xref: /xnu-11215.61.5/osfmk/vm/vm_pageout.c (revision 4f1223e81cd707a65cc109d0b8ad6653699da3c4)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	vm/vm_pageout.c
60  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61  *	Date:	1985
62  *
63  *	The proverbial page-out daemon.
64  */
65 
66 #include "mach/kern_return.h"
67 #include <stdint.h>
68 #include <ptrauth.h>
69 
70 #include <debug.h>
71 
72 #include <mach/mach_types.h>
73 #include <mach/memory_object.h>
74 #include <mach/mach_host_server.h>
75 #include <mach/upl.h>
76 #include <mach/vm_map.h>
77 #include <mach/vm_param.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/sdt.h>
80 
81 #include <kern/kern_types.h>
82 #include <kern/counter.h>
83 #include <kern/host_statistics.h>
84 #include <kern/machine.h>
85 #include <kern/misc_protos.h>
86 #include <kern/sched.h>
87 #include <kern/thread.h>
88 #include <kern/kalloc.h>
89 #include <kern/zalloc_internal.h>
90 #include <kern/policy_internal.h>
91 #include <kern/thread_group.h>
92 
93 #include <os/log.h>
94 
95 #include <sys/kdebug_triage.h>
96 
97 #include <machine/vm_tuning.h>
98 #include <machine/commpage.h>
99 
100 #include <vm/pmap.h>
101 #include <vm/vm_compressor_pager_internal.h>
102 #include <vm/vm_fault_internal.h>
103 #include <vm/vm_map_internal.h>
104 #include <vm/vm_object_internal.h>
105 #include <vm/vm_page_internal.h>
106 #include <vm/vm_pageout_internal.h>
107 #include <vm/vm_protos_internal.h> /* must be last */
108 #include <vm/memory_object.h>
109 #include <vm/vm_purgeable_internal.h>
110 #include <vm/vm_shared_region.h>
111 #include <vm/vm_compressor_internal.h>
112 #include <vm/vm_kern_xnu.h>
113 #include <vm/vm_iokit.h>
114 #include <vm/vm_ubc.h>
115 
116 #include <san/kasan.h>
117 #include <sys/kern_memorystatus_xnu.h>
118 
119 #if CONFIG_PHANTOM_CACHE
120 #include <vm/vm_phantom_cache_internal.h>
121 #endif
122 
123 #if UPL_DEBUG
124 #include <libkern/OSDebug.h>
125 #endif
126 
127 extern int cs_debug;
128 
129 #if CONFIG_MBUF_MCACHE
130 extern void mbuf_drain(boolean_t);
131 #endif /* CONFIG_MBUF_MCACHE */
132 
133 #if CONFIG_FREEZE
134 extern unsigned int memorystatus_frozen_count;
135 extern unsigned int memorystatus_suspended_count;
136 #endif /* CONFIG_FREEZE */
137 extern vm_pressure_level_t memorystatus_vm_pressure_level;
138 
139 extern lck_mtx_t memorystatus_jetsam_broadcast_lock;
140 extern uint32_t memorystatus_jetsam_fg_band_waiters;
141 extern uint32_t memorystatus_jetsam_bg_band_waiters;
142 
143 void vm_pressure_response(void);
144 extern void consider_vm_pressure_events(void);
145 
146 #define MEMORYSTATUS_SUSPENDED_THRESHOLD  4
147 
148 SECURITY_READ_ONLY_LATE(thread_t) vm_pageout_scan_thread;
149 SECURITY_READ_ONLY_LATE(thread_t) vm_pageout_gc_thread;
150 #if CONFIG_VPS_DYNAMIC_PRIO
151 TUNABLE(bool, vps_dynamic_priority_enabled, "vps_dynamic_priority_enabled", false);
152 #else
153 const bool vps_dynamic_priority_enabled = false;
154 #endif
155 boolean_t vps_yield_for_pgqlockwaiters = TRUE;
156 
157 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE  /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
158 #if !XNU_TARGET_OS_OSX
159 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
160 #else /* !XNU_TARGET_OS_OSX */
161 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
162 #endif /* !XNU_TARGET_OS_OSX */
163 #endif
164 
165 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
166 #define VM_PAGEOUT_DEADLOCK_RELIEF 100  /* number of pages to move to break deadlock */
167 #endif
168 
169 #ifndef VM_PAGE_LAUNDRY_MAX
170 #define VM_PAGE_LAUNDRY_MAX     128UL   /* maximum pageouts on a given pageout queue */
171 #endif  /* VM_PAGEOUT_LAUNDRY_MAX */
172 
173 #ifndef VM_PAGEOUT_BURST_WAIT
174 #define VM_PAGEOUT_BURST_WAIT   1       /* milliseconds */
175 #endif  /* VM_PAGEOUT_BURST_WAIT */
176 
177 #ifndef VM_PAGEOUT_EMPTY_WAIT
178 #define VM_PAGEOUT_EMPTY_WAIT   50      /* milliseconds */
179 #endif  /* VM_PAGEOUT_EMPTY_WAIT */
180 
181 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
182 #define VM_PAGEOUT_DEADLOCK_WAIT 100    /* milliseconds */
183 #endif  /* VM_PAGEOUT_DEADLOCK_WAIT */
184 
185 #ifndef VM_PAGEOUT_IDLE_WAIT
186 #define VM_PAGEOUT_IDLE_WAIT    10      /* milliseconds */
187 #endif  /* VM_PAGEOUT_IDLE_WAIT */
188 
189 #ifndef VM_PAGEOUT_SWAP_WAIT
190 #define VM_PAGEOUT_SWAP_WAIT    10      /* milliseconds */
191 #endif  /* VM_PAGEOUT_SWAP_WAIT */
192 
193 /*
194  * vm_page_max_speculative_age_q should be less than or equal to
195  * VM_PAGE_RESERVED_SPECULATIVE_AGE_Q which is number of allocated
196  * vm_page_queue_speculative entries.
197  */
198 
199 TUNABLE_DEV_WRITEABLE(unsigned int, vm_page_max_speculative_age_q, "vm_page_max_speculative_age_q", VM_PAGE_DEFAULT_MAX_SPECULATIVE_AGE_Q);
200 #ifndef VM_PAGE_SPECULATIVE_TARGET
201 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_pageout_state.vm_page_speculative_percentage))
202 #endif /* VM_PAGE_SPECULATIVE_TARGET */
203 
204 
205 /*
206  *	To obtain a reasonable LRU approximation, the inactive queue
207  *	needs to be large enough to give pages on it a chance to be
208  *	referenced a second time.  This macro defines the fraction
209  *	of active+inactive pages that should be inactive.
210  *	The pageout daemon uses it to update vm_page_inactive_target.
211  *
212  *	If vm_page_free_count falls below vm_page_free_target and
213  *	vm_page_inactive_count is below vm_page_inactive_target,
214  *	then the pageout daemon starts running.
215  */
216 
217 #ifndef VM_PAGE_INACTIVE_TARGET
218 #define VM_PAGE_INACTIVE_TARGET(avail)  ((avail) * 1 / 2)
219 #endif  /* VM_PAGE_INACTIVE_TARGET */
220 
221 /*
222  *	Once the pageout daemon starts running, it keeps going
223  *	until vm_page_free_count meets or exceeds vm_page_free_target.
224  */
225 
226 #ifndef VM_PAGE_FREE_TARGET
227 #if !XNU_TARGET_OS_OSX
228 #define VM_PAGE_FREE_TARGET(free)       (15 + (free) / 100)
229 #else /* !XNU_TARGET_OS_OSX */
230 #define VM_PAGE_FREE_TARGET(free)       (15 + (free) / 80)
231 #endif /* !XNU_TARGET_OS_OSX */
232 #endif  /* VM_PAGE_FREE_TARGET */
233 
234 
235 /*
236  *	The pageout daemon always starts running once vm_page_free_count
237  *	falls below vm_page_free_min.
238  */
239 
240 #ifndef VM_PAGE_FREE_MIN
241 #if !XNU_TARGET_OS_OSX
242 #define VM_PAGE_FREE_MIN(free)          (10 + (free) / 200)
243 #else /* !XNU_TARGET_OS_OSX */
244 #define VM_PAGE_FREE_MIN(free)          (10 + (free) / 100)
245 #endif /* !XNU_TARGET_OS_OSX */
246 #endif  /* VM_PAGE_FREE_MIN */
247 
248 #if !XNU_TARGET_OS_OSX
249 #define VM_PAGE_FREE_RESERVED_LIMIT     100
250 #define VM_PAGE_FREE_MIN_LIMIT          1500
251 #define VM_PAGE_FREE_TARGET_LIMIT       2000
252 #else /* !XNU_TARGET_OS_OSX */
253 #define VM_PAGE_FREE_RESERVED_LIMIT     1700
254 #define VM_PAGE_FREE_MIN_LIMIT          3500
255 #define VM_PAGE_FREE_TARGET_LIMIT       4000
256 #endif /* !XNU_TARGET_OS_OSX */
257 
258 /*
259  *	When vm_page_free_count falls below vm_page_free_reserved,
260  *	only vm-privileged threads can allocate pages.  vm-privilege
261  *	allows the pageout daemon and default pager (and any other
262  *	associated threads needed for default pageout) to continue
263  *	operation by dipping into the reserved pool of pages.
264  */
265 
266 #ifndef VM_PAGE_FREE_RESERVED
267 #define VM_PAGE_FREE_RESERVED(n)        \
268 	((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
269 #endif  /* VM_PAGE_FREE_RESERVED */
270 
271 /*
272  *	When we dequeue pages from the inactive list, they are
273  *	reactivated (ie, put back on the active queue) if referenced.
274  *	However, it is possible to starve the free list if other
275  *	processors are referencing pages faster than we can turn off
276  *	the referenced bit.  So we limit the number of reactivations
277  *	we will make per call of vm_pageout_scan().
278  */
279 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
280 
281 #ifndef VM_PAGE_REACTIVATE_LIMIT
282 #if !XNU_TARGET_OS_OSX
283 #define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
284 #else /* !XNU_TARGET_OS_OSX */
285 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
286 #endif /* !XNU_TARGET_OS_OSX */
287 #endif  /* VM_PAGE_REACTIVATE_LIMIT */
288 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM       1000
289 
290 int vm_pageout_protect_realtime = true;
291 
292 extern boolean_t hibernate_cleaning_in_progress;
293 
294 struct pgo_iothread_state pgo_iothread_internal_state[MAX_COMPRESSOR_THREAD_COUNT];
295 struct pgo_iothread_state pgo_iothread_external_state;
296 
297 #if VM_PRESSURE_EVENTS
298 void vm_pressure_thread(void);
299 
300 boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
301 boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
302 
303 boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
304 boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
305 #endif
306 
307 static void vm_pageout_iothread_external(struct pgo_iothread_state *, wait_result_t);
308 static void vm_pageout_iothread_internal(struct pgo_iothread_state *, wait_result_t);
309 static void vm_pageout_adjust_eq_iothrottle(struct pgo_iothread_state *, boolean_t);
310 
311 extern void vm_pageout_continue(void);
312 extern void vm_pageout_scan(void);
313 
314 boolean_t vm_pageout_running = FALSE;
315 
316 uint32_t vm_page_upl_tainted = 0;
317 uint32_t vm_page_iopl_tainted = 0;
318 
319 #if XNU_TARGET_OS_OSX
320 static boolean_t vm_pageout_waiter  = FALSE;
321 #endif /* XNU_TARGET_OS_OSX */
322 
323 
324 #if DEVELOPMENT || DEBUG
325 struct vm_pageout_debug vm_pageout_debug;
326 #endif
327 struct vm_pageout_vminfo vm_pageout_vminfo;
328 struct vm_pageout_state  vm_pageout_state;
329 struct vm_config         vm_config;
330 
331 struct  vm_pageout_queue vm_pageout_queue_internal VM_PAGE_PACKED_ALIGNED;
332 struct  vm_pageout_queue vm_pageout_queue_external VM_PAGE_PACKED_ALIGNED;
333 #if DEVELOPMENT || DEBUG
334 struct vm_pageout_queue vm_pageout_queue_benchmark VM_PAGE_PACKED_ALIGNED;
335 #endif /* DEVELOPMENT || DEBUG */
336 
337 int         vm_upl_wait_for_pages = 0;
338 vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
339 
340 boolean_t(*volatile consider_buffer_cache_collect)(int) = NULL;
341 
342 int     vm_debug_events = 0;
343 
344 LCK_GRP_DECLARE(vm_pageout_lck_grp, "vm_pageout");
345 
346 #if CONFIG_MEMORYSTATUS
347 extern void memorystatus_kill_on_vps_starvation(void);
348 
349 uint32_t vm_pageout_memorystatus_fb_factor_nr = 5;
350 uint32_t vm_pageout_memorystatus_fb_factor_dr = 2;
351 
352 #endif
353 
354 #if __AMP__
355 
356 
357 /*
358  * Bind compressor threads to e-cores unless there are multiple non-e clusters
359  */
360 #if (MAX_CPU_CLUSTERS > 2)
361 #define VM_COMPRESSOR_EBOUND_DEFAULT false
362 #elif defined(XNU_TARGET_OS_XR)
363 #define VM_COMPRESSOR_EBOUND_DEFAULT false
364 #else
365 #define VM_COMPRESSOR_EBOUND_DEFAULT true
366 #endif
367 
368 TUNABLE(bool, vm_compressor_ebound, "vmcomp_ecluster", VM_COMPRESSOR_EBOUND_DEFAULT);
369 int vm_pgo_pbound = 0;
370 extern void thread_bind_cluster_type(thread_t, char, bool);
371 
372 #endif /* __AMP__ */
373 
374 
375 /*
376  *	Routine:	vm_pageout_object_terminate
377  *	Purpose:
378  *		Destroy the pageout_object, and perform all of the
379  *		required cleanup actions.
380  *
381  *	In/Out conditions:
382  *		The object must be locked, and will be returned locked.
383  */
384 void
vm_pageout_object_terminate(vm_object_t object)385 vm_pageout_object_terminate(
386 	vm_object_t     object)
387 {
388 	vm_object_t     shadow_object;
389 
390 	/*
391 	 * Deal with the deallocation (last reference) of a pageout object
392 	 * (used for cleaning-in-place) by dropping the paging references/
393 	 * freeing pages in the original object.
394 	 */
395 
396 	assert(object->pageout);
397 	shadow_object = object->shadow;
398 	vm_object_lock(shadow_object);
399 
400 	while (!vm_page_queue_empty(&object->memq)) {
401 		vm_page_t               p, m;
402 		vm_object_offset_t      offset;
403 
404 		p = (vm_page_t) vm_page_queue_first(&object->memq);
405 
406 		assert(p->vmp_private);
407 		assert(p->vmp_free_when_done);
408 		p->vmp_free_when_done = FALSE;
409 		assert(!p->vmp_cleaning);
410 		assert(!p->vmp_laundry);
411 
412 		offset = p->vmp_offset;
413 		VM_PAGE_FREE(p);
414 		p = VM_PAGE_NULL;
415 
416 		m = vm_page_lookup(shadow_object,
417 		    offset + object->vo_shadow_offset);
418 
419 		if (m == VM_PAGE_NULL) {
420 			continue;
421 		}
422 
423 		assert((m->vmp_dirty) || (m->vmp_precious) ||
424 		    (m->vmp_busy && m->vmp_cleaning));
425 
426 		/*
427 		 * Handle the trusted pager throttle.
428 		 * Also decrement the burst throttle (if external).
429 		 */
430 		vm_page_lock_queues();
431 		if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
432 			vm_pageout_throttle_up(m);
433 		}
434 
435 		/*
436 		 * Handle the "target" page(s). These pages are to be freed if
437 		 * successfully cleaned. Target pages are always busy, and are
438 		 * wired exactly once. The initial target pages are not mapped,
439 		 * (so cannot be referenced or modified) but converted target
440 		 * pages may have been modified between the selection as an
441 		 * adjacent page and conversion to a target.
442 		 */
443 		if (m->vmp_free_when_done) {
444 			assert(m->vmp_busy);
445 			assert(m->vmp_q_state == VM_PAGE_IS_WIRED);
446 			assert(m->vmp_wire_count == 1);
447 			m->vmp_cleaning = FALSE;
448 			m->vmp_free_when_done = FALSE;
449 			/*
450 			 * Revoke all access to the page. Since the object is
451 			 * locked, and the page is busy, this prevents the page
452 			 * from being dirtied after the pmap_disconnect() call
453 			 * returns.
454 			 *
455 			 * Since the page is left "dirty" but "not modifed", we
456 			 * can detect whether the page was redirtied during
457 			 * pageout by checking the modify state.
458 			 */
459 			if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
460 				SET_PAGE_DIRTY(m, FALSE);
461 			} else {
462 				m->vmp_dirty = FALSE;
463 			}
464 
465 			if (m->vmp_dirty) {
466 				vm_page_unwire(m, TRUE);        /* reactivates */
467 				counter_inc(&vm_statistics_reactivations);
468 				vm_page_wakeup_done(object, m);
469 			} else {
470 				vm_page_free(m);  /* clears busy, etc. */
471 			}
472 			vm_page_unlock_queues();
473 			continue;
474 		}
475 		/*
476 		 * Handle the "adjacent" pages. These pages were cleaned in
477 		 * place, and should be left alone.
478 		 * If prep_pin_count is nonzero, then someone is using the
479 		 * page, so make it active.
480 		 */
481 		if ((m->vmp_q_state == VM_PAGE_NOT_ON_Q) && !m->vmp_private) {
482 			if (m->vmp_reference) {
483 				vm_page_activate(m);
484 			} else {
485 				vm_page_deactivate(m);
486 			}
487 		}
488 		if (m->vmp_overwriting) {
489 			/*
490 			 * the (COPY_OUT_FROM == FALSE) request_page_list case
491 			 */
492 			if (m->vmp_busy) {
493 				/*
494 				 * We do not re-set m->vmp_dirty !
495 				 * The page was busy so no extraneous activity
496 				 * could have occurred. COPY_INTO is a read into the
497 				 * new pages. CLEAN_IN_PLACE does actually write
498 				 * out the pages but handling outside of this code
499 				 * will take care of resetting dirty. We clear the
500 				 * modify however for the Programmed I/O case.
501 				 */
502 				pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
503 
504 				m->vmp_busy = FALSE;
505 				m->vmp_absent = FALSE;
506 			} else {
507 				/*
508 				 * alternate (COPY_OUT_FROM == FALSE) request_page_list case
509 				 * Occurs when the original page was wired
510 				 * at the time of the list request
511 				 */
512 				assert(VM_PAGE_WIRED(m));
513 				vm_page_unwire(m, TRUE);        /* reactivates */
514 			}
515 			m->vmp_overwriting = FALSE;
516 		} else {
517 			m->vmp_dirty = FALSE;
518 		}
519 		m->vmp_cleaning = FALSE;
520 
521 		/*
522 		 * Wakeup any thread waiting for the page to be un-cleaning.
523 		 */
524 		vm_page_wakeup(object, m);
525 		vm_page_unlock_queues();
526 	}
527 	/*
528 	 * Account for the paging reference taken in vm_paging_object_allocate.
529 	 */
530 	vm_object_activity_end(shadow_object);
531 	vm_object_unlock(shadow_object);
532 
533 	assert(os_ref_get_count_raw(&object->ref_count) == 0);
534 	assert(object->paging_in_progress == 0);
535 	assert(object->activity_in_progress == 0);
536 	assert(object->resident_page_count == 0);
537 	return;
538 }
539 
540 /*
541  * Routine:	vm_pageclean_setup
542  *
543  * Purpose:	setup a page to be cleaned (made non-dirty), but not
544  *		necessarily flushed from the VM page cache.
545  *		This is accomplished by cleaning in place.
546  *
547  *		The page must not be busy, and new_object
548  *		must be locked.
549  *
550  */
551 static void
vm_pageclean_setup(vm_page_t m,vm_page_t new_m,vm_object_t new_object,vm_object_offset_t new_offset)552 vm_pageclean_setup(
553 	vm_page_t               m,
554 	vm_page_t               new_m,
555 	vm_object_t             new_object,
556 	vm_object_offset_t      new_offset)
557 {
558 	assert(!m->vmp_busy);
559 #if 0
560 	assert(!m->vmp_cleaning);
561 #endif
562 
563 	pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
564 
565 	/*
566 	 * Mark original page as cleaning in place.
567 	 */
568 	m->vmp_cleaning = TRUE;
569 	SET_PAGE_DIRTY(m, FALSE);
570 	m->vmp_precious = FALSE;
571 
572 	/*
573 	 * Convert the fictitious page to a private shadow of
574 	 * the real page.
575 	 */
576 	assert(new_m->vmp_fictitious);
577 	assert(VM_PAGE_GET_PHYS_PAGE(new_m) == vm_page_fictitious_addr);
578 	new_m->vmp_fictitious = FALSE;
579 	new_m->vmp_private = TRUE;
580 	new_m->vmp_free_when_done = TRUE;
581 	VM_PAGE_SET_PHYS_PAGE(new_m, VM_PAGE_GET_PHYS_PAGE(m));
582 
583 	vm_page_lockspin_queues();
584 	vm_page_wire(new_m, VM_KERN_MEMORY_NONE, TRUE);
585 	vm_page_unlock_queues();
586 
587 	vm_page_insert_wired(new_m, new_object, new_offset, VM_KERN_MEMORY_NONE);
588 	assert(!new_m->vmp_wanted);
589 	new_m->vmp_busy = FALSE;
590 }
591 
592 /*
593  *	Routine:	vm_pageout_initialize_page
594  *	Purpose:
595  *		Causes the specified page to be initialized in
596  *		the appropriate memory object. This routine is used to push
597  *		pages into a copy-object when they are modified in the
598  *		permanent object.
599  *
600  *		The page is moved to a temporary object and paged out.
601  *
602  *	In/out conditions:
603  *		The page in question must not be on any pageout queues.
604  *		The object to which it belongs must be locked.
605  *		The page must be busy, but not hold a paging reference.
606  *
607  *	Implementation:
608  *		Move this page to a completely new object.
609  */
610 void
vm_pageout_initialize_page(vm_page_t m)611 vm_pageout_initialize_page(
612 	vm_page_t       m)
613 {
614 	vm_object_t             object;
615 	vm_object_offset_t      paging_offset;
616 	memory_object_t         pager;
617 
618 	assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
619 
620 	object = VM_PAGE_OBJECT(m);
621 
622 	assert(m->vmp_busy);
623 	assert(object->internal);
624 
625 	/*
626 	 *	Verify that we really want to clean this page
627 	 */
628 	assert(!m->vmp_absent);
629 	assert(m->vmp_dirty);
630 
631 	/*
632 	 *	Create a paging reference to let us play with the object.
633 	 */
634 	paging_offset = m->vmp_offset + object->paging_offset;
635 
636 	if (m->vmp_absent || VMP_ERROR_GET(m) || m->vmp_restart || (!m->vmp_dirty && !m->vmp_precious)) {
637 		panic("reservation without pageout?"); /* alan */
638 
639 		VM_PAGE_FREE(m);
640 		vm_object_unlock(object);
641 
642 		return;
643 	}
644 
645 	/*
646 	 * If there's no pager, then we can't clean the page.  This should
647 	 * never happen since this should be a copy object and therefore not
648 	 * an external object, so the pager should always be there.
649 	 */
650 
651 	pager = object->pager;
652 
653 	if (pager == MEMORY_OBJECT_NULL) {
654 		panic("missing pager for copy object");
655 
656 		VM_PAGE_FREE(m);
657 		return;
658 	}
659 
660 	/*
661 	 * set the page for future call to vm_fault_list_request
662 	 */
663 	pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
664 	SET_PAGE_DIRTY(m, FALSE);
665 
666 	/*
667 	 * keep the object from collapsing or terminating
668 	 */
669 	vm_object_paging_begin(object);
670 	vm_object_unlock(object);
671 
672 	/*
673 	 *	Write the data to its pager.
674 	 *	Note that the data is passed by naming the new object,
675 	 *	not a virtual address; the pager interface has been
676 	 *	manipulated to use the "internal memory" data type.
677 	 *	[The object reference from its allocation is donated
678 	 *	to the eventual recipient.]
679 	 */
680 	memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
681 
682 	vm_object_lock(object);
683 	vm_object_paging_end(object);
684 }
685 
686 
687 /*
688  * vm_pageout_cluster:
689  *
690  * Given a page, queue it to the appropriate I/O thread,
691  * which will page it out and attempt to clean adjacent pages
692  * in the same operation.
693  *
694  * The object and queues must be locked. We will take a
695  * paging reference to prevent deallocation or collapse when we
696  * release the object lock back at the call site.  The I/O thread
697  * is responsible for consuming this reference
698  *
699  * The page must not be on any pageout queue.
700  */
701 #if DEVELOPMENT || DEBUG
702 vmct_stats_t vmct_stats;
703 
704 int32_t vmct_active = 0;
705 uint64_t vm_compressor_epoch_start = 0;
706 uint64_t vm_compressor_epoch_stop = 0;
707 
708 typedef enum vmct_state_t {
709 	VMCT_IDLE,
710 	VMCT_AWAKENED,
711 	VMCT_ACTIVE,
712 } vmct_state_t;
713 vmct_state_t vmct_state[MAX_COMPRESSOR_THREAD_COUNT];
714 #endif
715 
716 
717 
718 static void
vm_pageout_cluster_to_queue(vm_page_t m,struct vm_pageout_queue * q)719 vm_pageout_cluster_to_queue(vm_page_t m, struct vm_pageout_queue *q)
720 {
721 	vm_object_t object = VM_PAGE_OBJECT(m);
722 
723 	VM_PAGE_CHECK(m);
724 	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
725 	vm_object_lock_assert_exclusive(object);
726 
727 	/*
728 	 * Make sure it's OK to page this out.
729 	 */
730 	assert((m->vmp_dirty || m->vmp_precious) && (!VM_PAGE_WIRED(m)));
731 	assert(!m->vmp_cleaning && !m->vmp_laundry);
732 	assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
733 
734 	/*
735 	 * protect the object from collapse or termination
736 	 */
737 	vm_object_activity_begin(object);
738 
739 
740 	/*
741 	 * pgo_laundry count is tied to the laundry bit
742 	 */
743 	m->vmp_laundry = TRUE;
744 	q->pgo_laundry++;
745 
746 	m->vmp_q_state = VM_PAGE_ON_PAGEOUT_Q;
747 	vm_page_queue_enter(&q->pgo_pending, m, vmp_pageq);
748 
749 	if (object->internal == TRUE) {
750 		assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
751 		m->vmp_busy = TRUE;
752 #if DEVELOPMENT || DEBUG
753 		/*
754 		 * The benchmark queue will be woken up independently by the benchmark
755 		 * itself.
756 		 */
757 		if (q != &vm_pageout_queue_benchmark) {
758 #else /* DEVELOPMENT || DEBUG */
759 		if (true) {
760 #endif /* DEVELOPMENT || DEBUG */
761 			/*
762 			 * Wake up the first compressor thread. It will wake subsequent
763 			 * threads if necessary.
764 			 */
765 			sched_cond_signal(&pgo_iothread_internal_state[0].pgo_wakeup,
766 			    pgo_iothread_internal_state[0].pgo_iothread);
767 		}
768 	} else {
769 		sched_cond_signal(&pgo_iothread_external_state.pgo_wakeup, pgo_iothread_external_state.pgo_iothread);
770 	}
771 	VM_PAGE_CHECK(m);
772 }
773 
774 void
775 vm_pageout_cluster(vm_page_t m)
776 {
777 	struct          vm_pageout_queue *q;
778 	vm_object_t     object = VM_PAGE_OBJECT(m);
779 	if (object->internal) {
780 		q = &vm_pageout_queue_internal;
781 	} else {
782 		q = &vm_pageout_queue_external;
783 	}
784 	vm_pageout_cluster_to_queue(m, q);
785 }
786 
787 
788 /*
789  * A page is back from laundry or we are stealing it back from
790  * the laundering state.  See if there are some pages waiting to
791  * go to laundry and if we can let some of them go now.
792  *
793  * Object and page queues must be locked.
794  */
795 void
796 vm_pageout_throttle_up(
797 	vm_page_t       m)
798 {
799 	struct vm_pageout_queue *q;
800 	vm_object_t      m_object;
801 
802 	m_object = VM_PAGE_OBJECT(m);
803 
804 	assert(m_object != VM_OBJECT_NULL);
805 	assert(!is_kernel_object(m_object));
806 
807 	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
808 	vm_object_lock_assert_exclusive(m_object);
809 
810 	if (m_object->internal == TRUE) {
811 		q = &vm_pageout_queue_internal;
812 	} else {
813 		q = &vm_pageout_queue_external;
814 	}
815 
816 	if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
817 		vm_page_queue_remove(&q->pgo_pending, m, vmp_pageq);
818 		m->vmp_q_state = VM_PAGE_NOT_ON_Q;
819 
820 		VM_PAGE_ZERO_PAGEQ_ENTRY(m);
821 
822 		vm_object_activity_end(m_object);
823 
824 		VM_PAGEOUT_DEBUG(vm_page_steal_pageout_page, 1);
825 	}
826 	if (m->vmp_laundry == TRUE) {
827 		m->vmp_laundry = FALSE;
828 		q->pgo_laundry--;
829 
830 		if (q->pgo_throttled == TRUE) {
831 			q->pgo_throttled = FALSE;
832 			thread_wakeup((event_t) &q->pgo_laundry);
833 		}
834 		if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
835 			q->pgo_draining = FALSE;
836 			thread_wakeup((event_t) (&q->pgo_laundry + 1));
837 		}
838 		VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, 1);
839 	}
840 }
841 
842 
843 static void
844 vm_pageout_throttle_up_batch(
845 	struct vm_pageout_queue *q,
846 	int             batch_cnt)
847 {
848 	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
849 
850 	VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, batch_cnt);
851 
852 	q->pgo_laundry -= batch_cnt;
853 
854 	if (q->pgo_throttled == TRUE) {
855 		q->pgo_throttled = FALSE;
856 		thread_wakeup((event_t) &q->pgo_laundry);
857 	}
858 	if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
859 		q->pgo_draining = FALSE;
860 		thread_wakeup((event_t) (&q->pgo_laundry + 1));
861 	}
862 }
863 
864 
865 
866 /*
867  * VM memory pressure monitoring.
868  *
869  * vm_pageout_scan() keeps track of the number of pages it considers and
870  * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
871  *
872  * compute_memory_pressure() is called every second from compute_averages()
873  * and moves "vm_pageout_stat_now" forward, to start accumulating the number
874  * of recalimed pages in a new vm_pageout_stat[] bucket.
875  *
876  * mach_vm_pressure_monitor() collects past statistics about memory pressure.
877  * The caller provides the number of seconds ("nsecs") worth of statistics
878  * it wants, up to 30 seconds.
879  * It computes the number of pages reclaimed in the past "nsecs" seconds and
880  * also returns the number of pages the system still needs to reclaim at this
881  * moment in time.
882  */
883 #if DEVELOPMENT || DEBUG
884 #define VM_PAGEOUT_STAT_SIZE    (30 * 8) + 1
885 #else
886 #define VM_PAGEOUT_STAT_SIZE    (1 * 8) + 1
887 #endif
888 struct vm_pageout_stat {
889 	unsigned long vm_page_active_count;
890 	unsigned long vm_page_speculative_count;
891 	unsigned long vm_page_inactive_count;
892 	unsigned long vm_page_anonymous_count;
893 
894 	unsigned long vm_page_free_count;
895 	unsigned long vm_page_wire_count;
896 	unsigned long vm_page_compressor_count;
897 
898 	unsigned long vm_page_pages_compressed;
899 	unsigned long vm_page_pageable_internal_count;
900 	unsigned long vm_page_pageable_external_count;
901 	unsigned long vm_page_xpmapped_external_count;
902 
903 	unsigned int pages_grabbed;
904 	unsigned int pages_freed;
905 
906 	unsigned int pages_compressed;
907 	unsigned int pages_grabbed_by_compressor;
908 	unsigned int failed_compressions;
909 
910 	unsigned int pages_evicted;
911 	unsigned int pages_purged;
912 
913 	unsigned int considered;
914 	unsigned int considered_bq_internal;
915 	unsigned int considered_bq_external;
916 
917 	unsigned int skipped_external;
918 	unsigned int skipped_internal;
919 	unsigned int filecache_min_reactivations;
920 
921 	unsigned int freed_speculative;
922 	unsigned int freed_cleaned;
923 	unsigned int freed_internal;
924 	unsigned int freed_external;
925 
926 	unsigned int cleaned_dirty_external;
927 	unsigned int cleaned_dirty_internal;
928 
929 	unsigned int inactive_referenced;
930 	unsigned int inactive_nolock;
931 	unsigned int reactivation_limit_exceeded;
932 	unsigned int forced_inactive_reclaim;
933 
934 	unsigned int throttled_internal_q;
935 	unsigned int throttled_external_q;
936 
937 	unsigned int phantom_ghosts_found;
938 	unsigned int phantom_ghosts_added;
939 
940 	unsigned int vm_page_realtime_count;
941 	unsigned int forcereclaimed_sharedcache;
942 	unsigned int forcereclaimed_realtime;
943 	unsigned int protected_sharedcache;
944 	unsigned int protected_realtime;
945 } vm_pageout_stats[VM_PAGEOUT_STAT_SIZE];
946 
947 unsigned int vm_pageout_stat_now = 0;
948 
949 #define VM_PAGEOUT_STAT_BEFORE(i) \
950 	(((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
951 #define VM_PAGEOUT_STAT_AFTER(i) \
952 	(((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
953 
954 #if VM_PAGE_BUCKETS_CHECK
955 int vm_page_buckets_check_interval = 80; /* in eighths of a second */
956 #endif /* VM_PAGE_BUCKETS_CHECK */
957 
958 
959 void
960 record_memory_pressure(void);
961 void
962 record_memory_pressure(void)
963 {
964 	unsigned int vm_pageout_next;
965 
966 #if VM_PAGE_BUCKETS_CHECK
967 	/* check the consistency of VM page buckets at regular interval */
968 	static int counter = 0;
969 	if ((++counter % vm_page_buckets_check_interval) == 0) {
970 		vm_page_buckets_check();
971 	}
972 #endif /* VM_PAGE_BUCKETS_CHECK */
973 
974 	vm_pageout_state.vm_memory_pressure =
975 	    vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_speculative +
976 	    vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_cleaned +
977 	    vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_internal +
978 	    vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_external;
979 
980 	commpage_set_memory_pressure((unsigned int)vm_pageout_state.vm_memory_pressure );
981 
982 	/* move "now" forward */
983 	vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
984 
985 	bzero(&vm_pageout_stats[vm_pageout_next], sizeof(struct vm_pageout_stat));
986 
987 	vm_pageout_stat_now = vm_pageout_next;
988 }
989 
990 
991 /*
992  * IMPORTANT
993  * mach_vm_ctl_page_free_wanted() is called indirectly, via
994  * mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
995  * it must be safe in the restricted stackshot context. Locks and/or
996  * blocking are not allowable.
997  */
998 unsigned int
999 mach_vm_ctl_page_free_wanted(void)
1000 {
1001 	unsigned int page_free_target, page_free_count, page_free_wanted;
1002 
1003 	page_free_target = vm_page_free_target;
1004 	page_free_count = vm_page_free_count;
1005 	if (page_free_target > page_free_count) {
1006 		page_free_wanted = page_free_target - page_free_count;
1007 	} else {
1008 		page_free_wanted = 0;
1009 	}
1010 
1011 	return page_free_wanted;
1012 }
1013 
1014 
1015 /*
1016  * IMPORTANT:
1017  * mach_vm_pressure_monitor() is called when taking a stackshot, with
1018  * wait_for_pressure FALSE, so that code path must remain safe in the
1019  * restricted stackshot context. No blocking or locks are allowable.
1020  * on that code path.
1021  */
1022 
1023 kern_return_t
1024 mach_vm_pressure_monitor(
1025 	boolean_t       wait_for_pressure,
1026 	unsigned int    nsecs_monitored,
1027 	unsigned int    *pages_reclaimed_p,
1028 	unsigned int    *pages_wanted_p)
1029 {
1030 	wait_result_t   wr;
1031 	unsigned int    vm_pageout_then, vm_pageout_now;
1032 	unsigned int    pages_reclaimed;
1033 	unsigned int    units_of_monitor;
1034 
1035 	units_of_monitor = 8 * nsecs_monitored;
1036 	/*
1037 	 * We don't take the vm_page_queue_lock here because we don't want
1038 	 * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
1039 	 * thread when it's trying to reclaim memory.  We don't need fully
1040 	 * accurate monitoring anyway...
1041 	 */
1042 
1043 	if (wait_for_pressure) {
1044 		/* wait until there's memory pressure */
1045 		while (vm_page_free_count >= vm_page_free_target) {
1046 			wr = assert_wait((event_t) &vm_page_free_wanted,
1047 			    THREAD_INTERRUPTIBLE);
1048 			if (wr == THREAD_WAITING) {
1049 				wr = thread_block(THREAD_CONTINUE_NULL);
1050 			}
1051 			if (wr == THREAD_INTERRUPTED) {
1052 				return KERN_ABORTED;
1053 			}
1054 			if (wr == THREAD_AWAKENED) {
1055 				/*
1056 				 * The memory pressure might have already
1057 				 * been relieved but let's not block again
1058 				 * and let's report that there was memory
1059 				 * pressure at some point.
1060 				 */
1061 				break;
1062 			}
1063 		}
1064 	}
1065 
1066 	/* provide the number of pages the system wants to reclaim */
1067 	if (pages_wanted_p != NULL) {
1068 		*pages_wanted_p = mach_vm_ctl_page_free_wanted();
1069 	}
1070 
1071 	if (pages_reclaimed_p == NULL) {
1072 		return KERN_SUCCESS;
1073 	}
1074 
1075 	/* provide number of pages reclaimed in the last "nsecs_monitored" */
1076 	vm_pageout_now = vm_pageout_stat_now;
1077 	pages_reclaimed = 0;
1078 	for (vm_pageout_then =
1079 	    VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1080 	    vm_pageout_then != vm_pageout_now &&
1081 	    units_of_monitor-- != 0;
1082 	    vm_pageout_then =
1083 	    VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1084 		pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_speculative;
1085 		pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_cleaned;
1086 		pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_internal;
1087 		pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_external;
1088 	}
1089 	*pages_reclaimed_p = pages_reclaimed;
1090 
1091 	return KERN_SUCCESS;
1092 }
1093 
1094 
1095 
1096 #if DEVELOPMENT || DEBUG
1097 
1098 static void
1099 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *, int);
1100 
1101 /*
1102  * condition variable used to make sure there is
1103  * only a single sweep going on at a time
1104  */
1105 bool vm_pageout_disconnect_all_pages_active = false;
1106 
1107 void
1108 vm_pageout_disconnect_all_pages()
1109 {
1110 	vm_page_lock_queues();
1111 
1112 	if (vm_pageout_disconnect_all_pages_active) {
1113 		vm_page_unlock_queues();
1114 		return;
1115 	}
1116 	vm_pageout_disconnect_all_pages_active = true;
1117 
1118 	vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_throttled,
1119 	    vm_page_throttled_count);
1120 	vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_anonymous,
1121 	    vm_page_anonymous_count);
1122 	vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_inactive,
1123 	    (vm_page_inactive_count - vm_page_anonymous_count));
1124 	vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_active,
1125 	    vm_page_active_count);
1126 #ifdef CONFIG_SECLUDED_MEMORY
1127 	vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_secluded,
1128 	    vm_page_secluded_count);
1129 #endif /* CONFIG_SECLUDED_MEMORY */
1130 	vm_page_unlock_queues();
1131 
1132 	vm_pageout_disconnect_all_pages_active = false;
1133 }
1134 
1135 /* NB: assumes the page_queues lock is held on entry, returns with page queue lock held */
1136 void
1137 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *q, int qcount)
1138 {
1139 	vm_page_t       m;
1140 	vm_object_t     t_object = NULL;
1141 	vm_object_t     l_object = NULL;
1142 	vm_object_t     m_object = NULL;
1143 	int             delayed_unlock = 0;
1144 	int             try_failed_count = 0;
1145 	int             disconnected_count = 0;
1146 	int             paused_count = 0;
1147 	int             object_locked_count = 0;
1148 
1149 	KDBG((MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS) |
1150 	    DBG_FUNC_START),
1151 	    q, qcount);
1152 
1153 	while (qcount && !vm_page_queue_empty(q)) {
1154 		LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1155 
1156 		m = (vm_page_t) vm_page_queue_first(q);
1157 		m_object = VM_PAGE_OBJECT(m);
1158 
1159 		if (m_object == VM_OBJECT_NULL) {
1160 			/*
1161 			 * Bumped into a free page. This should only happen on the
1162 			 * secluded queue
1163 			 */
1164 #if CONFIG_SECLUDED_MEMORY
1165 			assert(q == &vm_page_queue_secluded);
1166 #endif /* CONFIG_SECLUDED_MEMORY */
1167 			goto reenter_pg_on_q;
1168 		}
1169 
1170 		/*
1171 		 * check to see if we currently are working
1172 		 * with the same object... if so, we've
1173 		 * already got the lock
1174 		 */
1175 		if (m_object != l_object) {
1176 			/*
1177 			 * the object associated with candidate page is
1178 			 * different from the one we were just working
1179 			 * with... dump the lock if we still own it
1180 			 */
1181 			if (l_object != NULL) {
1182 				vm_object_unlock(l_object);
1183 				l_object = NULL;
1184 			}
1185 			if (m_object != t_object) {
1186 				try_failed_count = 0;
1187 			}
1188 
1189 			/*
1190 			 * Try to lock object; since we've alread got the
1191 			 * page queues lock, we can only 'try' for this one.
1192 			 * if the 'try' fails, we need to do a mutex_pause
1193 			 * to allow the owner of the object lock a chance to
1194 			 * run...
1195 			 */
1196 			if (!vm_object_lock_try_scan(m_object)) {
1197 				if (try_failed_count > 20) {
1198 					goto reenter_pg_on_q;
1199 				}
1200 				vm_page_unlock_queues();
1201 				mutex_pause(try_failed_count++);
1202 				vm_page_lock_queues();
1203 				delayed_unlock = 0;
1204 
1205 				paused_count++;
1206 
1207 				t_object = m_object;
1208 				continue;
1209 			}
1210 			object_locked_count++;
1211 
1212 			l_object = m_object;
1213 		}
1214 		if (!m_object->alive || m->vmp_cleaning || m->vmp_laundry ||
1215 		    m->vmp_busy || m->vmp_absent || VMP_ERROR_GET(m) ||
1216 		    m->vmp_free_when_done) {
1217 			/*
1218 			 * put it back on the head of its queue
1219 			 */
1220 			goto reenter_pg_on_q;
1221 		}
1222 		if (m->vmp_pmapped == TRUE) {
1223 			pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
1224 
1225 			disconnected_count++;
1226 		}
1227 reenter_pg_on_q:
1228 		vm_page_queue_remove(q, m, vmp_pageq);
1229 		vm_page_queue_enter(q, m, vmp_pageq);
1230 
1231 		qcount--;
1232 		try_failed_count = 0;
1233 
1234 		if (delayed_unlock++ > 128) {
1235 			if (l_object != NULL) {
1236 				vm_object_unlock(l_object);
1237 				l_object = NULL;
1238 			}
1239 			lck_mtx_yield(&vm_page_queue_lock);
1240 			delayed_unlock = 0;
1241 		}
1242 	}
1243 	if (l_object != NULL) {
1244 		vm_object_unlock(l_object);
1245 		l_object = NULL;
1246 	}
1247 
1248 	KDBG((MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS) |
1249 	    DBG_FUNC_END),
1250 	    q, disconnected_count, object_locked_count, paused_count);
1251 }
1252 
1253 extern const char *proc_best_name(struct proc* proc);
1254 
1255 int
1256 vm_toggle_task_selfdonate_pages(task_t task)
1257 {
1258 	int state = 0;
1259 	if (vm_page_donate_mode == VM_PAGE_DONATE_DISABLED) {
1260 		printf("VM Donation mode is OFF on the system\n");
1261 		return state;
1262 	}
1263 	if (task != kernel_task) {
1264 		task_lock(task);
1265 		if (!task->donates_own_pages) {
1266 			printf("SELF DONATE for %s ON\n", proc_best_name(get_bsdtask_info(task)));
1267 			task->donates_own_pages = true;
1268 			state = 1;
1269 		} else if (task->donates_own_pages) {
1270 			printf("SELF DONATE for %s OFF\n", proc_best_name(get_bsdtask_info(task)));
1271 			task->donates_own_pages = false;
1272 			state = 0;
1273 		}
1274 		task_unlock(task);
1275 	}
1276 	return state;
1277 }
1278 #endif /* DEVELOPMENT || DEBUG */
1279 
1280 void
1281 vm_task_set_selfdonate_pages(task_t task, bool donate)
1282 {
1283 	assert(vm_page_donate_mode != VM_PAGE_DONATE_DISABLED);
1284 	assert(task != kernel_task);
1285 
1286 	task_lock(task);
1287 	task->donates_own_pages = donate;
1288 	task_unlock(task);
1289 }
1290 
1291 
1292 
1293 static size_t
1294 vm_pageout_page_queue(vm_page_queue_head_t *, size_t, bool);
1295 
1296 /*
1297  * condition variable used to make sure there is
1298  * only a single sweep going on at a time
1299  */
1300 boolean_t       vm_pageout_anonymous_pages_active = FALSE;
1301 
1302 
1303 kern_return_t
1304 vm_pageout_anonymous_pages()
1305 {
1306 	if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
1307 		size_t throttled_pages_moved, anonymous_pages_moved, active_pages_moved;
1308 		vm_page_lock_queues();
1309 
1310 		if (vm_pageout_anonymous_pages_active == TRUE) {
1311 			vm_page_unlock_queues();
1312 			return KERN_RESOURCE_SHORTAGE;
1313 		}
1314 		vm_pageout_anonymous_pages_active = TRUE;
1315 		vm_page_unlock_queues();
1316 
1317 		throttled_pages_moved = vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count, false);
1318 		anonymous_pages_moved = vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count, false);
1319 		active_pages_moved = vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count, false);
1320 
1321 		os_log(OS_LOG_DEFAULT,
1322 		    "%s: throttled pages moved: %zu, anonymous pages moved: %zu, active pages moved: %zu",
1323 		    __func__, throttled_pages_moved, anonymous_pages_moved, active_pages_moved);
1324 
1325 		if (VM_CONFIG_SWAP_IS_PRESENT) {
1326 			vm_consider_swapping();
1327 		}
1328 
1329 		vm_page_lock_queues();
1330 		vm_pageout_anonymous_pages_active = FALSE;
1331 		vm_page_unlock_queues();
1332 		return KERN_SUCCESS;
1333 	} else {
1334 		return KERN_NOT_SUPPORTED;
1335 	}
1336 }
1337 
1338 
1339 size_t
1340 vm_pageout_page_queue(vm_page_queue_head_t *q, size_t qcount, bool perf_test)
1341 {
1342 	vm_page_t       m;
1343 	vm_object_t     t_object = NULL;
1344 	vm_object_t     l_object = NULL;
1345 	vm_object_t     m_object = NULL;
1346 	int             delayed_unlock = 0;
1347 	int             try_failed_count = 0;
1348 	int             refmod_state;
1349 	int             pmap_options;
1350 	struct          vm_pageout_queue *iq;
1351 	ppnum_t         phys_page;
1352 	size_t          pages_moved = 0;
1353 
1354 
1355 	iq = &vm_pageout_queue_internal;
1356 
1357 	vm_page_lock_queues();
1358 
1359 #if DEVELOPMENT || DEBUG
1360 	if (perf_test) {
1361 		iq = &vm_pageout_queue_benchmark;
1362 		// ensure the benchmark queue isn't throttled
1363 		iq->pgo_maxlaundry = (unsigned int) qcount;
1364 	}
1365 #endif /* DEVELOPMENT ||DEBUG */
1366 
1367 	while (qcount && !vm_page_queue_empty(q)) {
1368 		LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1369 
1370 		if (VM_PAGE_Q_THROTTLED(iq)) {
1371 			if (l_object != NULL) {
1372 				vm_object_unlock(l_object);
1373 				l_object = NULL;
1374 			}
1375 			iq->pgo_draining = TRUE;
1376 
1377 			assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
1378 			vm_page_unlock_queues();
1379 
1380 			thread_block(THREAD_CONTINUE_NULL);
1381 
1382 			vm_page_lock_queues();
1383 			delayed_unlock = 0;
1384 			continue;
1385 		}
1386 		m = (vm_page_t) vm_page_queue_first(q);
1387 		m_object = VM_PAGE_OBJECT(m);
1388 
1389 		/*
1390 		 * check to see if we currently are working
1391 		 * with the same object... if so, we've
1392 		 * already got the lock
1393 		 */
1394 		if (m_object != l_object) {
1395 			if (!m_object->internal) {
1396 				goto reenter_pg_on_q;
1397 			}
1398 
1399 			/*
1400 			 * the object associated with candidate page is
1401 			 * different from the one we were just working
1402 			 * with... dump the lock if we still own it
1403 			 */
1404 			if (l_object != NULL) {
1405 				vm_object_unlock(l_object);
1406 				l_object = NULL;
1407 			}
1408 			if (m_object != t_object) {
1409 				try_failed_count = 0;
1410 			}
1411 
1412 			/*
1413 			 * Try to lock object; since we've alread got the
1414 			 * page queues lock, we can only 'try' for this one.
1415 			 * if the 'try' fails, we need to do a mutex_pause
1416 			 * to allow the owner of the object lock a chance to
1417 			 * run...
1418 			 */
1419 			if (!vm_object_lock_try_scan(m_object)) {
1420 				if (try_failed_count > 20) {
1421 					goto reenter_pg_on_q;
1422 				}
1423 				vm_page_unlock_queues();
1424 				mutex_pause(try_failed_count++);
1425 				vm_page_lock_queues();
1426 				delayed_unlock = 0;
1427 
1428 				t_object = m_object;
1429 				continue;
1430 			}
1431 			l_object = m_object;
1432 		}
1433 		if (!m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || VMP_ERROR_GET(m) || m->vmp_free_when_done) {
1434 			/*
1435 			 * page is not to be cleaned
1436 			 * put it back on the head of its queue
1437 			 */
1438 			goto reenter_pg_on_q;
1439 		}
1440 		phys_page = VM_PAGE_GET_PHYS_PAGE(m);
1441 
1442 		if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) {
1443 			refmod_state = pmap_get_refmod(phys_page);
1444 
1445 			if (refmod_state & VM_MEM_REFERENCED) {
1446 				m->vmp_reference = TRUE;
1447 			}
1448 			if (refmod_state & VM_MEM_MODIFIED) {
1449 				SET_PAGE_DIRTY(m, FALSE);
1450 			}
1451 		}
1452 		if (m->vmp_reference == TRUE) {
1453 			m->vmp_reference = FALSE;
1454 			pmap_clear_refmod_options(phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1455 			goto reenter_pg_on_q;
1456 		}
1457 		if (m->vmp_pmapped == TRUE) {
1458 			if (m->vmp_dirty || m->vmp_precious) {
1459 				pmap_options = PMAP_OPTIONS_COMPRESSOR;
1460 			} else {
1461 				pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1462 			}
1463 			refmod_state = pmap_disconnect_options(phys_page, pmap_options, NULL);
1464 			if (refmod_state & VM_MEM_MODIFIED) {
1465 				SET_PAGE_DIRTY(m, FALSE);
1466 			}
1467 		}
1468 
1469 		if (!m->vmp_dirty && !m->vmp_precious) {
1470 			vm_page_unlock_queues();
1471 			VM_PAGE_FREE(m);
1472 			vm_page_lock_queues();
1473 			delayed_unlock = 0;
1474 
1475 			goto next_pg;
1476 		}
1477 		if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL) {
1478 			if (!m_object->pager_initialized) {
1479 				vm_page_unlock_queues();
1480 
1481 				vm_object_collapse(m_object, (vm_object_offset_t) 0, TRUE);
1482 
1483 				if (!m_object->pager_initialized) {
1484 					vm_object_compressor_pager_create(m_object);
1485 				}
1486 
1487 				vm_page_lock_queues();
1488 				delayed_unlock = 0;
1489 			}
1490 			if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL) {
1491 				/*
1492 				 * We dropped the page queues lock above, so
1493 				 * "m" might no longer be on this queue...
1494 				 */
1495 				if (m != (vm_page_t) vm_page_queue_first(q)) {
1496 					continue;
1497 				}
1498 				goto reenter_pg_on_q;
1499 			}
1500 			/*
1501 			 * vm_object_compressor_pager_create will drop the object lock
1502 			 * which means 'm' may no longer be valid to use
1503 			 */
1504 			continue;
1505 		}
1506 
1507 		if (!perf_test) {
1508 			/*
1509 			 * we've already factored out pages in the laundry which
1510 			 * means this page can't be on the pageout queue so it's
1511 			 * safe to do the vm_page_queues_remove
1512 			 */
1513 			bool donate = (m->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE);
1514 			vm_page_queues_remove(m, TRUE);
1515 			if (donate) {
1516 				/*
1517 				 * The compressor needs to see this bit to know
1518 				 * where this page needs to land. Also if stolen,
1519 				 * this bit helps put the page back in the right
1520 				 * special queue where it belongs.
1521 				 */
1522 				m->vmp_on_specialq = VM_PAGE_SPECIAL_Q_DONATE;
1523 			}
1524 		} else {
1525 			vm_page_queue_remove(q, m, vmp_pageq);
1526 		}
1527 
1528 		LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1529 
1530 		vm_pageout_cluster_to_queue(m, iq);
1531 
1532 		pages_moved++;
1533 		goto next_pg;
1534 
1535 reenter_pg_on_q:
1536 		vm_page_queue_remove(q, m, vmp_pageq);
1537 		vm_page_queue_enter(q, m, vmp_pageq);
1538 next_pg:
1539 		qcount--;
1540 		try_failed_count = 0;
1541 
1542 		if (delayed_unlock++ > 128) {
1543 			if (l_object != NULL) {
1544 				vm_object_unlock(l_object);
1545 				l_object = NULL;
1546 			}
1547 			lck_mtx_yield(&vm_page_queue_lock);
1548 			delayed_unlock = 0;
1549 		}
1550 	}
1551 	if (l_object != NULL) {
1552 		vm_object_unlock(l_object);
1553 		l_object = NULL;
1554 	}
1555 	vm_page_unlock_queues();
1556 	return pages_moved;
1557 }
1558 
1559 
1560 
1561 /*
1562  * function in BSD to apply I/O throttle to the pageout thread
1563  */
1564 extern void vm_pageout_io_throttle(void);
1565 
1566 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, obj)                    \
1567 	MACRO_BEGIN                                                     \
1568 	/* \
1569 	 * If a "reusable" page somehow made it back into \
1570 	 * the active queue, it's been re-used and is not \
1571 	 * quite re-usable. \
1572 	 * If the VM object was "all_reusable", consider it \
1573 	 * as "all re-used" instead of converting it to \
1574 	 * "partially re-used", which could be expensive. \
1575 	 */                                                             \
1576 	assert(VM_PAGE_OBJECT((m)) == (obj));                           \
1577 	if ((m)->vmp_reusable ||                                        \
1578 	    (obj)->all_reusable) {                                      \
1579 	        vm_object_reuse_pages((obj),                            \
1580 	                              (m)->vmp_offset,                  \
1581 	                              (m)->vmp_offset + PAGE_SIZE_64,   \
1582 	                              FALSE);                           \
1583 	}                                                               \
1584 	MACRO_END
1585 
1586 
1587 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT         64
1588 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX     1024
1589 
1590 #define FCS_IDLE                0
1591 #define FCS_DELAYED             1
1592 #define FCS_DEADLOCK_DETECTED   2
1593 
1594 struct flow_control {
1595 	int             state;
1596 	mach_timespec_t ts;
1597 };
1598 
1599 
1600 uint64_t vm_pageout_rejected_bq_internal = 0;
1601 uint64_t vm_pageout_rejected_bq_external = 0;
1602 uint64_t vm_pageout_skipped_bq_internal = 0;
1603 uint64_t vm_pageout_skipped_bq_external = 0;
1604 
1605 #define ANONS_GRABBED_LIMIT     2
1606 
1607 
1608 #if 0
1609 static void vm_pageout_delayed_unlock(int *, int *, vm_page_t *);
1610 #endif
1611 static void vm_pageout_prepare_to_block(vm_object_t *, int *, vm_page_t *, int *, int);
1612 
1613 #define VM_PAGEOUT_PB_NO_ACTION                         0
1614 #define VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER 1
1615 #define VM_PAGEOUT_PB_THREAD_YIELD                      2
1616 
1617 
1618 #if 0
1619 static void
1620 vm_pageout_delayed_unlock(int *delayed_unlock, int *local_freed, vm_page_t *local_freeq)
1621 {
1622 	if (*local_freeq) {
1623 		vm_page_unlock_queues();
1624 
1625 		VM_DEBUG_CONSTANT_EVENT(
1626 			vm_pageout_freelist, DBG_VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1627 			vm_page_free_count, 0, 0, 1);
1628 
1629 		vm_page_free_list(*local_freeq, TRUE);
1630 
1631 		VM_DEBUG_CONSTANT_EVENT(vm_pageout_freelist, DBG_VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1632 		    vm_page_free_count, *local_freed, 0, 1);
1633 
1634 		*local_freeq = NULL;
1635 		*local_freed = 0;
1636 
1637 		vm_page_lock_queues();
1638 	} else {
1639 		lck_mtx_yield(&vm_page_queue_lock);
1640 	}
1641 	*delayed_unlock = 1;
1642 }
1643 #endif
1644 
1645 
1646 static void
1647 vm_pageout_prepare_to_block(vm_object_t *object, int *delayed_unlock,
1648     vm_page_t *local_freeq, int *local_freed, int action)
1649 {
1650 	vm_page_unlock_queues();
1651 
1652 	if (*object != NULL) {
1653 		vm_object_unlock(*object);
1654 		*object = NULL;
1655 	}
1656 	if (*local_freeq) {
1657 		vm_page_free_list(*local_freeq, TRUE);
1658 
1659 		*local_freeq = NULL;
1660 		*local_freed = 0;
1661 	}
1662 	*delayed_unlock = 1;
1663 
1664 	switch (action) {
1665 	case VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER:
1666 		vm_consider_waking_compactor_swapper();
1667 		break;
1668 	case VM_PAGEOUT_PB_THREAD_YIELD:
1669 		thread_yield_internal(1);
1670 		break;
1671 	case VM_PAGEOUT_PB_NO_ACTION:
1672 	default:
1673 		break;
1674 	}
1675 	vm_page_lock_queues();
1676 }
1677 
1678 
1679 static struct vm_pageout_vminfo last;
1680 
1681 uint64_t last_vm_page_pages_grabbed = 0;
1682 
1683 extern  uint32_t c_segment_pages_compressed;
1684 
1685 extern uint64_t shared_region_pager_reclaimed;
1686 extern struct memory_object_pager_ops shared_region_pager_ops;
1687 
1688 void
1689 update_vm_info(void)
1690 {
1691 	unsigned long tmp;
1692 	uint64_t tmp64;
1693 
1694 	vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count = vm_page_active_count;
1695 	vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count = vm_page_speculative_count;
1696 	vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count = vm_page_inactive_count;
1697 	vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count = vm_page_anonymous_count;
1698 
1699 	vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count = vm_page_free_count;
1700 	vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count = vm_page_wire_count;
1701 	vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count = VM_PAGE_COMPRESSOR_COUNT;
1702 
1703 	vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed = c_segment_pages_compressed;
1704 	vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count = vm_page_pageable_internal_count;
1705 	vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count = vm_page_pageable_external_count;
1706 	vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count = vm_page_xpmapped_external_count;
1707 	vm_pageout_stats[vm_pageout_stat_now].vm_page_realtime_count = vm_page_realtime_count;
1708 
1709 	tmp = vm_pageout_vminfo.vm_pageout_considered_page;
1710 	vm_pageout_stats[vm_pageout_stat_now].considered = (unsigned int)(tmp - last.vm_pageout_considered_page);
1711 	last.vm_pageout_considered_page = tmp;
1712 
1713 	tmp64 = vm_pageout_vminfo.vm_pageout_compressions;
1714 	vm_pageout_stats[vm_pageout_stat_now].pages_compressed = (unsigned int)(tmp64 - last.vm_pageout_compressions);
1715 	last.vm_pageout_compressions = tmp64;
1716 
1717 	tmp = vm_pageout_vminfo.vm_compressor_failed;
1718 	vm_pageout_stats[vm_pageout_stat_now].failed_compressions = (unsigned int)(tmp - last.vm_compressor_failed);
1719 	last.vm_compressor_failed = tmp;
1720 
1721 	tmp64 = vm_pageout_vminfo.vm_compressor_pages_grabbed;
1722 	vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor = (unsigned int)(tmp64 - last.vm_compressor_pages_grabbed);
1723 	last.vm_compressor_pages_grabbed = tmp64;
1724 
1725 	tmp = vm_pageout_vminfo.vm_phantom_cache_found_ghost;
1726 	vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found = (unsigned int)(tmp - last.vm_phantom_cache_found_ghost);
1727 	last.vm_phantom_cache_found_ghost = tmp;
1728 
1729 	tmp = vm_pageout_vminfo.vm_phantom_cache_added_ghost;
1730 	vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added = (unsigned int)(tmp - last.vm_phantom_cache_added_ghost);
1731 	last.vm_phantom_cache_added_ghost = tmp;
1732 
1733 	tmp64 = counter_load(&vm_page_grab_count);
1734 	vm_pageout_stats[vm_pageout_stat_now].pages_grabbed = (unsigned int)(tmp64 - last_vm_page_pages_grabbed);
1735 	last_vm_page_pages_grabbed = tmp64;
1736 
1737 	tmp = vm_pageout_vminfo.vm_page_pages_freed;
1738 	vm_pageout_stats[vm_pageout_stat_now].pages_freed = (unsigned int)(tmp - last.vm_page_pages_freed);
1739 	last.vm_page_pages_freed = tmp;
1740 
1741 	if (vm_pageout_stats[vm_pageout_stat_now].considered) {
1742 		tmp = vm_pageout_vminfo.vm_pageout_pages_evicted;
1743 		vm_pageout_stats[vm_pageout_stat_now].pages_evicted = (unsigned int)(tmp - last.vm_pageout_pages_evicted);
1744 		last.vm_pageout_pages_evicted = tmp;
1745 
1746 		tmp = vm_pageout_vminfo.vm_pageout_pages_purged;
1747 		vm_pageout_stats[vm_pageout_stat_now].pages_purged = (unsigned int)(tmp - last.vm_pageout_pages_purged);
1748 		last.vm_pageout_pages_purged = tmp;
1749 
1750 		tmp = vm_pageout_vminfo.vm_pageout_freed_speculative;
1751 		vm_pageout_stats[vm_pageout_stat_now].freed_speculative = (unsigned int)(tmp - last.vm_pageout_freed_speculative);
1752 		last.vm_pageout_freed_speculative = tmp;
1753 
1754 		tmp = vm_pageout_vminfo.vm_pageout_freed_external;
1755 		vm_pageout_stats[vm_pageout_stat_now].freed_external = (unsigned int)(tmp - last.vm_pageout_freed_external);
1756 		last.vm_pageout_freed_external = tmp;
1757 
1758 		tmp = vm_pageout_vminfo.vm_pageout_inactive_referenced;
1759 		vm_pageout_stats[vm_pageout_stat_now].inactive_referenced = (unsigned int)(tmp - last.vm_pageout_inactive_referenced);
1760 		last.vm_pageout_inactive_referenced = tmp;
1761 
1762 		tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external;
1763 		vm_pageout_stats[vm_pageout_stat_now].throttled_external_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_external);
1764 		last.vm_pageout_scan_inactive_throttled_external = tmp;
1765 
1766 		tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_external;
1767 		vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_external);
1768 		last.vm_pageout_inactive_dirty_external = tmp;
1769 
1770 		tmp = vm_pageout_vminfo.vm_pageout_freed_cleaned;
1771 		vm_pageout_stats[vm_pageout_stat_now].freed_cleaned = (unsigned int)(tmp - last.vm_pageout_freed_cleaned);
1772 		last.vm_pageout_freed_cleaned = tmp;
1773 
1774 		tmp = vm_pageout_vminfo.vm_pageout_inactive_nolock;
1775 		vm_pageout_stats[vm_pageout_stat_now].inactive_nolock = (unsigned int)(tmp - last.vm_pageout_inactive_nolock);
1776 		last.vm_pageout_inactive_nolock = tmp;
1777 
1778 		tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal;
1779 		vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_internal);
1780 		last.vm_pageout_scan_inactive_throttled_internal = tmp;
1781 
1782 		tmp = vm_pageout_vminfo.vm_pageout_skipped_external;
1783 		vm_pageout_stats[vm_pageout_stat_now].skipped_external = (unsigned int)(tmp - last.vm_pageout_skipped_external);
1784 		last.vm_pageout_skipped_external = tmp;
1785 
1786 		tmp = vm_pageout_vminfo.vm_pageout_skipped_internal;
1787 		vm_pageout_stats[vm_pageout_stat_now].skipped_internal = (unsigned int)(tmp - last.vm_pageout_skipped_internal);
1788 		last.vm_pageout_skipped_internal = tmp;
1789 
1790 		tmp = vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded;
1791 		vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded = (unsigned int)(tmp - last.vm_pageout_reactivation_limit_exceeded);
1792 		last.vm_pageout_reactivation_limit_exceeded = tmp;
1793 
1794 		tmp = vm_pageout_vminfo.vm_pageout_inactive_force_reclaim;
1795 		vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim = (unsigned int)(tmp - last.vm_pageout_inactive_force_reclaim);
1796 		last.vm_pageout_inactive_force_reclaim = tmp;
1797 
1798 		tmp = vm_pageout_vminfo.vm_pageout_freed_internal;
1799 		vm_pageout_stats[vm_pageout_stat_now].freed_internal = (unsigned int)(tmp - last.vm_pageout_freed_internal);
1800 		last.vm_pageout_freed_internal = tmp;
1801 
1802 		tmp = vm_pageout_vminfo.vm_pageout_considered_bq_internal;
1803 		vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal = (unsigned int)(tmp - last.vm_pageout_considered_bq_internal);
1804 		last.vm_pageout_considered_bq_internal = tmp;
1805 
1806 		tmp = vm_pageout_vminfo.vm_pageout_considered_bq_external;
1807 		vm_pageout_stats[vm_pageout_stat_now].considered_bq_external = (unsigned int)(tmp - last.vm_pageout_considered_bq_external);
1808 		last.vm_pageout_considered_bq_external = tmp;
1809 
1810 		tmp = vm_pageout_vminfo.vm_pageout_filecache_min_reactivated;
1811 		vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations = (unsigned int)(tmp - last.vm_pageout_filecache_min_reactivated);
1812 		last.vm_pageout_filecache_min_reactivated = tmp;
1813 
1814 		tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_internal;
1815 		vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_internal);
1816 		last.vm_pageout_inactive_dirty_internal = tmp;
1817 
1818 		tmp = vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache;
1819 		vm_pageout_stats[vm_pageout_stat_now].forcereclaimed_sharedcache = (unsigned int)(tmp - last.vm_pageout_forcereclaimed_sharedcache);
1820 		last.vm_pageout_forcereclaimed_sharedcache = tmp;
1821 
1822 		tmp = vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime;
1823 		vm_pageout_stats[vm_pageout_stat_now].forcereclaimed_realtime = (unsigned int)(tmp - last.vm_pageout_forcereclaimed_realtime);
1824 		last.vm_pageout_forcereclaimed_realtime = tmp;
1825 
1826 		tmp = vm_pageout_vminfo.vm_pageout_protected_sharedcache;
1827 		vm_pageout_stats[vm_pageout_stat_now].protected_sharedcache = (unsigned int)(tmp - last.vm_pageout_protected_sharedcache);
1828 		last.vm_pageout_protected_sharedcache = tmp;
1829 
1830 		tmp = vm_pageout_vminfo.vm_pageout_protected_realtime;
1831 		vm_pageout_stats[vm_pageout_stat_now].protected_realtime = (unsigned int)(tmp - last.vm_pageout_protected_realtime);
1832 		last.vm_pageout_protected_realtime = tmp;
1833 	}
1834 
1835 	KDBG((VMDBG_CODE(DBG_VM_INFO1)) | DBG_FUNC_NONE,
1836 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count,
1837 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count,
1838 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count,
1839 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count);
1840 
1841 	KDBG((VMDBG_CODE(DBG_VM_INFO2)) | DBG_FUNC_NONE,
1842 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count,
1843 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count,
1844 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count);
1845 
1846 	KDBG((VMDBG_CODE(DBG_VM_INFO3)) | DBG_FUNC_NONE,
1847 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed,
1848 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count,
1849 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count,
1850 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count);
1851 
1852 	if (vm_pageout_stats[vm_pageout_stat_now].considered ||
1853 	    vm_pageout_stats[vm_pageout_stat_now].pages_compressed ||
1854 	    vm_pageout_stats[vm_pageout_stat_now].failed_compressions) {
1855 		KDBG((VMDBG_CODE(DBG_VM_INFO4)) | DBG_FUNC_NONE,
1856 		    vm_pageout_stats[vm_pageout_stat_now].considered,
1857 		    vm_pageout_stats[vm_pageout_stat_now].freed_speculative,
1858 		    vm_pageout_stats[vm_pageout_stat_now].freed_external,
1859 		    vm_pageout_stats[vm_pageout_stat_now].inactive_referenced);
1860 
1861 		KDBG((VMDBG_CODE(DBG_VM_INFO5)) | DBG_FUNC_NONE,
1862 		    vm_pageout_stats[vm_pageout_stat_now].throttled_external_q,
1863 		    vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external,
1864 		    vm_pageout_stats[vm_pageout_stat_now].freed_cleaned,
1865 		    vm_pageout_stats[vm_pageout_stat_now].inactive_nolock);
1866 
1867 		KDBG((VMDBG_CODE(DBG_VM_INFO6)) | DBG_FUNC_NONE,
1868 		    vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q,
1869 		    vm_pageout_stats[vm_pageout_stat_now].pages_compressed,
1870 		    vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor,
1871 		    vm_pageout_stats[vm_pageout_stat_now].skipped_external);
1872 
1873 		KDBG((VMDBG_CODE(DBG_VM_INFO7)) | DBG_FUNC_NONE,
1874 		    vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded,
1875 		    vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim,
1876 		    vm_pageout_stats[vm_pageout_stat_now].failed_compressions,
1877 		    vm_pageout_stats[vm_pageout_stat_now].freed_internal);
1878 
1879 		KDBG((VMDBG_CODE(DBG_VM_INFO8)) | DBG_FUNC_NONE,
1880 		    vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal,
1881 		    vm_pageout_stats[vm_pageout_stat_now].considered_bq_external,
1882 		    vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations,
1883 		    vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal);
1884 
1885 		KDBG((VMDBG_CODE(DBG_VM_INFO10)) | DBG_FUNC_NONE,
1886 		    vm_pageout_stats[vm_pageout_stat_now].forcereclaimed_sharedcache,
1887 		    vm_pageout_stats[vm_pageout_stat_now].forcereclaimed_realtime,
1888 		    vm_pageout_stats[vm_pageout_stat_now].protected_sharedcache,
1889 		    vm_pageout_stats[vm_pageout_stat_now].protected_realtime);
1890 	}
1891 	KDBG((VMDBG_CODE(DBG_VM_INFO9)) | DBG_FUNC_NONE,
1892 	    vm_pageout_stats[vm_pageout_stat_now].pages_grabbed,
1893 	    vm_pageout_stats[vm_pageout_stat_now].pages_freed,
1894 	    vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found,
1895 	    vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added);
1896 
1897 	record_memory_pressure();
1898 }
1899 
1900 extern boolean_t hibernation_vmqueues_inspection;
1901 
1902 /*
1903  * Return values for functions called by vm_pageout_scan
1904  * that control its flow.
1905  *
1906  * PROCEED -- vm_pageout_scan will keep making forward progress.
1907  * DONE_RETURN -- page demand satisfied, work is done -> vm_pageout_scan returns.
1908  * NEXT_ITERATION -- restart the 'for' loop in vm_pageout_scan aka continue.
1909  */
1910 
1911 #define VM_PAGEOUT_SCAN_PROCEED                 (0)
1912 #define VM_PAGEOUT_SCAN_DONE_RETURN             (1)
1913 #define VM_PAGEOUT_SCAN_NEXT_ITERATION          (2)
1914 
1915 /*
1916  * This function is called only from vm_pageout_scan and
1917  * it moves overflow secluded pages (one-at-a-time) to the
1918  * batched 'local' free Q or active Q.
1919  */
1920 static void
1921 vps_deal_with_secluded_page_overflow(vm_page_t *local_freeq, int *local_freed)
1922 {
1923 #if CONFIG_SECLUDED_MEMORY
1924 	/*
1925 	 * Deal with secluded_q overflow.
1926 	 */
1927 	if (vm_page_secluded_count > vm_page_secluded_target) {
1928 		vm_page_t secluded_page;
1929 
1930 		/*
1931 		 * SECLUDED_AGING_BEFORE_ACTIVE:
1932 		 * Excess secluded pages go to the active queue and
1933 		 * will later go to the inactive queue.
1934 		 */
1935 		assert((vm_page_secluded_count_free +
1936 		    vm_page_secluded_count_inuse) ==
1937 		    vm_page_secluded_count);
1938 		secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
1939 		assert(secluded_page->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
1940 
1941 		vm_page_queues_remove(secluded_page, FALSE);
1942 		assert(!secluded_page->vmp_fictitious);
1943 		assert(!VM_PAGE_WIRED(secluded_page));
1944 
1945 		if (secluded_page->vmp_object == 0) {
1946 			/* transfer to free queue */
1947 			assert(secluded_page->vmp_busy);
1948 			secluded_page->vmp_snext = *local_freeq;
1949 			*local_freeq = secluded_page;
1950 			*local_freed += 1;
1951 		} else {
1952 			/* transfer to head of active queue */
1953 			vm_page_enqueue_active(secluded_page, FALSE);
1954 			secluded_page = VM_PAGE_NULL;
1955 		}
1956 	}
1957 #else /* CONFIG_SECLUDED_MEMORY */
1958 
1959 #pragma unused(local_freeq)
1960 #pragma unused(local_freed)
1961 
1962 	return;
1963 
1964 #endif /* CONFIG_SECLUDED_MEMORY */
1965 }
1966 
1967 /*
1968  * This function is called only from vm_pageout_scan and
1969  * it initializes the loop targets for vm_pageout_scan().
1970  */
1971 static void
1972 vps_init_page_targets(void)
1973 {
1974 	/*
1975 	 * LD TODO: Other page targets should be calculated here too.
1976 	 */
1977 	vm_page_anonymous_min = vm_page_inactive_target / 20;
1978 
1979 	if (vm_pageout_state.vm_page_speculative_percentage > 50) {
1980 		vm_pageout_state.vm_page_speculative_percentage = 50;
1981 	} else if (vm_pageout_state.vm_page_speculative_percentage <= 0) {
1982 		vm_pageout_state.vm_page_speculative_percentage = 1;
1983 	}
1984 
1985 	vm_pageout_state.vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1986 	    vm_page_inactive_count);
1987 }
1988 
1989 /*
1990  * This function is called only from vm_pageout_scan and
1991  * it purges a single VM object at-a-time and will either
1992  * make vm_pageout_scan() restart the loop or keeping moving forward.
1993  */
1994 static int
1995 vps_purge_object()
1996 {
1997 	int             force_purge;
1998 
1999 	assert(available_for_purge >= 0);
2000 	force_purge = 0; /* no force-purging */
2001 
2002 #if VM_PRESSURE_EVENTS
2003 	vm_pressure_level_t pressure_level;
2004 
2005 	pressure_level = memorystatus_vm_pressure_level;
2006 
2007 	if (pressure_level > kVMPressureNormal) {
2008 		if (pressure_level >= kVMPressureCritical) {
2009 			force_purge = vm_pageout_state.memorystatus_purge_on_critical;
2010 		} else if (pressure_level >= kVMPressureUrgent) {
2011 			force_purge = vm_pageout_state.memorystatus_purge_on_urgent;
2012 		} else if (pressure_level >= kVMPressureWarning) {
2013 			force_purge = vm_pageout_state.memorystatus_purge_on_warning;
2014 		}
2015 	}
2016 #endif /* VM_PRESSURE_EVENTS */
2017 
2018 	if (available_for_purge || force_purge) {
2019 		memoryshot(DBG_VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
2020 
2021 		VM_DEBUG_EVENT(vm_pageout_purgeone, DBG_VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
2022 		if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
2023 			VM_PAGEOUT_DEBUG(vm_pageout_purged_objects, 1);
2024 			VM_DEBUG_EVENT(vm_pageout_purgeone, DBG_VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
2025 			memoryshot(DBG_VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2026 
2027 			return VM_PAGEOUT_SCAN_NEXT_ITERATION;
2028 		}
2029 		VM_DEBUG_EVENT(vm_pageout_purgeone, DBG_VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
2030 		memoryshot(DBG_VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2031 	}
2032 
2033 	return VM_PAGEOUT_SCAN_PROCEED;
2034 }
2035 
2036 /*
2037  * This function is called only from vm_pageout_scan and
2038  * it will try to age the next speculative Q if the oldest
2039  * one is empty.
2040  */
2041 static int
2042 vps_age_speculative_queue(boolean_t force_speculative_aging)
2043 {
2044 #define DELAY_SPECULATIVE_AGE   1000
2045 
2046 	/*
2047 	 * try to pull pages from the aging bins...
2048 	 * see vm_page_internal.h for an explanation of how
2049 	 * this mechanism works
2050 	 */
2051 	boolean_t                       can_steal = FALSE;
2052 	int                             num_scanned_queues;
2053 	static int                      delay_speculative_age = 0; /* depends the # of times we go through the main pageout_scan loop.*/
2054 	mach_timespec_t                 ts;
2055 	struct vm_speculative_age_q     *aq;
2056 	struct vm_speculative_age_q     *sq;
2057 
2058 	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2059 
2060 	aq = &vm_page_queue_speculative[speculative_steal_index];
2061 
2062 	num_scanned_queues = 0;
2063 	while (vm_page_queue_empty(&aq->age_q) &&
2064 	    num_scanned_queues++ != vm_page_max_speculative_age_q) {
2065 		speculative_steal_index++;
2066 
2067 		if (speculative_steal_index > vm_page_max_speculative_age_q) {
2068 			speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2069 		}
2070 
2071 		aq = &vm_page_queue_speculative[speculative_steal_index];
2072 	}
2073 
2074 	if (num_scanned_queues == vm_page_max_speculative_age_q + 1) {
2075 		/*
2076 		 * XXX We've scanned all the speculative
2077 		 * queues but still haven't found one
2078 		 * that is not empty, even though
2079 		 * vm_page_speculative_count is not 0.
2080 		 */
2081 		if (!vm_page_queue_empty(&sq->age_q)) {
2082 			return VM_PAGEOUT_SCAN_NEXT_ITERATION;
2083 		}
2084 #if DEVELOPMENT || DEBUG
2085 		panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty", vm_page_speculative_count);
2086 #endif
2087 		/* readjust... */
2088 		vm_page_speculative_count = 0;
2089 		/* ... and continue */
2090 		return VM_PAGEOUT_SCAN_NEXT_ITERATION;
2091 	}
2092 
2093 	if (vm_page_speculative_count > vm_pageout_state.vm_page_speculative_target || force_speculative_aging == TRUE) {
2094 		can_steal = TRUE;
2095 	} else {
2096 		if (!delay_speculative_age) {
2097 			mach_timespec_t ts_fully_aged;
2098 
2099 			ts_fully_aged.tv_sec = (vm_page_max_speculative_age_q * vm_pageout_state.vm_page_speculative_q_age_ms) / 1000;
2100 			ts_fully_aged.tv_nsec = ((vm_page_max_speculative_age_q * vm_pageout_state.vm_page_speculative_q_age_ms) % 1000)
2101 			    * 1000 * NSEC_PER_USEC;
2102 
2103 			ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
2104 
2105 			clock_sec_t sec;
2106 			clock_nsec_t nsec;
2107 			clock_get_system_nanotime(&sec, &nsec);
2108 			ts.tv_sec = (unsigned int) sec;
2109 			ts.tv_nsec = nsec;
2110 
2111 			if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0) {
2112 				can_steal = TRUE;
2113 			} else {
2114 				delay_speculative_age++;
2115 			}
2116 		} else {
2117 			delay_speculative_age++;
2118 			if (delay_speculative_age == DELAY_SPECULATIVE_AGE) {
2119 				delay_speculative_age = 0;
2120 			}
2121 		}
2122 	}
2123 	if (can_steal == TRUE) {
2124 		vm_page_speculate_ageit(aq);
2125 	}
2126 
2127 	return VM_PAGEOUT_SCAN_PROCEED;
2128 }
2129 
2130 /*
2131  * This function is called only from vm_pageout_scan and
2132  * it evicts a single VM object from the cache.
2133  */
2134 static int inline
2135 vps_object_cache_evict(vm_object_t *object_to_unlock)
2136 {
2137 	static int                      cache_evict_throttle = 0;
2138 	struct vm_speculative_age_q     *sq;
2139 
2140 	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2141 
2142 	if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
2143 		int     pages_evicted;
2144 
2145 		if (*object_to_unlock != NULL) {
2146 			vm_object_unlock(*object_to_unlock);
2147 			*object_to_unlock = NULL;
2148 		}
2149 		KDBG(0x13001ec | DBG_FUNC_START);
2150 
2151 		pages_evicted = vm_object_cache_evict(100, 10);
2152 
2153 		KDBG(0x13001ec | DBG_FUNC_END, pages_evicted);
2154 
2155 		if (pages_evicted) {
2156 			vm_pageout_vminfo.vm_pageout_pages_evicted += pages_evicted;
2157 
2158 			VM_DEBUG_EVENT(vm_pageout_cache_evict, DBG_VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
2159 			    vm_page_free_count, pages_evicted, vm_pageout_vminfo.vm_pageout_pages_evicted, 0);
2160 			memoryshot(DBG_VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
2161 
2162 			/*
2163 			 * we just freed up to 100 pages,
2164 			 * so go back to the top of the main loop
2165 			 * and re-evaulate the memory situation
2166 			 */
2167 			return VM_PAGEOUT_SCAN_NEXT_ITERATION;
2168 		} else {
2169 			cache_evict_throttle = 1000;
2170 		}
2171 	}
2172 	if (cache_evict_throttle) {
2173 		cache_evict_throttle--;
2174 	}
2175 
2176 	return VM_PAGEOUT_SCAN_PROCEED;
2177 }
2178 
2179 
2180 /*
2181  * This function is called only from vm_pageout_scan and
2182  * it calculates the filecache min. that needs to be maintained
2183  * as we start to steal pages.
2184  */
2185 static void
2186 vps_calculate_filecache_min(void)
2187 {
2188 	int divisor = vm_pageout_state.vm_page_filecache_min_divisor;
2189 
2190 #if CONFIG_JETSAM
2191 	/*
2192 	 * don't let the filecache_min fall below 15% of available memory
2193 	 * on systems with an active compressor that isn't nearing its
2194 	 * limits w/r to accepting new data
2195 	 *
2196 	 * on systems w/o the compressor/swapper, the filecache is always
2197 	 * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
2198 	 * since most (if not all) of the anonymous pages are in the
2199 	 * throttled queue (which isn't counted as available) which
2200 	 * effectively disables this filter
2201 	 */
2202 	if (vm_compressor_low_on_space() || divisor == 0) {
2203 		vm_pageout_state.vm_page_filecache_min = 0;
2204 	} else {
2205 		vm_pageout_state.vm_page_filecache_min =
2206 		    ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
2207 	}
2208 #else
2209 	if (vm_compressor_out_of_space() || divisor == 0) {
2210 		vm_pageout_state.vm_page_filecache_min = 0;
2211 	} else {
2212 		/*
2213 		 * don't let the filecache_min fall below the specified critical level
2214 		 */
2215 		vm_pageout_state.vm_page_filecache_min =
2216 		    ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
2217 	}
2218 #endif
2219 	if (vm_page_free_count < (vm_page_free_reserved / 4)) {
2220 		vm_pageout_state.vm_page_filecache_min = 0;
2221 	}
2222 }
2223 
2224 /*
2225  * This function is called only from vm_pageout_scan and
2226  * it updates the flow control time to detect if VM pageoutscan
2227  * isn't making progress.
2228  */
2229 static void
2230 vps_flow_control_reset_deadlock_timer(struct flow_control *flow_control)
2231 {
2232 	mach_timespec_t ts;
2233 	clock_sec_t sec;
2234 	clock_nsec_t nsec;
2235 
2236 	ts.tv_sec = vm_pageout_state.vm_pageout_deadlock_wait / 1000;
2237 	ts.tv_nsec = (vm_pageout_state.vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
2238 	clock_get_system_nanotime(&sec, &nsec);
2239 	flow_control->ts.tv_sec = (unsigned int) sec;
2240 	flow_control->ts.tv_nsec = nsec;
2241 	ADD_MACH_TIMESPEC(&flow_control->ts, &ts);
2242 
2243 	flow_control->state = FCS_DELAYED;
2244 
2245 	vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal++;
2246 }
2247 
2248 /*
2249  * This function is called only from vm_pageout_scan and
2250  * it is the flow control logic of VM pageout scan which
2251  * controls if it should block and for how long.
2252  * Any blocking of vm_pageout_scan happens ONLY in this function.
2253  */
2254 static int
2255 vps_flow_control(struct flow_control *flow_control, int *anons_grabbed, vm_object_t *object, int *delayed_unlock,
2256     vm_page_t *local_freeq, int *local_freed, int *vm_pageout_deadlock_target, unsigned int inactive_burst_count)
2257 {
2258 	boolean_t       exceeded_burst_throttle = FALSE;
2259 	unsigned int    msecs = 0;
2260 	uint32_t        inactive_external_count;
2261 	mach_timespec_t ts;
2262 	struct  vm_pageout_queue *iq;
2263 	struct  vm_pageout_queue *eq;
2264 	struct  vm_speculative_age_q *sq;
2265 
2266 	iq = &vm_pageout_queue_internal;
2267 	eq = &vm_pageout_queue_external;
2268 	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2269 
2270 	/*
2271 	 * Sometimes we have to pause:
2272 	 *	1) No inactive pages - nothing to do.
2273 	 *	2) Loop control - no acceptable pages found on the inactive queue
2274 	 *         within the last vm_pageout_burst_inactive_throttle iterations
2275 	 *	3) Flow control - default pageout queue is full
2276 	 */
2277 	if (vm_page_queue_empty(&vm_page_queue_inactive) &&
2278 	    vm_page_queue_empty(&vm_page_queue_anonymous) &&
2279 	    vm_page_queue_empty(&vm_page_queue_cleaned) &&
2280 	    vm_page_queue_empty(&sq->age_q)) {
2281 		VM_PAGEOUT_DEBUG(vm_pageout_scan_empty_throttle, 1);
2282 		msecs = vm_pageout_state.vm_pageout_empty_wait;
2283 	} else if (inactive_burst_count >=
2284 	    MIN(vm_pageout_state.vm_pageout_burst_inactive_throttle,
2285 	    (vm_page_inactive_count +
2286 	    vm_page_speculative_count))) {
2287 		VM_PAGEOUT_DEBUG(vm_pageout_scan_burst_throttle, 1);
2288 		msecs = vm_pageout_state.vm_pageout_burst_wait;
2289 
2290 		exceeded_burst_throttle = TRUE;
2291 	} else if (VM_PAGE_Q_THROTTLED(iq) &&
2292 	    VM_DYNAMIC_PAGING_ENABLED()) {
2293 		clock_sec_t sec;
2294 		clock_nsec_t nsec;
2295 
2296 		switch (flow_control->state) {
2297 		case FCS_IDLE:
2298 			if ((vm_page_free_count + *local_freed) < vm_page_free_target &&
2299 			    vm_pageout_state.vm_restricted_to_single_processor == FALSE) {
2300 				/*
2301 				 * since the compressor is running independently of vm_pageout_scan
2302 				 * let's not wait for it just yet... as long as we have a healthy supply
2303 				 * of filecache pages to work with, let's keep stealing those.
2304 				 */
2305 				inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2306 
2307 				if (vm_page_pageable_external_count > vm_pageout_state.vm_page_filecache_min &&
2308 				    (inactive_external_count >= VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
2309 					*anons_grabbed = ANONS_GRABBED_LIMIT;
2310 					VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle_deferred, 1);
2311 					return VM_PAGEOUT_SCAN_PROCEED;
2312 				}
2313 			}
2314 
2315 			vps_flow_control_reset_deadlock_timer(flow_control);
2316 			msecs = vm_pageout_state.vm_pageout_deadlock_wait;
2317 
2318 			break;
2319 
2320 		case FCS_DELAYED:
2321 			clock_get_system_nanotime(&sec, &nsec);
2322 			ts.tv_sec = (unsigned int) sec;
2323 			ts.tv_nsec = nsec;
2324 
2325 			if (CMP_MACH_TIMESPEC(&ts, &flow_control->ts) >= 0) {
2326 				/*
2327 				 * the pageout thread for the default pager is potentially
2328 				 * deadlocked since the
2329 				 * default pager queue has been throttled for more than the
2330 				 * allowable time... we need to move some clean pages or dirty
2331 				 * pages belonging to the external pagers if they aren't throttled
2332 				 * vm_page_free_wanted represents the number of threads currently
2333 				 * blocked waiting for pages... we'll move one page for each of
2334 				 * these plus a fixed amount to break the logjam... once we're done
2335 				 * moving this number of pages, we'll re-enter the FSC_DELAYED state
2336 				 * with a new timeout target since we have no way of knowing
2337 				 * whether we've broken the deadlock except through observation
2338 				 * of the queue associated with the default pager... we need to
2339 				 * stop moving pages and allow the system to run to see what
2340 				 * state it settles into.
2341 				 */
2342 
2343 				*vm_pageout_deadlock_target = vm_pageout_state.vm_pageout_deadlock_relief +
2344 				    vm_page_free_wanted + vm_page_free_wanted_privileged;
2345 				VM_PAGEOUT_DEBUG(vm_pageout_scan_deadlock_detected, 1);
2346 				flow_control->state = FCS_DEADLOCK_DETECTED;
2347 				thread_wakeup(VM_PAGEOUT_GC_EVENT);
2348 				return VM_PAGEOUT_SCAN_PROCEED;
2349 			}
2350 			/*
2351 			 * just resniff instead of trying
2352 			 * to compute a new delay time... we're going to be
2353 			 * awakened immediately upon a laundry completion,
2354 			 * so we won't wait any longer than necessary
2355 			 */
2356 			msecs = vm_pageout_state.vm_pageout_idle_wait;
2357 			break;
2358 
2359 		case FCS_DEADLOCK_DETECTED:
2360 			if (*vm_pageout_deadlock_target) {
2361 				return VM_PAGEOUT_SCAN_PROCEED;
2362 			}
2363 
2364 			vps_flow_control_reset_deadlock_timer(flow_control);
2365 			msecs = vm_pageout_state.vm_pageout_deadlock_wait;
2366 
2367 			break;
2368 		}
2369 	} else {
2370 		/*
2371 		 * No need to pause...
2372 		 */
2373 		return VM_PAGEOUT_SCAN_PROCEED;
2374 	}
2375 
2376 	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2377 
2378 	vm_pageout_prepare_to_block(object, delayed_unlock, local_freeq, local_freed,
2379 	    VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
2380 
2381 	if (vm_page_free_count >= vm_page_free_target) {
2382 		/*
2383 		 * we're here because
2384 		 *  1) someone else freed up some pages while we had
2385 		 *     the queues unlocked above
2386 		 * and we've hit one of the 3 conditions that
2387 		 * cause us to pause the pageout scan thread
2388 		 *
2389 		 * since we already have enough free pages,
2390 		 * let's avoid stalling and return normally
2391 		 *
2392 		 * before we return, make sure the pageout I/O threads
2393 		 * are running throttled in case there are still requests
2394 		 * in the laundry... since we have enough free pages
2395 		 * we don't need the laundry to be cleaned in a timely
2396 		 * fashion... so let's avoid interfering with foreground
2397 		 * activity
2398 		 *
2399 		 * we don't want to hold vm_page_queue_free_lock when
2400 		 * calling vm_pageout_adjust_eq_iothrottle (since it
2401 		 * may cause other locks to be taken), we do the intitial
2402 		 * check outside of the lock.  Once we take the lock,
2403 		 * we recheck the condition since it may have changed.
2404 		 * if it has, no problem, we will make the threads
2405 		 * non-throttled before actually blocking
2406 		 */
2407 		vm_pageout_adjust_eq_iothrottle(&pgo_iothread_external_state, TRUE);
2408 	}
2409 	vm_free_page_lock();
2410 
2411 	if (vm_page_free_count >= vm_page_free_target &&
2412 	    (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
2413 		return VM_PAGEOUT_SCAN_DONE_RETURN;
2414 	}
2415 	vm_free_page_unlock();
2416 
2417 	if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
2418 		/*
2419 		 * we're most likely about to block due to one of
2420 		 * the 3 conditions that cause vm_pageout_scan to
2421 		 * not be able to make forward progress w/r
2422 		 * to providing new pages to the free queue,
2423 		 * so unthrottle the I/O threads in case we
2424 		 * have laundry to be cleaned... it needs
2425 		 * to be completed ASAP.
2426 		 *
2427 		 * even if we don't block, we want the io threads
2428 		 * running unthrottled since the sum of free +
2429 		 * clean pages is still under our free target
2430 		 */
2431 		vm_pageout_adjust_eq_iothrottle(&pgo_iothread_external_state, FALSE);
2432 	}
2433 	if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
2434 		/*
2435 		 * if we get here we're below our free target and
2436 		 * we're stalling due to a full laundry queue or
2437 		 * we don't have any inactive pages other then
2438 		 * those in the clean queue...
2439 		 * however, we have pages on the clean queue that
2440 		 * can be moved to the free queue, so let's not
2441 		 * stall the pageout scan
2442 		 */
2443 		flow_control->state = FCS_IDLE;
2444 		return VM_PAGEOUT_SCAN_PROCEED;
2445 	}
2446 	if (flow_control->state == FCS_DELAYED && !VM_PAGE_Q_THROTTLED(iq)) {
2447 		flow_control->state = FCS_IDLE;
2448 		return VM_PAGEOUT_SCAN_PROCEED;
2449 	}
2450 
2451 	VM_CHECK_MEMORYSTATUS;
2452 
2453 	if (flow_control->state != FCS_IDLE) {
2454 		VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle, 1);
2455 	}
2456 
2457 	iq->pgo_throttled = TRUE;
2458 	assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000 * NSEC_PER_USEC);
2459 
2460 	vm_page_unlock_queues();
2461 
2462 	assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2463 
2464 	VM_DEBUG_EVENT(vm_pageout_thread_block, DBG_VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
2465 	    iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2466 	memoryshot(DBG_VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
2467 
2468 	thread_block(THREAD_CONTINUE_NULL);
2469 
2470 	VM_DEBUG_EVENT(vm_pageout_thread_block, DBG_VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
2471 	    iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2472 	memoryshot(DBG_VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
2473 
2474 	vm_page_lock_queues();
2475 
2476 	iq->pgo_throttled = FALSE;
2477 
2478 	vps_init_page_targets();
2479 
2480 	return VM_PAGEOUT_SCAN_NEXT_ITERATION;
2481 }
2482 
2483 extern boolean_t vm_darkwake_mode;
2484 /*
2485  * This function is called only from vm_pageout_scan and
2486  * it will find and return the most appropriate page to be
2487  * reclaimed.
2488  */
2489 static int
2490 vps_choose_victim_page(vm_page_t *victim_page, int *anons_grabbed, boolean_t *grab_anonymous, boolean_t force_anonymous,
2491     boolean_t *is_page_from_bg_q, unsigned int *reactivated_this_call)
2492 {
2493 	vm_page_t                       m = NULL;
2494 	vm_object_t                     m_object = VM_OBJECT_NULL;
2495 	uint32_t                        inactive_external_count;
2496 	struct vm_speculative_age_q     *sq;
2497 	struct vm_pageout_queue         *iq;
2498 	int                             retval = VM_PAGEOUT_SCAN_PROCEED;
2499 
2500 	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2501 	iq = &vm_pageout_queue_internal;
2502 
2503 	*is_page_from_bg_q = FALSE;
2504 
2505 	m = NULL;
2506 	m_object = VM_OBJECT_NULL;
2507 
2508 	if (VM_DYNAMIC_PAGING_ENABLED()) {
2509 		assert(vm_page_throttled_count == 0);
2510 		assert(vm_page_queue_empty(&vm_page_queue_throttled));
2511 	}
2512 
2513 	/*
2514 	 * Try for a clean-queue inactive page.
2515 	 * These are pages that vm_pageout_scan tried to steal earlier, but
2516 	 * were dirty and had to be cleaned.  Pick them up now that they are clean.
2517 	 */
2518 	if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
2519 		m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2520 
2521 		assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
2522 
2523 		goto found_page;
2524 	}
2525 
2526 	/*
2527 	 * The next most eligible pages are ones we paged in speculatively,
2528 	 * but which have not yet been touched and have been aged out.
2529 	 */
2530 	if (!vm_page_queue_empty(&sq->age_q)) {
2531 		m = (vm_page_t) vm_page_queue_first(&sq->age_q);
2532 
2533 		assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
2534 
2535 		if (!m->vmp_dirty || force_anonymous == FALSE) {
2536 			goto found_page;
2537 		} else {
2538 			m = NULL;
2539 		}
2540 	}
2541 
2542 #if !CONFIG_JETSAM
2543 	if (vm_page_donate_mode != VM_PAGE_DONATE_DISABLED) {
2544 		if (vm_page_donate_queue_ripe && !vm_page_queue_empty(&vm_page_queue_donate)) {
2545 			m = (vm_page_t) vm_page_queue_first(&vm_page_queue_donate);
2546 			assert(m->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE);
2547 			goto found_page;
2548 		}
2549 	}
2550 #endif /* !CONFIG_JETSAM */
2551 
2552 	if (vm_page_background_mode != VM_PAGE_BG_DISABLED && (vm_page_background_count > vm_page_background_target)) {
2553 		vm_object_t     bg_m_object = NULL;
2554 
2555 		m = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
2556 
2557 		bg_m_object = VM_PAGE_OBJECT(m);
2558 
2559 		if (!VM_PAGE_PAGEABLE(m) || (vm_darkwake_mode && m->vmp_busy)) {
2560 			/*
2561 			 * This page is on the background queue
2562 			 * but not on a pageable queue OR is busy during
2563 			 * darkwake mode when the target is artificially lowered.
2564 			 * If it is busy during darkwake mode, and we don't skip it,
2565 			 * we will just swing back around and try again with the same
2566 			 * queue and might hit the same page or its neighbor in a
2567 			 * similar state. Both of these are transient states and will
2568 			 * get resolved, but, at this point let's ignore this page.
2569 			 */
2570 			if (vm_darkwake_mode && m->vmp_busy) {
2571 				if (bg_m_object->internal) {
2572 					vm_pageout_skipped_bq_internal++;
2573 				} else {
2574 					vm_pageout_skipped_bq_external++;
2575 				}
2576 			}
2577 		} else if (force_anonymous == FALSE || bg_m_object->internal) {
2578 			if (bg_m_object->internal &&
2579 			    (VM_PAGE_Q_THROTTLED(iq) ||
2580 			    vm_compressor_out_of_space() == TRUE ||
2581 			    vm_page_free_count < (vm_page_free_reserved / 4))) {
2582 				vm_pageout_skipped_bq_internal++;
2583 			} else {
2584 				*is_page_from_bg_q = TRUE;
2585 
2586 				if (bg_m_object->internal) {
2587 					vm_pageout_vminfo.vm_pageout_considered_bq_internal++;
2588 				} else {
2589 					vm_pageout_vminfo.vm_pageout_considered_bq_external++;
2590 				}
2591 				goto found_page;
2592 			}
2593 		}
2594 	}
2595 
2596 	inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2597 
2598 	if ((vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min || force_anonymous == TRUE) ||
2599 	    (inactive_external_count < VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
2600 		*grab_anonymous = TRUE;
2601 		*anons_grabbed = 0;
2602 
2603 		if (VM_CONFIG_SWAP_IS_ACTIVE) {
2604 			vm_pageout_vminfo.vm_pageout_skipped_external++;
2605 		} else {
2606 			if (vm_page_free_count < (COMPRESSOR_FREE_RESERVED_LIMIT * 2)) {
2607 				/*
2608 				 * No swap and we are in dangerously low levels of free memory.
2609 				 * If we keep going ahead with anonymous pages, we are going to run into a situation
2610 				 * where the compressor will be stuck waiting for free pages (if it isn't already).
2611 				 *
2612 				 * So, pick a file backed page...
2613 				 */
2614 				*grab_anonymous = FALSE;
2615 				*anons_grabbed = ANONS_GRABBED_LIMIT;
2616 				vm_pageout_vminfo.vm_pageout_skipped_internal++;
2617 			}
2618 		}
2619 		goto want_anonymous;
2620 	}
2621 	*grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
2622 
2623 #if CONFIG_JETSAM
2624 	/* If the file-backed pool has accumulated
2625 	 * significantly more pages than the jetsam
2626 	 * threshold, prefer to reclaim those
2627 	 * inline to minimise compute overhead of reclaiming
2628 	 * anonymous pages.
2629 	 * This calculation does not account for the CPU local
2630 	 * external page queues, as those are expected to be
2631 	 * much smaller relative to the global pools.
2632 	 */
2633 
2634 	struct vm_pageout_queue *eq = &vm_pageout_queue_external;
2635 
2636 	if (*grab_anonymous == TRUE && !VM_PAGE_Q_THROTTLED(eq)) {
2637 		if (vm_page_pageable_external_count >
2638 		    vm_pageout_state.vm_page_filecache_min) {
2639 			if ((vm_page_pageable_external_count *
2640 			    vm_pageout_memorystatus_fb_factor_dr) >
2641 			    (memorystatus_get_critical_page_shortage_threshold() *
2642 			    vm_pageout_memorystatus_fb_factor_nr)) {
2643 				*grab_anonymous = FALSE;
2644 
2645 				VM_PAGEOUT_DEBUG(vm_grab_anon_overrides, 1);
2646 			}
2647 		}
2648 		if (*grab_anonymous) {
2649 			VM_PAGEOUT_DEBUG(vm_grab_anon_nops, 1);
2650 		}
2651 	}
2652 #endif /* CONFIG_JETSAM */
2653 
2654 want_anonymous:
2655 	if (*grab_anonymous == FALSE || *anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)) {
2656 		if (!vm_page_queue_empty(&vm_page_queue_inactive)) {
2657 			m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2658 
2659 			assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
2660 			*anons_grabbed = 0;
2661 
2662 			if (vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min) {
2663 				if (!vm_page_queue_empty(&vm_page_queue_anonymous)) {
2664 					if ((++(*reactivated_this_call) % 100)) {
2665 						vm_pageout_vminfo.vm_pageout_filecache_min_reactivated++;
2666 
2667 						vm_page_activate(m);
2668 						counter_inc(&vm_statistics_reactivations);
2669 #if DEVELOPMENT || DEBUG
2670 						if (*is_page_from_bg_q == TRUE) {
2671 							if (m_object->internal) {
2672 								vm_pageout_rejected_bq_internal++;
2673 							} else {
2674 								vm_pageout_rejected_bq_external++;
2675 							}
2676 						}
2677 #endif /* DEVELOPMENT || DEBUG */
2678 						vm_pageout_state.vm_pageout_inactive_used++;
2679 
2680 						m = NULL;
2681 						retval = VM_PAGEOUT_SCAN_NEXT_ITERATION;
2682 
2683 						goto found_page;
2684 					}
2685 
2686 					/*
2687 					 * steal 1 of the file backed pages even if
2688 					 * we are under the limit that has been set
2689 					 * for a healthy filecache
2690 					 */
2691 				}
2692 			}
2693 			goto found_page;
2694 		}
2695 	}
2696 	if (!vm_page_queue_empty(&vm_page_queue_anonymous)) {
2697 		m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2698 
2699 		assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
2700 		*anons_grabbed += 1;
2701 
2702 		goto found_page;
2703 	}
2704 
2705 	m = NULL;
2706 
2707 found_page:
2708 	*victim_page = m;
2709 
2710 	return retval;
2711 }
2712 
2713 /*
2714  * This function is called only from vm_pageout_scan and
2715  * it will put a page back on the active/inactive queue
2716  * if we can't reclaim it for some reason.
2717  */
2718 static void
2719 vps_requeue_page(vm_page_t m, int page_prev_q_state, __unused boolean_t page_from_bg_q)
2720 {
2721 	if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
2722 		vm_page_enqueue_inactive(m, FALSE);
2723 	} else {
2724 		vm_page_activate(m);
2725 	}
2726 
2727 #if DEVELOPMENT || DEBUG
2728 	vm_object_t m_object = VM_PAGE_OBJECT(m);
2729 
2730 	if (page_from_bg_q == TRUE) {
2731 		if (m_object->internal) {
2732 			vm_pageout_rejected_bq_internal++;
2733 		} else {
2734 			vm_pageout_rejected_bq_external++;
2735 		}
2736 	}
2737 #endif /* DEVELOPMENT || DEBUG */
2738 }
2739 
2740 /*
2741  * This function is called only from vm_pageout_scan and
2742  * it will try to grab the victim page's VM object (m_object)
2743  * which differs from the previous victim page's object (object).
2744  */
2745 static int
2746 vps_switch_object(vm_page_t m, vm_object_t m_object, vm_object_t *object, int page_prev_q_state, boolean_t avoid_anon_pages, boolean_t page_from_bg_q)
2747 {
2748 	struct vm_speculative_age_q *sq;
2749 
2750 	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2751 
2752 	/*
2753 	 * the object associated with candidate page is
2754 	 * different from the one we were just working
2755 	 * with... dump the lock if we still own it
2756 	 */
2757 	if (*object != NULL) {
2758 		vm_object_unlock(*object);
2759 		*object = NULL;
2760 	}
2761 	/*
2762 	 * Try to lock object; since we've alread got the
2763 	 * page queues lock, we can only 'try' for this one.
2764 	 * if the 'try' fails, we need to do a mutex_pause
2765 	 * to allow the owner of the object lock a chance to
2766 	 * run... otherwise, we're likely to trip over this
2767 	 * object in the same state as we work our way through
2768 	 * the queue... clumps of pages associated with the same
2769 	 * object are fairly typical on the inactive and active queues
2770 	 */
2771 	if (!vm_object_lock_try_scan(m_object)) {
2772 		vm_page_t m_want = NULL;
2773 
2774 		vm_pageout_vminfo.vm_pageout_inactive_nolock++;
2775 
2776 		if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
2777 			VM_PAGEOUT_DEBUG(vm_pageout_cleaned_nolock, 1);
2778 		}
2779 
2780 		pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
2781 
2782 		m->vmp_reference = FALSE;
2783 
2784 		if (!m_object->object_is_shared_cache) {
2785 			/*
2786 			 * don't apply this optimization if this is the shared cache
2787 			 * object, it's too easy to get rid of very hot and important
2788 			 * pages...
2789 			 * m->vmp_object must be stable since we hold the page queues lock...
2790 			 * we can update the scan_collisions field sans the object lock
2791 			 * since it is a separate field and this is the only spot that does
2792 			 * a read-modify-write operation and it is never executed concurrently...
2793 			 * we can asynchronously set this field to 0 when creating a UPL, so it
2794 			 * is possible for the value to be a bit non-determistic, but that's ok
2795 			 * since it's only used as a hint
2796 			 */
2797 			m_object->scan_collisions = 1;
2798 		}
2799 		if (page_from_bg_q) {
2800 			m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
2801 		} else if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
2802 			m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2803 		} else if (!vm_page_queue_empty(&sq->age_q)) {
2804 			m_want = (vm_page_t) vm_page_queue_first(&sq->age_q);
2805 		} else if ((avoid_anon_pages || vm_page_queue_empty(&vm_page_queue_anonymous)) &&
2806 		    !vm_page_queue_empty(&vm_page_queue_inactive)) {
2807 			m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2808 		} else if (!vm_page_queue_empty(&vm_page_queue_anonymous)) {
2809 			m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2810 		}
2811 
2812 		/*
2813 		 * this is the next object we're going to be interested in
2814 		 * try to make sure its available after the mutex_pause
2815 		 * returns control
2816 		 */
2817 		if (m_want) {
2818 			vm_pageout_scan_wants_object = VM_PAGE_OBJECT(m_want);
2819 		}
2820 
2821 		vps_requeue_page(m, page_prev_q_state, page_from_bg_q);
2822 
2823 		return VM_PAGEOUT_SCAN_NEXT_ITERATION;
2824 	} else {
2825 		*object = m_object;
2826 		vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2827 	}
2828 
2829 	return VM_PAGEOUT_SCAN_PROCEED;
2830 }
2831 
2832 /*
2833  * This function is called only from vm_pageout_scan and
2834  * it notices that pageout scan may be rendered ineffective
2835  * due to a FS deadlock and will jetsam a process if possible.
2836  * If jetsam isn't supported, it'll move the page to the active
2837  * queue to try and get some different pages pushed onwards so
2838  * we can try to get out of this scenario.
2839  */
2840 static void
2841 vps_deal_with_throttled_queues(vm_page_t m, vm_object_t *object, uint32_t *vm_pageout_inactive_external_forced_reactivate_limit,
2842     boolean_t *force_anonymous, __unused boolean_t is_page_from_bg_q)
2843 {
2844 	struct  vm_pageout_queue *eq;
2845 	vm_object_t cur_object = VM_OBJECT_NULL;
2846 
2847 	cur_object = *object;
2848 
2849 	eq = &vm_pageout_queue_external;
2850 
2851 	if (cur_object->internal == FALSE) {
2852 		/*
2853 		 * we need to break up the following potential deadlock case...
2854 		 *  a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
2855 		 *  b) The thread doing the writing is waiting for pages while holding the truncate lock
2856 		 *  c) Most of the pages in the inactive queue belong to this file.
2857 		 *
2858 		 * we are potentially in this deadlock because...
2859 		 *  a) the external pageout queue is throttled
2860 		 *  b) we're done with the active queue and moved on to the inactive queue
2861 		 *  c) we've got a dirty external page
2862 		 *
2863 		 * since we don't know the reason for the external pageout queue being throttled we
2864 		 * must suspect that we are deadlocked, so move the current page onto the active queue
2865 		 * in an effort to cause a page from the active queue to 'age' to the inactive queue
2866 		 *
2867 		 * if we don't have jetsam configured (i.e. we have a dynamic pager), set
2868 		 * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
2869 		 * pool the next time we select a victim page... if we can make enough new free pages,
2870 		 * the deadlock will break, the external pageout queue will empty and it will no longer
2871 		 * be throttled
2872 		 *
2873 		 * if we have jetsam configured, keep a count of the pages reactivated this way so
2874 		 * that we can try to find clean pages in the active/inactive queues before
2875 		 * deciding to jetsam a process
2876 		 */
2877 		vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external++;
2878 
2879 		vm_page_check_pageable_safe(m);
2880 		assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
2881 		vm_page_queue_enter(&vm_page_queue_active, m, vmp_pageq);
2882 		m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
2883 		vm_page_active_count++;
2884 		vm_page_pageable_external_count++;
2885 
2886 		vm_pageout_adjust_eq_iothrottle(&pgo_iothread_external_state, FALSE);
2887 
2888 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
2889 
2890 #pragma unused(force_anonymous)
2891 
2892 		*vm_pageout_inactive_external_forced_reactivate_limit -= 1;
2893 
2894 		if (*vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
2895 			*vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
2896 			/*
2897 			 * Possible deadlock scenario so request jetsam action
2898 			 */
2899 			memorystatus_kill_on_vps_starvation();
2900 			VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, DBG_VM_PAGEOUT_JETSAM, DBG_FUNC_NONE,
2901 			    vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
2902 		}
2903 #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
2904 
2905 #pragma unused(vm_pageout_inactive_external_forced_reactivate_limit)
2906 
2907 		*force_anonymous = TRUE;
2908 #endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
2909 	} else {
2910 		vm_page_activate(m);
2911 		counter_inc(&vm_statistics_reactivations);
2912 
2913 #if DEVELOPMENT || DEBUG
2914 		if (is_page_from_bg_q == TRUE) {
2915 			if (cur_object->internal) {
2916 				vm_pageout_rejected_bq_internal++;
2917 			} else {
2918 				vm_pageout_rejected_bq_external++;
2919 			}
2920 		}
2921 #endif /* DEVELOPMENT || DEBUG */
2922 
2923 		vm_pageout_state.vm_pageout_inactive_used++;
2924 	}
2925 }
2926 
2927 
2928 void
2929 vm_page_balance_inactive(int max_to_move)
2930 {
2931 	vm_page_t m;
2932 
2933 	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2934 
2935 	if (hibernation_vmqueues_inspection || hibernate_cleaning_in_progress) {
2936 		/*
2937 		 * It is likely that the hibernation code path is
2938 		 * dealing with these very queues as we are about
2939 		 * to move pages around in/from them and completely
2940 		 * change the linkage of the pages.
2941 		 *
2942 		 * And so we skip the rebalancing of these queues.
2943 		 */
2944 		return;
2945 	}
2946 	vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
2947 	    vm_page_inactive_count +
2948 	    vm_page_speculative_count);
2949 
2950 	while (max_to_move-- && (vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) {
2951 		VM_PAGEOUT_DEBUG(vm_pageout_balanced, 1);
2952 
2953 		m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
2954 
2955 		assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
2956 		assert(!m->vmp_laundry);
2957 		assert(!is_kernel_object(VM_PAGE_OBJECT(m)));
2958 		assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2959 
2960 		DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2961 
2962 		/*
2963 		 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
2964 		 *
2965 		 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
2966 		 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
2967 		 * new reference happens. If no futher references happen on the page after that remote TLB flushes
2968 		 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
2969 		 * by pageout_scan, which is just fine since the last reference would have happened quite far
2970 		 * in the past (TLB caches don't hang around for very long), and of course could just as easily
2971 		 * have happened before we moved the page
2972 		 */
2973 		if (m->vmp_pmapped == TRUE) {
2974 			/*
2975 			 * We might be holding the page queue lock as a
2976 			 * spin lock and clearing the "referenced" bit could
2977 			 * take a while if there are lots of mappings of
2978 			 * that page, so make sure we acquire the lock as
2979 			 * as mutex to avoid a spinlock timeout.
2980 			 */
2981 			vm_page_lockconvert_queues();
2982 			pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
2983 		}
2984 
2985 		/*
2986 		 * The page might be absent or busy,
2987 		 * but vm_page_deactivate can handle that.
2988 		 * FALSE indicates that we don't want a H/W clear reference
2989 		 */
2990 		vm_page_deactivate_internal(m, FALSE);
2991 	}
2992 }
2993 
2994 /*
2995  *	vm_pageout_scan does the dirty work for the pageout daemon.
2996  *	It returns with both vm_page_queue_free_lock and vm_page_queue_lock
2997  *	held and vm_page_free_wanted == 0.
2998  */
2999 void
3000 vm_pageout_scan(void)
3001 {
3002 	unsigned int loop_count = 0;
3003 	unsigned int inactive_burst_count = 0;
3004 	unsigned int reactivated_this_call;
3005 	unsigned int reactivate_limit;
3006 	vm_page_t   local_freeq = NULL;
3007 	int         local_freed = 0;
3008 	int         delayed_unlock;
3009 	int         delayed_unlock_limit = 0;
3010 	int         refmod_state = 0;
3011 	int     vm_pageout_deadlock_target = 0;
3012 	struct  vm_pageout_queue *iq;
3013 	struct  vm_pageout_queue *eq;
3014 	struct  vm_speculative_age_q *sq;
3015 	struct  flow_control    flow_control = { .state = 0, .ts = { .tv_sec = 0, .tv_nsec = 0 } };
3016 	boolean_t inactive_throttled = FALSE;
3017 	vm_object_t     object = NULL;
3018 	uint32_t        inactive_reclaim_run;
3019 	boolean_t       grab_anonymous = FALSE;
3020 	boolean_t       force_anonymous = FALSE;
3021 	boolean_t       force_speculative_aging = FALSE;
3022 	int             anons_grabbed = 0;
3023 	int             page_prev_q_state = 0;
3024 	boolean_t       page_from_bg_q = FALSE;
3025 	uint32_t        vm_pageout_inactive_external_forced_reactivate_limit = 0;
3026 	vm_object_t     m_object = VM_OBJECT_NULL;
3027 	int             retval = 0;
3028 	boolean_t       lock_yield_check = FALSE;
3029 
3030 
3031 	VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, DBG_VM_PAGEOUT_SCAN, DBG_FUNC_START,
3032 	    vm_pageout_vminfo.vm_pageout_freed_speculative,
3033 	    vm_pageout_state.vm_pageout_inactive_clean,
3034 	    vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
3035 	    vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
3036 
3037 	flow_control.state = FCS_IDLE;
3038 	iq = &vm_pageout_queue_internal;
3039 	eq = &vm_pageout_queue_external;
3040 	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3041 
3042 	/* Ask the pmap layer to return any pages it no longer needs. */
3043 	pmap_release_pages_fast();
3044 
3045 	vm_page_lock_queues();
3046 
3047 	delayed_unlock = 1;
3048 
3049 	/*
3050 	 *	Calculate the max number of referenced pages on the inactive
3051 	 *	queue that we will reactivate.
3052 	 */
3053 	reactivated_this_call = 0;
3054 	reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
3055 	    vm_page_inactive_count);
3056 	inactive_reclaim_run = 0;
3057 
3058 	vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
3059 
3060 	/*
3061 	 *	We must limit the rate at which we send pages to the pagers
3062 	 *	so that we don't tie up too many pages in the I/O queues.
3063 	 *	We implement a throttling mechanism using the laundry count
3064 	 *      to limit the number of pages outstanding to the default
3065 	 *	and external pagers.  We can bypass the throttles and look
3066 	 *	for clean pages if the pageout queues don't drain in a timely
3067 	 *	fashion since this may indicate that the pageout paths are
3068 	 *	stalled waiting for memory, which only we can provide.
3069 	 */
3070 
3071 	vps_init_page_targets();
3072 	assert(object == NULL);
3073 	assert(delayed_unlock != 0);
3074 
3075 	for (;;) {
3076 		vm_page_t m;
3077 
3078 		DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
3079 
3080 		if (lock_yield_check) {
3081 			lock_yield_check = FALSE;
3082 
3083 			if (delayed_unlock++ > delayed_unlock_limit) {
3084 				vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
3085 				    VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
3086 			} else if (vm_pageout_scan_wants_object) {
3087 				vm_page_unlock_queues();
3088 				mutex_pause(0);
3089 				vm_page_lock_queues();
3090 			} else if (vps_yield_for_pgqlockwaiters && lck_mtx_yield(&vm_page_queue_lock)) {
3091 				VM_PAGEOUT_DEBUG(vm_pageout_yield_for_free_pages, 1);
3092 			}
3093 		}
3094 
3095 		if (vm_upl_wait_for_pages < 0) {
3096 			vm_upl_wait_for_pages = 0;
3097 		}
3098 
3099 		delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
3100 
3101 		if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX) {
3102 			delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
3103 		}
3104 
3105 		vps_deal_with_secluded_page_overflow(&local_freeq, &local_freed);
3106 
3107 		assert(delayed_unlock);
3108 
3109 		/*
3110 		 * maintain our balance
3111 		 */
3112 		vm_page_balance_inactive(1);
3113 
3114 
3115 		/**********************************************************************
3116 		* above this point we're playing with the active and secluded queues
3117 		* below this point we're playing with the throttling mechanisms
3118 		* and the inactive queue
3119 		**********************************************************************/
3120 
3121 		if (vm_page_free_count + local_freed >= vm_page_free_target) {
3122 			vm_pageout_scan_wants_object = VM_OBJECT_NULL;
3123 
3124 			vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
3125 			    VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
3126 			/*
3127 			 * make sure the pageout I/O threads are running
3128 			 * throttled in case there are still requests
3129 			 * in the laundry... since we have met our targets
3130 			 * we don't need the laundry to be cleaned in a timely
3131 			 * fashion... so let's avoid interfering with foreground
3132 			 * activity
3133 			 */
3134 			vm_pageout_adjust_eq_iothrottle(&pgo_iothread_external_state, TRUE);
3135 
3136 			vm_free_page_lock();
3137 
3138 			if ((vm_page_free_count >= vm_page_free_target) &&
3139 			    (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
3140 				/*
3141 				 * done - we have met our target *and*
3142 				 * there is no one waiting for a page.
3143 				 */
3144 return_from_scan:
3145 				assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
3146 
3147 				VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, DBG_VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
3148 				    vm_pageout_state.vm_pageout_inactive,
3149 				    vm_pageout_state.vm_pageout_inactive_used, 0, 0);
3150 				VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, DBG_VM_PAGEOUT_SCAN, DBG_FUNC_END,
3151 				    vm_pageout_vminfo.vm_pageout_freed_speculative,
3152 				    vm_pageout_state.vm_pageout_inactive_clean,
3153 				    vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
3154 				    vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
3155 
3156 				return;
3157 			}
3158 			vm_free_page_unlock();
3159 		}
3160 
3161 		/*
3162 		 * Before anything, we check if we have any ripe volatile
3163 		 * objects around. If so, try to purge the first object.
3164 		 * If the purge fails, fall through to reclaim a page instead.
3165 		 * If the purge succeeds, go back to the top and reevalute
3166 		 * the new memory situation.
3167 		 */
3168 		retval = vps_purge_object();
3169 
3170 		if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
3171 			/*
3172 			 * Success
3173 			 */
3174 			if (object != NULL) {
3175 				vm_object_unlock(object);
3176 				object = NULL;
3177 			}
3178 
3179 			lock_yield_check = FALSE;
3180 			continue;
3181 		}
3182 
3183 		/*
3184 		 * If our 'aged' queue is empty and we have some speculative pages
3185 		 * in the other queues, let's go through and see if we need to age
3186 		 * them.
3187 		 *
3188 		 * If we succeeded in aging a speculative Q or just that everything
3189 		 * looks normal w.r.t queue age and queue counts, we keep going onward.
3190 		 *
3191 		 * If, for some reason, we seem to have a mismatch between the spec.
3192 		 * page count and the page queues, we reset those variables and
3193 		 * restart the loop (LD TODO: Track this better?).
3194 		 */
3195 		if (vm_page_queue_empty(&sq->age_q) && vm_page_speculative_count) {
3196 			retval = vps_age_speculative_queue(force_speculative_aging);
3197 
3198 			if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
3199 				lock_yield_check = FALSE;
3200 				continue;
3201 			}
3202 		}
3203 		force_speculative_aging = FALSE;
3204 
3205 		/*
3206 		 * Check to see if we need to evict objects from the cache.
3207 		 *
3208 		 * Note: 'object' here doesn't have anything to do with
3209 		 * the eviction part. We just need to make sure we have dropped
3210 		 * any object lock we might be holding if we need to go down
3211 		 * into the eviction logic.
3212 		 */
3213 		retval = vps_object_cache_evict(&object);
3214 
3215 		if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
3216 			lock_yield_check = FALSE;
3217 			continue;
3218 		}
3219 
3220 
3221 		/*
3222 		 * Calculate our filecache_min that will affect the loop
3223 		 * going forward.
3224 		 */
3225 		vps_calculate_filecache_min();
3226 
3227 		/*
3228 		 * LD TODO: Use a structure to hold all state variables for a single
3229 		 * vm_pageout_scan iteration and pass that structure to this function instead.
3230 		 */
3231 		retval = vps_flow_control(&flow_control, &anons_grabbed, &object,
3232 		    &delayed_unlock, &local_freeq, &local_freed,
3233 		    &vm_pageout_deadlock_target, inactive_burst_count);
3234 
3235 		if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
3236 			if (loop_count >= vm_page_inactive_count) {
3237 				loop_count = 0;
3238 			}
3239 
3240 			inactive_burst_count = 0;
3241 
3242 			assert(object == NULL);
3243 			assert(delayed_unlock != 0);
3244 
3245 			lock_yield_check = FALSE;
3246 			continue;
3247 		} else if (retval == VM_PAGEOUT_SCAN_DONE_RETURN) {
3248 			goto return_from_scan;
3249 		}
3250 
3251 		flow_control.state = FCS_IDLE;
3252 
3253 		vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
3254 		    vm_pageout_inactive_external_forced_reactivate_limit);
3255 		loop_count++;
3256 		inactive_burst_count++;
3257 		vm_pageout_state.vm_pageout_inactive++;
3258 
3259 		/*
3260 		 * Choose a victim.
3261 		 */
3262 
3263 		m = NULL;
3264 		retval = vps_choose_victim_page(&m, &anons_grabbed, &grab_anonymous, force_anonymous, &page_from_bg_q, &reactivated_this_call);
3265 
3266 		if (m == NULL) {
3267 			if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
3268 				inactive_burst_count = 0;
3269 
3270 				if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
3271 					VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
3272 				}
3273 
3274 				lock_yield_check = TRUE;
3275 				continue;
3276 			}
3277 
3278 			/*
3279 			 * if we've gotten here, we have no victim page.
3280 			 * check to see if we've not finished balancing the queues
3281 			 * or we have a page on the aged speculative queue that we
3282 			 * skipped due to force_anonymous == TRUE.. or we have
3283 			 * speculative  pages that we can prematurely age... if
3284 			 * one of these cases we'll keep going, else panic
3285 			 */
3286 			force_anonymous = FALSE;
3287 			VM_PAGEOUT_DEBUG(vm_pageout_no_victim, 1);
3288 
3289 			if (!vm_page_queue_empty(&sq->age_q)) {
3290 				lock_yield_check = TRUE;
3291 				continue;
3292 			}
3293 
3294 			if (vm_page_speculative_count) {
3295 				force_speculative_aging = TRUE;
3296 				lock_yield_check = TRUE;
3297 				continue;
3298 			}
3299 			panic("vm_pageout: no victim");
3300 
3301 			/* NOTREACHED */
3302 		}
3303 
3304 		assert(VM_PAGE_PAGEABLE(m));
3305 		m_object = VM_PAGE_OBJECT(m);
3306 		force_anonymous = FALSE;
3307 
3308 		page_prev_q_state = m->vmp_q_state;
3309 		/*
3310 		 * we just found this page on one of our queues...
3311 		 * it can't also be on the pageout queue, so safe
3312 		 * to call vm_page_queues_remove
3313 		 */
3314 		bool donate = (m->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE);
3315 		vm_page_queues_remove(m, TRUE);
3316 		if (donate) {
3317 			/*
3318 			 * The compressor needs to see this bit to know
3319 			 * where this page needs to land. Also if stolen,
3320 			 * this bit helps put the page back in the right
3321 			 * special queue where it belongs.
3322 			 */
3323 			m->vmp_on_specialq = VM_PAGE_SPECIAL_Q_DONATE;
3324 		}
3325 
3326 		assert(!m->vmp_laundry);
3327 		assert(!m->vmp_private);
3328 		assert(!m->vmp_fictitious);
3329 		assert(!is_kernel_object(m_object));
3330 		assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
3331 
3332 		vm_pageout_vminfo.vm_pageout_considered_page++;
3333 
3334 		DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
3335 
3336 		/*
3337 		 * check to see if we currently are working
3338 		 * with the same object... if so, we've
3339 		 * already got the lock
3340 		 */
3341 		if (m_object != object) {
3342 			boolean_t avoid_anon_pages = (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT);
3343 
3344 			/*
3345 			 * vps_switch_object() will always drop the 'object' lock first
3346 			 * and then try to acquire the 'm_object' lock. So 'object' has to point to
3347 			 * either 'm_object' or NULL.
3348 			 */
3349 			retval = vps_switch_object(m, m_object, &object, page_prev_q_state, avoid_anon_pages, page_from_bg_q);
3350 
3351 			if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
3352 				lock_yield_check = TRUE;
3353 				continue;
3354 			}
3355 		}
3356 		assert(m_object == object);
3357 		assert(VM_PAGE_OBJECT(m) == m_object);
3358 
3359 		if (m->vmp_busy) {
3360 			/*
3361 			 *	Somebody is already playing with this page.
3362 			 *	Put it back on the appropriate queue
3363 			 *
3364 			 */
3365 			VM_PAGEOUT_DEBUG(vm_pageout_inactive_busy, 1);
3366 
3367 			if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
3368 				VM_PAGEOUT_DEBUG(vm_pageout_cleaned_busy, 1);
3369 			}
3370 
3371 			vps_requeue_page(m, page_prev_q_state, page_from_bg_q);
3372 
3373 			lock_yield_check = TRUE;
3374 			continue;
3375 		}
3376 
3377 		/*
3378 		 *   if (m->vmp_cleaning && !m->vmp_free_when_done)
3379 		 *	If already cleaning this page in place
3380 		 *	just leave if off the paging queues.
3381 		 *	We can leave the page mapped, and upl_commit_range
3382 		 *	will put it on the clean queue.
3383 		 *
3384 		 *   if (m->vmp_free_when_done && !m->vmp_cleaning)
3385 		 *	an msync INVALIDATE is in progress...
3386 		 *	this page has been marked for destruction
3387 		 *      after it has been cleaned,
3388 		 *      but not yet gathered into a UPL
3389 		 *	where 'cleaning' will be set...
3390 		 *	just leave it off the paging queues
3391 		 *
3392 		 *   if (m->vmp_free_when_done && m->vmp_clenaing)
3393 		 *	an msync INVALIDATE is in progress
3394 		 *	and the UPL has already gathered this page...
3395 		 *	just leave it off the paging queues
3396 		 */
3397 		if (m->vmp_free_when_done || m->vmp_cleaning) {
3398 			lock_yield_check = TRUE;
3399 			continue;
3400 		}
3401 
3402 
3403 		/*
3404 		 *	If it's absent, in error or the object is no longer alive,
3405 		 *	we can reclaim the page... in the no longer alive case,
3406 		 *	there are 2 states the page can be in that preclude us
3407 		 *	from reclaiming it - busy or cleaning - that we've already
3408 		 *	dealt with
3409 		 */
3410 		if (m->vmp_absent || VMP_ERROR_GET(m) || !object->alive ||
3411 		    (!object->internal && object->pager == MEMORY_OBJECT_NULL)) {
3412 			if (m->vmp_absent) {
3413 				VM_PAGEOUT_DEBUG(vm_pageout_inactive_absent, 1);
3414 			} else if (!object->alive ||
3415 			    (!object->internal &&
3416 			    object->pager == MEMORY_OBJECT_NULL)) {
3417 				VM_PAGEOUT_DEBUG(vm_pageout_inactive_notalive, 1);
3418 			} else {
3419 				VM_PAGEOUT_DEBUG(vm_pageout_inactive_error, 1);
3420 			}
3421 reclaim_page:
3422 			if (vm_pageout_deadlock_target) {
3423 				VM_PAGEOUT_DEBUG(vm_pageout_scan_inactive_throttle_success, 1);
3424 				vm_pageout_deadlock_target--;
3425 			}
3426 
3427 			DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
3428 
3429 			if (object->internal) {
3430 				DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
3431 			} else {
3432 				DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
3433 			}
3434 			assert(!m->vmp_cleaning);
3435 			assert(!m->vmp_laundry);
3436 
3437 			if (!object->internal &&
3438 			    object->pager != NULL &&
3439 			    object->pager->mo_pager_ops == &shared_region_pager_ops) {
3440 				shared_region_pager_reclaimed++;
3441 			}
3442 
3443 			m->vmp_busy = TRUE;
3444 
3445 			/*
3446 			 * remove page from object here since we're already
3447 			 * behind the object lock... defer the rest of the work
3448 			 * we'd normally do in vm_page_free_prepare_object
3449 			 * until 'vm_page_free_list' is called
3450 			 */
3451 			if (m->vmp_tabled) {
3452 				vm_page_remove(m, TRUE);
3453 			}
3454 
3455 			assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
3456 			m->vmp_snext = local_freeq;
3457 			local_freeq = m;
3458 			local_freed++;
3459 
3460 			if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
3461 				vm_pageout_vminfo.vm_pageout_freed_speculative++;
3462 			} else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
3463 				vm_pageout_vminfo.vm_pageout_freed_cleaned++;
3464 			} else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q) {
3465 				vm_pageout_vminfo.vm_pageout_freed_internal++;
3466 			} else {
3467 				vm_pageout_vminfo.vm_pageout_freed_external++;
3468 			}
3469 
3470 			inactive_burst_count = 0;
3471 
3472 			lock_yield_check = TRUE;
3473 			continue;
3474 		}
3475 		if (object->vo_copy == VM_OBJECT_NULL) {
3476 			/*
3477 			 * No one else can have any interest in this page.
3478 			 * If this is an empty purgable object, the page can be
3479 			 * reclaimed even if dirty.
3480 			 * If the page belongs to a volatile purgable object, we
3481 			 * reactivate it if the compressor isn't active.
3482 			 */
3483 			if (object->purgable == VM_PURGABLE_EMPTY) {
3484 				if (m->vmp_pmapped == TRUE) {
3485 					/* unmap the page */
3486 					refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
3487 					if (refmod_state & VM_MEM_MODIFIED) {
3488 						SET_PAGE_DIRTY(m, FALSE);
3489 					}
3490 				}
3491 				if (m->vmp_dirty || m->vmp_precious) {
3492 					/* we saved the cost of cleaning this page ! */
3493 					vm_page_purged_count++;
3494 				}
3495 				goto reclaim_page;
3496 			}
3497 
3498 			if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
3499 				/*
3500 				 * With the VM compressor, the cost of
3501 				 * reclaiming a page is much lower (no I/O),
3502 				 * so if we find a "volatile" page, it's better
3503 				 * to let it get compressed rather than letting
3504 				 * it occupy a full page until it gets purged.
3505 				 * So no need to check for "volatile" here.
3506 				 */
3507 			} else if (object->purgable == VM_PURGABLE_VOLATILE) {
3508 				/*
3509 				 * Avoid cleaning a "volatile" page which might
3510 				 * be purged soon.
3511 				 */
3512 
3513 				/* if it's wired, we can't put it on our queue */
3514 				assert(!VM_PAGE_WIRED(m));
3515 
3516 				/* just stick it back on! */
3517 				reactivated_this_call++;
3518 
3519 				if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
3520 					VM_PAGEOUT_DEBUG(vm_pageout_cleaned_volatile_reactivated, 1);
3521 				}
3522 
3523 				goto reactivate_page;
3524 			}
3525 		} /* vo_copy NULL */
3526 		/*
3527 		 *	If it's being used, reactivate.
3528 		 *	(Fictitious pages are either busy or absent.)
3529 		 *	First, update the reference and dirty bits
3530 		 *	to make sure the page is unreferenced.
3531 		 */
3532 		refmod_state = -1;
3533 
3534 		if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) {
3535 			refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
3536 
3537 			if (refmod_state & VM_MEM_REFERENCED) {
3538 				m->vmp_reference = TRUE;
3539 			}
3540 			if (refmod_state & VM_MEM_MODIFIED) {
3541 				SET_PAGE_DIRTY(m, FALSE);
3542 			}
3543 		}
3544 
3545 		if (m->vmp_reference || m->vmp_dirty) {
3546 			/* deal with a rogue "reusable" page */
3547 			VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, m_object);
3548 		}
3549 
3550 		if (vm_pageout_state.vm_page_xpmapped_min_divisor == 0) {
3551 			vm_pageout_state.vm_page_xpmapped_min = 0;
3552 		} else {
3553 			vm_pageout_state.vm_page_xpmapped_min = (vm_page_pageable_external_count * 10) /
3554 			    vm_pageout_state.vm_page_xpmapped_min_divisor;
3555 		}
3556 
3557 		if (!m->vmp_no_cache &&
3558 		    page_from_bg_q == FALSE &&
3559 		    (m->vmp_reference || (m->vmp_xpmapped && !object->internal &&
3560 		    (vm_page_xpmapped_external_count < vm_pageout_state.vm_page_xpmapped_min)))) {
3561 			/*
3562 			 * The page we pulled off the inactive list has
3563 			 * been referenced.  It is possible for other
3564 			 * processors to be touching pages faster than we
3565 			 * can clear the referenced bit and traverse the
3566 			 * inactive queue, so we limit the number of
3567 			 * reactivations.
3568 			 */
3569 			if (++reactivated_this_call >= reactivate_limit &&
3570 			    !object->object_is_shared_cache &&
3571 			    !((m->vmp_realtime ||
3572 			    object->for_realtime) &&
3573 			    vm_pageout_protect_realtime)) {
3574 				vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded++;
3575 			} else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
3576 				vm_pageout_vminfo.vm_pageout_inactive_force_reclaim++;
3577 				if (object->object_is_shared_cache) {
3578 					vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache++;
3579 				} else if (m->vmp_realtime ||
3580 				    object->for_realtime) {
3581 					vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime++;
3582 				}
3583 			} else {
3584 				uint32_t isinuse;
3585 
3586 				if (reactivated_this_call >= reactivate_limit) {
3587 					if (object->object_is_shared_cache) {
3588 						vm_pageout_vminfo.vm_pageout_protected_sharedcache++;
3589 					} else if ((m->vmp_realtime ||
3590 					    object->for_realtime) &&
3591 					    vm_pageout_protect_realtime) {
3592 						vm_pageout_vminfo.vm_pageout_protected_realtime++;
3593 					}
3594 				}
3595 				if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
3596 					VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reference_reactivated, 1);
3597 				}
3598 
3599 				vm_pageout_vminfo.vm_pageout_inactive_referenced++;
3600 reactivate_page:
3601 				if (!object->internal && object->pager != MEMORY_OBJECT_NULL &&
3602 				    vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
3603 					/*
3604 					 * no explict mappings of this object exist
3605 					 * and it's not open via the filesystem
3606 					 */
3607 					vm_page_deactivate(m);
3608 					VM_PAGEOUT_DEBUG(vm_pageout_inactive_deactivated, 1);
3609 				} else {
3610 					/*
3611 					 * The page was/is being used, so put back on active list.
3612 					 */
3613 					vm_page_activate(m);
3614 					counter_inc(&vm_statistics_reactivations);
3615 					inactive_burst_count = 0;
3616 				}
3617 #if DEVELOPMENT || DEBUG
3618 				if (page_from_bg_q == TRUE) {
3619 					if (m_object->internal) {
3620 						vm_pageout_rejected_bq_internal++;
3621 					} else {
3622 						vm_pageout_rejected_bq_external++;
3623 					}
3624 				}
3625 #endif /* DEVELOPMENT || DEBUG */
3626 
3627 				if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
3628 					VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
3629 				}
3630 				vm_pageout_state.vm_pageout_inactive_used++;
3631 
3632 				lock_yield_check = TRUE;
3633 				continue;
3634 			}
3635 			/*
3636 			 * Make sure we call pmap_get_refmod() if it
3637 			 * wasn't already called just above, to update
3638 			 * the dirty bit.
3639 			 */
3640 			if ((refmod_state == -1) && !m->vmp_dirty && m->vmp_pmapped) {
3641 				refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
3642 				if (refmod_state & VM_MEM_MODIFIED) {
3643 					SET_PAGE_DIRTY(m, FALSE);
3644 				}
3645 			}
3646 		}
3647 
3648 		/*
3649 		 * we've got a candidate page to steal...
3650 		 *
3651 		 * m->vmp_dirty is up to date courtesy of the
3652 		 * preceding check for m->vmp_reference... if
3653 		 * we get here, then m->vmp_reference had to be
3654 		 * FALSE (or possibly "reactivate_limit" was
3655 		 * exceeded), but in either case we called
3656 		 * pmap_get_refmod() and updated both
3657 		 * m->vmp_reference and m->vmp_dirty
3658 		 *
3659 		 * if it's dirty or precious we need to
3660 		 * see if the target queue is throtttled
3661 		 * it if is, we need to skip over it by moving it back
3662 		 * to the end of the inactive queue
3663 		 */
3664 
3665 		inactive_throttled = FALSE;
3666 
3667 		if (m->vmp_dirty || m->vmp_precious) {
3668 			if (object->internal) {
3669 				if (VM_PAGE_Q_THROTTLED(iq)) {
3670 					inactive_throttled = TRUE;
3671 				}
3672 			} else if (VM_PAGE_Q_THROTTLED(eq)) {
3673 				inactive_throttled = TRUE;
3674 			}
3675 		}
3676 throttle_inactive:
3677 		if (!VM_DYNAMIC_PAGING_ENABLED() &&
3678 		    object->internal && m->vmp_dirty &&
3679 		    (object->purgable == VM_PURGABLE_DENY ||
3680 		    object->purgable == VM_PURGABLE_NONVOLATILE ||
3681 		    object->purgable == VM_PURGABLE_VOLATILE)) {
3682 			vm_page_check_pageable_safe(m);
3683 			assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
3684 			vm_page_queue_enter(&vm_page_queue_throttled, m, vmp_pageq);
3685 			m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
3686 			vm_page_throttled_count++;
3687 
3688 			VM_PAGEOUT_DEBUG(vm_pageout_scan_reclaimed_throttled, 1);
3689 
3690 			inactive_burst_count = 0;
3691 
3692 			lock_yield_check = TRUE;
3693 			continue;
3694 		}
3695 		if (inactive_throttled == TRUE) {
3696 			vps_deal_with_throttled_queues(m, &object, &vm_pageout_inactive_external_forced_reactivate_limit,
3697 			    &force_anonymous, page_from_bg_q);
3698 
3699 			inactive_burst_count = 0;
3700 
3701 			if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
3702 				VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
3703 			}
3704 
3705 			lock_yield_check = TRUE;
3706 			continue;
3707 		}
3708 
3709 		/*
3710 		 * we've got a page that we can steal...
3711 		 * eliminate all mappings and make sure
3712 		 * we have the up-to-date modified state
3713 		 *
3714 		 * if we need to do a pmap_disconnect then we
3715 		 * need to re-evaluate m->vmp_dirty since the pmap_disconnect
3716 		 * provides the true state atomically... the
3717 		 * page was still mapped up to the pmap_disconnect
3718 		 * and may have been dirtied at the last microsecond
3719 		 *
3720 		 * Note that if 'pmapped' is FALSE then the page is not
3721 		 * and has not been in any map, so there is no point calling
3722 		 * pmap_disconnect().  m->vmp_dirty could have been set in anticipation
3723 		 * of likely usage of the page.
3724 		 */
3725 		if (m->vmp_pmapped == TRUE) {
3726 			int pmap_options;
3727 
3728 			/*
3729 			 * Don't count this page as going into the compressor
3730 			 * if any of these are true:
3731 			 * 1) compressed pager isn't enabled
3732 			 * 2) Freezer enabled device with compressed pager
3733 			 *    backend (exclusive use) i.e. most of the VM system
3734 			 *    (including vm_pageout_scan) has no knowledge of
3735 			 *    the compressor
3736 			 * 3) This page belongs to a file and hence will not be
3737 			 *    sent into the compressor
3738 			 */
3739 			if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE ||
3740 			    object->internal == FALSE) {
3741 				pmap_options = 0;
3742 			} else if (m->vmp_dirty || m->vmp_precious) {
3743 				/*
3744 				 * VM knows that this page is dirty (or
3745 				 * precious) and needs to be compressed
3746 				 * rather than freed.
3747 				 * Tell the pmap layer to count this page
3748 				 * as "compressed".
3749 				 */
3750 				pmap_options = PMAP_OPTIONS_COMPRESSOR;
3751 			} else {
3752 				/*
3753 				 * VM does not know if the page needs to
3754 				 * be preserved but the pmap layer might tell
3755 				 * us if any mapping has "modified" it.
3756 				 * Let's the pmap layer to count this page
3757 				 * as compressed if and only if it has been
3758 				 * modified.
3759 				 */
3760 				pmap_options =
3761 				    PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
3762 			}
3763 			refmod_state = pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m),
3764 			    pmap_options,
3765 			    NULL);
3766 			if (refmod_state & VM_MEM_MODIFIED) {
3767 				SET_PAGE_DIRTY(m, FALSE);
3768 			}
3769 		}
3770 
3771 		/*
3772 		 * reset our count of pages that have been reclaimed
3773 		 * since the last page was 'stolen'
3774 		 */
3775 		inactive_reclaim_run = 0;
3776 
3777 		/*
3778 		 *	If it's clean and not precious, we can free the page.
3779 		 */
3780 		if (!m->vmp_dirty && !m->vmp_precious) {
3781 			vm_pageout_state.vm_pageout_inactive_clean++;
3782 
3783 			/*
3784 			 * OK, at this point we have found a page we are going to free.
3785 			 */
3786 #if CONFIG_PHANTOM_CACHE
3787 			if (!object->internal) {
3788 				vm_phantom_cache_add_ghost(m);
3789 			}
3790 #endif
3791 			goto reclaim_page;
3792 		}
3793 
3794 		/*
3795 		 * The page may have been dirtied since the last check
3796 		 * for a throttled target queue (which may have been skipped
3797 		 * if the page was clean then).  With the dirty page
3798 		 * disconnected here, we can make one final check.
3799 		 */
3800 		if (object->internal) {
3801 			if (VM_PAGE_Q_THROTTLED(iq)) {
3802 				inactive_throttled = TRUE;
3803 			}
3804 		} else if (VM_PAGE_Q_THROTTLED(eq)) {
3805 			inactive_throttled = TRUE;
3806 		}
3807 
3808 		if (inactive_throttled == TRUE) {
3809 			goto throttle_inactive;
3810 		}
3811 #if !CONFIG_JETSAM
3812 		memorystatus_update_available_page_count(AVAILABLE_NON_COMPRESSED_MEMORY);
3813 #endif /* !CONFIG_JETSAM */
3814 
3815 		if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
3816 			VM_PAGEOUT_DEBUG(vm_pageout_speculative_dirty, 1);
3817 		}
3818 
3819 		if (object->internal) {
3820 			vm_pageout_vminfo.vm_pageout_inactive_dirty_internal++;
3821 		} else {
3822 			vm_pageout_vminfo.vm_pageout_inactive_dirty_external++;
3823 		}
3824 
3825 		/*
3826 		 * internal pages will go to the compressor...
3827 		 * external pages will go to the appropriate pager to be cleaned
3828 		 * and upon completion will end up on 'vm_page_queue_cleaned' which
3829 		 * is a preferred queue to steal from
3830 		 */
3831 		vm_pageout_cluster(m);
3832 		inactive_burst_count = 0;
3833 
3834 		/*
3835 		 * back to top of pageout scan loop
3836 		 */
3837 	}
3838 }
3839 
3840 
3841 void
3842 vm_page_free_reserve(
3843 	int pages)
3844 {
3845 	int             free_after_reserve;
3846 
3847 	if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3848 		if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT)) {
3849 			vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
3850 		} else {
3851 			vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
3852 		}
3853 	} else {
3854 		if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT) {
3855 			vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
3856 		} else {
3857 			vm_page_free_reserved += pages;
3858 		}
3859 	}
3860 	free_after_reserve = vm_pageout_state.vm_page_free_count_init - vm_page_free_reserved;
3861 
3862 	vm_page_free_min = vm_page_free_reserved +
3863 	    VM_PAGE_FREE_MIN(free_after_reserve);
3864 
3865 	if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT) {
3866 		vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
3867 	}
3868 
3869 	vm_page_free_target = vm_page_free_reserved +
3870 	    VM_PAGE_FREE_TARGET(free_after_reserve);
3871 
3872 	if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT) {
3873 		vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
3874 	}
3875 
3876 	if (vm_page_free_target < vm_page_free_min + 5) {
3877 		vm_page_free_target = vm_page_free_min + 5;
3878 	}
3879 
3880 	vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 2);
3881 }
3882 
3883 /*
3884  *	vm_pageout is the high level pageout daemon.
3885  */
3886 
3887 void
3888 vm_pageout_continue(void)
3889 {
3890 	DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
3891 	VM_PAGEOUT_DEBUG(vm_pageout_scan_event_counter, 1);
3892 
3893 	vm_free_page_lock();
3894 	vm_pageout_running = TRUE;
3895 	vm_free_page_unlock();
3896 
3897 	vm_pageout_scan();
3898 	/*
3899 	 * we hold both the vm_page_queue_free_lock
3900 	 * and the vm_page_queues_lock at this point
3901 	 */
3902 	assert(vm_page_free_wanted == 0);
3903 	assert(vm_page_free_wanted_privileged == 0);
3904 	assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
3905 
3906 	vm_pageout_running = FALSE;
3907 #if XNU_TARGET_OS_OSX
3908 	if (vm_pageout_waiter) {
3909 		vm_pageout_waiter = FALSE;
3910 		thread_wakeup((event_t)&vm_pageout_waiter);
3911 	}
3912 #endif /* XNU_TARGET_OS_OSX */
3913 
3914 	vm_free_page_unlock();
3915 	vm_page_unlock_queues();
3916 
3917 	thread_block((thread_continue_t)vm_pageout_continue);
3918 	/*NOTREACHED*/
3919 }
3920 
3921 #if XNU_TARGET_OS_OSX
3922 kern_return_t
3923 vm_pageout_wait(uint64_t deadline)
3924 {
3925 	kern_return_t kr;
3926 
3927 	vm_free_page_lock();
3928 	for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr);) {
3929 		vm_pageout_waiter = TRUE;
3930 		if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
3931 			    &vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
3932 			    (event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
3933 			kr = KERN_OPERATION_TIMED_OUT;
3934 		}
3935 	}
3936 	vm_free_page_unlock();
3937 
3938 	return kr;
3939 }
3940 #endif /* XNU_TARGET_OS_OSX */
3941 
3942 OS_NORETURN
3943 static void
3944 vm_pageout_iothread_external_continue(struct pgo_iothread_state *ethr, __unused wait_result_t w)
3945 {
3946 	vm_page_t       m = NULL;
3947 	vm_object_t     object;
3948 	vm_object_offset_t offset;
3949 	memory_object_t pager;
3950 	struct vm_pageout_queue *q = ethr->q;
3951 
3952 	/* On systems with a compressor, the external IO thread clears its
3953 	 * VM privileged bit to accommodate large allocations (e.g. bulk UPL
3954 	 * creation)
3955 	 */
3956 	if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3957 		current_thread()->options &= ~TH_OPT_VMPRIV;
3958 	}
3959 
3960 	sched_cond_ack(&(ethr->pgo_wakeup));
3961 
3962 	while (true) {
3963 		vm_page_lockspin_queues();
3964 
3965 		while (!vm_page_queue_empty(&q->pgo_pending)) {
3966 			q->pgo_busy = TRUE;
3967 			vm_page_queue_remove_first(&q->pgo_pending, m, vmp_pageq);
3968 
3969 			assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q);
3970 			VM_PAGE_CHECK(m);
3971 			/*
3972 			 * grab a snapshot of the object and offset this
3973 			 * page is tabled in so that we can relookup this
3974 			 * page after we've taken the object lock - these
3975 			 * fields are stable while we hold the page queues lock
3976 			 * but as soon as we drop it, there is nothing to keep
3977 			 * this page in this object... we hold an activity_in_progress
3978 			 * on this object which will keep it from terminating
3979 			 */
3980 			object = VM_PAGE_OBJECT(m);
3981 			offset = m->vmp_offset;
3982 
3983 			m->vmp_q_state = VM_PAGE_NOT_ON_Q;
3984 			VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3985 
3986 			vm_page_unlock_queues();
3987 
3988 			vm_object_lock(object);
3989 
3990 			m = vm_page_lookup(object, offset);
3991 
3992 			if (m == NULL || m->vmp_busy || m->vmp_cleaning ||
3993 			    !m->vmp_laundry || (m->vmp_q_state != VM_PAGE_NOT_ON_Q)) {
3994 				/*
3995 				 * it's either the same page that someone else has
3996 				 * started cleaning (or it's finished cleaning or
3997 				 * been put back on the pageout queue), or
3998 				 * the page has been freed or we have found a
3999 				 * new page at this offset... in all of these cases
4000 				 * we merely need to release the activity_in_progress
4001 				 * we took when we put the page on the pageout queue
4002 				 */
4003 				vm_object_activity_end(object);
4004 				vm_object_unlock(object);
4005 
4006 				vm_page_lockspin_queues();
4007 				continue;
4008 			}
4009 			pager = object->pager;
4010 
4011 			if (pager == MEMORY_OBJECT_NULL) {
4012 				/*
4013 				 * This pager has been destroyed by either
4014 				 * memory_object_destroy or vm_object_destroy, and
4015 				 * so there is nowhere for the page to go.
4016 				 */
4017 				if (m->vmp_free_when_done) {
4018 					/*
4019 					 * Just free the page... VM_PAGE_FREE takes
4020 					 * care of cleaning up all the state...
4021 					 * including doing the vm_pageout_throttle_up
4022 					 */
4023 					VM_PAGE_FREE(m);
4024 				} else {
4025 					vm_page_lockspin_queues();
4026 
4027 					vm_pageout_throttle_up(m);
4028 					vm_page_activate(m);
4029 
4030 					vm_page_unlock_queues();
4031 
4032 					/*
4033 					 *	And we are done with it.
4034 					 */
4035 				}
4036 				vm_object_activity_end(object);
4037 				vm_object_unlock(object);
4038 
4039 				vm_page_lockspin_queues();
4040 				continue;
4041 			}
4042 	#if 0
4043 			/*
4044 			 * we don't hold the page queue lock
4045 			 * so this check isn't safe to make
4046 			 */
4047 			VM_PAGE_CHECK(m);
4048 	#endif
4049 			/*
4050 			 * give back the activity_in_progress reference we
4051 			 * took when we queued up this page and replace it
4052 			 * it with a paging_in_progress reference that will
4053 			 * also hold the paging offset from changing and
4054 			 * prevent the object from terminating
4055 			 */
4056 			vm_object_activity_end(object);
4057 			vm_object_paging_begin(object);
4058 			vm_object_unlock(object);
4059 
4060 			/*
4061 			 * Send the data to the pager.
4062 			 * any pageout clustering happens there
4063 			 */
4064 			memory_object_data_return(pager,
4065 			    m->vmp_offset + object->paging_offset,
4066 			    PAGE_SIZE,
4067 			    NULL,
4068 			    NULL,
4069 			    FALSE,
4070 			    FALSE,
4071 			    0);
4072 
4073 			vm_object_lock(object);
4074 			vm_object_paging_end(object);
4075 			vm_object_unlock(object);
4076 
4077 			vm_pageout_io_throttle();
4078 
4079 			vm_page_lockspin_queues();
4080 		}
4081 		q->pgo_busy = FALSE;
4082 
4083 		vm_page_unlock_queues();
4084 		sched_cond_wait_parameter(&(ethr->pgo_wakeup), THREAD_UNINT, (thread_continue_t)vm_pageout_iothread_external_continue, ethr);
4085 	}
4086 	/*NOTREACHED*/
4087 }
4088 
4089 uint32_t vm_compressor_time_thread; /* Set via sysctl 'vm.compressor_timing_enabled' to record time accrued by this thread. */
4090 
4091 #if DEVELOPMENT || DEBUG
4092 static void
4093 vm_pageout_record_thread_time(int cqid, int ncomps)
4094 {
4095 	if (__improbable(vm_compressor_time_thread)) {
4096 		vmct_stats.vmct_runtimes[cqid] = thread_get_runtime_self();
4097 		vmct_stats.vmct_pages[cqid] += ncomps;
4098 		vmct_stats.vmct_iterations[cqid]++;
4099 		if (ncomps > vmct_stats.vmct_maxpages[cqid]) {
4100 			vmct_stats.vmct_maxpages[cqid] = ncomps;
4101 		}
4102 		if (ncomps < vmct_stats.vmct_minpages[cqid]) {
4103 			vmct_stats.vmct_minpages[cqid] = ncomps;
4104 		}
4105 	}
4106 }
4107 #endif
4108 
4109 static void *
4110 vm_pageout_select_filling_chead(struct pgo_iothread_state *cq, vm_page_t m)
4111 {
4112 	/*
4113 	 * Technically we need the pageq locks to manipulate the vmp_on_specialq field.
4114 	 * However, this page has been removed from all queues and is only
4115 	 * known to this compressor thread dealing with this local queue.
4116 	 *
4117 	 * TODO: Add a second localq that is the early localq and
4118 	 * put special pages like this one on that queue in the block above
4119 	 * under the pageq lock to avoid this 'works but not clean' logic.
4120 	 */
4121 	void *donate_queue_head;
4122 #if XNU_TARGET_OS_OSX /* tag:DONATE */
4123 	donate_queue_head = &cq->current_early_swapout_chead;
4124 #else /* XNU_TARGET_OS_OSX */
4125 	donate_queue_head = &cq->current_late_swapout_chead;
4126 #endif /* XNU_TARGET_OS_OSX */
4127 	if (m->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE) {
4128 		m->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY;
4129 		return donate_queue_head;
4130 	} else {
4131 		return &cq->current_regular_swapout_chead;
4132 	}
4133 }
4134 
4135 #define         MAX_FREE_BATCH          32
4136 
4137 OS_NORETURN
4138 static void
4139 vm_pageout_iothread_internal_continue(struct pgo_iothread_state *cq, __unused wait_result_t w)
4140 {
4141 	struct vm_pageout_queue *q;
4142 	vm_page_t       m = NULL;
4143 	boolean_t       pgo_draining;
4144 	vm_page_t   local_q;
4145 	int         local_cnt;
4146 	vm_page_t   local_freeq = NULL;
4147 	int         local_freed = 0;
4148 	int         local_batch_size;
4149 #if DEVELOPMENT || DEBUG
4150 	int       ncomps = 0;
4151 	boolean_t marked_active = FALSE;
4152 	int       num_pages_processed = 0;
4153 #endif
4154 	void *chead = NULL;
4155 
4156 	KDBG_FILTERED(0xe040000c | DBG_FUNC_END);
4157 
4158 	sched_cond_ack(&(cq->pgo_wakeup));
4159 
4160 	q = cq->q;
4161 
4162 	while (true) { /* this top loop is for the compressor_running_perf_test running a full speed without blocking */
4163 #if DEVELOPMENT || DEBUG
4164 		bool benchmark_accounting = false;
4165 		/* If we're running the compressor perf test, only process the benchmark pages.
4166 		 * We'll get back to our regular queue once the benchmark is done */
4167 		if (compressor_running_perf_test) {
4168 			q = cq->benchmark_q;
4169 			if (!vm_page_queue_empty(&q->pgo_pending)) {
4170 				benchmark_accounting = true;
4171 			} else {
4172 				q = cq->q;
4173 				benchmark_accounting = false;
4174 			}
4175 		}
4176 #endif /* DEVELOPMENT || DEBUG */
4177 
4178 #if __AMP__
4179 		if (vm_compressor_ebound && (vm_pageout_state.vm_compressor_thread_count > 1)) {
4180 			local_batch_size = (q->pgo_maxlaundry >> 3);
4181 			local_batch_size = MAX(local_batch_size, 16);
4182 		} else {
4183 			local_batch_size = q->pgo_maxlaundry / (vm_pageout_state.vm_compressor_thread_count * 2);
4184 		}
4185 #else
4186 		local_batch_size = q->pgo_maxlaundry / (vm_pageout_state.vm_compressor_thread_count * 2);
4187 #endif
4188 
4189 #if RECORD_THE_COMPRESSED_DATA
4190 		if (q->pgo_laundry) {
4191 			c_compressed_record_init();
4192 		}
4193 #endif
4194 		while (true) { /* this loop is for working though all the pages in the pending queue */
4195 			int     pages_left_on_q = 0;
4196 
4197 			local_cnt = 0;
4198 			local_q = NULL;
4199 
4200 			KDBG_FILTERED(0xe0400014 | DBG_FUNC_START);
4201 
4202 			vm_page_lock_queues();
4203 #if DEVELOPMENT || DEBUG
4204 			if (marked_active == FALSE) {
4205 				vmct_active++;
4206 				vmct_state[cq->id] = VMCT_ACTIVE;
4207 				marked_active = TRUE;
4208 				if (vmct_active == 1) {
4209 					vm_compressor_epoch_start = mach_absolute_time();
4210 				}
4211 			}
4212 #endif
4213 			KDBG_FILTERED(0xe0400014 | DBG_FUNC_END);
4214 
4215 			KDBG_FILTERED(0xe0400018 | DBG_FUNC_START, q->pgo_laundry);
4216 
4217 			/* empty the entire content of the thread input q to local_q, but not more than local_batch_size pages */
4218 			while (!vm_page_queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) {
4219 				vm_page_queue_remove_first(&q->pgo_pending, m, vmp_pageq);
4220 				assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q);
4221 				VM_PAGE_CHECK(m);
4222 
4223 				m->vmp_q_state = VM_PAGE_NOT_ON_Q;
4224 				VM_PAGE_ZERO_PAGEQ_ENTRY(m);
4225 				m->vmp_laundry = FALSE;
4226 
4227 				m->vmp_snext = local_q;
4228 				local_q = m;
4229 				local_cnt++;
4230 			}
4231 			if (local_q == NULL) {
4232 				break;
4233 			}
4234 
4235 			q->pgo_busy = TRUE;
4236 
4237 			if ((pgo_draining = q->pgo_draining) == FALSE) {
4238 				vm_pageout_throttle_up_batch(q, local_cnt);
4239 				pages_left_on_q = q->pgo_laundry;
4240 			} else {
4241 				pages_left_on_q = q->pgo_laundry - local_cnt;
4242 			}
4243 
4244 			vm_page_unlock_queues();
4245 
4246 #if !RECORD_THE_COMPRESSED_DATA
4247 			/* if we have lots to compress, wake up the other thread to help.
4248 			 * disabled when recording data since record data is not protected with a mutex so this may cause races */
4249 			if (pages_left_on_q >= local_batch_size && cq->id < (vm_pageout_state.vm_compressor_thread_count - 1)) {
4250 				// wake up the next compressor thread
4251 				sched_cond_signal(&pgo_iothread_internal_state[cq->id + 1].pgo_wakeup,
4252 				    pgo_iothread_internal_state[cq->id + 1].pgo_iothread);
4253 			}
4254 #endif
4255 			KDBG_FILTERED(0xe0400018 | DBG_FUNC_END, q->pgo_laundry);
4256 
4257 			while (local_q) {
4258 				KDBG_FILTERED(0xe0400024 | DBG_FUNC_START, local_cnt);
4259 
4260 				m = local_q;
4261 				local_q = m->vmp_snext;
4262 				m->vmp_snext = NULL;
4263 
4264 
4265 				chead = vm_pageout_select_filling_chead(cq, m);
4266 
4267 				if (vm_pageout_compress_page(chead, cq->scratch_buf, m) == KERN_SUCCESS) {
4268 #if DEVELOPMENT || DEBUG
4269 					ncomps++;
4270 #endif
4271 					KDBG_FILTERED(0xe0400024 | DBG_FUNC_END, local_cnt);
4272 
4273 					m->vmp_snext = local_freeq;
4274 					local_freeq = m;
4275 					local_freed++;
4276 
4277 					/* if we gathered enough free pages, free them now */
4278 					if (local_freed >= MAX_FREE_BATCH) {
4279 						OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
4280 
4281 						vm_page_free_list(local_freeq, TRUE);
4282 
4283 						local_freeq = NULL;
4284 						local_freed = 0;
4285 					}
4286 				}
4287 #if DEVELOPMENT || DEBUG
4288 				num_pages_processed++;
4289 #endif /* DEVELOPMENT || DEBUG */
4290 #if !CONFIG_JETSAM /* Maybe: if there's no JETSAM, be more proactive in waking up anybody that needs free pages */
4291 				while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
4292 					kern_return_t   wait_result;
4293 					int             need_wakeup = 0;
4294 
4295 					if (local_freeq) {
4296 						OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
4297 
4298 						vm_page_free_list(local_freeq, TRUE);
4299 						local_freeq = NULL;
4300 						local_freed = 0;
4301 
4302 						continue;
4303 					}
4304 					vm_free_page_lock_spin();
4305 
4306 					if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
4307 						if (vm_page_free_wanted_privileged++ == 0) {
4308 							need_wakeup = 1;
4309 						}
4310 						wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
4311 
4312 						vm_free_page_unlock();
4313 
4314 						if (need_wakeup) {
4315 							thread_wakeup((event_t)&vm_page_free_wanted);
4316 						}
4317 
4318 						if (wait_result == THREAD_WAITING) {
4319 							thread_block(THREAD_CONTINUE_NULL);
4320 						}
4321 					} else {
4322 						vm_free_page_unlock();
4323 					}
4324 				}
4325 #endif
4326 			}  /* while (local_q) */
4327 			/* free any leftovers in the freeq */
4328 			if (local_freeq) {
4329 				OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
4330 
4331 				vm_page_free_list(local_freeq, TRUE);
4332 				local_freeq = NULL;
4333 				local_freed = 0;
4334 			}
4335 			if (pgo_draining == TRUE) {
4336 				vm_page_lockspin_queues();
4337 				vm_pageout_throttle_up_batch(q, local_cnt);
4338 				vm_page_unlock_queues();
4339 			}
4340 		}
4341 		KDBG_FILTERED(0xe040000c | DBG_FUNC_START);
4342 
4343 		/*
4344 		 * queue lock is held and our q is empty
4345 		 */
4346 		q->pgo_busy = FALSE;
4347 #if DEVELOPMENT || DEBUG
4348 		if (marked_active == TRUE) {
4349 			vmct_active--;
4350 			vmct_state[cq->id] = VMCT_IDLE;
4351 
4352 			if (vmct_active == 0) {
4353 				vm_compressor_epoch_stop = mach_absolute_time();
4354 				assertf(vm_compressor_epoch_stop >= vm_compressor_epoch_start,
4355 				    "Compressor epoch non-monotonic: 0x%llx -> 0x%llx",
4356 				    vm_compressor_epoch_start, vm_compressor_epoch_stop);
4357 				/* This interval includes intervals where one or more
4358 				 * compressor threads were pre-empted
4359 				 */
4360 				vmct_stats.vmct_cthreads_total += vm_compressor_epoch_stop - vm_compressor_epoch_start;
4361 			}
4362 		}
4363 		if (compressor_running_perf_test && benchmark_accounting) {
4364 			/*
4365 			 * We could turn ON compressor_running_perf_test while still processing
4366 			 * regular non-benchmark pages. We shouldn't count them here else we
4367 			 * could overshoot. We might also still be populating that benchmark Q
4368 			 * and be under pressure. So we will go back to the regular queues. And
4369 			 * benchmark accounting will be off for that case too.
4370 			 */
4371 			compressor_perf_test_pages_processed += num_pages_processed;
4372 			thread_wakeup(&compressor_perf_test_pages_processed);
4373 		}
4374 #endif
4375 		vm_page_unlock_queues();
4376 #if DEVELOPMENT || DEBUG
4377 		vm_pageout_record_thread_time(cq->id, ncomps);
4378 #endif
4379 
4380 		KDBG_FILTERED(0xe0400018 | DBG_FUNC_END);
4381 #if DEVELOPMENT || DEBUG
4382 		if (compressor_running_perf_test && benchmark_accounting) {
4383 			/*
4384 			 * We've been exclusively compressing pages from the benchmark queue,
4385 			 * do 1 pass over the internal queue before blocking.
4386 			 */
4387 			continue;
4388 		}
4389 #endif
4390 
4391 		sched_cond_wait_parameter(&(cq->pgo_wakeup), THREAD_UNINT, (thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
4392 	}
4393 	/*NOTREACHED*/
4394 }
4395 
4396 /* resolves the pager and maintain stats in the pager and in the vm_object */
4397 kern_return_t
4398 vm_pageout_compress_page(void **current_chead, char *scratch_buf, vm_page_t m)
4399 {
4400 	vm_object_t     object;
4401 	memory_object_t pager;
4402 	int             compressed_count_delta;
4403 	kern_return_t   retval;
4404 
4405 	object = VM_PAGE_OBJECT(m);
4406 
4407 	assert(!m->vmp_free_when_done);
4408 	assert(!m->vmp_laundry);
4409 
4410 	pager = object->pager;
4411 
4412 	if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) {
4413 		KDBG_FILTERED(0xe0400010 | DBG_FUNC_START, object, pager);
4414 
4415 		vm_object_lock(object);
4416 
4417 		/*
4418 		 * If there is no memory object for the page, create
4419 		 * one and hand it to the compression pager.
4420 		 */
4421 
4422 		if (!object->pager_initialized) {
4423 			vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
4424 		}
4425 		if (!object->pager_initialized) {
4426 			vm_object_compressor_pager_create(object);
4427 		}
4428 
4429 		pager = object->pager;
4430 
4431 		if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) {
4432 			/*
4433 			 * Still no pager for the object,
4434 			 * or the pager has been destroyed.
4435 			 * Reactivate the page.
4436 			 *
4437 			 * Should only happen if there is no
4438 			 * compression pager
4439 			 */
4440 			vm_page_wakeup_done(object, m);
4441 
4442 			vm_page_lockspin_queues();
4443 			vm_page_activate(m);
4444 			VM_PAGEOUT_DEBUG(vm_pageout_dirty_no_pager, 1);
4445 			vm_page_unlock_queues();
4446 
4447 			/*
4448 			 *	And we are done with it.
4449 			 */
4450 			vm_object_activity_end(object);
4451 			vm_object_unlock(object);
4452 
4453 			return KERN_FAILURE;
4454 		}
4455 		vm_object_unlock(object);
4456 
4457 		KDBG_FILTERED(0xe0400010 | DBG_FUNC_END, object, pager);
4458 	}
4459 	assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL);
4460 	assert(object->activity_in_progress > 0);
4461 
4462 #if CONFIG_TRACK_UNMODIFIED_ANON_PAGES
4463 	if (m->vmp_unmodified_ro == true) {
4464 		os_atomic_inc(&compressor_ro_uncompressed_total_returned, relaxed);
4465 	}
4466 #endif /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */
4467 
4468 	vm_compressor_options_t flags = 0;
4469 
4470 #if CONFIG_TRACK_UNMODIFIED_ANON_PAGES
4471 	if (m->vmp_unmodified_ro) {
4472 		flags |= C_PAGE_UNMODIFIED;
4473 	}
4474 #endif /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */
4475 
4476 
4477 	retval = vm_compressor_pager_put(
4478 		pager,
4479 		m->vmp_offset + object->paging_offset,
4480 		VM_PAGE_GET_PHYS_PAGE(m),
4481 		current_chead,
4482 		scratch_buf,
4483 		&compressed_count_delta,
4484 		flags);
4485 
4486 	vm_object_lock(object);
4487 
4488 	assert(object->activity_in_progress > 0);
4489 	assert(VM_PAGE_OBJECT(m) == object);
4490 	assert( !VM_PAGE_WIRED(m));
4491 
4492 	vm_compressor_pager_count(pager,
4493 	    compressed_count_delta,
4494 	    FALSE,                       /* shared_lock */
4495 	    object);
4496 
4497 	if (retval == KERN_SUCCESS) {
4498 		/*
4499 		 * If the object is purgeable, its owner's
4500 		 * purgeable ledgers will be updated in
4501 		 * vm_page_remove() but the page still
4502 		 * contributes to the owner's memory footprint,
4503 		 * so account for it as such.
4504 		 */
4505 		if (m->vmp_tabled) {
4506 			vm_page_remove(m, TRUE);
4507 		}
4508 		if ((object->purgable != VM_PURGABLE_DENY ||
4509 		    object->vo_ledger_tag) &&
4510 		    object->vo_owner != NULL) {
4511 			/* one more compressed purgeable/tagged page */
4512 			vm_object_owner_compressed_update(object,
4513 			    compressed_count_delta);
4514 		}
4515 		counter_inc(&vm_statistics_compressions);
4516 	} else {
4517 		vm_page_wakeup_done(object, m);
4518 
4519 		vm_page_lockspin_queues();
4520 
4521 		vm_page_activate(m);
4522 		vm_pageout_vminfo.vm_compressor_failed++;
4523 
4524 		vm_page_unlock_queues();
4525 	}
4526 	vm_object_activity_end(object);
4527 	vm_object_unlock(object);
4528 
4529 	return retval;
4530 }
4531 
4532 
4533 static void
4534 vm_pageout_adjust_eq_iothrottle(struct pgo_iothread_state *ethr, boolean_t req_lowpriority)
4535 {
4536 	uint32_t        policy;
4537 
4538 	if (hibernate_cleaning_in_progress == TRUE) {
4539 		req_lowpriority = FALSE;
4540 	}
4541 
4542 	if (ethr->q->pgo_inited == TRUE && ethr->q->pgo_lowpriority != req_lowpriority) {
4543 		vm_page_unlock_queues();
4544 
4545 		if (req_lowpriority == TRUE) {
4546 			policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
4547 			DTRACE_VM(laundrythrottle);
4548 		} else {
4549 			policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
4550 			DTRACE_VM(laundryunthrottle);
4551 		}
4552 		proc_set_thread_policy(ethr->pgo_iothread,
4553 		    TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
4554 
4555 		vm_page_lock_queues();
4556 		ethr->q->pgo_lowpriority = req_lowpriority;
4557 	}
4558 }
4559 
4560 OS_NORETURN
4561 static void
4562 vm_pageout_iothread_external(struct pgo_iothread_state *ethr, __unused wait_result_t w)
4563 {
4564 	thread_t        self = current_thread();
4565 
4566 	self->options |= TH_OPT_VMPRIV;
4567 
4568 	DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
4569 
4570 	proc_set_thread_policy(self, TASK_POLICY_EXTERNAL,
4571 	    TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
4572 
4573 	vm_page_lock_queues();
4574 
4575 	vm_pageout_queue_external.pgo_lowpriority = TRUE;
4576 	vm_pageout_queue_external.pgo_inited = TRUE;
4577 
4578 	vm_page_unlock_queues();
4579 
4580 #if CONFIG_THREAD_GROUPS
4581 	thread_group_vm_add();
4582 #endif /* CONFIG_THREAD_GROUPS */
4583 
4584 	vm_pageout_iothread_external_continue(ethr, 0);
4585 	/*NOTREACHED*/
4586 }
4587 
4588 
4589 OS_NORETURN
4590 static void
4591 vm_pageout_iothread_internal(struct pgo_iothread_state *cthr, __unused wait_result_t w)
4592 {
4593 	thread_t        self = current_thread();
4594 
4595 	self->options |= TH_OPT_VMPRIV;
4596 
4597 	vm_page_lock_queues();
4598 
4599 	vm_pageout_queue_internal.pgo_lowpriority = TRUE;
4600 	vm_pageout_queue_internal.pgo_inited = TRUE;
4601 
4602 #if DEVELOPMENT || DEBUG
4603 	vm_pageout_queue_benchmark.pgo_lowpriority = vm_pageout_queue_internal.pgo_lowpriority;
4604 	vm_pageout_queue_benchmark.pgo_inited = vm_pageout_queue_internal.pgo_inited;
4605 	vm_pageout_queue_benchmark.pgo_busy = FALSE;
4606 #endif /* DEVELOPMENT || DEBUG */
4607 
4608 	vm_page_unlock_queues();
4609 
4610 	if (vm_pageout_state.vm_restricted_to_single_processor == TRUE) {
4611 		thread_vm_bind_group_add();
4612 	}
4613 
4614 #if CONFIG_THREAD_GROUPS
4615 	thread_group_vm_add();
4616 #endif /* CONFIG_THREAD_GROUPS */
4617 
4618 #if __AMP__
4619 	if (vm_compressor_ebound) {
4620 		/*
4621 		 * Use the soft bound option for vm_compressor to allow it to run on
4622 		 * P-cores if E-cluster is unavailable.
4623 		 */
4624 		thread_bind_cluster_type(self, 'E', true);
4625 	}
4626 #endif /* __AMP__ */
4627 
4628 	thread_set_thread_name(current_thread(), "VM_compressor");
4629 #if DEVELOPMENT || DEBUG
4630 	vmct_stats.vmct_minpages[cthr->id] = INT32_MAX;
4631 #endif
4632 	vm_pageout_iothread_internal_continue(cthr, 0);
4633 
4634 	/*NOTREACHED*/
4635 }
4636 
4637 kern_return_t
4638 vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
4639 {
4640 	if (OSCompareAndSwapPtr(NULL, ptrauth_nop_cast(void *, func), (void * volatile *) &consider_buffer_cache_collect)) {
4641 		return KERN_SUCCESS;
4642 	} else {
4643 		return KERN_FAILURE; /* Already set */
4644 	}
4645 }
4646 
4647 extern boolean_t        memorystatus_manual_testing_on;
4648 extern unsigned int     memorystatus_level;
4649 
4650 
4651 #if VM_PRESSURE_EVENTS
4652 
4653 boolean_t vm_pressure_events_enabled = FALSE;
4654 
4655 extern uint64_t next_warning_notification_sent_at_ts;
4656 extern uint64_t next_critical_notification_sent_at_ts;
4657 
4658 #define PRESSURE_LEVEL_STUCK_THRESHOLD_MINS    (30)    /* 30 minutes. */
4659 
4660 /*
4661  * The last time there was change in pressure level OR we forced a check
4662  * because the system is stuck in a non-normal pressure level.
4663  */
4664 uint64_t  vm_pressure_last_level_transition_abs = 0;
4665 
4666 /*
4667  *  This is how the long the system waits 'stuck' in an unchanged non-normal pressure
4668  * level before resending out notifications for that level again.
4669  */
4670 int  vm_pressure_level_transition_threshold = PRESSURE_LEVEL_STUCK_THRESHOLD_MINS;
4671 
4672 void
4673 vm_pressure_response(void)
4674 {
4675 	vm_pressure_level_t     old_level = kVMPressureNormal;
4676 	int                     new_level = -1;
4677 	unsigned int            total_pages;
4678 	uint64_t                available_memory = 0;
4679 	uint64_t                curr_ts, abs_time_since_level_transition, time_in_ns;
4680 	bool                    force_check = false;
4681 	int                     time_in_mins;
4682 
4683 
4684 	if (vm_pressure_events_enabled == FALSE) {
4685 		return;
4686 	}
4687 
4688 	available_memory = (uint64_t) memorystatus_get_available_page_count();
4689 
4690 	total_pages = (unsigned int) atop_64(max_mem);
4691 #if CONFIG_SECLUDED_MEMORY
4692 	total_pages -= vm_page_secluded_count;
4693 #endif /* CONFIG_SECLUDED_MEMORY */
4694 	memorystatus_level = (unsigned int) ((available_memory * 100) / total_pages);
4695 
4696 	if (memorystatus_manual_testing_on) {
4697 		return;
4698 	}
4699 
4700 	curr_ts = mach_absolute_time();
4701 	abs_time_since_level_transition = curr_ts - vm_pressure_last_level_transition_abs;
4702 
4703 	absolutetime_to_nanoseconds(abs_time_since_level_transition, &time_in_ns);
4704 	time_in_mins = (int) ((time_in_ns / NSEC_PER_SEC) / 60);
4705 	force_check = (time_in_mins >= vm_pressure_level_transition_threshold);
4706 
4707 	old_level = memorystatus_vm_pressure_level;
4708 
4709 	switch (memorystatus_vm_pressure_level) {
4710 	case kVMPressureNormal:
4711 	{
4712 		if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4713 			new_level = kVMPressureCritical;
4714 		} else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
4715 			new_level = kVMPressureWarning;
4716 		}
4717 		break;
4718 	}
4719 
4720 	case kVMPressureWarning:
4721 	case kVMPressureUrgent:
4722 	{
4723 		if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4724 			new_level = kVMPressureNormal;
4725 		} else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4726 			new_level = kVMPressureCritical;
4727 		} else if (force_check) {
4728 			new_level = kVMPressureWarning;
4729 			next_warning_notification_sent_at_ts = curr_ts;
4730 		}
4731 		break;
4732 	}
4733 
4734 	case kVMPressureCritical:
4735 	{
4736 		if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4737 			new_level = kVMPressureNormal;
4738 		} else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
4739 			new_level = kVMPressureWarning;
4740 		} else if (force_check) {
4741 			new_level = kVMPressureCritical;
4742 			next_critical_notification_sent_at_ts = curr_ts;
4743 		}
4744 		break;
4745 	}
4746 
4747 	default:
4748 		return;
4749 	}
4750 
4751 	if (new_level != -1 || force_check) {
4752 		if (new_level != -1) {
4753 			memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
4754 
4755 			if (new_level != (int) old_level) {
4756 				VM_DEBUG_CONSTANT_EVENT(vm_pressure_level_change, DBG_VM_PRESSURE_LEVEL_CHANGE, DBG_FUNC_NONE,
4757 				    new_level, old_level, 0, 0);
4758 			}
4759 		} else {
4760 			VM_DEBUG_CONSTANT_EVENT(vm_pressure_level_change, DBG_VM_PRESSURE_LEVEL_CHANGE, DBG_FUNC_NONE,
4761 			    new_level, old_level, force_check, 0);
4762 		}
4763 
4764 		if (hibernation_vmqueues_inspection || hibernate_cleaning_in_progress) {
4765 			/*
4766 			 * We don't want to schedule a wakeup while hibernation is in progress
4767 			 * because that could collide with checks for non-monotonicity in the scheduler.
4768 			 * We do however do all the updates to memorystatus_vm_pressure_level because
4769 			 * we _might_ want to use that for decisions regarding which pages or how
4770 			 * many pages we want to dump in hibernation.
4771 			 */
4772 			return;
4773 		}
4774 
4775 		if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != memorystatus_vm_pressure_level) || force_check) {
4776 			if (vm_pageout_state.vm_pressure_thread_running == FALSE) {
4777 				thread_wakeup(&vm_pressure_thread);
4778 			}
4779 
4780 			if (old_level != memorystatus_vm_pressure_level) {
4781 				thread_wakeup(&vm_pageout_state.vm_pressure_changed);
4782 			}
4783 			vm_pressure_last_level_transition_abs = curr_ts; /* renew the window of observation for a stuck pressure level */
4784 		}
4785 	}
4786 }
4787 #endif /* VM_PRESSURE_EVENTS */
4788 
4789 
4790 /**
4791  * Called by a kernel thread to ask if a number of pages may be wired.
4792  */
4793 kern_return_t
4794 mach_vm_wire_level_monitor(int64_t requested_pages)
4795 {
4796 	if (requested_pages <= 0) {
4797 		return KERN_INVALID_ARGUMENT;
4798 	}
4799 
4800 	const int64_t max_wire_pages = atop_64(vm_global_user_wire_limit);
4801 	/**
4802 	 * Available pages can be negative in the case where more system memory is
4803 	 * wired than the threshold, so we must use a signed integer.
4804 	 */
4805 	const int64_t available_pages = max_wire_pages - vm_page_wire_count;
4806 
4807 	if (requested_pages > available_pages) {
4808 		return KERN_RESOURCE_SHORTAGE;
4809 	}
4810 	return KERN_SUCCESS;
4811 }
4812 
4813 /*
4814  * Function called by a kernel thread to either get the current pressure level or
4815  * wait until memory pressure changes from a given level.
4816  */
4817 kern_return_t
4818 mach_vm_pressure_level_monitor(boolean_t wait_for_pressure, unsigned int *pressure_level)
4819 {
4820 #if !VM_PRESSURE_EVENTS
4821 	(void)wait_for_pressure;
4822 	(void)pressure_level;
4823 	return KERN_NOT_SUPPORTED;
4824 #else /* VM_PRESSURE_EVENTS */
4825 
4826 	uint32_t *waiters = NULL;
4827 	wait_result_t wr = 0;
4828 	vm_pressure_level_t old_level = memorystatus_vm_pressure_level;
4829 
4830 	if (pressure_level == NULL) {
4831 		return KERN_INVALID_ARGUMENT;
4832 	}
4833 	if (!wait_for_pressure && (*pressure_level == kVMPressureBackgroundJetsam ||
4834 	    *pressure_level == kVMPressureForegroundJetsam)) {
4835 		return KERN_INVALID_ARGUMENT;
4836 	}
4837 
4838 	if (wait_for_pressure) {
4839 		switch (*pressure_level) {
4840 		case kVMPressureForegroundJetsam:
4841 		case kVMPressureBackgroundJetsam:
4842 
4843 			if (*pressure_level == kVMPressureForegroundJetsam) {
4844 				waiters = &memorystatus_jetsam_fg_band_waiters;
4845 			} else {
4846 				/* kVMPressureBackgroundJetsam */
4847 				waiters = &memorystatus_jetsam_bg_band_waiters;
4848 			}
4849 
4850 			lck_mtx_lock(&memorystatus_jetsam_broadcast_lock);
4851 			wr = assert_wait((event_t)waiters, THREAD_INTERRUPTIBLE);
4852 			if (wr == THREAD_WAITING) {
4853 				*waiters += 1;
4854 				lck_mtx_unlock(&memorystatus_jetsam_broadcast_lock);
4855 				wr = thread_block(THREAD_CONTINUE_NULL);
4856 			} else {
4857 				lck_mtx_unlock(&memorystatus_jetsam_broadcast_lock);
4858 			}
4859 
4860 			if (wr != THREAD_AWAKENED) {
4861 				return KERN_ABORTED;
4862 			}
4863 
4864 			return KERN_SUCCESS;
4865 		case kVMPressureNormal:
4866 		case kVMPressureWarning:
4867 		case kVMPressureUrgent:
4868 		case kVMPressureCritical:
4869 			while (old_level == *pressure_level) {
4870 				wr = assert_wait((event_t) &vm_pageout_state.vm_pressure_changed,
4871 				    THREAD_INTERRUPTIBLE);
4872 				if (wr == THREAD_WAITING) {
4873 					wr = thread_block(THREAD_CONTINUE_NULL);
4874 				}
4875 				if (wr == THREAD_INTERRUPTED) {
4876 					return KERN_ABORTED;
4877 				}
4878 
4879 				if (wr == THREAD_AWAKENED) {
4880 					old_level = memorystatus_vm_pressure_level;
4881 				}
4882 			}
4883 			break;
4884 		default:
4885 			return KERN_INVALID_ARGUMENT;
4886 		}
4887 	}
4888 
4889 	*pressure_level = old_level;
4890 	return KERN_SUCCESS;
4891 #endif /* VM_PRESSURE_EVENTS */
4892 }
4893 
4894 #if VM_PRESSURE_EVENTS
4895 void
4896 vm_pressure_thread(void)
4897 {
4898 	static boolean_t thread_initialized = FALSE;
4899 
4900 	if (thread_initialized == TRUE) {
4901 		vm_pageout_state.vm_pressure_thread_running = TRUE;
4902 		consider_vm_pressure_events();
4903 		vm_pageout_state.vm_pressure_thread_running = FALSE;
4904 	}
4905 
4906 #if CONFIG_THREAD_GROUPS
4907 	thread_group_vm_add();
4908 #endif /* CONFIG_THREAD_GROUPS */
4909 
4910 	thread_set_thread_name(current_thread(), "VM_pressure");
4911 	thread_initialized = TRUE;
4912 	assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
4913 	thread_block((thread_continue_t)vm_pressure_thread);
4914 }
4915 #endif /* VM_PRESSURE_EVENTS */
4916 
4917 
4918 /*
4919  * called once per-second via "compute_averages"
4920  */
4921 void
4922 compute_pageout_gc_throttle(__unused void *arg)
4923 {
4924 	if (vm_pageout_vminfo.vm_pageout_considered_page != vm_pageout_state.vm_pageout_considered_page_last) {
4925 		vm_pageout_state.vm_pageout_considered_page_last = vm_pageout_vminfo.vm_pageout_considered_page;
4926 
4927 		thread_wakeup(VM_PAGEOUT_GC_EVENT);
4928 	}
4929 }
4930 
4931 /*
4932  * vm_pageout_garbage_collect can also be called when the zone allocator needs
4933  * to call zone_gc on a different thread in order to trigger zone-map-exhaustion
4934  * jetsams. We need to check if the zone map size is above its jetsam limit to
4935  * decide if this was indeed the case.
4936  *
4937  * We need to do this on a different thread because of the following reasons:
4938  *
4939  * 1. In the case of synchronous jetsams, the leaking process can try to jetsam
4940  * itself causing the system to hang. We perform synchronous jetsams if we're
4941  * leaking in the VM map entries zone, so the leaking process could be doing a
4942  * zalloc for a VM map entry while holding its vm_map lock, when it decides to
4943  * jetsam itself. We also need the vm_map lock on the process termination path,
4944  * which would now lead the dying process to deadlock against itself.
4945  *
4946  * 2. The jetsam path might need to allocate zone memory itself. We could try
4947  * using the non-blocking variant of zalloc for this path, but we can still
4948  * end up trying to do a kmem_alloc when the zone maps are almost full.
4949  */
4950 __dead2
4951 void
4952 vm_pageout_garbage_collect(void *step, wait_result_t wr __unused)
4953 {
4954 	assert(step == VM_PAGEOUT_GC_INIT || step == VM_PAGEOUT_GC_COLLECT);
4955 
4956 	if (step == VM_PAGEOUT_GC_INIT) {
4957 		/* first time being called is not about GC */
4958 #if CONFIG_THREAD_GROUPS
4959 		thread_group_vm_add();
4960 #endif /* CONFIG_THREAD_GROUPS */
4961 	} else if (zone_map_nearing_exhaustion()) {
4962 		/*
4963 		 * Woken up by the zone allocator for zone-map-exhaustion jetsams.
4964 		 *
4965 		 * Bail out after calling zone_gc (which triggers the
4966 		 * zone-map-exhaustion jetsams). If we fall through, the subsequent
4967 		 * operations that clear out a bunch of caches might allocate zone
4968 		 * memory themselves (for eg. vm_map operations would need VM map
4969 		 * entries). Since the zone map is almost full at this point, we
4970 		 * could end up with a panic. We just need to quickly jetsam a
4971 		 * process and exit here.
4972 		 *
4973 		 * It could so happen that we were woken up to relieve memory
4974 		 * pressure and the zone map also happened to be near its limit at
4975 		 * the time, in which case we'll skip out early. But that should be
4976 		 * ok; if memory pressure persists, the thread will simply be woken
4977 		 * up again.
4978 		 */
4979 		zone_gc(ZONE_GC_JETSAM);
4980 	} else {
4981 		/* Woken up by vm_pageout_scan or compute_pageout_gc_throttle. */
4982 		boolean_t buf_large_zfree = FALSE;
4983 		boolean_t first_try = TRUE;
4984 
4985 		stack_collect();
4986 
4987 		consider_machine_collect();
4988 #if CONFIG_MBUF_MCACHE
4989 		mbuf_drain(FALSE);
4990 #endif /* CONFIG_MBUF_MCACHE */
4991 
4992 		do {
4993 			if (consider_buffer_cache_collect != NULL) {
4994 				buf_large_zfree = (*consider_buffer_cache_collect)(0);
4995 			}
4996 			if (first_try == TRUE || buf_large_zfree == TRUE) {
4997 				/*
4998 				 * zone_gc should be last, because the other operations
4999 				 * might return memory to zones.
5000 				 */
5001 				zone_gc(ZONE_GC_TRIM);
5002 			}
5003 			first_try = FALSE;
5004 		} while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
5005 
5006 		consider_machine_adjust();
5007 	}
5008 
5009 	assert_wait(VM_PAGEOUT_GC_EVENT, THREAD_UNINT);
5010 
5011 	thread_block_parameter(vm_pageout_garbage_collect, VM_PAGEOUT_GC_COLLECT);
5012 	__builtin_unreachable();
5013 }
5014 
5015 
5016 #if VM_PAGE_BUCKETS_CHECK
5017 #if VM_PAGE_FAKE_BUCKETS
5018 extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
5019 #endif /* VM_PAGE_FAKE_BUCKETS */
5020 #endif /* VM_PAGE_BUCKETS_CHECK */
5021 
5022 
5023 
5024 void
5025 vm_set_restrictions(unsigned int num_cpus)
5026 {
5027 	int vm_restricted_to_single_processor = 0;
5028 
5029 	if (PE_parse_boot_argn("vm_restricted_to_single_processor", &vm_restricted_to_single_processor, sizeof(vm_restricted_to_single_processor))) {
5030 		kprintf("Overriding vm_restricted_to_single_processor to %d\n", vm_restricted_to_single_processor);
5031 		vm_pageout_state.vm_restricted_to_single_processor = (vm_restricted_to_single_processor ? TRUE : FALSE);
5032 	} else {
5033 		assert(num_cpus > 0);
5034 
5035 		if (num_cpus <= 3) {
5036 			/*
5037 			 * on systems with a limited number of CPUS, bind the
5038 			 * 4 major threads that can free memory and that tend to use
5039 			 * a fair bit of CPU under pressured conditions to a single processor.
5040 			 * This insures that these threads don't hog all of the available CPUs
5041 			 * (important for camera launch), while allowing them to run independently
5042 			 * w/r to locks... the 4 threads are
5043 			 * vm_pageout_scan,  vm_pageout_iothread_internal (compressor),
5044 			 * vm_compressor_swap_trigger_thread (minor and major compactions),
5045 			 * memorystatus_thread (jetsams).
5046 			 *
5047 			 * the first time the thread is run, it is responsible for checking the
5048 			 * state of vm_restricted_to_single_processor, and if TRUE it calls
5049 			 * thread_bind_master...  someday this should be replaced with a group
5050 			 * scheduling mechanism and KPI.
5051 			 */
5052 			vm_pageout_state.vm_restricted_to_single_processor = TRUE;
5053 		} else {
5054 			vm_pageout_state.vm_restricted_to_single_processor = FALSE;
5055 		}
5056 	}
5057 }
5058 
5059 /*
5060  * Set up vm_config based on the vm_compressor_mode.
5061  * Must run BEFORE the pageout thread starts up.
5062  */
5063 __startup_func
5064 void
5065 vm_config_init(void)
5066 {
5067 	bzero(&vm_config, sizeof(vm_config));
5068 
5069 	switch (vm_compressor_mode) {
5070 	case VM_PAGER_DEFAULT:
5071 		printf("mapping deprecated VM_PAGER_DEFAULT to VM_PAGER_COMPRESSOR_WITH_SWAP\n");
5072 		OS_FALLTHROUGH;
5073 
5074 	case VM_PAGER_COMPRESSOR_WITH_SWAP:
5075 		vm_config.compressor_is_present = TRUE;
5076 		vm_config.swap_is_present = TRUE;
5077 		vm_config.compressor_is_active = TRUE;
5078 		vm_config.swap_is_active = TRUE;
5079 		break;
5080 
5081 	case VM_PAGER_COMPRESSOR_NO_SWAP:
5082 		vm_config.compressor_is_present = TRUE;
5083 		vm_config.swap_is_present = TRUE;
5084 		vm_config.compressor_is_active = TRUE;
5085 		break;
5086 
5087 	case VM_PAGER_FREEZER_DEFAULT:
5088 		printf("mapping deprecated VM_PAGER_FREEZER_DEFAULT to VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP\n");
5089 		OS_FALLTHROUGH;
5090 
5091 	case VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP:
5092 		vm_config.compressor_is_present = TRUE;
5093 		vm_config.swap_is_present = TRUE;
5094 		break;
5095 
5096 	case VM_PAGER_COMPRESSOR_NO_SWAP_PLUS_FREEZER_COMPRESSOR_WITH_SWAP:
5097 		vm_config.compressor_is_present = TRUE;
5098 		vm_config.swap_is_present = TRUE;
5099 		vm_config.compressor_is_active = TRUE;
5100 		vm_config.freezer_swap_is_active = TRUE;
5101 		break;
5102 
5103 	case VM_PAGER_NOT_CONFIGURED:
5104 		break;
5105 
5106 	default:
5107 		printf("unknown compressor mode - %x\n", vm_compressor_mode);
5108 		break;
5109 	}
5110 }
5111 
5112 __startup_func
5113 static void
5114 vm_pageout_create_gc_thread(void)
5115 {
5116 	thread_t thread;
5117 
5118 	if (kernel_thread_create(vm_pageout_garbage_collect,
5119 	    VM_PAGEOUT_GC_INIT, BASEPRI_DEFAULT, &thread) != KERN_SUCCESS) {
5120 		panic("vm_pageout_garbage_collect: create failed");
5121 	}
5122 	thread_set_thread_name(thread, "VM_pageout_garbage_collect");
5123 	if (thread->reserved_stack == 0) {
5124 		assert(thread->kernel_stack);
5125 		thread->reserved_stack = thread->kernel_stack;
5126 	}
5127 
5128 	/* thread is started in vm_pageout() */
5129 	vm_pageout_gc_thread = thread;
5130 }
5131 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, vm_pageout_create_gc_thread);
5132 
5133 void
5134 vm_pageout(void)
5135 {
5136 	thread_t        self = current_thread();
5137 	thread_t        thread;
5138 	kern_return_t   result;
5139 	spl_t           s;
5140 
5141 	/*
5142 	 * Set thread privileges.
5143 	 */
5144 	s = splsched();
5145 
5146 #if CONFIG_VPS_DYNAMIC_PRIO
5147 	if (vps_dynamic_priority_enabled) {
5148 		sched_set_kernel_thread_priority(self, MAXPRI_THROTTLE);
5149 		thread_set_eager_preempt(self);
5150 	} else {
5151 		sched_set_kernel_thread_priority(self, BASEPRI_VM);
5152 	}
5153 #else /* CONFIG_VPS_DYNAMIC_PRIO */
5154 	sched_set_kernel_thread_priority(self, BASEPRI_VM);
5155 #endif /* CONFIG_VPS_DYNAMIC_PRIO */
5156 
5157 	thread_lock(self);
5158 	self->options |= TH_OPT_VMPRIV;
5159 	thread_unlock(self);
5160 
5161 	if (!self->reserved_stack) {
5162 		self->reserved_stack = self->kernel_stack;
5163 	}
5164 
5165 	if (vm_pageout_state.vm_restricted_to_single_processor == TRUE &&
5166 	    !vps_dynamic_priority_enabled) {
5167 		thread_vm_bind_group_add();
5168 	}
5169 
5170 
5171 #if CONFIG_THREAD_GROUPS
5172 	thread_group_vm_add();
5173 #endif /* CONFIG_THREAD_GROUPS */
5174 
5175 #if __AMP__
5176 	PE_parse_boot_argn("vmpgo_pcluster", &vm_pgo_pbound, sizeof(vm_pgo_pbound));
5177 	if (vm_pgo_pbound) {
5178 		/*
5179 		 * Use the soft bound option for vm pageout to allow it to run on
5180 		 * E-cores if P-cluster is unavailable.
5181 		 */
5182 		thread_bind_cluster_type(self, 'P', true);
5183 	}
5184 #endif /* __AMP__ */
5185 
5186 	PE_parse_boot_argn("vmpgo_protect_realtime",
5187 	    &vm_pageout_protect_realtime,
5188 	    sizeof(vm_pageout_protect_realtime));
5189 	splx(s);
5190 
5191 	thread_set_thread_name(current_thread(), "VM_pageout_scan");
5192 
5193 	/*
5194 	 *	Initialize some paging parameters.
5195 	 */
5196 
5197 	vm_pageout_state.vm_pressure_thread_running = FALSE;
5198 	vm_pageout_state.vm_pressure_changed = FALSE;
5199 	vm_pageout_state.memorystatus_purge_on_warning = 2;
5200 	vm_pageout_state.memorystatus_purge_on_urgent = 5;
5201 	vm_pageout_state.memorystatus_purge_on_critical = 8;
5202 	vm_pageout_state.vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
5203 	vm_pageout_state.vm_page_speculative_percentage = 5;
5204 	vm_pageout_state.vm_page_speculative_target = 0;
5205 
5206 	vm_pageout_state.vm_pageout_swap_wait = 0;
5207 	vm_pageout_state.vm_pageout_idle_wait = 0;
5208 	vm_pageout_state.vm_pageout_empty_wait = 0;
5209 	vm_pageout_state.vm_pageout_burst_wait = 0;
5210 	vm_pageout_state.vm_pageout_deadlock_wait = 0;
5211 	vm_pageout_state.vm_pageout_deadlock_relief = 0;
5212 	vm_pageout_state.vm_pageout_burst_inactive_throttle = 0;
5213 
5214 	vm_pageout_state.vm_pageout_inactive = 0;
5215 	vm_pageout_state.vm_pageout_inactive_used = 0;
5216 	vm_pageout_state.vm_pageout_inactive_clean = 0;
5217 
5218 	vm_pageout_state.vm_memory_pressure = 0;
5219 	vm_pageout_state.vm_page_filecache_min = 0;
5220 #if CONFIG_JETSAM
5221 	vm_pageout_state.vm_page_filecache_min_divisor = 70;
5222 	vm_pageout_state.vm_page_xpmapped_min_divisor = 40;
5223 #else
5224 	vm_pageout_state.vm_page_filecache_min_divisor = 27;
5225 	vm_pageout_state.vm_page_xpmapped_min_divisor = 36;
5226 #endif
5227 	vm_pageout_state.vm_page_free_count_init = vm_page_free_count;
5228 
5229 	vm_pageout_state.vm_pageout_considered_page_last = 0;
5230 
5231 	if (vm_pageout_state.vm_pageout_swap_wait == 0) {
5232 		vm_pageout_state.vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
5233 	}
5234 
5235 	if (vm_pageout_state.vm_pageout_idle_wait == 0) {
5236 		vm_pageout_state.vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
5237 	}
5238 
5239 	if (vm_pageout_state.vm_pageout_burst_wait == 0) {
5240 		vm_pageout_state.vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
5241 	}
5242 
5243 	if (vm_pageout_state.vm_pageout_empty_wait == 0) {
5244 		vm_pageout_state.vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
5245 	}
5246 
5247 	if (vm_pageout_state.vm_pageout_deadlock_wait == 0) {
5248 		vm_pageout_state.vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
5249 	}
5250 
5251 	if (vm_pageout_state.vm_pageout_deadlock_relief == 0) {
5252 		vm_pageout_state.vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
5253 	}
5254 
5255 	if (vm_pageout_state.vm_pageout_burst_inactive_throttle == 0) {
5256 		vm_pageout_state.vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
5257 	}
5258 	/*
5259 	 * even if we've already called vm_page_free_reserve
5260 	 * call it again here to insure that the targets are
5261 	 * accurately calculated (it uses vm_page_free_count_init)
5262 	 * calling it with an arg of 0 will not change the reserve
5263 	 * but will re-calculate free_min and free_target
5264 	 */
5265 	if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
5266 		vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
5267 	} else {
5268 		vm_page_free_reserve(0);
5269 	}
5270 
5271 	bzero(&vm_pageout_queue_external, sizeof(struct vm_pageout_queue));
5272 	bzero(&vm_pageout_queue_internal, sizeof(struct vm_pageout_queue));
5273 
5274 	vm_page_queue_init(&vm_pageout_queue_external.pgo_pending);
5275 	vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
5276 
5277 	vm_page_queue_init(&vm_pageout_queue_internal.pgo_pending);
5278 
5279 #if DEVELOPMENT || DEBUG
5280 	bzero(&vm_pageout_queue_benchmark, sizeof(struct vm_pageout_queue));
5281 	vm_page_queue_init(&vm_pageout_queue_benchmark.pgo_pending);
5282 #endif /* DEVELOPMENT || DEBUG */
5283 
5284 
5285 	/* internal pageout thread started when default pager registered first time */
5286 	/* external pageout and garbage collection threads started here */
5287 	struct pgo_iothread_state *ethr = &pgo_iothread_external_state;
5288 	ethr->id = 0;
5289 	ethr->q = &vm_pageout_queue_external;
5290 	/* in external_state these cheads are never used, they are used only in internal_state for te compressor */
5291 	ethr->current_early_swapout_chead = NULL;
5292 	ethr->current_regular_swapout_chead = NULL;
5293 	ethr->current_late_swapout_chead = NULL;
5294 	ethr->scratch_buf = NULL;
5295 #if DEVELOPMENT || DEBUG
5296 	ethr->benchmark_q = NULL;
5297 #endif /* DEVELOPMENT || DEBUG */
5298 	sched_cond_init(&(ethr->pgo_wakeup));
5299 
5300 	result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external,
5301 	    (void *)ethr, BASEPRI_VM,
5302 	    &(ethr->pgo_iothread));
5303 	if (result != KERN_SUCCESS) {
5304 		panic("vm_pageout: Unable to create external thread (%d)\n", result);
5305 	}
5306 	thread_set_thread_name(ethr->pgo_iothread, "VM_pageout_external_iothread");
5307 
5308 	thread_mtx_lock(vm_pageout_gc_thread );
5309 	thread_start(vm_pageout_gc_thread );
5310 	thread_mtx_unlock(vm_pageout_gc_thread);
5311 
5312 #if VM_PRESSURE_EVENTS
5313 	result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
5314 	    BASEPRI_DEFAULT,
5315 	    &thread);
5316 
5317 	if (result != KERN_SUCCESS) {
5318 		panic("vm_pressure_thread: create failed");
5319 	}
5320 
5321 	thread_deallocate(thread);
5322 #endif
5323 
5324 	vm_object_reaper_init();
5325 
5326 
5327 	if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
5328 		vm_compressor_init();
5329 	}
5330 
5331 #if VM_PRESSURE_EVENTS
5332 	vm_pressure_events_enabled = TRUE;
5333 #endif /* VM_PRESSURE_EVENTS */
5334 
5335 #if CONFIG_PHANTOM_CACHE
5336 	vm_phantom_cache_init();
5337 #endif
5338 #if VM_PAGE_BUCKETS_CHECK
5339 #if VM_PAGE_FAKE_BUCKETS
5340 	printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
5341 	    (uint64_t) vm_page_fake_buckets_start,
5342 	    (uint64_t) vm_page_fake_buckets_end);
5343 	pmap_protect(kernel_pmap,
5344 	    vm_page_fake_buckets_start,
5345 	    vm_page_fake_buckets_end,
5346 	    VM_PROT_READ);
5347 //	*(char *) vm_page_fake_buckets_start = 'x';	/* panic! */
5348 #endif /* VM_PAGE_FAKE_BUCKETS */
5349 #endif /* VM_PAGE_BUCKETS_CHECK */
5350 
5351 #if VM_OBJECT_TRACKING
5352 	vm_object_tracking_init();
5353 #endif /* VM_OBJECT_TRACKING */
5354 
5355 #if __arm64__
5356 //	vm_tests();
5357 #endif /* __arm64__ */
5358 
5359 	vm_pageout_continue();
5360 
5361 	/*
5362 	 * Unreached code!
5363 	 *
5364 	 * The vm_pageout_continue() call above never returns, so the code below is never
5365 	 * executed.  We take advantage of this to declare several DTrace VM related probe
5366 	 * points that our kernel doesn't have an analog for.  These are probe points that
5367 	 * exist in Solaris and are in the DTrace documentation, so people may have written
5368 	 * scripts that use them.  Declaring the probe points here means their scripts will
5369 	 * compile and execute which we want for portability of the scripts, but since this
5370 	 * section of code is never reached, the probe points will simply never fire.  Yes,
5371 	 * this is basically a hack.  The problem is the DTrace probe points were chosen with
5372 	 * Solaris specific VM events in mind, not portability to different VM implementations.
5373 	 */
5374 
5375 	DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
5376 	DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
5377 	DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
5378 	DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
5379 	DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
5380 	DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
5381 	DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
5382 	/*NOTREACHED*/
5383 }
5384 
5385 
5386 
5387 kern_return_t
5388 vm_pageout_internal_start(void)
5389 {
5390 	kern_return_t   result = KERN_SUCCESS;
5391 	host_basic_info_data_t hinfo;
5392 	vm_offset_t     buf, bufsize;
5393 
5394 	assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
5395 
5396 	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
5397 #define BSD_HOST 1
5398 	host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
5399 
5400 	assert(hinfo.max_cpus > 0);
5401 
5402 #if !XNU_TARGET_OS_OSX
5403 	vm_pageout_state.vm_compressor_thread_count = 1;
5404 #else /* !XNU_TARGET_OS_OSX */
5405 	if (hinfo.max_cpus > 4) {
5406 		vm_pageout_state.vm_compressor_thread_count = 2;
5407 	} else {
5408 		vm_pageout_state.vm_compressor_thread_count = 1;
5409 	}
5410 #endif /* !XNU_TARGET_OS_OSX */
5411 #if     __AMP__
5412 	if (vm_compressor_ebound) {
5413 		vm_pageout_state.vm_compressor_thread_count = 2;
5414 	}
5415 #endif
5416 	PE_parse_boot_argn("vmcomp_threads", &vm_pageout_state.vm_compressor_thread_count,
5417 	    sizeof(vm_pageout_state.vm_compressor_thread_count));
5418 
5419 	/* did we get from the bootargs an unreasonable number? */
5420 	if (vm_pageout_state.vm_compressor_thread_count >= hinfo.max_cpus) {
5421 		vm_pageout_state.vm_compressor_thread_count = hinfo.max_cpus - 1;
5422 	}
5423 	if (vm_pageout_state.vm_compressor_thread_count <= 0) {
5424 		vm_pageout_state.vm_compressor_thread_count = 1;
5425 	} else if (vm_pageout_state.vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT) {
5426 		vm_pageout_state.vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
5427 	}
5428 
5429 	vm_pageout_queue_internal.pgo_maxlaundry =
5430 	    (vm_pageout_state.vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
5431 
5432 	PE_parse_boot_argn("vmpgoi_maxlaundry",
5433 	    &vm_pageout_queue_internal.pgo_maxlaundry,
5434 	    sizeof(vm_pageout_queue_internal.pgo_maxlaundry));
5435 
5436 #if DEVELOPMENT || DEBUG
5437 	// Note: this will be modified at enqueue-time such that the benchmark queue is never throttled
5438 	vm_pageout_queue_benchmark.pgo_maxlaundry = vm_pageout_queue_internal.pgo_maxlaundry;
5439 #endif /* DEVELOPMENT || DEBUG */
5440 
5441 	bufsize = COMPRESSOR_SCRATCH_BUF_SIZE;
5442 
5443 	kmem_alloc(kernel_map, &buf,
5444 	    bufsize * vm_pageout_state.vm_compressor_thread_count,
5445 	    KMA_DATA | KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT,
5446 	    VM_KERN_MEMORY_COMPRESSOR);
5447 
5448 	for (int i = 0; i < vm_pageout_state.vm_compressor_thread_count; i++) {
5449 		struct pgo_iothread_state *iq = &pgo_iothread_internal_state[i];
5450 		iq->id = i;
5451 		iq->q = &vm_pageout_queue_internal;
5452 		iq->current_early_swapout_chead = NULL;
5453 		iq->current_regular_swapout_chead = NULL;
5454 		iq->current_late_swapout_chead = NULL;
5455 		iq->scratch_buf = (char *)(buf + i * bufsize);
5456 #if DEVELOPMENT || DEBUG
5457 		iq->benchmark_q = &vm_pageout_queue_benchmark;
5458 #endif /* DEVELOPMENT || DEBUG */
5459 		sched_cond_init(&(iq->pgo_wakeup));
5460 		result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal,
5461 		    (void *)iq, BASEPRI_VM,
5462 		    &(iq->pgo_iothread));
5463 
5464 		if (result != KERN_SUCCESS) {
5465 			panic("vm_pageout: Unable to create compressor thread no. %d (%d)\n", i, result);
5466 		}
5467 	}
5468 	return result;
5469 }
5470 
5471 #if CONFIG_IOSCHED
5472 /*
5473  * To support I/O Expedite for compressed files we mark the upls with special flags.
5474  * The way decmpfs works is that we create a big upl which marks all the pages needed to
5475  * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
5476  * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
5477  * being held in the big original UPL. We mark each of these smaller UPLs with the flag
5478  * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
5479  * decmp_io_upl field (in the upl structure). This link is protected in the forward direction
5480  * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
5481  * unless the real I/O upl is being destroyed).
5482  */
5483 
5484 
5485 static void
5486 upl_set_decmp_info(upl_t upl, upl_t src_upl)
5487 {
5488 	assert((src_upl->flags & UPL_DECMP_REQ) != 0);
5489 
5490 	upl_lock(src_upl);
5491 	if (src_upl->decmp_io_upl) {
5492 		/*
5493 		 * If there is already an alive real I/O UPL, ignore this new UPL.
5494 		 * This case should rarely happen and even if it does, it just means
5495 		 * that we might issue a spurious expedite which the driver is expected
5496 		 * to handle.
5497 		 */
5498 		upl_unlock(src_upl);
5499 		return;
5500 	}
5501 	src_upl->decmp_io_upl = (void *)upl;
5502 	src_upl->ref_count++;
5503 
5504 	upl->flags |= UPL_DECMP_REAL_IO;
5505 	upl->decmp_io_upl = (void *)src_upl;
5506 	upl_unlock(src_upl);
5507 }
5508 #endif /* CONFIG_IOSCHED */
5509 
5510 #if UPL_DEBUG
5511 int     upl_debug_enabled = 1;
5512 #else
5513 int     upl_debug_enabled = 0;
5514 #endif
5515 
5516 static upl_t
5517 upl_create(int type, int flags, upl_size_t size)
5518 {
5519 	uint32_t pages = (uint32_t)atop(round_page_32(size));
5520 	upl_t    upl;
5521 
5522 	assert(page_aligned(size));
5523 
5524 	/*
5525 	 * FIXME: this code assumes the allocation always succeeds,
5526 	 *        however `pages` can be up to MAX_UPL_SIZE.
5527 	 *
5528 	 *        The allocation size is above 32k (resp. 128k)
5529 	 *        on 16k pages (resp. 4k), which kalloc might fail
5530 	 *        to allocate.
5531 	 */
5532 	upl = kalloc_type(struct upl, struct upl_page_info,
5533 	    (type & UPL_CREATE_INTERNAL) ? pages : 0, Z_WAITOK | Z_ZERO);
5534 	if (type & UPL_CREATE_INTERNAL) {
5535 		flags |= UPL_INTERNAL;
5536 	}
5537 
5538 	if (type & UPL_CREATE_LITE) {
5539 		flags |= UPL_LITE;
5540 		if (pages) {
5541 			upl->lite_list = bitmap_alloc(pages);
5542 		}
5543 	}
5544 
5545 	upl->flags = flags;
5546 	upl->ref_count = 1;
5547 	upl_lock_init(upl);
5548 #if CONFIG_IOSCHED
5549 	if (type & UPL_CREATE_IO_TRACKING) {
5550 		upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
5551 	}
5552 
5553 	if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
5554 		/* Only support expedite on internal UPLs */
5555 		thread_t        curthread = current_thread();
5556 		upl->upl_reprio_info = kalloc_data(sizeof(uint64_t) * pages,
5557 		    Z_WAITOK | Z_ZERO);
5558 		upl->flags |= UPL_EXPEDITE_SUPPORTED;
5559 		if (curthread->decmp_upl != NULL) {
5560 			upl_set_decmp_info(upl, curthread->decmp_upl);
5561 		}
5562 	}
5563 #endif
5564 #if CONFIG_IOSCHED || UPL_DEBUG
5565 	if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) {
5566 		upl->upl_creator = current_thread();
5567 		upl->flags |= UPL_TRACKED_BY_OBJECT;
5568 	}
5569 #endif
5570 
5571 #if UPL_DEBUG
5572 	upl->upl_create_btref = btref_get(__builtin_frame_address(0), 0);
5573 #endif /* UPL_DEBUG */
5574 
5575 	return upl;
5576 }
5577 
5578 static void
5579 upl_destroy(upl_t upl)
5580 {
5581 	uint32_t pages;
5582 
5583 //	DEBUG4K_UPL("upl %p (u_offset 0x%llx u_size 0x%llx) object %p\n", upl, (uint64_t)upl->u_offset, (uint64_t)upl->u_size, upl->map_object);
5584 
5585 	if (upl->ext_ref_count) {
5586 		panic("upl(%p) ext_ref_count", upl);
5587 	}
5588 
5589 #if CONFIG_IOSCHED
5590 	if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
5591 		upl_t src_upl;
5592 		src_upl = upl->decmp_io_upl;
5593 		assert((src_upl->flags & UPL_DECMP_REQ) != 0);
5594 		upl_lock(src_upl);
5595 		src_upl->decmp_io_upl = NULL;
5596 		upl_unlock(src_upl);
5597 		upl_deallocate(src_upl);
5598 	}
5599 #endif /* CONFIG_IOSCHED */
5600 
5601 #if CONFIG_IOSCHED || UPL_DEBUG
5602 	if (((upl->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) &&
5603 	    !(upl->flags & UPL_VECTOR)) {
5604 		vm_object_t     object;
5605 
5606 		if (upl->flags & UPL_SHADOWED) {
5607 			object = upl->map_object->shadow;
5608 		} else {
5609 			object = upl->map_object;
5610 		}
5611 
5612 		vm_object_lock(object);
5613 		queue_remove(&object->uplq, upl, upl_t, uplq);
5614 		vm_object_activity_end(object);
5615 		vm_object_collapse(object, 0, TRUE);
5616 		vm_object_unlock(object);
5617 	}
5618 #endif
5619 	/*
5620 	 * drop a reference on the map_object whether or
5621 	 * not a pageout object is inserted
5622 	 */
5623 	if (upl->flags & UPL_SHADOWED) {
5624 		vm_object_deallocate(upl->map_object);
5625 	}
5626 
5627 	if (upl->flags & UPL_DEVICE_MEMORY) {
5628 		pages = 1;
5629 	} else {
5630 		pages = (uint32_t)atop(upl_adjusted_size(upl, PAGE_MASK));
5631 	}
5632 
5633 	upl_lock_destroy(upl);
5634 
5635 #if CONFIG_IOSCHED
5636 	if (upl->flags & UPL_EXPEDITE_SUPPORTED) {
5637 		kfree_data(upl->upl_reprio_info, sizeof(uint64_t) * pages);
5638 	}
5639 #endif
5640 
5641 #if UPL_DEBUG
5642 	for (int i = 0; i < upl->upl_commit_index; i++) {
5643 		btref_put(upl->upl_commit_records[i].c_btref);
5644 	}
5645 	btref_put(upl->upl_create_btref);
5646 #endif /* UPL_DEBUG */
5647 
5648 	if ((upl->flags & UPL_LITE) && pages) {
5649 		bitmap_free(upl->lite_list, pages);
5650 	}
5651 	kfree_type(struct upl, struct upl_page_info,
5652 	    (upl->flags & UPL_INTERNAL) ? pages : 0, upl);
5653 }
5654 
5655 void
5656 upl_deallocate(upl_t upl)
5657 {
5658 	upl_lock(upl);
5659 
5660 	if (--upl->ref_count == 0) {
5661 		if (vector_upl_is_valid(upl)) {
5662 			vector_upl_deallocate(upl);
5663 		}
5664 		upl_unlock(upl);
5665 
5666 		if (upl->upl_iodone) {
5667 			upl_callout_iodone(upl);
5668 		}
5669 
5670 		upl_destroy(upl);
5671 	} else {
5672 		upl_unlock(upl);
5673 	}
5674 }
5675 
5676 #if CONFIG_IOSCHED
5677 void
5678 upl_mark_decmp(upl_t upl)
5679 {
5680 	if (upl->flags & UPL_TRACKED_BY_OBJECT) {
5681 		upl->flags |= UPL_DECMP_REQ;
5682 		upl->upl_creator->decmp_upl = (void *)upl;
5683 	}
5684 }
5685 
5686 void
5687 upl_unmark_decmp(upl_t upl)
5688 {
5689 	if (upl && (upl->flags & UPL_DECMP_REQ)) {
5690 		upl->upl_creator->decmp_upl = NULL;
5691 	}
5692 }
5693 
5694 #endif /* CONFIG_IOSCHED */
5695 
5696 #define VM_PAGE_Q_BACKING_UP(q)         \
5697 	((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
5698 
5699 boolean_t must_throttle_writes(void);
5700 
5701 boolean_t
5702 must_throttle_writes()
5703 {
5704 	if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
5705 	    vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10) {
5706 		return TRUE;
5707 	}
5708 
5709 	return FALSE;
5710 }
5711 
5712 int vm_page_delayed_work_ctx_needed = 0;
5713 KALLOC_TYPE_DEFINE(dw_ctx_zone, struct vm_page_delayed_work_ctx, KT_PRIV_ACCT);
5714 
5715 __startup_func
5716 static void
5717 vm_page_delayed_work_init_ctx(void)
5718 {
5719 	uint16_t min_delayed_work_ctx_allocated = 16;
5720 
5721 	/*
5722 	 * try really hard to always keep NCPU elements around in the zone
5723 	 * in order for the UPL code to almost always get an element.
5724 	 */
5725 	if (min_delayed_work_ctx_allocated < zpercpu_count()) {
5726 		min_delayed_work_ctx_allocated = (uint16_t)zpercpu_count();
5727 	}
5728 
5729 	zone_raise_reserve(dw_ctx_zone, min_delayed_work_ctx_allocated);
5730 }
5731 STARTUP(ZALLOC, STARTUP_RANK_LAST, vm_page_delayed_work_init_ctx);
5732 
5733 struct vm_page_delayed_work*
5734 vm_page_delayed_work_get_ctx(void)
5735 {
5736 	struct vm_page_delayed_work_ctx * dw_ctx = NULL;
5737 
5738 	dw_ctx = zalloc_flags(dw_ctx_zone, Z_ZERO | Z_NOWAIT);
5739 
5740 	if (__probable(dw_ctx)) {
5741 		dw_ctx->delayed_owner = current_thread();
5742 	} else {
5743 		vm_page_delayed_work_ctx_needed++;
5744 	}
5745 	return dw_ctx ? dw_ctx->dwp : NULL;
5746 }
5747 
5748 void
5749 vm_page_delayed_work_finish_ctx(struct vm_page_delayed_work* dwp)
5750 {
5751 	struct  vm_page_delayed_work_ctx *ldw_ctx;
5752 
5753 	ldw_ctx = (struct vm_page_delayed_work_ctx *)dwp;
5754 	ldw_ctx->delayed_owner = NULL;
5755 
5756 	zfree(dw_ctx_zone, ldw_ctx);
5757 }
5758 
5759 /*
5760  *	Routine:	vm_object_upl_request
5761  *	Purpose:
5762  *		Cause the population of a portion of a vm_object.
5763  *		Depending on the nature of the request, the pages
5764  *		returned may be contain valid data or be uninitialized.
5765  *		A page list structure, listing the physical pages
5766  *		will be returned upon request.
5767  *		This function is called by the file system or any other
5768  *		supplier of backing store to a pager.
5769  *		IMPORTANT NOTE: The caller must still respect the relationship
5770  *		between the vm_object and its backing memory object.  The
5771  *		caller MUST NOT substitute changes in the backing file
5772  *		without first doing a memory_object_lock_request on the
5773  *		target range unless it is know that the pages are not
5774  *		shared with another entity at the pager level.
5775  *		Copy_in_to:
5776  *			if a page list structure is present
5777  *			return the mapped physical pages, where a
5778  *			page is not present, return a non-initialized
5779  *			one.  If the no_sync bit is turned on, don't
5780  *			call the pager unlock to synchronize with other
5781  *			possible copies of the page. Leave pages busy
5782  *			in the original object, if a page list structure
5783  *			was specified.  When a commit of the page list
5784  *			pages is done, the dirty bit will be set for each one.
5785  *		Copy_out_from:
5786  *			If a page list structure is present, return
5787  *			all mapped pages.  Where a page does not exist
5788  *			map a zero filled one. Leave pages busy in
5789  *			the original object.  If a page list structure
5790  *			is not specified, this call is a no-op.
5791  *
5792  *		Note:  access of default pager objects has a rather interesting
5793  *		twist.  The caller of this routine, presumably the file system
5794  *		page cache handling code, will never actually make a request
5795  *		against a default pager backed object.  Only the default
5796  *		pager will make requests on backing store related vm_objects
5797  *		In this way the default pager can maintain the relationship
5798  *		between backing store files (abstract memory objects) and
5799  *		the vm_objects (cache objects), they support.
5800  *
5801  */
5802 
5803 __private_extern__ kern_return_t
5804 vm_object_upl_request(
5805 	vm_object_t             object,
5806 	vm_object_offset_t      offset,
5807 	upl_size_t              size,
5808 	upl_t                   *upl_ptr,
5809 	upl_page_info_array_t   user_page_list,
5810 	unsigned int            *page_list_count,
5811 	upl_control_flags_t     cntrl_flags,
5812 	vm_tag_t                tag)
5813 {
5814 	vm_page_t               dst_page = VM_PAGE_NULL;
5815 	vm_object_offset_t      dst_offset;
5816 	upl_size_t              xfer_size;
5817 	unsigned int            size_in_pages;
5818 	boolean_t               dirty;
5819 	boolean_t               hw_dirty;
5820 	upl_t                   upl = NULL;
5821 	unsigned int            entry;
5822 	vm_page_t               alias_page = NULL;
5823 	int                     refmod_state = 0;
5824 	vm_object_t             last_copy_object;
5825 	uint32_t                last_copy_version;
5826 	struct  vm_page_delayed_work    dw_array;
5827 	struct  vm_page_delayed_work    *dwp, *dwp_start;
5828 	bool                    dwp_finish_ctx = TRUE;
5829 	int                     dw_count;
5830 	int                     dw_limit;
5831 	int                     io_tracking_flag = 0;
5832 	int                     grab_options;
5833 	int                     page_grab_count = 0;
5834 	ppnum_t                 phys_page;
5835 	pmap_flush_context      pmap_flush_context_storage;
5836 	boolean_t               pmap_flushes_delayed = FALSE;
5837 #if DEVELOPMENT || DEBUG
5838 	task_t                  task = current_task();
5839 #endif /* DEVELOPMENT || DEBUG */
5840 
5841 	dwp_start = dwp = NULL;
5842 
5843 	if (cntrl_flags & ~UPL_VALID_FLAGS) {
5844 		/*
5845 		 * For forward compatibility's sake,
5846 		 * reject any unknown flag.
5847 		 */
5848 		return KERN_INVALID_VALUE;
5849 	}
5850 	if ((!object->internal) && (object->paging_offset != 0)) {
5851 		panic("vm_object_upl_request: external object with non-zero paging offset");
5852 	}
5853 	if (object->phys_contiguous) {
5854 		panic("vm_object_upl_request: contiguous object specified");
5855 	}
5856 
5857 	assertf(page_aligned(offset) && page_aligned(size),
5858 	    "offset 0x%llx size 0x%x",
5859 	    offset, size);
5860 
5861 	VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, DBG_VM_UPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, 0, 0);
5862 
5863 	dw_count = 0;
5864 	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
5865 	dwp_start = vm_page_delayed_work_get_ctx();
5866 	if (dwp_start == NULL) {
5867 		dwp_start = &dw_array;
5868 		dw_limit = 1;
5869 		dwp_finish_ctx = FALSE;
5870 	}
5871 
5872 	dwp = dwp_start;
5873 
5874 	if (size > MAX_UPL_SIZE_BYTES) {
5875 		size = MAX_UPL_SIZE_BYTES;
5876 	}
5877 
5878 	if ((cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL) {
5879 		*page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
5880 	}
5881 
5882 #if CONFIG_IOSCHED || UPL_DEBUG
5883 	if (object->io_tracking || upl_debug_enabled) {
5884 		io_tracking_flag |= UPL_CREATE_IO_TRACKING;
5885 	}
5886 #endif
5887 #if CONFIG_IOSCHED
5888 	if (object->io_tracking) {
5889 		io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
5890 	}
5891 #endif
5892 
5893 	if (cntrl_flags & UPL_SET_INTERNAL) {
5894 		if (cntrl_flags & UPL_SET_LITE) {
5895 			upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
5896 		} else {
5897 			upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, 0, size);
5898 		}
5899 		user_page_list = size ? upl->page_list : NULL;
5900 	} else {
5901 		if (cntrl_flags & UPL_SET_LITE) {
5902 			upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
5903 		} else {
5904 			upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, 0, size);
5905 		}
5906 	}
5907 	*upl_ptr = upl;
5908 
5909 	if (user_page_list) {
5910 		user_page_list[0].device = FALSE;
5911 	}
5912 
5913 	if (cntrl_flags & UPL_SET_LITE) {
5914 		upl->map_object = object;
5915 	} else {
5916 		upl->map_object = vm_object_allocate(size);
5917 		vm_object_lock(upl->map_object);
5918 		/*
5919 		 * No neeed to lock the new object: nobody else knows
5920 		 * about it yet, so it's all ours so far.
5921 		 */
5922 		upl->map_object->shadow = object;
5923 		VM_OBJECT_SET_PAGEOUT(upl->map_object, TRUE);
5924 		VM_OBJECT_SET_CAN_PERSIST(upl->map_object, FALSE);
5925 		upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
5926 		upl->map_object->vo_shadow_offset = offset;
5927 		upl->map_object->wimg_bits = object->wimg_bits;
5928 		assertf(page_aligned(upl->map_object->vo_shadow_offset),
5929 		    "object %p shadow_offset 0x%llx",
5930 		    upl->map_object, upl->map_object->vo_shadow_offset);
5931 		vm_object_unlock(upl->map_object);
5932 
5933 		alias_page = vm_page_grab_fictitious(TRUE);
5934 
5935 		upl->flags |= UPL_SHADOWED;
5936 	}
5937 	if (cntrl_flags & UPL_FOR_PAGEOUT) {
5938 		upl->flags |= UPL_PAGEOUT;
5939 	}
5940 
5941 	vm_object_lock(object);
5942 	vm_object_activity_begin(object);
5943 
5944 	grab_options = 0;
5945 #if CONFIG_SECLUDED_MEMORY
5946 	if (object->can_grab_secluded) {
5947 		grab_options |= VM_PAGE_GRAB_SECLUDED;
5948 	}
5949 #endif /* CONFIG_SECLUDED_MEMORY */
5950 
5951 	/*
5952 	 * we can lock in the paging_offset once paging_in_progress is set
5953 	 */
5954 	upl->u_size = size;
5955 	upl->u_offset = offset + object->paging_offset;
5956 
5957 #if CONFIG_IOSCHED || UPL_DEBUG
5958 	if (object->io_tracking || upl_debug_enabled) {
5959 		vm_object_activity_begin(object);
5960 		queue_enter(&object->uplq, upl, upl_t, uplq);
5961 	}
5962 #endif
5963 	if ((cntrl_flags & UPL_WILL_MODIFY) && object->vo_copy != VM_OBJECT_NULL) {
5964 		/*
5965 		 * Honor copy-on-write obligations
5966 		 *
5967 		 * The caller is gathering these pages and
5968 		 * might modify their contents.  We need to
5969 		 * make sure that the copy object has its own
5970 		 * private copies of these pages before we let
5971 		 * the caller modify them.
5972 		 */
5973 		vm_object_update(object,
5974 		    offset,
5975 		    size,
5976 		    NULL,
5977 		    NULL,
5978 		    FALSE,              /* should_return */
5979 		    MEMORY_OBJECT_COPY_SYNC,
5980 		    VM_PROT_NO_CHANGE);
5981 
5982 		VM_PAGEOUT_DEBUG(upl_cow, 1);
5983 		VM_PAGEOUT_DEBUG(upl_cow_pages, (size >> PAGE_SHIFT));
5984 	}
5985 	/*
5986 	 * remember which copy object we synchronized with
5987 	 */
5988 	last_copy_object = object->vo_copy;
5989 	last_copy_version = object->vo_copy_version;
5990 	entry = 0;
5991 
5992 	xfer_size = size;
5993 	dst_offset = offset;
5994 	size_in_pages = size / PAGE_SIZE;
5995 
5996 	if (vm_page_free_count > (vm_page_free_target + size_in_pages) ||
5997 	    object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT)) {
5998 		object->scan_collisions = 0;
5999 	}
6000 
6001 	if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
6002 		boolean_t       isSSD = FALSE;
6003 
6004 #if !XNU_TARGET_OS_OSX
6005 		isSSD = TRUE;
6006 #else /* !XNU_TARGET_OS_OSX */
6007 		vnode_pager_get_isSSD(object->pager, &isSSD);
6008 #endif /* !XNU_TARGET_OS_OSX */
6009 		vm_object_unlock(object);
6010 
6011 		OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
6012 
6013 		if (isSSD == TRUE) {
6014 			delay(1000 * size_in_pages);
6015 		} else {
6016 			delay(5000 * size_in_pages);
6017 		}
6018 		OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
6019 
6020 		vm_object_lock(object);
6021 	}
6022 
6023 	while (xfer_size) {
6024 		dwp->dw_mask = 0;
6025 
6026 		if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
6027 			vm_object_unlock(object);
6028 			alias_page = vm_page_grab_fictitious(TRUE);
6029 			vm_object_lock(object);
6030 		}
6031 		if (cntrl_flags & UPL_COPYOUT_FROM) {
6032 			upl->flags |= UPL_PAGE_SYNC_DONE;
6033 
6034 			if (((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
6035 			    dst_page->vmp_fictitious ||
6036 			    dst_page->vmp_absent ||
6037 			    VMP_ERROR_GET(dst_page) ||
6038 			    dst_page->vmp_cleaning ||
6039 			    (VM_PAGE_WIRED(dst_page))) {
6040 				if (user_page_list) {
6041 					user_page_list[entry].phys_addr = 0;
6042 				}
6043 
6044 				goto try_next_page;
6045 			}
6046 			phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
6047 
6048 			/*
6049 			 * grab this up front...
6050 			 * a high percentange of the time we're going to
6051 			 * need the hardware modification state a bit later
6052 			 * anyway... so we can eliminate an extra call into
6053 			 * the pmap layer by grabbing it here and recording it
6054 			 */
6055 			if (dst_page->vmp_pmapped) {
6056 				refmod_state = pmap_get_refmod(phys_page);
6057 			} else {
6058 				refmod_state = 0;
6059 			}
6060 
6061 			if ((refmod_state & VM_MEM_REFERENCED) && VM_PAGE_INACTIVE(dst_page)) {
6062 				/*
6063 				 * page is on inactive list and referenced...
6064 				 * reactivate it now... this gets it out of the
6065 				 * way of vm_pageout_scan which would have to
6066 				 * reactivate it upon tripping over it
6067 				 */
6068 				dwp->dw_mask |= DW_vm_page_activate;
6069 			}
6070 			if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
6071 				/*
6072 				 * we're only asking for DIRTY pages to be returned
6073 				 */
6074 				if (dst_page->vmp_laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
6075 					/*
6076 					 * if we were the page stolen by vm_pageout_scan to be
6077 					 * cleaned (as opposed to a buddy being clustered in
6078 					 * or this request is not being driven by a PAGEOUT cluster
6079 					 * then we only need to check for the page being dirty or
6080 					 * precious to decide whether to return it
6081 					 */
6082 					if (dst_page->vmp_dirty || dst_page->vmp_precious || (refmod_state & VM_MEM_MODIFIED)) {
6083 						goto check_busy;
6084 					}
6085 					goto dont_return;
6086 				}
6087 				/*
6088 				 * this is a request for a PAGEOUT cluster and this page
6089 				 * is merely along for the ride as a 'buddy'... not only
6090 				 * does it have to be dirty to be returned, but it also
6091 				 * can't have been referenced recently...
6092 				 */
6093 				if ((hibernate_cleaning_in_progress == TRUE ||
6094 				    (!((refmod_state & VM_MEM_REFERENCED) || dst_page->vmp_reference) ||
6095 				    (dst_page->vmp_q_state == VM_PAGE_ON_THROTTLED_Q))) &&
6096 				    ((refmod_state & VM_MEM_MODIFIED) || dst_page->vmp_dirty || dst_page->vmp_precious)) {
6097 					goto check_busy;
6098 				}
6099 dont_return:
6100 				/*
6101 				 * if we reach here, we're not to return
6102 				 * the page... go on to the next one
6103 				 */
6104 				if (dst_page->vmp_laundry == TRUE) {
6105 					/*
6106 					 * if we get here, the page is not 'cleaning' (filtered out above).
6107 					 * since it has been referenced, remove it from the laundry
6108 					 * so we don't pay the cost of an I/O to clean a page
6109 					 * we're just going to take back
6110 					 */
6111 					vm_page_lockspin_queues();
6112 
6113 					vm_pageout_steal_laundry(dst_page, TRUE);
6114 					vm_page_activate(dst_page);
6115 
6116 					vm_page_unlock_queues();
6117 				}
6118 				if (user_page_list) {
6119 					user_page_list[entry].phys_addr = 0;
6120 				}
6121 
6122 				goto try_next_page;
6123 			}
6124 check_busy:
6125 			if (dst_page->vmp_busy) {
6126 				if (cntrl_flags & UPL_NOBLOCK) {
6127 					if (user_page_list) {
6128 						user_page_list[entry].phys_addr = 0;
6129 					}
6130 					dwp->dw_mask = 0;
6131 
6132 					goto try_next_page;
6133 				}
6134 				/*
6135 				 * someone else is playing with the
6136 				 * page.  We will have to wait.
6137 				 */
6138 				vm_page_sleep(object, dst_page, THREAD_UNINT, LCK_SLEEP_EXCLUSIVE);
6139 
6140 				continue;
6141 			}
6142 			if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
6143 				vm_page_lockspin_queues();
6144 
6145 				if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
6146 					/*
6147 					 * we've buddied up a page for a clustered pageout
6148 					 * that has already been moved to the pageout
6149 					 * queue by pageout_scan... we need to remove
6150 					 * it from the queue and drop the laundry count
6151 					 * on that queue
6152 					 */
6153 					vm_pageout_throttle_up(dst_page);
6154 				}
6155 				vm_page_unlock_queues();
6156 			}
6157 			hw_dirty = refmod_state & VM_MEM_MODIFIED;
6158 			dirty = hw_dirty ? TRUE : dst_page->vmp_dirty;
6159 
6160 			if (phys_page > upl->highest_page) {
6161 				upl->highest_page = phys_page;
6162 			}
6163 
6164 			assert(!pmap_is_noencrypt(phys_page));
6165 
6166 			if (cntrl_flags & UPL_SET_LITE) {
6167 				unsigned int    pg_num;
6168 
6169 				pg_num = (unsigned int) ((dst_offset - offset) / PAGE_SIZE);
6170 				assert(pg_num == (dst_offset - offset) / PAGE_SIZE);
6171 				bitmap_set(upl->lite_list, pg_num);
6172 
6173 				if (hw_dirty) {
6174 					if (pmap_flushes_delayed == FALSE) {
6175 						pmap_flush_context_init(&pmap_flush_context_storage);
6176 						pmap_flushes_delayed = TRUE;
6177 					}
6178 					pmap_clear_refmod_options(phys_page,
6179 					    VM_MEM_MODIFIED,
6180 					    PMAP_OPTIONS_NOFLUSH | PMAP_OPTIONS_CLEAR_WRITE,
6181 					    &pmap_flush_context_storage);
6182 				}
6183 
6184 				/*
6185 				 * Mark original page as cleaning
6186 				 * in place.
6187 				 */
6188 				dst_page->vmp_cleaning = TRUE;
6189 				dst_page->vmp_precious = FALSE;
6190 			} else {
6191 				/*
6192 				 * use pageclean setup, it is more
6193 				 * convenient even for the pageout
6194 				 * cases here
6195 				 */
6196 				vm_object_lock(upl->map_object);
6197 				vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
6198 				vm_object_unlock(upl->map_object);
6199 
6200 				alias_page->vmp_absent = FALSE;
6201 				alias_page = NULL;
6202 			}
6203 			if (dirty) {
6204 				SET_PAGE_DIRTY(dst_page, FALSE);
6205 			} else {
6206 				dst_page->vmp_dirty = FALSE;
6207 			}
6208 
6209 			if (!dirty) {
6210 				dst_page->vmp_precious = TRUE;
6211 			}
6212 
6213 			if (!(cntrl_flags & UPL_CLEAN_IN_PLACE)) {
6214 				if (!VM_PAGE_WIRED(dst_page)) {
6215 					dst_page->vmp_free_when_done = TRUE;
6216 				}
6217 			}
6218 		} else {
6219 			if ((cntrl_flags & UPL_WILL_MODIFY) &&
6220 			    (object->vo_copy != last_copy_object ||
6221 			    object->vo_copy_version != last_copy_version)) {
6222 				/*
6223 				 * Honor copy-on-write obligations
6224 				 *
6225 				 * The copy object has changed since we
6226 				 * last synchronized for copy-on-write.
6227 				 * Another copy object might have been
6228 				 * inserted while we released the object's
6229 				 * lock.  Since someone could have seen the
6230 				 * original contents of the remaining pages
6231 				 * through that new object, we have to
6232 				 * synchronize with it again for the remaining
6233 				 * pages only.  The previous pages are "busy"
6234 				 * so they can not be seen through the new
6235 				 * mapping.  The new mapping will see our
6236 				 * upcoming changes for those previous pages,
6237 				 * but that's OK since they couldn't see what
6238 				 * was there before.  It's just a race anyway
6239 				 * and there's no guarantee of consistency or
6240 				 * atomicity.  We just don't want new mappings
6241 				 * to see both the *before* and *after* pages.
6242 				 */
6243 				if (object->vo_copy != VM_OBJECT_NULL) {
6244 					vm_object_update(
6245 						object,
6246 						dst_offset,/* current offset */
6247 						xfer_size, /* remaining size */
6248 						NULL,
6249 						NULL,
6250 						FALSE,     /* should_return */
6251 						MEMORY_OBJECT_COPY_SYNC,
6252 						VM_PROT_NO_CHANGE);
6253 
6254 					VM_PAGEOUT_DEBUG(upl_cow_again, 1);
6255 					VM_PAGEOUT_DEBUG(upl_cow_again_pages, (xfer_size >> PAGE_SHIFT));
6256 				}
6257 				/*
6258 				 * remember the copy object we synced with
6259 				 */
6260 				last_copy_object = object->vo_copy;
6261 				last_copy_version = object->vo_copy_version;
6262 			}
6263 			dst_page = vm_page_lookup(object, dst_offset);
6264 
6265 			if (dst_page != VM_PAGE_NULL) {
6266 				if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
6267 					/*
6268 					 * skip over pages already present in the cache
6269 					 */
6270 					if (user_page_list) {
6271 						user_page_list[entry].phys_addr = 0;
6272 					}
6273 
6274 					goto try_next_page;
6275 				}
6276 				if (dst_page->vmp_fictitious) {
6277 					panic("need corner case for fictitious page");
6278 				}
6279 
6280 				if (dst_page->vmp_busy || dst_page->vmp_cleaning) {
6281 					/*
6282 					 * someone else is playing with the
6283 					 * page.  We will have to wait.
6284 					 */
6285 					vm_page_sleep(object, dst_page, THREAD_UNINT, LCK_SLEEP_EXCLUSIVE);
6286 
6287 					continue;
6288 				}
6289 				if (dst_page->vmp_laundry) {
6290 					vm_pageout_steal_laundry(dst_page, FALSE);
6291 				}
6292 			} else {
6293 				if (object->private) {
6294 					/*
6295 					 * This is a nasty wrinkle for users
6296 					 * of upl who encounter device or
6297 					 * private memory however, it is
6298 					 * unavoidable, only a fault can
6299 					 * resolve the actual backing
6300 					 * physical page by asking the
6301 					 * backing device.
6302 					 */
6303 					if (user_page_list) {
6304 						user_page_list[entry].phys_addr = 0;
6305 					}
6306 
6307 					goto try_next_page;
6308 				}
6309 				if (object->scan_collisions) {
6310 					/*
6311 					 * the pageout_scan thread is trying to steal
6312 					 * pages from this object, but has run into our
6313 					 * lock... grab 2 pages from the head of the object...
6314 					 * the first is freed on behalf of pageout_scan, the
6315 					 * 2nd is for our own use... we use vm_object_page_grab
6316 					 * in both cases to avoid taking pages from the free
6317 					 * list since we are under memory pressure and our
6318 					 * lock on this object is getting in the way of
6319 					 * relieving it
6320 					 */
6321 					dst_page = vm_object_page_grab(object);
6322 
6323 					if (dst_page != VM_PAGE_NULL) {
6324 						vm_page_release(dst_page,
6325 						    FALSE);
6326 					}
6327 
6328 					dst_page = vm_object_page_grab(object);
6329 				}
6330 				if (dst_page == VM_PAGE_NULL) {
6331 					/*
6332 					 * need to allocate a page
6333 					 */
6334 					dst_page = vm_page_grab_options(grab_options);
6335 					if (dst_page != VM_PAGE_NULL) {
6336 						page_grab_count++;
6337 					}
6338 				}
6339 				if (dst_page == VM_PAGE_NULL) {
6340 					if ((cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
6341 						/*
6342 						 * we don't want to stall waiting for pages to come onto the free list
6343 						 * while we're already holding absent pages in this UPL
6344 						 * the caller will deal with the empty slots
6345 						 */
6346 						if (user_page_list) {
6347 							user_page_list[entry].phys_addr = 0;
6348 						}
6349 
6350 						goto try_next_page;
6351 					}
6352 					/*
6353 					 * no pages available... wait
6354 					 * then try again for the same
6355 					 * offset...
6356 					 */
6357 					vm_object_unlock(object);
6358 
6359 					OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
6360 
6361 					VM_DEBUG_EVENT(vm_upl_page_wait, DBG_VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
6362 
6363 					VM_PAGE_WAIT();
6364 					OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
6365 
6366 					VM_DEBUG_EVENT(vm_upl_page_wait, DBG_VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
6367 
6368 					vm_object_lock(object);
6369 
6370 					continue;
6371 				}
6372 				vm_page_insert(dst_page, object, dst_offset);
6373 
6374 				dst_page->vmp_absent = TRUE;
6375 				dst_page->vmp_busy = FALSE;
6376 
6377 				if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
6378 					/*
6379 					 * if UPL_RET_ONLY_ABSENT was specified,
6380 					 * than we're definitely setting up a
6381 					 * upl for a clustered read/pagein
6382 					 * operation... mark the pages as clustered
6383 					 * so upl_commit_range can put them on the
6384 					 * speculative list
6385 					 */
6386 					dst_page->vmp_clustered = TRUE;
6387 
6388 					if (!(cntrl_flags & UPL_FILE_IO)) {
6389 						counter_inc(&vm_statistics_pageins);
6390 					}
6391 				}
6392 			}
6393 			phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
6394 
6395 			dst_page->vmp_overwriting = TRUE;
6396 
6397 			if (dst_page->vmp_pmapped) {
6398 				if (!(cntrl_flags & UPL_FILE_IO)) {
6399 					/*
6400 					 * eliminate all mappings from the
6401 					 * original object and its prodigy
6402 					 */
6403 					refmod_state = pmap_disconnect(phys_page);
6404 				} else {
6405 					refmod_state = pmap_get_refmod(phys_page);
6406 				}
6407 			} else {
6408 				refmod_state = 0;
6409 			}
6410 
6411 			hw_dirty = refmod_state & VM_MEM_MODIFIED;
6412 			dirty = hw_dirty ? TRUE : dst_page->vmp_dirty;
6413 
6414 			if (cntrl_flags & UPL_SET_LITE) {
6415 				unsigned int    pg_num;
6416 
6417 				pg_num = (unsigned int) ((dst_offset - offset) / PAGE_SIZE);
6418 				assert(pg_num == (dst_offset - offset) / PAGE_SIZE);
6419 				bitmap_set(upl->lite_list, pg_num);
6420 
6421 				if (hw_dirty) {
6422 					pmap_clear_modify(phys_page);
6423 				}
6424 
6425 				/*
6426 				 * Mark original page as cleaning
6427 				 * in place.
6428 				 */
6429 				dst_page->vmp_cleaning = TRUE;
6430 				dst_page->vmp_precious = FALSE;
6431 			} else {
6432 				/*
6433 				 * use pageclean setup, it is more
6434 				 * convenient even for the pageout
6435 				 * cases here
6436 				 */
6437 				vm_object_lock(upl->map_object);
6438 				vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
6439 				vm_object_unlock(upl->map_object);
6440 
6441 				alias_page->vmp_absent = FALSE;
6442 				alias_page = NULL;
6443 			}
6444 
6445 			if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
6446 				upl->flags &= ~UPL_CLEAR_DIRTY;
6447 				upl->flags |= UPL_SET_DIRTY;
6448 				dirty = TRUE;
6449 				/*
6450 				 * Page belonging to a code-signed object is about to
6451 				 * be written. Mark it tainted and disconnect it from
6452 				 * all pmaps so processes have to fault it back in and
6453 				 * deal with the tainted bit.
6454 				 */
6455 				if (object->code_signed && dst_page->vmp_cs_tainted != VMP_CS_ALL_TRUE) {
6456 					dst_page->vmp_cs_tainted = VMP_CS_ALL_TRUE;
6457 					vm_page_upl_tainted++;
6458 					if (dst_page->vmp_pmapped) {
6459 						refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(dst_page));
6460 						if (refmod_state & VM_MEM_REFERENCED) {
6461 							dst_page->vmp_reference = TRUE;
6462 						}
6463 					}
6464 				}
6465 			} else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
6466 				/*
6467 				 * clean in place for read implies
6468 				 * that a write will be done on all
6469 				 * the pages that are dirty before
6470 				 * a upl commit is done.  The caller
6471 				 * is obligated to preserve the
6472 				 * contents of all pages marked dirty
6473 				 */
6474 				upl->flags |= UPL_CLEAR_DIRTY;
6475 			}
6476 			dst_page->vmp_dirty = dirty;
6477 
6478 			if (!dirty) {
6479 				dst_page->vmp_precious = TRUE;
6480 			}
6481 
6482 			if (!VM_PAGE_WIRED(dst_page)) {
6483 				/*
6484 				 * deny access to the target page while
6485 				 * it is being worked on
6486 				 */
6487 				dst_page->vmp_busy = TRUE;
6488 			} else {
6489 				dwp->dw_mask |= DW_vm_page_wire;
6490 			}
6491 
6492 			/*
6493 			 * We might be about to satisfy a fault which has been
6494 			 * requested. So no need for the "restart" bit.
6495 			 */
6496 			dst_page->vmp_restart = FALSE;
6497 			if (!dst_page->vmp_absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
6498 				/*
6499 				 * expect the page to be used
6500 				 */
6501 				dwp->dw_mask |= DW_set_reference;
6502 			}
6503 			if (cntrl_flags & UPL_PRECIOUS) {
6504 				if (object->internal) {
6505 					SET_PAGE_DIRTY(dst_page, FALSE);
6506 					dst_page->vmp_precious = FALSE;
6507 				} else {
6508 					dst_page->vmp_precious = TRUE;
6509 				}
6510 			} else {
6511 				dst_page->vmp_precious = FALSE;
6512 			}
6513 		}
6514 		if (dst_page->vmp_busy) {
6515 			upl->flags |= UPL_HAS_BUSY;
6516 		}
6517 
6518 		if (phys_page > upl->highest_page) {
6519 			upl->highest_page = phys_page;
6520 		}
6521 		assert(!pmap_is_noencrypt(phys_page));
6522 		if (user_page_list) {
6523 			user_page_list[entry].phys_addr = phys_page;
6524 			user_page_list[entry].free_when_done    = dst_page->vmp_free_when_done;
6525 			user_page_list[entry].absent    = dst_page->vmp_absent;
6526 			user_page_list[entry].dirty     = dst_page->vmp_dirty;
6527 			user_page_list[entry].precious  = dst_page->vmp_precious;
6528 			user_page_list[entry].device    = FALSE;
6529 			user_page_list[entry].needed    = FALSE;
6530 			if (dst_page->vmp_clustered == TRUE) {
6531 				user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
6532 			} else {
6533 				user_page_list[entry].speculative = FALSE;
6534 			}
6535 			user_page_list[entry].cs_validated = dst_page->vmp_cs_validated;
6536 			user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted;
6537 			user_page_list[entry].cs_nx = dst_page->vmp_cs_nx;
6538 			user_page_list[entry].mark      = FALSE;
6539 		}
6540 		/*
6541 		 * if UPL_RET_ONLY_ABSENT is set, then
6542 		 * we are working with a fresh page and we've
6543 		 * just set the clustered flag on it to
6544 		 * indicate that it was drug in as part of a
6545 		 * speculative cluster... so leave it alone
6546 		 */
6547 		if (!(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
6548 			/*
6549 			 * someone is explicitly grabbing this page...
6550 			 * update clustered and speculative state
6551 			 *
6552 			 */
6553 			if (dst_page->vmp_clustered) {
6554 				VM_PAGE_CONSUME_CLUSTERED(dst_page);
6555 			}
6556 		}
6557 try_next_page:
6558 		if (dwp->dw_mask) {
6559 			if (dwp->dw_mask & DW_vm_page_activate) {
6560 				counter_inc(&vm_statistics_reactivations);
6561 			}
6562 
6563 			VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
6564 
6565 			if (dw_count >= dw_limit) {
6566 				vm_page_do_delayed_work(object, tag, dwp_start, dw_count);
6567 
6568 				dwp = dwp_start;
6569 				dw_count = 0;
6570 			}
6571 		}
6572 		entry++;
6573 		dst_offset += PAGE_SIZE_64;
6574 		xfer_size -= PAGE_SIZE;
6575 	}
6576 	if (dw_count) {
6577 		vm_page_do_delayed_work(object, tag, dwp_start, dw_count);
6578 		dwp = dwp_start;
6579 		dw_count = 0;
6580 	}
6581 
6582 	if (alias_page != NULL) {
6583 		VM_PAGE_FREE(alias_page);
6584 	}
6585 	if (pmap_flushes_delayed == TRUE) {
6586 		pmap_flush(&pmap_flush_context_storage);
6587 	}
6588 
6589 	if (page_list_count != NULL) {
6590 		if (upl->flags & UPL_INTERNAL) {
6591 			*page_list_count = 0;
6592 		} else if (*page_list_count > entry) {
6593 			*page_list_count = entry;
6594 		}
6595 	}
6596 #if UPL_DEBUG
6597 	upl->upl_state = 1;
6598 #endif
6599 	vm_object_unlock(object);
6600 
6601 	VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, DBG_VM_UPL_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
6602 #if DEVELOPMENT || DEBUG
6603 	if (task != NULL) {
6604 		ledger_credit(task->ledger, task_ledgers.pages_grabbed_upl, page_grab_count);
6605 	}
6606 #endif /* DEVELOPMENT || DEBUG */
6607 
6608 	if (dwp_start && dwp_finish_ctx) {
6609 		vm_page_delayed_work_finish_ctx(dwp_start);
6610 		dwp_start = dwp = NULL;
6611 	}
6612 
6613 	return KERN_SUCCESS;
6614 }
6615 
6616 /*
6617  *	Routine:	vm_object_super_upl_request
6618  *	Purpose:
6619  *		Cause the population of a portion of a vm_object
6620  *		in much the same way as memory_object_upl_request.
6621  *		Depending on the nature of the request, the pages
6622  *		returned may be contain valid data or be uninitialized.
6623  *		However, the region may be expanded up to the super
6624  *		cluster size provided.
6625  */
6626 
6627 __private_extern__ kern_return_t
6628 vm_object_super_upl_request(
6629 	vm_object_t object,
6630 	vm_object_offset_t      offset,
6631 	upl_size_t              size,
6632 	upl_size_t              super_cluster,
6633 	upl_t                   *upl,
6634 	upl_page_info_t         *user_page_list,
6635 	unsigned int            *page_list_count,
6636 	upl_control_flags_t     cntrl_flags,
6637 	vm_tag_t                tag)
6638 {
6639 	if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR) == UPL_VECTOR)) {
6640 		return KERN_FAILURE;
6641 	}
6642 
6643 	assert(object->paging_in_progress);
6644 	offset = offset - object->paging_offset;
6645 
6646 	if (super_cluster > size) {
6647 		vm_object_offset_t      base_offset;
6648 		upl_size_t              super_size;
6649 		vm_object_size_t        super_size_64;
6650 
6651 		base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
6652 		super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster << 1 : super_cluster;
6653 		super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
6654 		super_size = (upl_size_t) super_size_64;
6655 		assert(super_size == super_size_64);
6656 
6657 		if (offset > (base_offset + super_size)) {
6658 			panic("vm_object_super_upl_request: Missed target pageout"
6659 			    " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
6660 			    offset, base_offset, super_size, super_cluster,
6661 			    size, object->paging_offset);
6662 		}
6663 		/*
6664 		 * apparently there is a case where the vm requests a
6665 		 * page to be written out who's offset is beyond the
6666 		 * object size
6667 		 */
6668 		if ((offset + size) > (base_offset + super_size)) {
6669 			super_size_64 = (offset + size) - base_offset;
6670 			super_size = (upl_size_t) super_size_64;
6671 			assert(super_size == super_size_64);
6672 		}
6673 
6674 		offset = base_offset;
6675 		size = super_size;
6676 	}
6677 	return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags, tag);
6678 }
6679 
6680 int cs_executable_create_upl = 0;
6681 extern int proc_selfpid(void);
6682 extern char *proc_name_address(void *p);
6683 
6684 kern_return_t
6685 vm_map_create_upl(
6686 	vm_map_t                map,
6687 	vm_map_address_t        offset,
6688 	upl_size_t              *upl_size,
6689 	upl_t                   *upl,
6690 	upl_page_info_array_t   page_list,
6691 	unsigned int            *count,
6692 	upl_control_flags_t     *flags,
6693 	vm_tag_t                tag)
6694 {
6695 	vm_map_entry_t          entry;
6696 	upl_control_flags_t     caller_flags;
6697 	int                     force_data_sync;
6698 	int                     sync_cow_data;
6699 	vm_object_t             local_object;
6700 	vm_map_offset_t         local_offset;
6701 	vm_map_offset_t         local_start;
6702 	kern_return_t           ret;
6703 	vm_map_address_t        original_offset;
6704 	vm_map_size_t           original_size, adjusted_size;
6705 	vm_map_offset_t         local_entry_start;
6706 	vm_object_offset_t      local_entry_offset;
6707 	vm_object_offset_t      offset_in_mapped_page;
6708 	boolean_t               release_map = FALSE;
6709 
6710 
6711 start_with_map:
6712 
6713 	original_offset = offset;
6714 	original_size = *upl_size;
6715 	adjusted_size = original_size;
6716 
6717 	caller_flags = *flags;
6718 
6719 	if (caller_flags & ~UPL_VALID_FLAGS) {
6720 		/*
6721 		 * For forward compatibility's sake,
6722 		 * reject any unknown flag.
6723 		 */
6724 		ret = KERN_INVALID_VALUE;
6725 		goto done;
6726 	}
6727 	force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
6728 	sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
6729 
6730 	if (upl == NULL) {
6731 		ret = KERN_INVALID_ARGUMENT;
6732 		goto done;
6733 	}
6734 
6735 REDISCOVER_ENTRY:
6736 	vm_map_lock_read(map);
6737 
6738 	if (!vm_map_lookup_entry(map, offset, &entry)) {
6739 		vm_map_unlock_read(map);
6740 		ret = KERN_FAILURE;
6741 		goto done;
6742 	}
6743 
6744 	local_entry_start = entry->vme_start;
6745 	local_entry_offset = VME_OFFSET(entry);
6746 
6747 	if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
6748 		DEBUG4K_UPL("map %p (%d) offset 0x%llx size 0x%x flags 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)offset, *upl_size, *flags);
6749 	}
6750 
6751 	if (entry->vme_end - original_offset < adjusted_size) {
6752 		adjusted_size = entry->vme_end - original_offset;
6753 		assert(adjusted_size > 0);
6754 		*upl_size = (upl_size_t) adjusted_size;
6755 		assert(*upl_size == adjusted_size);
6756 	}
6757 
6758 	if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
6759 		*flags = 0;
6760 
6761 		if (!entry->is_sub_map &&
6762 		    VME_OBJECT(entry) != VM_OBJECT_NULL) {
6763 			if (VME_OBJECT(entry)->private) {
6764 				*flags = UPL_DEV_MEMORY;
6765 			}
6766 
6767 			if (VME_OBJECT(entry)->phys_contiguous) {
6768 				*flags |= UPL_PHYS_CONTIG;
6769 			}
6770 		}
6771 		vm_map_unlock_read(map);
6772 		ret = KERN_SUCCESS;
6773 		goto done;
6774 	}
6775 
6776 	offset_in_mapped_page = 0;
6777 	if (VM_MAP_PAGE_SIZE(map) < PAGE_SIZE) {
6778 		offset = vm_map_trunc_page(original_offset, VM_MAP_PAGE_MASK(map));
6779 		*upl_size = (upl_size_t)
6780 		    (vm_map_round_page(original_offset + adjusted_size,
6781 		    VM_MAP_PAGE_MASK(map))
6782 		    - offset);
6783 
6784 		offset_in_mapped_page = original_offset - offset;
6785 		assert(offset_in_mapped_page < VM_MAP_PAGE_SIZE(map));
6786 
6787 		DEBUG4K_UPL("map %p (%d) offset 0x%llx size 0x%llx flags 0x%llx -> offset 0x%llx adjusted_size 0x%llx *upl_size 0x%x offset_in_mapped_page 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)original_offset, (uint64_t)original_size, *flags, (uint64_t)offset, (uint64_t)adjusted_size, *upl_size, offset_in_mapped_page);
6788 	}
6789 
6790 	if (!entry->is_sub_map) {
6791 		if (VME_OBJECT(entry) == VM_OBJECT_NULL ||
6792 		    !VME_OBJECT(entry)->phys_contiguous) {
6793 			if (*upl_size > MAX_UPL_SIZE_BYTES) {
6794 				*upl_size = MAX_UPL_SIZE_BYTES;
6795 			}
6796 		}
6797 
6798 		/*
6799 		 *      Create an object if necessary.
6800 		 */
6801 		if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6802 			if (vm_map_lock_read_to_write(map)) {
6803 				goto REDISCOVER_ENTRY;
6804 			}
6805 
6806 			VME_OBJECT_SET(entry,
6807 			    vm_object_allocate((vm_size_t)
6808 			    vm_object_round_page((entry->vme_end - entry->vme_start))),
6809 			    false, 0);
6810 			VME_OFFSET_SET(entry, 0);
6811 			assert(entry->use_pmap);
6812 
6813 			vm_map_lock_write_to_read(map);
6814 		}
6815 
6816 		if (!(caller_flags & UPL_COPYOUT_FROM) &&
6817 		    !(entry->protection & VM_PROT_WRITE)) {
6818 			vm_map_unlock_read(map);
6819 			ret = KERN_PROTECTION_FAILURE;
6820 			goto done;
6821 		}
6822 	}
6823 
6824 #if !XNU_TARGET_OS_OSX
6825 	if (map->pmap != kernel_pmap &&
6826 	    (caller_flags & UPL_COPYOUT_FROM) &&
6827 	    (entry->protection & VM_PROT_EXECUTE) &&
6828 	    !(entry->protection & VM_PROT_WRITE)) {
6829 		vm_offset_t     kaddr;
6830 		vm_size_t       ksize;
6831 
6832 		/*
6833 		 * We're about to create a read-only UPL backed by
6834 		 * memory from an executable mapping.
6835 		 * Wiring the pages would result in the pages being copied
6836 		 * (due to the "MAP_PRIVATE" mapping) and no longer
6837 		 * code-signed, so no longer eligible for execution.
6838 		 * Instead, let's copy the data into a kernel buffer and
6839 		 * create the UPL from this kernel buffer.
6840 		 * The kernel buffer is then freed, leaving the UPL holding
6841 		 * the last reference on the VM object, so the memory will
6842 		 * be released when the UPL is committed.
6843 		 */
6844 
6845 		vm_map_unlock_read(map);
6846 		entry = VM_MAP_ENTRY_NULL;
6847 		/* allocate kernel buffer */
6848 		ksize = round_page(*upl_size);
6849 		kaddr = 0;
6850 		ret = kmem_alloc(kernel_map, &kaddr, ksize,
6851 		    KMA_PAGEABLE | KMA_DATA, tag);
6852 		if (ret == KERN_SUCCESS) {
6853 			/* copyin the user data */
6854 			ret = copyinmap(map, offset, (void *)kaddr, *upl_size);
6855 		}
6856 		if (ret == KERN_SUCCESS) {
6857 			if (ksize > *upl_size) {
6858 				/* zero out the extra space in kernel buffer */
6859 				memset((void *)(kaddr + *upl_size),
6860 				    0,
6861 				    ksize - *upl_size);
6862 			}
6863 			/* create the UPL from the kernel buffer */
6864 			vm_object_offset_t      offset_in_object;
6865 			vm_object_offset_t      offset_in_object_page;
6866 
6867 			offset_in_object = offset - local_entry_start + local_entry_offset;
6868 			offset_in_object_page = offset_in_object - vm_object_trunc_page(offset_in_object);
6869 			assert(offset_in_object_page < PAGE_SIZE);
6870 			assert(offset_in_object_page + offset_in_mapped_page < PAGE_SIZE);
6871 			*upl_size -= offset_in_object_page + offset_in_mapped_page;
6872 			ret = vm_map_create_upl(kernel_map,
6873 			    (vm_map_address_t)(kaddr + offset_in_object_page + offset_in_mapped_page),
6874 			    upl_size, upl, page_list, count, flags, tag);
6875 		}
6876 		if (kaddr != 0) {
6877 			/* free the kernel buffer */
6878 			kmem_free(kernel_map, kaddr, ksize);
6879 			kaddr = 0;
6880 			ksize = 0;
6881 		}
6882 #if DEVELOPMENT || DEBUG
6883 		DTRACE_VM4(create_upl_from_executable,
6884 		    vm_map_t, map,
6885 		    vm_map_address_t, offset,
6886 		    upl_size_t, *upl_size,
6887 		    kern_return_t, ret);
6888 #endif /* DEVELOPMENT || DEBUG */
6889 		goto done;
6890 	}
6891 #endif /* !XNU_TARGET_OS_OSX */
6892 
6893 	if (!entry->is_sub_map) {
6894 		local_object = VME_OBJECT(entry);
6895 		assert(local_object != VM_OBJECT_NULL);
6896 	}
6897 
6898 	if (!entry->is_sub_map &&
6899 	    !entry->needs_copy &&
6900 	    *upl_size != 0 &&
6901 	    local_object->vo_size > *upl_size && /* partial UPL */
6902 	    entry->wired_count == 0 && /* No COW for entries that are wired */
6903 	    (map->pmap != kernel_pmap) && /* alias checks */
6904 	    (vm_map_entry_should_cow_for_true_share(entry) /* case 1 */
6905 	    ||
6906 	    ( /* case 2 */
6907 		    local_object->internal &&
6908 		    (local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) &&
6909 		    os_ref_get_count_raw(&local_object->ref_count) > 1))) {
6910 		vm_prot_t       prot;
6911 
6912 		/*
6913 		 * Case 1:
6914 		 * Set up the targeted range for copy-on-write to avoid
6915 		 * applying true_share/copy_delay to the entire object.
6916 		 *
6917 		 * Case 2:
6918 		 * This map entry covers only part of an internal
6919 		 * object.  There could be other map entries covering
6920 		 * other areas of this object and some of these map
6921 		 * entries could be marked as "needs_copy", which
6922 		 * assumes that the object is COPY_SYMMETRIC.
6923 		 * To avoid marking this object as COPY_DELAY and
6924 		 * "true_share", let's shadow it and mark the new
6925 		 * (smaller) object as "true_share" and COPY_DELAY.
6926 		 */
6927 
6928 		if (vm_map_lock_read_to_write(map)) {
6929 			goto REDISCOVER_ENTRY;
6930 		}
6931 		vm_map_lock_assert_exclusive(map);
6932 		assert(VME_OBJECT(entry) == local_object);
6933 
6934 		vm_map_clip_start(map,
6935 		    entry,
6936 		    vm_map_trunc_page(offset,
6937 		    VM_MAP_PAGE_MASK(map)));
6938 		vm_map_clip_end(map,
6939 		    entry,
6940 		    vm_map_round_page(offset + *upl_size,
6941 		    VM_MAP_PAGE_MASK(map)));
6942 		if ((entry->vme_end - offset) < *upl_size) {
6943 			*upl_size = (upl_size_t) (entry->vme_end - offset);
6944 			assert(*upl_size == entry->vme_end - offset);
6945 		}
6946 
6947 		prot = entry->protection & ~VM_PROT_WRITE;
6948 		if (override_nx(map, VME_ALIAS(entry)) && prot) {
6949 			prot |= VM_PROT_EXECUTE;
6950 		}
6951 		vm_object_pmap_protect(local_object,
6952 		    VME_OFFSET(entry),
6953 		    entry->vme_end - entry->vme_start,
6954 		    ((entry->is_shared ||
6955 		    map->mapped_in_other_pmaps)
6956 		    ? PMAP_NULL
6957 		    : map->pmap),
6958 		    VM_MAP_PAGE_SIZE(map),
6959 		    entry->vme_start,
6960 		    prot);
6961 
6962 		assert(entry->wired_count == 0);
6963 
6964 		/*
6965 		 * Lock the VM object and re-check its status: if it's mapped
6966 		 * in another address space, we could still be racing with
6967 		 * another thread holding that other VM map exclusively.
6968 		 */
6969 		vm_object_lock(local_object);
6970 		if (local_object->true_share) {
6971 			/* object is already in proper state: no COW needed */
6972 			assert(local_object->copy_strategy !=
6973 			    MEMORY_OBJECT_COPY_SYMMETRIC);
6974 		} else {
6975 			/* not true_share: ask for copy-on-write below */
6976 			assert(local_object->copy_strategy ==
6977 			    MEMORY_OBJECT_COPY_SYMMETRIC);
6978 			entry->needs_copy = TRUE;
6979 		}
6980 		vm_object_unlock(local_object);
6981 
6982 		vm_map_lock_write_to_read(map);
6983 	}
6984 
6985 	if (entry->needs_copy) {
6986 		/*
6987 		 * Honor copy-on-write for COPY_SYMMETRIC
6988 		 * strategy.
6989 		 */
6990 		vm_map_t                local_map;
6991 		vm_object_t             object;
6992 		vm_object_offset_t      new_offset;
6993 		vm_prot_t               prot;
6994 		boolean_t               wired;
6995 		vm_map_version_t        version;
6996 		vm_map_t                real_map;
6997 		vm_prot_t               fault_type;
6998 
6999 		local_map = map;
7000 
7001 		if (caller_flags & UPL_COPYOUT_FROM) {
7002 			fault_type = VM_PROT_READ | VM_PROT_COPY;
7003 			vm_counters.create_upl_extra_cow++;
7004 			vm_counters.create_upl_extra_cow_pages +=
7005 			    (entry->vme_end - entry->vme_start) / PAGE_SIZE;
7006 		} else {
7007 			fault_type = VM_PROT_WRITE;
7008 		}
7009 		if (vm_map_lookup_and_lock_object(&local_map,
7010 		    offset, fault_type,
7011 		    OBJECT_LOCK_EXCLUSIVE,
7012 		    &version, &object,
7013 		    &new_offset, &prot, &wired,
7014 		    NULL,
7015 		    &real_map, NULL) != KERN_SUCCESS) {
7016 			if (fault_type == VM_PROT_WRITE) {
7017 				vm_counters.create_upl_lookup_failure_write++;
7018 			} else {
7019 				vm_counters.create_upl_lookup_failure_copy++;
7020 			}
7021 			vm_map_unlock_read(local_map);
7022 			ret = KERN_FAILURE;
7023 			goto done;
7024 		}
7025 		if (real_map != local_map) {
7026 			vm_map_unlock(real_map);
7027 		}
7028 		vm_map_unlock_read(local_map);
7029 
7030 		vm_object_unlock(object);
7031 
7032 		goto REDISCOVER_ENTRY;
7033 	}
7034 
7035 	if (entry->is_sub_map) {
7036 		vm_map_t        submap;
7037 
7038 		submap = VME_SUBMAP(entry);
7039 		local_start = entry->vme_start;
7040 		local_offset = (vm_map_offset_t)VME_OFFSET(entry);
7041 
7042 		vm_map_reference(submap);
7043 		vm_map_unlock_read(map);
7044 
7045 		DEBUG4K_UPL("map %p offset 0x%llx (0x%llx) size 0x%x (adjusted 0x%llx original 0x%llx) offset_in_mapped_page 0x%llx submap %p\n", map, (uint64_t)offset, (uint64_t)original_offset, *upl_size, (uint64_t)adjusted_size, (uint64_t)original_size, offset_in_mapped_page, submap);
7046 		offset += offset_in_mapped_page;
7047 		*upl_size -= offset_in_mapped_page;
7048 
7049 		if (release_map) {
7050 			vm_map_deallocate(map);
7051 		}
7052 		map = submap;
7053 		release_map = TRUE;
7054 		offset = local_offset + (offset - local_start);
7055 		goto start_with_map;
7056 	}
7057 
7058 	if (sync_cow_data &&
7059 	    (VME_OBJECT(entry)->shadow ||
7060 	    VME_OBJECT(entry)->vo_copy)) {
7061 		local_object = VME_OBJECT(entry);
7062 		local_start = entry->vme_start;
7063 		local_offset = (vm_map_offset_t)VME_OFFSET(entry);
7064 
7065 		vm_object_reference(local_object);
7066 		vm_map_unlock_read(map);
7067 
7068 		if (local_object->shadow && local_object->vo_copy) {
7069 			vm_object_lock_request(local_object->shadow,
7070 			    ((vm_object_offset_t)
7071 			    ((offset - local_start) +
7072 			    local_offset) +
7073 			    local_object->vo_shadow_offset),
7074 			    *upl_size, FALSE,
7075 			    MEMORY_OBJECT_DATA_SYNC,
7076 			    VM_PROT_NO_CHANGE);
7077 		}
7078 		sync_cow_data = FALSE;
7079 		vm_object_deallocate(local_object);
7080 
7081 		goto REDISCOVER_ENTRY;
7082 	}
7083 	if (force_data_sync) {
7084 		local_object = VME_OBJECT(entry);
7085 		local_start = entry->vme_start;
7086 		local_offset = (vm_map_offset_t)VME_OFFSET(entry);
7087 
7088 		vm_object_reference(local_object);
7089 		vm_map_unlock_read(map);
7090 
7091 		vm_object_lock_request(local_object,
7092 		    ((vm_object_offset_t)
7093 		    ((offset - local_start) +
7094 		    local_offset)),
7095 		    (vm_object_size_t)*upl_size,
7096 		    FALSE,
7097 		    MEMORY_OBJECT_DATA_SYNC,
7098 		    VM_PROT_NO_CHANGE);
7099 
7100 		force_data_sync = FALSE;
7101 		vm_object_deallocate(local_object);
7102 
7103 		goto REDISCOVER_ENTRY;
7104 	}
7105 	if (VME_OBJECT(entry)->private) {
7106 		*flags = UPL_DEV_MEMORY;
7107 	} else {
7108 		*flags = 0;
7109 	}
7110 
7111 	if (VME_OBJECT(entry)->phys_contiguous) {
7112 		*flags |= UPL_PHYS_CONTIG;
7113 	}
7114 
7115 	local_object = VME_OBJECT(entry);
7116 	local_offset = (vm_map_offset_t)VME_OFFSET(entry);
7117 	local_start = entry->vme_start;
7118 
7119 	/*
7120 	 * Wiring will copy the pages to the shadow object.
7121 	 * The shadow object will not be code-signed so
7122 	 * attempting to execute code from these copied pages
7123 	 * would trigger a code-signing violation.
7124 	 */
7125 	if (entry->protection & VM_PROT_EXECUTE) {
7126 #if MACH_ASSERT
7127 		printf("pid %d[%s] create_upl out of executable range from "
7128 		    "0x%llx to 0x%llx: side effects may include "
7129 		    "code-signing violations later on\n",
7130 		    proc_selfpid(),
7131 		    (get_bsdtask_info(current_task())
7132 		    ? proc_name_address(get_bsdtask_info(current_task()))
7133 		    : "?"),
7134 		    (uint64_t) entry->vme_start,
7135 		    (uint64_t) entry->vme_end);
7136 #endif /* MACH_ASSERT */
7137 		DTRACE_VM2(cs_executable_create_upl,
7138 		    uint64_t, (uint64_t)entry->vme_start,
7139 		    uint64_t, (uint64_t)entry->vme_end);
7140 		cs_executable_create_upl++;
7141 	}
7142 
7143 	vm_object_lock(local_object);
7144 
7145 	/*
7146 	 * Ensure that this object is "true_share" and "copy_delay" now,
7147 	 * while we're still holding the VM map lock.  After we unlock the map,
7148 	 * anything could happen to that mapping, including some copy-on-write
7149 	 * activity.  We need to make sure that the IOPL will point at the
7150 	 * same memory as the mapping.
7151 	 */
7152 	if (local_object->true_share) {
7153 		assert(local_object->copy_strategy !=
7154 		    MEMORY_OBJECT_COPY_SYMMETRIC);
7155 	} else if (!is_kernel_object(local_object) &&
7156 	    local_object != compressor_object &&
7157 	    !local_object->phys_contiguous) {
7158 #if VM_OBJECT_TRACKING_OP_TRUESHARE
7159 		if (!local_object->true_share &&
7160 		    vm_object_tracking_btlog) {
7161 			btlog_record(vm_object_tracking_btlog, local_object,
7162 			    VM_OBJECT_TRACKING_OP_TRUESHARE,
7163 			    btref_get(__builtin_frame_address(0), 0));
7164 		}
7165 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
7166 		VM_OBJECT_SET_TRUE_SHARE(local_object, TRUE);
7167 		if (local_object->copy_strategy ==
7168 		    MEMORY_OBJECT_COPY_SYMMETRIC) {
7169 			local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
7170 		}
7171 	}
7172 
7173 	vm_object_reference_locked(local_object);
7174 	vm_object_unlock(local_object);
7175 
7176 	vm_map_unlock_read(map);
7177 
7178 	offset += offset_in_mapped_page;
7179 	assert(*upl_size > offset_in_mapped_page);
7180 	*upl_size -= offset_in_mapped_page;
7181 
7182 	ret = vm_object_iopl_request(local_object,
7183 	    ((vm_object_offset_t)
7184 	    ((offset - local_start) + local_offset)),
7185 	    *upl_size,
7186 	    upl,
7187 	    page_list,
7188 	    count,
7189 	    caller_flags,
7190 	    tag);
7191 	vm_object_deallocate(local_object);
7192 
7193 done:
7194 	if (release_map) {
7195 		vm_map_deallocate(map);
7196 	}
7197 
7198 	return ret;
7199 }
7200 
7201 /*
7202  * Internal routine to enter a UPL into a VM map.
7203  *
7204  * JMM - This should just be doable through the standard
7205  * vm_map_enter() API.
7206  */
7207 kern_return_t
7208 vm_map_enter_upl_range(
7209 	vm_map_t                map,
7210 	upl_t                   upl,
7211 	vm_object_offset_t      offset_to_map,
7212 	vm_size_t               size_to_map,
7213 	vm_prot_t               prot_to_map,
7214 	vm_map_offset_t         *dst_addr)
7215 {
7216 	vm_map_size_t           size;
7217 	vm_object_offset_t      offset;
7218 	vm_map_offset_t         addr;
7219 	vm_page_t               m;
7220 	kern_return_t           kr;
7221 	int                     isVectorUPL = 0, curr_upl = 0;
7222 	upl_t                   vector_upl = NULL;
7223 	mach_vm_offset_t        vector_upl_dst_addr = 0;
7224 	vm_map_t                vector_upl_submap = NULL;
7225 	upl_offset_t            subupl_offset = 0;
7226 	upl_size_t              subupl_size = 0;
7227 
7228 	if (upl == UPL_NULL) {
7229 		return KERN_INVALID_ARGUMENT;
7230 	}
7231 
7232 	DEBUG4K_UPL("map %p upl %p flags 0x%x object %p offset 0x%llx (uploff: 0x%llx) size 0x%lx (uplsz: 0x%x) \n", map, upl, upl->flags, upl->map_object, offset_to_map, upl->u_offset, size_to_map, upl->u_size);
7233 	assert(map == kernel_map);
7234 
7235 	if ((isVectorUPL = vector_upl_is_valid(upl))) {
7236 		int mapped = 0, valid_upls = 0;
7237 		vector_upl = upl;
7238 
7239 		upl_lock(vector_upl);
7240 		for (curr_upl = 0; curr_upl < vector_upl_max_upls(vector_upl); curr_upl++) {
7241 			upl =  vector_upl_subupl_byindex(vector_upl, curr_upl );
7242 			if (upl == NULL) {
7243 				continue;
7244 			}
7245 			valid_upls++;
7246 			if (UPL_PAGE_LIST_MAPPED & upl->flags) {
7247 				mapped++;
7248 			}
7249 		}
7250 
7251 		if (mapped) {
7252 			if (mapped != valid_upls) {
7253 				panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped", mapped, valid_upls);
7254 			} else {
7255 				upl_unlock(vector_upl);
7256 				return KERN_FAILURE;
7257 			}
7258 		}
7259 
7260 		if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
7261 			panic("TODO4K: vector UPL not implemented");
7262 		}
7263 
7264 		vector_upl_submap = kmem_suballoc(map, &vector_upl_dst_addr,
7265 		    vector_upl->u_size, VM_MAP_CREATE_DEFAULT,
7266 		    VM_FLAGS_ANYWHERE, KMS_NOFAIL | KMS_DATA,
7267 		    VM_KERN_MEMORY_NONE).kmr_submap;
7268 		map = vector_upl_submap;
7269 		vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
7270 		curr_upl = 0;
7271 	} else {
7272 		upl_lock(upl);
7273 	}
7274 
7275 process_upl_to_enter:
7276 	if (isVectorUPL) {
7277 		if (curr_upl == vector_upl_max_upls(vector_upl)) {
7278 			*dst_addr = vector_upl_dst_addr;
7279 			upl_unlock(vector_upl);
7280 			return KERN_SUCCESS;
7281 		}
7282 		upl =  vector_upl_subupl_byindex(vector_upl, curr_upl++ );
7283 		if (upl == NULL) {
7284 			goto process_upl_to_enter;
7285 		}
7286 
7287 		vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
7288 		*dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
7289 	} else {
7290 		/*
7291 		 * check to see if already mapped
7292 		 */
7293 		if (UPL_PAGE_LIST_MAPPED & upl->flags) {
7294 			upl_unlock(upl);
7295 			return KERN_FAILURE;
7296 		}
7297 	}
7298 
7299 	if ((!(upl->flags & UPL_SHADOWED)) &&
7300 	    ((upl->flags & UPL_HAS_BUSY) ||
7301 	    !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) {
7302 		vm_object_t             object;
7303 		vm_page_t               alias_page;
7304 		vm_object_offset_t      new_offset;
7305 		unsigned int            pg_num;
7306 
7307 		size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map));
7308 		object = upl->map_object;
7309 		upl->map_object = vm_object_allocate(vm_object_round_page(size));
7310 
7311 		vm_object_lock(upl->map_object);
7312 
7313 		upl->map_object->shadow = object;
7314 		VM_OBJECT_SET_PAGEOUT(upl->map_object, TRUE);
7315 		VM_OBJECT_SET_CAN_PERSIST(upl->map_object, FALSE);
7316 		upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
7317 		upl->map_object->vo_shadow_offset = upl_adjusted_offset(upl, PAGE_MASK) - object->paging_offset;
7318 		assertf(page_aligned(upl->map_object->vo_shadow_offset),
7319 		    "object %p shadow_offset 0x%llx",
7320 		    upl->map_object,
7321 		    (uint64_t)upl->map_object->vo_shadow_offset);
7322 		upl->map_object->wimg_bits = object->wimg_bits;
7323 		offset = upl->map_object->vo_shadow_offset;
7324 		new_offset = 0;
7325 
7326 		upl->flags |= UPL_SHADOWED;
7327 
7328 		while (size) {
7329 			pg_num = (unsigned int) (new_offset / PAGE_SIZE);
7330 			assert(pg_num == new_offset / PAGE_SIZE);
7331 
7332 			if (bitmap_test(upl->lite_list, pg_num)) {
7333 				alias_page = vm_page_grab_fictitious(TRUE);
7334 
7335 				vm_object_lock(object);
7336 
7337 				m = vm_page_lookup(object, offset);
7338 				if (m == VM_PAGE_NULL) {
7339 					panic("vm_upl_map: page missing");
7340 				}
7341 
7342 				/*
7343 				 * Convert the fictitious page to a private
7344 				 * shadow of the real page.
7345 				 */
7346 				assert(alias_page->vmp_fictitious);
7347 				alias_page->vmp_fictitious = FALSE;
7348 				alias_page->vmp_private = TRUE;
7349 				alias_page->vmp_free_when_done = TRUE;
7350 				/*
7351 				 * since m is a page in the upl it must
7352 				 * already be wired or BUSY, so it's
7353 				 * safe to assign the underlying physical
7354 				 * page to the alias
7355 				 */
7356 				VM_PAGE_SET_PHYS_PAGE(alias_page, VM_PAGE_GET_PHYS_PAGE(m));
7357 
7358 				vm_object_unlock(object);
7359 
7360 				vm_page_lockspin_queues();
7361 				vm_page_wire(alias_page, VM_KERN_MEMORY_NONE, TRUE);
7362 				vm_page_unlock_queues();
7363 
7364 				vm_page_insert_wired(alias_page, upl->map_object, new_offset, VM_KERN_MEMORY_NONE);
7365 
7366 				assert(!alias_page->vmp_wanted);
7367 				alias_page->vmp_busy = FALSE;
7368 				alias_page->vmp_absent = FALSE;
7369 			}
7370 			size -= PAGE_SIZE;
7371 			offset += PAGE_SIZE_64;
7372 			new_offset += PAGE_SIZE_64;
7373 		}
7374 		vm_object_unlock(upl->map_object);
7375 	}
7376 	if (upl->flags & UPL_SHADOWED) {
7377 		if (isVectorUPL) {
7378 			offset = 0;
7379 		} else {
7380 			offset = offset_to_map;
7381 		}
7382 	} else {
7383 		offset = upl_adjusted_offset(upl, VM_MAP_PAGE_MASK(map)) - upl->map_object->paging_offset;
7384 		if (!isVectorUPL) {
7385 			offset += offset_to_map;
7386 		}
7387 	}
7388 
7389 	if (isVectorUPL) {
7390 		size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map));
7391 	} else {
7392 		size = MIN(upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map)), size_to_map);
7393 	}
7394 
7395 	vm_object_reference(upl->map_object);
7396 
7397 	if (!isVectorUPL) {
7398 		*dst_addr = 0;
7399 		/*
7400 		 * NEED A UPL_MAP ALIAS
7401 		 */
7402 		kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
7403 		    VM_MAP_KERNEL_FLAGS_DATA_ANYWHERE(.vm_tag = VM_KERN_MEMORY_OSFMK),
7404 		    upl->map_object, offset, FALSE,
7405 		    prot_to_map, VM_PROT_ALL, VM_INHERIT_DEFAULT);
7406 
7407 		if (kr != KERN_SUCCESS) {
7408 			vm_object_deallocate(upl->map_object);
7409 			upl_unlock(upl);
7410 			return kr;
7411 		}
7412 	} else {
7413 		kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
7414 		    VM_MAP_KERNEL_FLAGS_FIXED(.vm_tag = VM_KERN_MEMORY_OSFMK),
7415 		    upl->map_object, offset, FALSE,
7416 		    prot_to_map, VM_PROT_ALL, VM_INHERIT_DEFAULT);
7417 		if (kr) {
7418 			panic("vm_map_enter failed for a Vector UPL");
7419 		}
7420 	}
7421 	upl->u_mapped_size = (upl_size_t) size; /* When we allow multiple submappings of the UPL */
7422 	                                        /* this will have to be an increment rather than */
7423 	                                        /* an assignment. */
7424 	vm_object_lock(upl->map_object);
7425 
7426 	for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
7427 		m = vm_page_lookup(upl->map_object, offset);
7428 
7429 		if (m) {
7430 			m->vmp_pmapped = TRUE;
7431 
7432 			/*
7433 			 * CODE SIGNING ENFORCEMENT: page has been wpmapped,
7434 			 * but only in kernel space. If this was on a user map,
7435 			 * we'd have to set the wpmapped bit.
7436 			 */
7437 			/* m->vmp_wpmapped = TRUE; */
7438 			assert(map->pmap == kernel_pmap);
7439 
7440 			kr = pmap_enter_check(map->pmap, addr, m, prot_to_map, VM_PROT_NONE, 0, TRUE);
7441 
7442 			assert(kr == KERN_SUCCESS);
7443 #if KASAN
7444 			kasan_notify_address(addr, PAGE_SIZE_64);
7445 #endif
7446 		}
7447 		offset += PAGE_SIZE_64;
7448 	}
7449 	vm_object_unlock(upl->map_object);
7450 
7451 	/*
7452 	 * hold a reference for the mapping
7453 	 */
7454 	upl->ref_count++;
7455 	upl->flags |= UPL_PAGE_LIST_MAPPED;
7456 	upl->kaddr = (vm_offset_t) *dst_addr;
7457 	assert(upl->kaddr == *dst_addr);
7458 
7459 	if (isVectorUPL) {
7460 		goto process_upl_to_enter;
7461 	}
7462 
7463 	if (!isVectorUPL) {
7464 		vm_map_offset_t addr_adjustment;
7465 
7466 		addr_adjustment = (vm_map_offset_t)(upl->u_offset - upl_adjusted_offset(upl, VM_MAP_PAGE_MASK(map)));
7467 		if (addr_adjustment) {
7468 			assert(VM_MAP_PAGE_MASK(map) != PAGE_MASK);
7469 			DEBUG4K_UPL("dst_addr 0x%llx (+ 0x%llx) -> 0x%llx\n", (uint64_t)*dst_addr, (uint64_t)addr_adjustment, (uint64_t)(*dst_addr + addr_adjustment));
7470 			*dst_addr += addr_adjustment;
7471 		}
7472 	}
7473 
7474 	upl_unlock(upl);
7475 
7476 	return KERN_SUCCESS;
7477 }
7478 
7479 kern_return_t
7480 vm_map_enter_upl(
7481 	vm_map_t                map,
7482 	upl_t                   upl,
7483 	vm_map_offset_t         *dst_addr)
7484 {
7485 	upl_size_t upl_size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map));
7486 	return vm_map_enter_upl_range(map, upl, 0, upl_size, VM_PROT_DEFAULT, dst_addr);
7487 }
7488 
7489 /*
7490  * Internal routine to remove a UPL mapping from a VM map.
7491  *
7492  * XXX - This should just be doable through a standard
7493  * vm_map_remove() operation.  Otherwise, implicit clean-up
7494  * of the target map won't be able to correctly remove
7495  * these (and release the reference on the UPL).  Having
7496  * to do this means we can't map these into user-space
7497  * maps yet.
7498  */
7499 kern_return_t
7500 vm_map_remove_upl_range(
7501 	vm_map_t        map,
7502 	upl_t           upl,
7503 	__unused vm_object_offset_t    offset_to_unmap,
7504 	__unused vm_size_t             size_to_unmap)
7505 {
7506 	vm_address_t    addr;
7507 	upl_size_t      size;
7508 	int             isVectorUPL = 0, curr_upl = 0;
7509 	upl_t           vector_upl = NULL;
7510 
7511 	if (upl == UPL_NULL) {
7512 		return KERN_INVALID_ARGUMENT;
7513 	}
7514 
7515 	if ((isVectorUPL = vector_upl_is_valid(upl))) {
7516 		int     unmapped = 0, valid_upls = 0;
7517 		vector_upl = upl;
7518 		upl_lock(vector_upl);
7519 		for (curr_upl = 0; curr_upl < vector_upl_max_upls(vector_upl); curr_upl++) {
7520 			upl =  vector_upl_subupl_byindex(vector_upl, curr_upl );
7521 			if (upl == NULL) {
7522 				continue;
7523 			}
7524 			valid_upls++;
7525 			if (!(UPL_PAGE_LIST_MAPPED & upl->flags)) {
7526 				unmapped++;
7527 			}
7528 		}
7529 
7530 		if (unmapped) {
7531 			if (unmapped != valid_upls) {
7532 				panic("%d of the %d sub-upls within the Vector UPL is/are not mapped", unmapped, valid_upls);
7533 			} else {
7534 				upl_unlock(vector_upl);
7535 				return KERN_FAILURE;
7536 			}
7537 		}
7538 		curr_upl = 0;
7539 	} else {
7540 		upl_lock(upl);
7541 	}
7542 
7543 process_upl_to_remove:
7544 	if (isVectorUPL) {
7545 		if (curr_upl == vector_upl_max_upls(vector_upl)) {
7546 			vm_map_t v_upl_submap;
7547 			vm_offset_t v_upl_submap_dst_addr;
7548 			vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
7549 
7550 			kmem_free_guard(map, v_upl_submap_dst_addr,
7551 			    vector_upl->u_size, KMF_NONE, KMEM_GUARD_SUBMAP);
7552 			vm_map_deallocate(v_upl_submap);
7553 			upl_unlock(vector_upl);
7554 			return KERN_SUCCESS;
7555 		}
7556 
7557 		upl =  vector_upl_subupl_byindex(vector_upl, curr_upl++ );
7558 		if (upl == NULL) {
7559 			goto process_upl_to_remove;
7560 		}
7561 	}
7562 
7563 	if (upl->flags & UPL_PAGE_LIST_MAPPED) {
7564 		addr = upl->kaddr;
7565 		size = upl->u_mapped_size;
7566 
7567 		assert(upl->ref_count > 1);
7568 		upl->ref_count--;               /* removing mapping ref */
7569 
7570 		upl->flags &= ~UPL_PAGE_LIST_MAPPED;
7571 		upl->kaddr = (vm_offset_t) 0;
7572 		upl->u_mapped_size = 0;
7573 
7574 		if (isVectorUPL) {
7575 			/*
7576 			 * If it's a Vectored UPL, we'll be removing the entire
7577 			 * submap anyways, so no need to remove individual UPL
7578 			 * element mappings from within the submap
7579 			 */
7580 			goto process_upl_to_remove;
7581 		}
7582 
7583 		upl_unlock(upl);
7584 
7585 		vm_map_remove(map,
7586 		    vm_map_trunc_page(addr, VM_MAP_PAGE_MASK(map)),
7587 		    vm_map_round_page(addr + size, VM_MAP_PAGE_MASK(map)));
7588 		return KERN_SUCCESS;
7589 	}
7590 	upl_unlock(upl);
7591 
7592 	return KERN_FAILURE;
7593 }
7594 
7595 kern_return_t
7596 vm_map_remove_upl(
7597 	vm_map_t        map,
7598 	upl_t           upl)
7599 {
7600 	upl_size_t upl_size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map));
7601 	return vm_map_remove_upl_range(map, upl, 0, upl_size);
7602 }
7603 
7604 void
7605 iopl_valid_data(
7606 	upl_t    upl,
7607 	vm_tag_t tag)
7608 {
7609 	vm_object_t     object;
7610 	vm_offset_t     offset;
7611 	vm_page_t       m, nxt_page = VM_PAGE_NULL;
7612 	upl_size_t      size;
7613 	int             wired_count = 0;
7614 
7615 	if (upl == NULL) {
7616 		panic("iopl_valid_data: NULL upl");
7617 	}
7618 	if (vector_upl_is_valid(upl)) {
7619 		panic("iopl_valid_data: vector upl");
7620 	}
7621 	if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_SHADOWED | UPL_ACCESS_BLOCKED | UPL_IO_WIRE | UPL_INTERNAL)) != UPL_IO_WIRE) {
7622 		panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
7623 	}
7624 
7625 	object = upl->map_object;
7626 
7627 	if (is_kernel_object(object) || object == compressor_object) {
7628 		panic("iopl_valid_data: object == kernel or compressor");
7629 	}
7630 
7631 	if (object->purgable == VM_PURGABLE_VOLATILE ||
7632 	    object->purgable == VM_PURGABLE_EMPTY) {
7633 		panic("iopl_valid_data: object %p purgable %d",
7634 		    object, object->purgable);
7635 	}
7636 
7637 	size = upl_adjusted_size(upl, PAGE_MASK);
7638 
7639 	vm_object_lock(object);
7640 	VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
7641 
7642 	bool whole_object;
7643 
7644 	if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE)) {
7645 		nxt_page = (vm_page_t)vm_page_queue_first(&object->memq);
7646 		whole_object = true;
7647 	} else {
7648 		offset = (vm_offset_t)(upl_adjusted_offset(upl, PAGE_MASK) - object->paging_offset);
7649 		whole_object = false;
7650 	}
7651 
7652 	while (size) {
7653 		if (whole_object) {
7654 			if (nxt_page != VM_PAGE_NULL) {
7655 				m = nxt_page;
7656 				nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->vmp_listq);
7657 			}
7658 		} else {
7659 			m = vm_page_lookup(object, offset);
7660 			offset += PAGE_SIZE;
7661 
7662 			if (m == VM_PAGE_NULL) {
7663 				panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
7664 			}
7665 		}
7666 		if (m->vmp_busy) {
7667 			if (!m->vmp_absent) {
7668 				panic("iopl_valid_data: busy page w/o absent");
7669 			}
7670 
7671 			if (m->vmp_pageq.next || m->vmp_pageq.prev) {
7672 				panic("iopl_valid_data: busy+absent page on page queue");
7673 			}
7674 			if (m->vmp_reusable) {
7675 				panic("iopl_valid_data: %p is reusable", m);
7676 			}
7677 
7678 			m->vmp_absent = FALSE;
7679 			m->vmp_dirty = TRUE;
7680 			assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
7681 			assert(m->vmp_wire_count == 0);
7682 			m->vmp_wire_count++;
7683 			assert(m->vmp_wire_count);
7684 			if (m->vmp_wire_count == 1) {
7685 				m->vmp_q_state = VM_PAGE_IS_WIRED;
7686 				wired_count++;
7687 			} else {
7688 				panic("iopl_valid_data: %p already wired", m);
7689 			}
7690 
7691 			vm_page_wakeup_done(object, m);
7692 		}
7693 		size -= PAGE_SIZE;
7694 	}
7695 	if (wired_count) {
7696 		VM_OBJECT_WIRED_PAGE_COUNT(object, wired_count);
7697 		assert(object->resident_page_count >= object->wired_page_count);
7698 
7699 		/* no need to adjust purgeable accounting for this object: */
7700 		assert(object->purgable != VM_PURGABLE_VOLATILE);
7701 		assert(object->purgable != VM_PURGABLE_EMPTY);
7702 
7703 		vm_page_lockspin_queues();
7704 		vm_page_wire_count += wired_count;
7705 		vm_page_unlock_queues();
7706 	}
7707 	VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
7708 	vm_object_unlock(object);
7709 }
7710 
7711 
7712 void
7713 vm_object_set_pmap_cache_attr(
7714 	vm_object_t             object,
7715 	upl_page_info_array_t   user_page_list,
7716 	unsigned int            num_pages,
7717 	boolean_t               batch_pmap_op)
7718 {
7719 	unsigned int    cache_attr = 0;
7720 
7721 	cache_attr = object->wimg_bits & VM_WIMG_MASK;
7722 	assert(user_page_list);
7723 	if (cache_attr != VM_WIMG_USE_DEFAULT) {
7724 		PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
7725 	}
7726 }
7727 
7728 
7729 static bool
7730 vm_object_iopl_wire_full(
7731 	vm_object_t             object,
7732 	upl_t                   upl,
7733 	upl_page_info_array_t   user_page_list,
7734 	upl_control_flags_t     cntrl_flags,
7735 	vm_tag_t                tag)
7736 {
7737 	vm_page_t       dst_page;
7738 	unsigned int    entry;
7739 	int             page_count;
7740 	int             delayed_unlock = 0;
7741 	boolean_t       retval = TRUE;
7742 	ppnum_t         phys_page;
7743 
7744 	vm_object_lock_assert_exclusive(object);
7745 	assert(object->purgable != VM_PURGABLE_VOLATILE);
7746 	assert(object->purgable != VM_PURGABLE_EMPTY);
7747 	assert(object->pager == NULL);
7748 	assert(object->vo_copy == NULL);
7749 	assert(object->shadow == NULL);
7750 
7751 	page_count = object->resident_page_count;
7752 	dst_page = (vm_page_t)vm_page_queue_first(&object->memq);
7753 
7754 	vm_page_lock_queues();
7755 
7756 	while (page_count--) {
7757 		if (dst_page->vmp_busy ||
7758 		    dst_page->vmp_fictitious ||
7759 		    dst_page->vmp_absent ||
7760 		    VMP_ERROR_GET(dst_page) ||
7761 		    dst_page->vmp_cleaning ||
7762 		    dst_page->vmp_restart ||
7763 		    dst_page->vmp_laundry) {
7764 			retval = FALSE;
7765 			goto done;
7766 		}
7767 		if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) {
7768 			retval = FALSE;
7769 			goto done;
7770 		}
7771 		dst_page->vmp_reference = TRUE;
7772 
7773 		vm_page_wire(dst_page, tag, FALSE);
7774 
7775 		if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7776 			SET_PAGE_DIRTY(dst_page, FALSE);
7777 		}
7778 		entry = (unsigned int)(dst_page->vmp_offset / PAGE_SIZE);
7779 		assert(entry >= 0 && entry < object->resident_page_count);
7780 		bitmap_set(upl->lite_list, entry);
7781 
7782 		phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7783 
7784 		if (phys_page > upl->highest_page) {
7785 			upl->highest_page = phys_page;
7786 		}
7787 
7788 		if (user_page_list) {
7789 			user_page_list[entry].phys_addr = phys_page;
7790 			user_page_list[entry].absent    = dst_page->vmp_absent;
7791 			user_page_list[entry].dirty     = dst_page->vmp_dirty;
7792 			user_page_list[entry].free_when_done   = dst_page->vmp_free_when_done;
7793 			user_page_list[entry].precious  = dst_page->vmp_precious;
7794 			user_page_list[entry].device    = FALSE;
7795 			user_page_list[entry].speculative = FALSE;
7796 			user_page_list[entry].cs_validated = FALSE;
7797 			user_page_list[entry].cs_tainted = FALSE;
7798 			user_page_list[entry].cs_nx     = FALSE;
7799 			user_page_list[entry].needed    = FALSE;
7800 			user_page_list[entry].mark      = FALSE;
7801 		}
7802 		if (delayed_unlock++ > 256) {
7803 			delayed_unlock = 0;
7804 			lck_mtx_yield(&vm_page_queue_lock);
7805 
7806 			VM_CHECK_MEMORYSTATUS;
7807 		}
7808 		dst_page = (vm_page_t)vm_page_queue_next(&dst_page->vmp_listq);
7809 	}
7810 done:
7811 	vm_page_unlock_queues();
7812 
7813 	VM_CHECK_MEMORYSTATUS;
7814 
7815 	return retval;
7816 }
7817 
7818 
7819 static kern_return_t
7820 vm_object_iopl_wire_empty(
7821 	vm_object_t             object,
7822 	upl_t                   upl,
7823 	upl_page_info_array_t   user_page_list,
7824 	upl_control_flags_t     cntrl_flags,
7825 	vm_tag_t                tag,
7826 	vm_object_offset_t     *dst_offset,
7827 	int                     page_count,
7828 	int                    *page_grab_count)
7829 {
7830 	vm_page_t       dst_page;
7831 	boolean_t       no_zero_fill = FALSE;
7832 	int             interruptible;
7833 	int             pages_wired = 0;
7834 	int             pages_inserted = 0;
7835 	int             entry = 0;
7836 	uint64_t        delayed_ledger_update = 0;
7837 	kern_return_t   ret = KERN_SUCCESS;
7838 	int             grab_options;
7839 	ppnum_t         phys_page;
7840 
7841 	vm_object_lock_assert_exclusive(object);
7842 	assert(object->purgable != VM_PURGABLE_VOLATILE);
7843 	assert(object->purgable != VM_PURGABLE_EMPTY);
7844 	assert(object->pager == NULL);
7845 	assert(object->vo_copy == NULL);
7846 	assert(object->shadow == NULL);
7847 
7848 	if (cntrl_flags & UPL_SET_INTERRUPTIBLE) {
7849 		interruptible = THREAD_ABORTSAFE;
7850 	} else {
7851 		interruptible = THREAD_UNINT;
7852 	}
7853 
7854 	if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO)) {
7855 		no_zero_fill = TRUE;
7856 	}
7857 
7858 	grab_options = 0;
7859 #if CONFIG_SECLUDED_MEMORY
7860 	if (object->can_grab_secluded) {
7861 		grab_options |= VM_PAGE_GRAB_SECLUDED;
7862 	}
7863 #endif /* CONFIG_SECLUDED_MEMORY */
7864 
7865 	while (page_count--) {
7866 		while ((dst_page = vm_page_grab_options(grab_options))
7867 		    == VM_PAGE_NULL) {
7868 			OSAddAtomic(page_count, &vm_upl_wait_for_pages);
7869 
7870 			VM_DEBUG_EVENT(vm_iopl_page_wait, DBG_VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
7871 
7872 			if (vm_page_wait(interruptible) == FALSE) {
7873 				/*
7874 				 * interrupted case
7875 				 */
7876 				OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
7877 
7878 				VM_DEBUG_EVENT(vm_iopl_page_wait, DBG_VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
7879 
7880 				ret = MACH_SEND_INTERRUPTED;
7881 				goto done;
7882 			}
7883 			OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
7884 
7885 			VM_DEBUG_EVENT(vm_iopl_page_wait, DBG_VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
7886 		}
7887 		if (no_zero_fill == FALSE) {
7888 			vm_page_zero_fill(dst_page);
7889 		} else {
7890 			dst_page->vmp_absent = TRUE;
7891 		}
7892 
7893 		dst_page->vmp_reference = TRUE;
7894 
7895 		if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7896 			SET_PAGE_DIRTY(dst_page, FALSE);
7897 		}
7898 		if (dst_page->vmp_absent == FALSE) {
7899 			assert(dst_page->vmp_q_state == VM_PAGE_NOT_ON_Q);
7900 			assert(dst_page->vmp_wire_count == 0);
7901 			dst_page->vmp_wire_count++;
7902 			dst_page->vmp_q_state = VM_PAGE_IS_WIRED;
7903 			assert(dst_page->vmp_wire_count);
7904 			pages_wired++;
7905 			vm_page_wakeup_done(object, dst_page);
7906 		}
7907 		pages_inserted++;
7908 
7909 		vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
7910 
7911 		bitmap_set(upl->lite_list, entry);
7912 
7913 		phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7914 
7915 		if (phys_page > upl->highest_page) {
7916 			upl->highest_page = phys_page;
7917 		}
7918 
7919 		if (user_page_list) {
7920 			user_page_list[entry].phys_addr = phys_page;
7921 			user_page_list[entry].absent    = dst_page->vmp_absent;
7922 			user_page_list[entry].dirty     = dst_page->vmp_dirty;
7923 			user_page_list[entry].free_when_done    = FALSE;
7924 			user_page_list[entry].precious  = FALSE;
7925 			user_page_list[entry].device    = FALSE;
7926 			user_page_list[entry].speculative = FALSE;
7927 			user_page_list[entry].cs_validated = FALSE;
7928 			user_page_list[entry].cs_tainted = FALSE;
7929 			user_page_list[entry].cs_nx     = FALSE;
7930 			user_page_list[entry].needed    = FALSE;
7931 			user_page_list[entry].mark      = FALSE;
7932 		}
7933 		entry++;
7934 		*dst_offset += PAGE_SIZE_64;
7935 	}
7936 done:
7937 	if (pages_wired) {
7938 		vm_page_lockspin_queues();
7939 		vm_page_wire_count += pages_wired;
7940 		vm_page_unlock_queues();
7941 	}
7942 	if (pages_inserted) {
7943 		if (object->internal) {
7944 			OSAddAtomic(pages_inserted, &vm_page_internal_count);
7945 		} else {
7946 			OSAddAtomic(pages_inserted, &vm_page_external_count);
7947 		}
7948 	}
7949 	if (delayed_ledger_update) {
7950 		task_t          owner;
7951 		int             ledger_idx_volatile;
7952 		int             ledger_idx_nonvolatile;
7953 		int             ledger_idx_volatile_compressed;
7954 		int             ledger_idx_nonvolatile_compressed;
7955 		int             ledger_idx_composite;
7956 		int             ledger_idx_external_wired;
7957 		boolean_t       do_footprint;
7958 
7959 		owner = VM_OBJECT_OWNER(object);
7960 		assert(owner);
7961 
7962 		vm_object_ledger_tag_ledgers(object,
7963 		    &ledger_idx_volatile,
7964 		    &ledger_idx_nonvolatile,
7965 		    &ledger_idx_volatile_compressed,
7966 		    &ledger_idx_nonvolatile_compressed,
7967 		    &ledger_idx_composite,
7968 		    &ledger_idx_external_wired,
7969 		    &do_footprint);
7970 
7971 		if (object->internal) {
7972 			/* more non-volatile bytes */
7973 			ledger_credit(owner->ledger,
7974 			    ledger_idx_nonvolatile,
7975 			    delayed_ledger_update);
7976 			if (do_footprint) {
7977 				/* more footprint */
7978 				ledger_credit(owner->ledger,
7979 				    task_ledgers.phys_footprint,
7980 				    delayed_ledger_update);
7981 			} else if (ledger_idx_composite != -1) {
7982 				ledger_credit(owner->ledger,
7983 				    ledger_idx_composite,
7984 				    delayed_ledger_update);
7985 			}
7986 		} else {
7987 			/* more external wired bytes */
7988 			ledger_credit(owner->ledger,
7989 			    ledger_idx_external_wired,
7990 			    delayed_ledger_update);
7991 			if (do_footprint) {
7992 				/* more footprint */
7993 				ledger_credit(owner->ledger,
7994 				    task_ledgers.phys_footprint,
7995 				    delayed_ledger_update);
7996 			} else if (ledger_idx_composite != -1) {
7997 				ledger_credit(owner->ledger,
7998 				    ledger_idx_composite,
7999 				    delayed_ledger_update);
8000 			}
8001 		}
8002 	}
8003 
8004 	assert(page_grab_count);
8005 	*page_grab_count = pages_inserted;
8006 
8007 	return ret;
8008 }
8009 
8010 
8011 
8012 kern_return_t
8013 vm_object_iopl_request(
8014 	vm_object_t             object,
8015 	vm_object_offset_t      offset,
8016 	upl_size_t              size,
8017 	upl_t                   *upl_ptr,
8018 	upl_page_info_array_t   user_page_list,
8019 	unsigned int            *page_list_count,
8020 	upl_control_flags_t     cntrl_flags,
8021 	vm_tag_t                tag)
8022 {
8023 	vm_page_t               dst_page;
8024 	vm_object_offset_t      dst_offset;
8025 	upl_size_t              xfer_size;
8026 	upl_t                   upl = NULL;
8027 	unsigned int            entry;
8028 	int                     no_zero_fill = FALSE;
8029 	unsigned int            size_in_pages;
8030 	int                     page_grab_count = 0;
8031 	u_int32_t               psize;
8032 	kern_return_t           ret;
8033 	vm_prot_t               prot;
8034 	struct vm_object_fault_info fault_info = {};
8035 	struct  vm_page_delayed_work    dw_array;
8036 	struct  vm_page_delayed_work    *dwp, *dwp_start;
8037 	bool                    dwp_finish_ctx = TRUE;
8038 	int                     dw_count;
8039 	int                     dw_limit;
8040 	int                     dw_index;
8041 	boolean_t               caller_lookup;
8042 	int                     io_tracking_flag = 0;
8043 	int                     interruptible;
8044 	ppnum_t                 phys_page;
8045 
8046 	boolean_t               set_cache_attr_needed = FALSE;
8047 	boolean_t               free_wired_pages = FALSE;
8048 	boolean_t               fast_path_empty_req = FALSE;
8049 	boolean_t               fast_path_full_req = FALSE;
8050 
8051 #if DEVELOPMENT || DEBUG
8052 	task_t                  task = current_task();
8053 #endif /* DEVELOPMENT || DEBUG */
8054 
8055 	dwp_start = dwp = NULL;
8056 
8057 	vm_object_offset_t original_offset = offset;
8058 	upl_size_t original_size = size;
8059 
8060 //	DEBUG4K_UPL("object %p offset 0x%llx size 0x%llx cntrl_flags 0x%llx\n", object, (uint64_t)offset, (uint64_t)size, cntrl_flags);
8061 
8062 	size = (upl_size_t)(vm_object_round_page(offset + size) - vm_object_trunc_page(offset));
8063 	offset = vm_object_trunc_page(offset);
8064 	if (size != original_size || offset != original_offset) {
8065 		DEBUG4K_IOKIT("flags 0x%llx object %p offset 0x%llx size 0x%x -> offset 0x%llx size 0x%x\n", cntrl_flags, object, original_offset, original_size, offset, size);
8066 	}
8067 
8068 	if (cntrl_flags & ~UPL_VALID_FLAGS) {
8069 		/*
8070 		 * For forward compatibility's sake,
8071 		 * reject any unknown flag.
8072 		 */
8073 		return KERN_INVALID_VALUE;
8074 	}
8075 	if (vm_lopage_needed == FALSE) {
8076 		cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
8077 	}
8078 
8079 	if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
8080 		if ((cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE)) {
8081 			return KERN_INVALID_VALUE;
8082 		}
8083 
8084 		if (object->phys_contiguous) {
8085 			if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address) {
8086 				return KERN_INVALID_ADDRESS;
8087 			}
8088 
8089 			if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address) {
8090 				return KERN_INVALID_ADDRESS;
8091 			}
8092 		}
8093 	}
8094 	if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO)) {
8095 		no_zero_fill = TRUE;
8096 	}
8097 
8098 	if (cntrl_flags & UPL_COPYOUT_FROM) {
8099 		prot = VM_PROT_READ;
8100 	} else {
8101 		prot = VM_PROT_READ | VM_PROT_WRITE;
8102 	}
8103 
8104 	if ((!object->internal) && (object->paging_offset != 0)) {
8105 		panic("vm_object_iopl_request: external object with non-zero paging offset");
8106 	}
8107 
8108 
8109 	VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, DBG_VM_IOPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, prot, 0);
8110 
8111 #if CONFIG_IOSCHED || UPL_DEBUG
8112 	if ((object->io_tracking && !is_kernel_object(object)) || upl_debug_enabled) {
8113 		io_tracking_flag |= UPL_CREATE_IO_TRACKING;
8114 	}
8115 #endif
8116 
8117 #if CONFIG_IOSCHED
8118 	if (object->io_tracking) {
8119 		/* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
8120 		if (!is_kernel_object(object)) {
8121 			io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
8122 		}
8123 	}
8124 #endif
8125 
8126 	if (object->phys_contiguous) {
8127 		psize = PAGE_SIZE;
8128 	} else {
8129 		psize = size;
8130 
8131 		dw_count = 0;
8132 		dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
8133 		dwp_start = vm_page_delayed_work_get_ctx();
8134 		if (dwp_start == NULL) {
8135 			dwp_start = &dw_array;
8136 			dw_limit = 1;
8137 			dwp_finish_ctx = FALSE;
8138 		}
8139 
8140 		dwp = dwp_start;
8141 	}
8142 
8143 	if (cntrl_flags & UPL_SET_INTERNAL) {
8144 		upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8145 		user_page_list = size ? upl->page_list : NULL;
8146 	} else {
8147 		upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8148 	}
8149 	if (user_page_list) {
8150 		user_page_list[0].device = FALSE;
8151 	}
8152 	*upl_ptr = upl;
8153 
8154 	if (cntrl_flags & UPL_NOZEROFILLIO) {
8155 		DTRACE_VM4(upl_nozerofillio,
8156 		    vm_object_t, object,
8157 		    vm_object_offset_t, offset,
8158 		    upl_size_t, size,
8159 		    upl_t, upl);
8160 	}
8161 
8162 	upl->map_object = object;
8163 	upl->u_offset = original_offset;
8164 	upl->u_size = original_size;
8165 
8166 	size_in_pages = size / PAGE_SIZE;
8167 
8168 	if (is_kernel_object(object) &&
8169 	    !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
8170 		upl->flags |= UPL_KERNEL_OBJECT;
8171 #if UPL_DEBUG
8172 		vm_object_lock(object);
8173 #else
8174 		vm_object_lock_shared(object);
8175 #endif
8176 	} else {
8177 		vm_object_lock(object);
8178 		vm_object_activity_begin(object);
8179 	}
8180 	/*
8181 	 * paging in progress also protects the paging_offset
8182 	 */
8183 	upl->u_offset = original_offset + object->paging_offset;
8184 
8185 	if (cntrl_flags & UPL_BLOCK_ACCESS) {
8186 		/*
8187 		 * The user requested that access to the pages in this UPL
8188 		 * be blocked until the UPL is commited or aborted.
8189 		 */
8190 		upl->flags |= UPL_ACCESS_BLOCKED;
8191 	}
8192 
8193 #if CONFIG_IOSCHED || UPL_DEBUG
8194 	if ((upl->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) {
8195 		vm_object_activity_begin(object);
8196 		queue_enter(&object->uplq, upl, upl_t, uplq);
8197 	}
8198 #endif
8199 
8200 	if (object->phys_contiguous) {
8201 		if (upl->flags & UPL_ACCESS_BLOCKED) {
8202 			assert(!object->blocked_access);
8203 			object->blocked_access = TRUE;
8204 		}
8205 
8206 		vm_object_unlock(object);
8207 
8208 		/*
8209 		 * don't need any shadow mappings for this one
8210 		 * since it is already I/O memory
8211 		 */
8212 		upl->flags |= UPL_DEVICE_MEMORY;
8213 
8214 		upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1) >> PAGE_SHIFT);
8215 
8216 		if (user_page_list) {
8217 			user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset) >> PAGE_SHIFT);
8218 			user_page_list[0].device = TRUE;
8219 		}
8220 		if (page_list_count != NULL) {
8221 			if (upl->flags & UPL_INTERNAL) {
8222 				*page_list_count = 0;
8223 			} else {
8224 				*page_list_count = 1;
8225 			}
8226 		}
8227 
8228 		VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, DBG_VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, 0, 0);
8229 #if DEVELOPMENT || DEBUG
8230 		if (task != NULL) {
8231 			ledger_credit(task->ledger, task_ledgers.pages_grabbed_iopl, page_grab_count);
8232 		}
8233 #endif /* DEVELOPMENT || DEBUG */
8234 		return KERN_SUCCESS;
8235 	}
8236 	if (!is_kernel_object(object) && object != compressor_object) {
8237 		/*
8238 		 * Protect user space from future COW operations
8239 		 */
8240 #if VM_OBJECT_TRACKING_OP_TRUESHARE
8241 		if (!object->true_share &&
8242 		    vm_object_tracking_btlog) {
8243 			btlog_record(vm_object_tracking_btlog, object,
8244 			    VM_OBJECT_TRACKING_OP_TRUESHARE,
8245 			    btref_get(__builtin_frame_address(0), 0));
8246 		}
8247 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
8248 
8249 		vm_object_lock_assert_exclusive(object);
8250 		VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
8251 
8252 		if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
8253 			object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8254 		}
8255 	}
8256 
8257 	if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
8258 	    object->vo_copy != VM_OBJECT_NULL) {
8259 		/*
8260 		 * Honor copy-on-write obligations
8261 		 *
8262 		 * The caller is gathering these pages and
8263 		 * might modify their contents.  We need to
8264 		 * make sure that the copy object has its own
8265 		 * private copies of these pages before we let
8266 		 * the caller modify them.
8267 		 *
8268 		 * NOTE: someone else could map the original object
8269 		 * after we've done this copy-on-write here, and they
8270 		 * could then see an inconsistent picture of the memory
8271 		 * while it's being modified via the UPL.  To prevent this,
8272 		 * we would have to block access to these pages until the
8273 		 * UPL is released.  We could use the UPL_BLOCK_ACCESS
8274 		 * code path for that...
8275 		 */
8276 		vm_object_update(object,
8277 		    offset,
8278 		    size,
8279 		    NULL,
8280 		    NULL,
8281 		    FALSE,              /* should_return */
8282 		    MEMORY_OBJECT_COPY_SYNC,
8283 		    VM_PROT_NO_CHANGE);
8284 		VM_PAGEOUT_DEBUG(iopl_cow, 1);
8285 		VM_PAGEOUT_DEBUG(iopl_cow_pages, (size >> PAGE_SHIFT));
8286 	}
8287 	if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) &&
8288 	    object->purgable != VM_PURGABLE_VOLATILE &&
8289 	    object->purgable != VM_PURGABLE_EMPTY &&
8290 	    object->vo_copy == NULL &&
8291 	    size == object->vo_size &&
8292 	    offset == 0 &&
8293 	    object->shadow == NULL &&
8294 	    object->pager == NULL) {
8295 		if (object->resident_page_count == size_in_pages) {
8296 			assert(object != compressor_object);
8297 			assert(!is_kernel_object(object));
8298 			fast_path_full_req = TRUE;
8299 		} else if (object->resident_page_count == 0) {
8300 			assert(object != compressor_object);
8301 			assert(!is_kernel_object(object));
8302 			fast_path_empty_req = TRUE;
8303 			set_cache_attr_needed = TRUE;
8304 		}
8305 	}
8306 
8307 	if (cntrl_flags & UPL_SET_INTERRUPTIBLE) {
8308 		interruptible = THREAD_ABORTSAFE;
8309 	} else {
8310 		interruptible = THREAD_UNINT;
8311 	}
8312 
8313 	entry = 0;
8314 
8315 	xfer_size = size;
8316 	dst_offset = offset;
8317 
8318 	if (fast_path_full_req) {
8319 		if (vm_object_iopl_wire_full(object, upl, user_page_list, cntrl_flags, tag) == TRUE) {
8320 			goto finish;
8321 		}
8322 		/*
8323 		 * we couldn't complete the processing of this request on the fast path
8324 		 * so fall through to the slow path and finish up
8325 		 */
8326 	} else if (fast_path_empty_req) {
8327 		if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8328 			ret = KERN_MEMORY_ERROR;
8329 			goto return_err;
8330 		}
8331 		ret = vm_object_iopl_wire_empty(object, upl, user_page_list,
8332 		    cntrl_flags, tag, &dst_offset, size_in_pages, &page_grab_count);
8333 
8334 		if (ret) {
8335 			free_wired_pages = TRUE;
8336 			goto return_err;
8337 		}
8338 		goto finish;
8339 	}
8340 
8341 	fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
8342 	fault_info.lo_offset = offset;
8343 	fault_info.hi_offset = offset + xfer_size;
8344 	fault_info.mark_zf_absent = TRUE;
8345 	fault_info.interruptible = interruptible;
8346 	fault_info.batch_pmap_op = TRUE;
8347 
8348 	while (xfer_size) {
8349 		vm_fault_return_t       result;
8350 
8351 		dwp->dw_mask = 0;
8352 
8353 		if (fast_path_full_req) {
8354 			/*
8355 			 * if we get here, it means that we ran into a page
8356 			 * state we couldn't handle in the fast path and
8357 			 * bailed out to the slow path... since the order
8358 			 * we look at pages is different between the 2 paths,
8359 			 * the following check is needed to determine whether
8360 			 * this page was already processed in the fast path
8361 			 */
8362 			if (bitmap_test(upl->lite_list, entry)) {
8363 				goto skip_page;
8364 			}
8365 		}
8366 		dst_page = vm_page_lookup(object, dst_offset);
8367 
8368 		if (dst_page == VM_PAGE_NULL ||
8369 		    dst_page->vmp_busy ||
8370 		    VMP_ERROR_GET(dst_page) ||
8371 		    dst_page->vmp_restart ||
8372 		    dst_page->vmp_absent ||
8373 		    dst_page->vmp_fictitious) {
8374 			if (is_kernel_object(object)) {
8375 				panic("vm_object_iopl_request: missing/bad page in kernel object");
8376 			}
8377 			if (object == compressor_object) {
8378 				panic("vm_object_iopl_request: missing/bad page in compressor object");
8379 			}
8380 
8381 			if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8382 				ret = KERN_MEMORY_ERROR;
8383 				goto return_err;
8384 			}
8385 
8386 			if (dst_page != VM_PAGE_NULL &&
8387 			    dst_page->vmp_busy) {
8388 				wait_result_t wait_result;
8389 				vm_object_lock_assert_exclusive(object);
8390 				wait_result = vm_page_sleep(object, dst_page,
8391 				    interruptible, LCK_SLEEP_DEFAULT);
8392 				if (wait_result == THREAD_AWAKENED ||
8393 				    wait_result == THREAD_RESTART) {
8394 					continue;
8395 				}
8396 				ret = MACH_SEND_INTERRUPTED;
8397 				goto return_err;
8398 			}
8399 
8400 			set_cache_attr_needed = TRUE;
8401 
8402 			/*
8403 			 * We just looked up the page and the result remains valid
8404 			 * until the object lock is release, so send it to
8405 			 * vm_fault_page() (as "dst_page"), to avoid having to
8406 			 * look it up again there.
8407 			 */
8408 			caller_lookup = TRUE;
8409 
8410 			do {
8411 				vm_page_t       top_page;
8412 				kern_return_t   error_code;
8413 
8414 				fault_info.cluster_size = xfer_size;
8415 
8416 				vm_object_paging_begin(object);
8417 
8418 				result = vm_fault_page(object, dst_offset,
8419 				    prot | VM_PROT_WRITE, FALSE,
8420 				    caller_lookup,
8421 				    &prot, &dst_page, &top_page,
8422 				    (int *)0,
8423 				    &error_code, no_zero_fill,
8424 				    &fault_info);
8425 
8426 				/* our lookup is no longer valid at this point */
8427 				caller_lookup = FALSE;
8428 
8429 				switch (result) {
8430 				case VM_FAULT_SUCCESS:
8431 					page_grab_count++;
8432 
8433 					if (!dst_page->vmp_absent) {
8434 						vm_page_wakeup_done(object, dst_page);
8435 					} else {
8436 						/*
8437 						 * we only get back an absent page if we
8438 						 * requested that it not be zero-filled
8439 						 * because we are about to fill it via I/O
8440 						 *
8441 						 * absent pages should be left BUSY
8442 						 * to prevent them from being faulted
8443 						 * into an address space before we've
8444 						 * had a chance to complete the I/O on
8445 						 * them since they may contain info that
8446 						 * shouldn't be seen by the faulting task
8447 						 */
8448 					}
8449 					/*
8450 					 *	Release paging references and
8451 					 *	top-level placeholder page, if any.
8452 					 */
8453 					if (top_page != VM_PAGE_NULL) {
8454 						vm_object_t local_object;
8455 
8456 						local_object = VM_PAGE_OBJECT(top_page);
8457 
8458 						/*
8459 						 * comparing 2 packed pointers
8460 						 */
8461 						if (top_page->vmp_object != dst_page->vmp_object) {
8462 							vm_object_lock(local_object);
8463 							VM_PAGE_FREE(top_page);
8464 							vm_object_paging_end(local_object);
8465 							vm_object_unlock(local_object);
8466 						} else {
8467 							VM_PAGE_FREE(top_page);
8468 							vm_object_paging_end(local_object);
8469 						}
8470 					}
8471 					vm_object_paging_end(object);
8472 					break;
8473 
8474 				case VM_FAULT_RETRY:
8475 					vm_object_lock(object);
8476 					break;
8477 
8478 				case VM_FAULT_MEMORY_SHORTAGE:
8479 					OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages);
8480 
8481 					VM_DEBUG_EVENT(vm_iopl_page_wait, DBG_VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
8482 
8483 					if (vm_page_wait(interruptible)) {
8484 						OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8485 
8486 						VM_DEBUG_EVENT(vm_iopl_page_wait, DBG_VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8487 						vm_object_lock(object);
8488 
8489 						break;
8490 					}
8491 					OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8492 
8493 					VM_DEBUG_EVENT(vm_iopl_page_wait, DBG_VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8494 					ktriage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_FAULT_OBJIOPLREQ_MEMORY_SHORTAGE), 0 /* arg */);
8495 					OS_FALLTHROUGH;
8496 
8497 				case VM_FAULT_INTERRUPTED:
8498 					error_code = MACH_SEND_INTERRUPTED;
8499 					OS_FALLTHROUGH;
8500 				case VM_FAULT_MEMORY_ERROR:
8501 memory_error:
8502 					ret = (error_code ? error_code: KERN_MEMORY_ERROR);
8503 
8504 					vm_object_lock(object);
8505 					goto return_err;
8506 
8507 				case VM_FAULT_SUCCESS_NO_VM_PAGE:
8508 					/* success but no page: fail */
8509 					vm_object_paging_end(object);
8510 					vm_object_unlock(object);
8511 					goto memory_error;
8512 
8513 				default:
8514 					panic("vm_object_iopl_request: unexpected error"
8515 					    " 0x%x from vm_fault_page()\n", result);
8516 				}
8517 			} while (result != VM_FAULT_SUCCESS);
8518 		}
8519 		phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8520 
8521 		if (upl->flags & UPL_KERNEL_OBJECT) {
8522 			goto record_phys_addr;
8523 		}
8524 
8525 		if (dst_page->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
8526 			dst_page->vmp_busy = TRUE;
8527 			goto record_phys_addr;
8528 		}
8529 
8530 		if (dst_page->vmp_cleaning) {
8531 			/*
8532 			 * Someone else is cleaning this page in place.
8533 			 * In theory, we should be able to  proceed and use this
8534 			 * page but they'll probably end up clearing the "busy"
8535 			 * bit on it in upl_commit_range() but they didn't set
8536 			 * it, so they would clear our "busy" bit and open
8537 			 * us to race conditions.
8538 			 * We'd better wait for the cleaning to complete and
8539 			 * then try again.
8540 			 */
8541 			VM_PAGEOUT_DEBUG(vm_object_iopl_request_sleep_for_cleaning, 1);
8542 			vm_page_sleep(object, dst_page, THREAD_UNINT, LCK_SLEEP_EXCLUSIVE);
8543 			continue;
8544 		}
8545 		if (dst_page->vmp_laundry) {
8546 			vm_pageout_steal_laundry(dst_page, FALSE);
8547 		}
8548 
8549 		if ((cntrl_flags & UPL_NEED_32BIT_ADDR) &&
8550 		    phys_page >= (max_valid_dma_address >> PAGE_SHIFT)) {
8551 			vm_page_t       low_page;
8552 			int             refmod;
8553 
8554 			/*
8555 			 * support devices that can't DMA above 32 bits
8556 			 * by substituting pages from a pool of low address
8557 			 * memory for any pages we find above the 4G mark
8558 			 * can't substitute if the page is already wired because
8559 			 * we don't know whether that physical address has been
8560 			 * handed out to some other 64 bit capable DMA device to use
8561 			 */
8562 			if (VM_PAGE_WIRED(dst_page)) {
8563 				ret = KERN_PROTECTION_FAILURE;
8564 				goto return_err;
8565 			}
8566 			low_page = vm_page_grablo();
8567 
8568 			if (low_page == VM_PAGE_NULL) {
8569 				ret = KERN_RESOURCE_SHORTAGE;
8570 				goto return_err;
8571 			}
8572 			/*
8573 			 * from here until the vm_page_replace completes
8574 			 * we musn't drop the object lock... we don't
8575 			 * want anyone refaulting this page in and using
8576 			 * it after we disconnect it... we want the fault
8577 			 * to find the new page being substituted.
8578 			 */
8579 			if (dst_page->vmp_pmapped) {
8580 				refmod = pmap_disconnect(phys_page);
8581 			} else {
8582 				refmod = 0;
8583 			}
8584 
8585 			if (!dst_page->vmp_absent) {
8586 				vm_page_copy(dst_page, low_page);
8587 			}
8588 
8589 			low_page->vmp_reference = dst_page->vmp_reference;
8590 			low_page->vmp_dirty     = dst_page->vmp_dirty;
8591 			low_page->vmp_absent    = dst_page->vmp_absent;
8592 
8593 			if (refmod & VM_MEM_REFERENCED) {
8594 				low_page->vmp_reference = TRUE;
8595 			}
8596 			if (refmod & VM_MEM_MODIFIED) {
8597 				SET_PAGE_DIRTY(low_page, FALSE);
8598 			}
8599 
8600 			vm_page_replace(low_page, object, dst_offset);
8601 
8602 			dst_page = low_page;
8603 			/*
8604 			 * vm_page_grablo returned the page marked
8605 			 * BUSY... we don't need a PAGE_WAKEUP_DONE
8606 			 * here, because we've never dropped the object lock
8607 			 */
8608 			if (!dst_page->vmp_absent) {
8609 				dst_page->vmp_busy = FALSE;
8610 			}
8611 
8612 			phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8613 		}
8614 		if (!dst_page->vmp_busy) {
8615 			dwp->dw_mask |= DW_vm_page_wire;
8616 		}
8617 
8618 		if (cntrl_flags & UPL_BLOCK_ACCESS) {
8619 			/*
8620 			 * Mark the page "busy" to block any future page fault
8621 			 * on this page in addition to wiring it.
8622 			 * We'll also remove the mapping
8623 			 * of all these pages before leaving this routine.
8624 			 */
8625 			assert(!dst_page->vmp_fictitious);
8626 			dst_page->vmp_busy = TRUE;
8627 		}
8628 		/*
8629 		 * expect the page to be used
8630 		 * page queues lock must be held to set 'reference'
8631 		 */
8632 		dwp->dw_mask |= DW_set_reference;
8633 
8634 		if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8635 			SET_PAGE_DIRTY(dst_page, TRUE);
8636 			/*
8637 			 * Page belonging to a code-signed object is about to
8638 			 * be written. Mark it tainted and disconnect it from
8639 			 * all pmaps so processes have to fault it back in and
8640 			 * deal with the tainted bit.
8641 			 */
8642 			if (object->code_signed && dst_page->vmp_cs_tainted != VMP_CS_ALL_TRUE) {
8643 				dst_page->vmp_cs_tainted = VMP_CS_ALL_TRUE;
8644 				vm_page_iopl_tainted++;
8645 				if (dst_page->vmp_pmapped) {
8646 					int refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(dst_page));
8647 					if (refmod & VM_MEM_REFERENCED) {
8648 						dst_page->vmp_reference = TRUE;
8649 					}
8650 				}
8651 			}
8652 		}
8653 		if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) {
8654 			pmap_sync_page_attributes_phys(phys_page);
8655 			dst_page->vmp_written_by_kernel = FALSE;
8656 		}
8657 
8658 record_phys_addr:
8659 		if (dst_page->vmp_busy) {
8660 			upl->flags |= UPL_HAS_BUSY;
8661 		}
8662 
8663 		bitmap_set(upl->lite_list, entry);
8664 
8665 		if (phys_page > upl->highest_page) {
8666 			upl->highest_page = phys_page;
8667 		}
8668 
8669 		if (user_page_list) {
8670 			user_page_list[entry].phys_addr = phys_page;
8671 			user_page_list[entry].free_when_done    = dst_page->vmp_free_when_done;
8672 			user_page_list[entry].absent    = dst_page->vmp_absent;
8673 			user_page_list[entry].dirty     = dst_page->vmp_dirty;
8674 			user_page_list[entry].precious  = dst_page->vmp_precious;
8675 			user_page_list[entry].device    = FALSE;
8676 			user_page_list[entry].needed    = FALSE;
8677 			if (dst_page->vmp_clustered == TRUE) {
8678 				user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
8679 			} else {
8680 				user_page_list[entry].speculative = FALSE;
8681 			}
8682 			user_page_list[entry].cs_validated = dst_page->vmp_cs_validated;
8683 			user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted;
8684 			user_page_list[entry].cs_nx = dst_page->vmp_cs_nx;
8685 			user_page_list[entry].mark      = FALSE;
8686 		}
8687 		if (!is_kernel_object(object) && object != compressor_object) {
8688 			/*
8689 			 * someone is explicitly grabbing this page...
8690 			 * update clustered and speculative state
8691 			 *
8692 			 */
8693 			if (dst_page->vmp_clustered) {
8694 				VM_PAGE_CONSUME_CLUSTERED(dst_page);
8695 			}
8696 		}
8697 skip_page:
8698 		entry++;
8699 		dst_offset += PAGE_SIZE_64;
8700 		xfer_size -= PAGE_SIZE;
8701 
8702 		if (dwp->dw_mask) {
8703 			VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
8704 
8705 			if (dw_count >= dw_limit) {
8706 				vm_page_do_delayed_work(object, tag, dwp_start, dw_count);
8707 
8708 				dwp = dwp_start;
8709 				dw_count = 0;
8710 			}
8711 		}
8712 	}
8713 	assert(entry == size_in_pages);
8714 
8715 	if (dw_count) {
8716 		vm_page_do_delayed_work(object, tag, dwp_start, dw_count);
8717 		dwp = dwp_start;
8718 		dw_count = 0;
8719 	}
8720 finish:
8721 	if (user_page_list && set_cache_attr_needed == TRUE) {
8722 		vm_object_set_pmap_cache_attr(object, user_page_list, size_in_pages, TRUE);
8723 	}
8724 
8725 	if (page_list_count != NULL) {
8726 		if (upl->flags & UPL_INTERNAL) {
8727 			*page_list_count = 0;
8728 		} else if (*page_list_count > size_in_pages) {
8729 			*page_list_count = size_in_pages;
8730 		}
8731 	}
8732 	vm_object_unlock(object);
8733 
8734 	if (cntrl_flags & UPL_BLOCK_ACCESS) {
8735 		/*
8736 		 * We've marked all the pages "busy" so that future
8737 		 * page faults will block.
8738 		 * Now remove the mapping for these pages, so that they
8739 		 * can't be accessed without causing a page fault.
8740 		 */
8741 		vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
8742 		    PMAP_NULL,
8743 		    PAGE_SIZE,
8744 		    0, VM_PROT_NONE);
8745 		assert(!object->blocked_access);
8746 		object->blocked_access = TRUE;
8747 	}
8748 
8749 	VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, DBG_VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, 0, 0);
8750 #if DEVELOPMENT || DEBUG
8751 	if (task != NULL) {
8752 		ledger_credit(task->ledger, task_ledgers.pages_grabbed_iopl, page_grab_count);
8753 	}
8754 #endif /* DEVELOPMENT || DEBUG */
8755 
8756 	if (dwp_start && dwp_finish_ctx) {
8757 		vm_page_delayed_work_finish_ctx(dwp_start);
8758 		dwp_start = dwp = NULL;
8759 	}
8760 
8761 	return KERN_SUCCESS;
8762 
8763 return_err:
8764 	dw_index = 0;
8765 
8766 	for (; offset < dst_offset; offset += PAGE_SIZE) {
8767 		boolean_t need_unwire;
8768 		bool need_wakeup;
8769 
8770 		dst_page = vm_page_lookup(object, offset);
8771 
8772 		if (dst_page == VM_PAGE_NULL) {
8773 			panic("vm_object_iopl_request: Wired page missing.");
8774 		}
8775 
8776 		/*
8777 		 * if we've already processed this page in an earlier
8778 		 * dw_do_work, we need to undo the wiring... we will
8779 		 * leave the dirty and reference bits on if they
8780 		 * were set, since we don't have a good way of knowing
8781 		 * what the previous state was and we won't get here
8782 		 * under any normal circumstances...  we will always
8783 		 * clear BUSY and wakeup any waiters via vm_page_free
8784 		 * or PAGE_WAKEUP_DONE
8785 		 */
8786 		need_unwire = TRUE;
8787 
8788 		need_wakeup = false;
8789 		if (dw_count) {
8790 			if ((dwp_start)[dw_index].dw_m == dst_page) {
8791 				/*
8792 				 * still in the deferred work list
8793 				 * which means we haven't yet called
8794 				 * vm_page_wire on this page
8795 				 */
8796 				need_unwire = FALSE;
8797 
8798 				if (dst_page->vmp_busy &&
8799 				    ((dwp_start)[dw_index].dw_mask & DW_clear_busy)) {
8800 					/*
8801 					 * It's our own "busy" bit, so we need to clear it
8802 					 * now and wake up waiters below.
8803 					 */
8804 					dst_page->vmp_busy = false;
8805 					need_wakeup = true;
8806 				}
8807 
8808 				dw_index++;
8809 				dw_count--;
8810 			}
8811 		}
8812 		vm_page_lock_queues();
8813 
8814 		if (dst_page->vmp_absent || free_wired_pages == TRUE) {
8815 			vm_page_free(dst_page);
8816 
8817 			need_unwire = FALSE;
8818 		} else {
8819 			if (need_unwire == TRUE) {
8820 				vm_page_unwire(dst_page, TRUE);
8821 			}
8822 			if (dst_page->vmp_busy) {
8823 				/* not our "busy" or we would have cleared it above */
8824 				assert(!need_wakeup);
8825 			}
8826 			if (need_wakeup) {
8827 				assert(!dst_page->vmp_busy);
8828 				vm_page_wakeup(object, dst_page);
8829 			}
8830 		}
8831 		vm_page_unlock_queues();
8832 
8833 		if (need_unwire == TRUE) {
8834 			counter_inc(&vm_statistics_reactivations);
8835 		}
8836 	}
8837 #if UPL_DEBUG
8838 	upl->upl_state = 2;
8839 #endif
8840 	if (!(upl->flags & UPL_KERNEL_OBJECT)) {
8841 		vm_object_activity_end(object);
8842 		vm_object_collapse(object, 0, TRUE);
8843 	}
8844 	vm_object_unlock(object);
8845 	upl_destroy(upl);
8846 
8847 	VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, DBG_VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, ret, 0, 0);
8848 #if DEVELOPMENT || DEBUG
8849 	if (task != NULL) {
8850 		ledger_credit(task->ledger, task_ledgers.pages_grabbed_iopl, page_grab_count);
8851 	}
8852 #endif /* DEVELOPMENT || DEBUG */
8853 
8854 	if (dwp_start && dwp_finish_ctx) {
8855 		vm_page_delayed_work_finish_ctx(dwp_start);
8856 		dwp_start = dwp = NULL;
8857 	}
8858 	return ret;
8859 }
8860 
8861 kern_return_t
8862 upl_transpose(
8863 	upl_t           upl1,
8864 	upl_t           upl2)
8865 {
8866 	kern_return_t           retval;
8867 	boolean_t               upls_locked;
8868 	vm_object_t             object1, object2;
8869 
8870 	/* LD: Should mapped UPLs be eligible for a transpose? */
8871 	if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR) == UPL_VECTOR) || ((upl2->flags & UPL_VECTOR) == UPL_VECTOR)) {
8872 		return KERN_INVALID_ARGUMENT;
8873 	}
8874 
8875 	upls_locked = FALSE;
8876 
8877 	/*
8878 	 * Since we need to lock both UPLs at the same time,
8879 	 * avoid deadlocks by always taking locks in the same order.
8880 	 */
8881 	if (upl1 < upl2) {
8882 		upl_lock(upl1);
8883 		upl_lock(upl2);
8884 	} else {
8885 		upl_lock(upl2);
8886 		upl_lock(upl1);
8887 	}
8888 	upls_locked = TRUE;     /* the UPLs will need to be unlocked */
8889 
8890 	object1 = upl1->map_object;
8891 	object2 = upl2->map_object;
8892 
8893 	if (upl1->u_offset != 0 || upl2->u_offset != 0 ||
8894 	    upl1->u_size != upl2->u_size) {
8895 		/*
8896 		 * We deal only with full objects, not subsets.
8897 		 * That's because we exchange the entire backing store info
8898 		 * for the objects: pager, resident pages, etc...  We can't do
8899 		 * only part of it.
8900 		 */
8901 		retval = KERN_INVALID_VALUE;
8902 		goto done;
8903 	}
8904 
8905 	/*
8906 	 * Tranpose the VM objects' backing store.
8907 	 */
8908 	retval = vm_object_transpose(object1, object2,
8909 	    upl_adjusted_size(upl1, PAGE_MASK));
8910 
8911 	if (retval == KERN_SUCCESS) {
8912 		/*
8913 		 * Make each UPL point to the correct VM object, i.e. the
8914 		 * object holding the pages that the UPL refers to...
8915 		 */
8916 #if CONFIG_IOSCHED || UPL_DEBUG
8917 		if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8918 			vm_object_lock(object1);
8919 			vm_object_lock(object2);
8920 		}
8921 		if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) {
8922 			queue_remove(&object1->uplq, upl1, upl_t, uplq);
8923 		}
8924 		if ((upl2->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) {
8925 			queue_remove(&object2->uplq, upl2, upl_t, uplq);
8926 		}
8927 #endif
8928 		upl1->map_object = object2;
8929 		upl2->map_object = object1;
8930 
8931 #if CONFIG_IOSCHED || UPL_DEBUG
8932 		if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) {
8933 			queue_enter(&object2->uplq, upl1, upl_t, uplq);
8934 		}
8935 		if ((upl2->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) {
8936 			queue_enter(&object1->uplq, upl2, upl_t, uplq);
8937 		}
8938 		if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8939 			vm_object_unlock(object2);
8940 			vm_object_unlock(object1);
8941 		}
8942 #endif
8943 	}
8944 
8945 done:
8946 	/*
8947 	 * Cleanup.
8948 	 */
8949 	if (upls_locked) {
8950 		upl_unlock(upl1);
8951 		upl_unlock(upl2);
8952 		upls_locked = FALSE;
8953 	}
8954 
8955 	return retval;
8956 }
8957 
8958 void
8959 upl_range_needed(
8960 	upl_t           upl,
8961 	int             index,
8962 	int             count)
8963 {
8964 	int             size_in_pages;
8965 
8966 	if (!(upl->flags & UPL_INTERNAL) || count <= 0) {
8967 		return;
8968 	}
8969 
8970 	size_in_pages = upl_adjusted_size(upl, PAGE_MASK) / PAGE_SIZE;
8971 
8972 	while (count-- && index < size_in_pages) {
8973 		upl->page_list[index++].needed = TRUE;
8974 	}
8975 }
8976 
8977 
8978 /*
8979  * Reserve of virtual addresses in the kernel address space.
8980  * We need to map the physical pages in the kernel, so that we
8981  * can call the code-signing or slide routines with a kernel
8982  * virtual address.  We keep this pool of pre-allocated kernel
8983  * virtual addresses so that we don't have to scan the kernel's
8984  * virtaul address space each time we need to work with
8985  * a physical page.
8986  */
8987 SIMPLE_LOCK_DECLARE(vm_paging_lock, 0);
8988 #define VM_PAGING_NUM_PAGES     64
8989 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_paging_base_address = 0;
8990 bool            vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
8991 int             vm_paging_max_index = 0;
8992 int             vm_paging_page_waiter = 0;
8993 int             vm_paging_page_waiter_total = 0;
8994 
8995 unsigned long   vm_paging_no_kernel_page = 0;
8996 unsigned long   vm_paging_objects_mapped = 0;
8997 unsigned long   vm_paging_pages_mapped = 0;
8998 unsigned long   vm_paging_objects_mapped_slow = 0;
8999 unsigned long   vm_paging_pages_mapped_slow = 0;
9000 
9001 __startup_func
9002 static void
9003 vm_paging_map_init(void)
9004 {
9005 	kmem_alloc(kernel_map, &vm_paging_base_address,
9006 	    ptoa(VM_PAGING_NUM_PAGES),
9007 	    KMA_DATA | KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT | KMA_PAGEABLE,
9008 	    VM_KERN_MEMORY_NONE);
9009 }
9010 STARTUP(ZALLOC, STARTUP_RANK_LAST, vm_paging_map_init);
9011 
9012 /*
9013  * vm_paging_map_object:
9014  *	Maps part of a VM object's pages in the kernel
9015  *      virtual address space, using the pre-allocated
9016  *	kernel virtual addresses, if possible.
9017  * Context:
9018  *      The VM object is locked.  This lock will get
9019  *      dropped and re-acquired though, so the caller
9020  *      must make sure the VM object is kept alive
9021  *	(by holding a VM map that has a reference
9022  *      on it, for example, or taking an extra reference).
9023  *      The page should also be kept busy to prevent
9024  *	it from being reclaimed.
9025  */
9026 kern_return_t
9027 vm_paging_map_object(
9028 	vm_page_t               page,
9029 	vm_object_t             object,
9030 	vm_object_offset_t      offset,
9031 	vm_prot_t               protection,
9032 	boolean_t               can_unlock_object,
9033 	vm_map_size_t           *size,          /* IN/OUT */
9034 	vm_map_offset_t         *address,       /* OUT */
9035 	boolean_t               *need_unmap)    /* OUT */
9036 {
9037 	kern_return_t           kr;
9038 	vm_map_offset_t         page_map_offset;
9039 	vm_map_size_t           map_size;
9040 	vm_object_offset_t      object_offset;
9041 	int                     i;
9042 
9043 	if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
9044 		/* use permanent 1-to-1 kernel mapping of physical memory ? */
9045 		*address = (vm_map_offset_t)
9046 		    phystokv((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) << PAGE_SHIFT);
9047 		*need_unmap = FALSE;
9048 		return KERN_SUCCESS;
9049 
9050 		assert(page->vmp_busy);
9051 		/*
9052 		 * Use one of the pre-allocated kernel virtual addresses
9053 		 * and just enter the VM page in the kernel address space
9054 		 * at that virtual address.
9055 		 */
9056 		simple_lock(&vm_paging_lock, &vm_pageout_lck_grp);
9057 
9058 		/*
9059 		 * Try and find an available kernel virtual address
9060 		 * from our pre-allocated pool.
9061 		 */
9062 		page_map_offset = 0;
9063 		for (;;) {
9064 			for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
9065 				if (vm_paging_page_inuse[i] == FALSE) {
9066 					page_map_offset =
9067 					    vm_paging_base_address +
9068 					    (i * PAGE_SIZE);
9069 					break;
9070 				}
9071 			}
9072 			if (page_map_offset != 0) {
9073 				/* found a space to map our page ! */
9074 				break;
9075 			}
9076 
9077 			if (can_unlock_object) {
9078 				/*
9079 				 * If we can afford to unlock the VM object,
9080 				 * let's take the slow path now...
9081 				 */
9082 				break;
9083 			}
9084 			/*
9085 			 * We can't afford to unlock the VM object, so
9086 			 * let's wait for a space to become available...
9087 			 */
9088 			vm_paging_page_waiter_total++;
9089 			vm_paging_page_waiter++;
9090 			kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
9091 			if (kr == THREAD_WAITING) {
9092 				simple_unlock(&vm_paging_lock);
9093 				kr = thread_block(THREAD_CONTINUE_NULL);
9094 				simple_lock(&vm_paging_lock, &vm_pageout_lck_grp);
9095 			}
9096 			vm_paging_page_waiter--;
9097 			/* ... and try again */
9098 		}
9099 
9100 		if (page_map_offset != 0) {
9101 			/*
9102 			 * We found a kernel virtual address;
9103 			 * map the physical page to that virtual address.
9104 			 */
9105 			if (i > vm_paging_max_index) {
9106 				vm_paging_max_index = i;
9107 			}
9108 			vm_paging_page_inuse[i] = TRUE;
9109 			simple_unlock(&vm_paging_lock);
9110 
9111 			page->vmp_pmapped = TRUE;
9112 
9113 			/*
9114 			 * Keep the VM object locked over the PMAP_ENTER
9115 			 * and the actual use of the page by the kernel,
9116 			 * or this pmap mapping might get undone by a
9117 			 * vm_object_pmap_protect() call...
9118 			 */
9119 			kr = pmap_enter_check(kernel_pmap,
9120 			    page_map_offset,
9121 			    page,
9122 			    protection,
9123 			    VM_PROT_NONE,
9124 			    0,
9125 			    TRUE);
9126 			assert(kr == KERN_SUCCESS);
9127 			vm_paging_objects_mapped++;
9128 			vm_paging_pages_mapped++;
9129 			*address = page_map_offset;
9130 			*need_unmap = TRUE;
9131 
9132 #if KASAN
9133 			kasan_notify_address(page_map_offset, PAGE_SIZE);
9134 #endif
9135 
9136 			/* all done and mapped, ready to use ! */
9137 			return KERN_SUCCESS;
9138 		}
9139 
9140 		/*
9141 		 * We ran out of pre-allocated kernel virtual
9142 		 * addresses.  Just map the page in the kernel
9143 		 * the slow and regular way.
9144 		 */
9145 		vm_paging_no_kernel_page++;
9146 		simple_unlock(&vm_paging_lock);
9147 	}
9148 
9149 	if (!can_unlock_object) {
9150 		*address = 0;
9151 		*size = 0;
9152 		*need_unmap = FALSE;
9153 		return KERN_NOT_SUPPORTED;
9154 	}
9155 
9156 	object_offset = vm_object_trunc_page(offset);
9157 	map_size = vm_map_round_page(*size,
9158 	    VM_MAP_PAGE_MASK(kernel_map));
9159 
9160 	/*
9161 	 * Try and map the required range of the object
9162 	 * in the kernel_map. Given that allocation is
9163 	 * for pageable memory, it shouldn't contain
9164 	 * pointers and is mapped into the data range.
9165 	 */
9166 
9167 	vm_object_reference_locked(object);     /* for the map entry */
9168 	vm_object_unlock(object);
9169 
9170 	kr = vm_map_enter(kernel_map,
9171 	    address,
9172 	    map_size,
9173 	    0,
9174 	    VM_MAP_KERNEL_FLAGS_DATA_ANYWHERE(),
9175 	    object,
9176 	    object_offset,
9177 	    FALSE,
9178 	    protection,
9179 	    VM_PROT_ALL,
9180 	    VM_INHERIT_NONE);
9181 	if (kr != KERN_SUCCESS) {
9182 		*address = 0;
9183 		*size = 0;
9184 		*need_unmap = FALSE;
9185 		vm_object_deallocate(object);   /* for the map entry */
9186 		vm_object_lock(object);
9187 		return kr;
9188 	}
9189 
9190 	*size = map_size;
9191 
9192 	/*
9193 	 * Enter the mapped pages in the page table now.
9194 	 */
9195 	vm_object_lock(object);
9196 	/*
9197 	 * VM object must be kept locked from before PMAP_ENTER()
9198 	 * until after the kernel is done accessing the page(s).
9199 	 * Otherwise, the pmap mappings in the kernel could be
9200 	 * undone by a call to vm_object_pmap_protect().
9201 	 */
9202 
9203 	for (page_map_offset = 0;
9204 	    map_size != 0;
9205 	    map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
9206 		page = vm_page_lookup(object, offset + page_map_offset);
9207 		if (page == VM_PAGE_NULL) {
9208 			printf("vm_paging_map_object: no page !?");
9209 			vm_object_unlock(object);
9210 			vm_map_remove(kernel_map, *address, *size);
9211 			*address = 0;
9212 			*size = 0;
9213 			*need_unmap = FALSE;
9214 			vm_object_lock(object);
9215 			return KERN_MEMORY_ERROR;
9216 		}
9217 		page->vmp_pmapped = TRUE;
9218 
9219 		kr = pmap_enter_check(kernel_pmap,
9220 		    *address + page_map_offset,
9221 		    page,
9222 		    protection,
9223 		    VM_PROT_NONE,
9224 		    0,
9225 		    TRUE);
9226 		assert(kr == KERN_SUCCESS);
9227 #if KASAN
9228 		kasan_notify_address(*address + page_map_offset, PAGE_SIZE);
9229 #endif
9230 	}
9231 
9232 	vm_paging_objects_mapped_slow++;
9233 	vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
9234 
9235 	*need_unmap = TRUE;
9236 
9237 	return KERN_SUCCESS;
9238 }
9239 
9240 /*
9241  * vm_paging_unmap_object:
9242  *	Unmaps part of a VM object's pages from the kernel
9243  *      virtual address space.
9244  * Context:
9245  *      The VM object is locked.  This lock will get
9246  *      dropped and re-acquired though.
9247  */
9248 void
9249 vm_paging_unmap_object(
9250 	vm_object_t     object,
9251 	vm_map_offset_t start,
9252 	vm_map_offset_t end)
9253 {
9254 	int             i;
9255 
9256 	if ((vm_paging_base_address == 0) ||
9257 	    (start < vm_paging_base_address) ||
9258 	    (end > (vm_paging_base_address
9259 	    + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
9260 		/*
9261 		 * We didn't use our pre-allocated pool of
9262 		 * kernel virtual address.  Deallocate the
9263 		 * virtual memory.
9264 		 */
9265 		if (object != VM_OBJECT_NULL) {
9266 			vm_object_unlock(object);
9267 		}
9268 		vm_map_remove(kernel_map, start, end);
9269 		if (object != VM_OBJECT_NULL) {
9270 			vm_object_lock(object);
9271 		}
9272 	} else {
9273 		/*
9274 		 * We used a kernel virtual address from our
9275 		 * pre-allocated pool.  Put it back in the pool
9276 		 * for next time.
9277 		 */
9278 		assert(end - start == PAGE_SIZE);
9279 		i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
9280 		assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
9281 
9282 		/* undo the pmap mapping */
9283 		pmap_remove(kernel_pmap, start, end);
9284 
9285 		simple_lock(&vm_paging_lock, &vm_pageout_lck_grp);
9286 		vm_paging_page_inuse[i] = FALSE;
9287 		if (vm_paging_page_waiter) {
9288 			thread_wakeup(&vm_paging_page_waiter);
9289 		}
9290 		simple_unlock(&vm_paging_lock);
9291 	}
9292 }
9293 
9294 
9295 /*
9296  * page->vmp_object must be locked
9297  */
9298 void
9299 vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
9300 {
9301 	if (!queues_locked) {
9302 		vm_page_lockspin_queues();
9303 	}
9304 
9305 	page->vmp_free_when_done = FALSE;
9306 	/*
9307 	 * need to drop the laundry count...
9308 	 * we may also need to remove it
9309 	 * from the I/O paging queue...
9310 	 * vm_pageout_throttle_up handles both cases
9311 	 *
9312 	 * the laundry and pageout_queue flags are cleared...
9313 	 */
9314 	vm_pageout_throttle_up(page);
9315 
9316 	if (!queues_locked) {
9317 		vm_page_unlock_queues();
9318 	}
9319 }
9320 
9321 #define VECTOR_UPL_ELEMENTS_UPPER_LIMIT 64
9322 
9323 upl_t
9324 vector_upl_create(vm_offset_t upl_offset, uint32_t max_upls)
9325 {
9326 	int i = 0;
9327 	upl_t   upl;
9328 
9329 	assert(max_upls > 0);
9330 	if (max_upls == 0) {
9331 		return NULL;
9332 	}
9333 
9334 	if (max_upls > VECTOR_UPL_ELEMENTS_UPPER_LIMIT) {
9335 		max_upls = VECTOR_UPL_ELEMENTS_UPPER_LIMIT;
9336 	}
9337 	vector_upl_t vector_upl = kalloc_type(struct _vector_upl, typeof(vector_upl->upls[0]), max_upls, Z_WAITOK | Z_NOFAIL);
9338 
9339 	upl = upl_create(0, UPL_VECTOR, 0);
9340 	upl->vector_upl = vector_upl;
9341 	upl->u_offset = upl_offset;
9342 	vector_upl->size = 0;
9343 	vector_upl->offset = upl_offset;
9344 	vector_upl->invalid_upls = 0;
9345 	vector_upl->num_upls = 0;
9346 	vector_upl->pagelist = NULL;
9347 	vector_upl->max_upls = max_upls;
9348 
9349 	for (i = 0; i < max_upls; i++) {
9350 		vector_upl->upls[i].iostate.size = 0;
9351 		vector_upl->upls[i].iostate.offset = 0;
9352 	}
9353 	return upl;
9354 }
9355 
9356 upl_size_t
9357 vector_upl_get_size(const upl_t upl)
9358 {
9359 	if (!vector_upl_is_valid(upl)) {
9360 		return upl_get_size(upl);
9361 	} else {
9362 		return round_page_32(upl->vector_upl->size);
9363 	}
9364 }
9365 
9366 uint32_t
9367 vector_upl_max_upls(const upl_t upl)
9368 {
9369 	if (!vector_upl_is_valid(upl)) {
9370 		return 0;
9371 	}
9372 	return ((vector_upl_t)(upl->vector_upl))->max_upls;
9373 }
9374 
9375 void
9376 vector_upl_deallocate(upl_t upl)
9377 {
9378 	vector_upl_t vector_upl = upl->vector_upl;
9379 
9380 	assert(vector_upl_is_valid(upl));
9381 
9382 	if (vector_upl->invalid_upls != vector_upl->num_upls) {
9383 		panic("Deallocating non-empty Vectored UPL");
9384 	}
9385 	uint32_t max_upls = vector_upl->max_upls;
9386 	kfree_type(struct upl_page_info, atop(vector_upl->size), vector_upl->pagelist);
9387 	kfree_type(struct _vector_upl, typeof(vector_upl->upls[0]), max_upls, vector_upl);
9388 	upl->vector_upl = NULL;
9389 }
9390 
9391 boolean_t
9392 vector_upl_is_valid(upl_t upl)
9393 {
9394 	return upl && (upl->flags & UPL_VECTOR) && upl->vector_upl;
9395 }
9396 
9397 boolean_t
9398 vector_upl_set_subupl(upl_t upl, upl_t subupl, uint32_t io_size)
9399 {
9400 	if (vector_upl_is_valid(upl)) {
9401 		vector_upl_t vector_upl = upl->vector_upl;
9402 
9403 		if (vector_upl) {
9404 			if (subupl) {
9405 				if (io_size) {
9406 					if (io_size < PAGE_SIZE) {
9407 						io_size = PAGE_SIZE;
9408 					}
9409 					subupl->vector_upl = (void*)vector_upl;
9410 					vector_upl->upls[vector_upl->num_upls++].elem = subupl;
9411 					vector_upl->size += io_size;
9412 					upl->u_size += io_size;
9413 				} else {
9414 					uint32_t i = 0, invalid_upls = 0;
9415 					for (i = 0; i < vector_upl->num_upls; i++) {
9416 						if (vector_upl->upls[i].elem == subupl) {
9417 							break;
9418 						}
9419 					}
9420 					if (i == vector_upl->num_upls) {
9421 						panic("Trying to remove sub-upl when none exists");
9422 					}
9423 
9424 					vector_upl->upls[i].elem = NULL;
9425 					invalid_upls = os_atomic_inc(&(vector_upl)->invalid_upls,
9426 					    relaxed);
9427 					if (invalid_upls == vector_upl->num_upls) {
9428 						return TRUE;
9429 					} else {
9430 						return FALSE;
9431 					}
9432 				}
9433 			} else {
9434 				panic("vector_upl_set_subupl was passed a NULL upl element");
9435 			}
9436 		} else {
9437 			panic("vector_upl_set_subupl was passed a non-vectored upl");
9438 		}
9439 	} else {
9440 		panic("vector_upl_set_subupl was passed a NULL upl");
9441 	}
9442 
9443 	return FALSE;
9444 }
9445 
9446 void
9447 vector_upl_set_pagelist(upl_t upl)
9448 {
9449 	if (vector_upl_is_valid(upl)) {
9450 		uint32_t i = 0;
9451 		vector_upl_t vector_upl = upl->vector_upl;
9452 
9453 		if (vector_upl) {
9454 			vm_offset_t pagelist_size = 0, cur_upl_pagelist_size = 0;
9455 
9456 			vector_upl->pagelist = kalloc_type(struct upl_page_info,
9457 			    atop(vector_upl->size), Z_WAITOK);
9458 
9459 			for (i = 0; i < vector_upl->num_upls; i++) {
9460 				cur_upl_pagelist_size = sizeof(struct upl_page_info) * upl_adjusted_size(vector_upl->upls[i].elem, PAGE_MASK) / PAGE_SIZE;
9461 				bcopy(vector_upl->upls[i].elem->page_list, (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
9462 				pagelist_size += cur_upl_pagelist_size;
9463 				if (vector_upl->upls[i].elem->highest_page > upl->highest_page) {
9464 					upl->highest_page = vector_upl->upls[i].elem->highest_page;
9465 				}
9466 			}
9467 			assert( pagelist_size == (sizeof(struct upl_page_info) * (vector_upl->size / PAGE_SIZE)));
9468 		} else {
9469 			panic("vector_upl_set_pagelist was passed a non-vectored upl");
9470 		}
9471 	} else {
9472 		panic("vector_upl_set_pagelist was passed a NULL upl");
9473 	}
9474 }
9475 
9476 upl_t
9477 vector_upl_subupl_byindex(upl_t upl, uint32_t index)
9478 {
9479 	if (vector_upl_is_valid(upl)) {
9480 		vector_upl_t vector_upl = upl->vector_upl;
9481 		if (vector_upl) {
9482 			if (index < vector_upl->num_upls) {
9483 				return vector_upl->upls[index].elem;
9484 			}
9485 		} else {
9486 			panic("vector_upl_subupl_byindex was passed a non-vectored upl");
9487 		}
9488 	}
9489 	return NULL;
9490 }
9491 
9492 upl_t
9493 vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size)
9494 {
9495 	if (vector_upl_is_valid(upl)) {
9496 		uint32_t i = 0;
9497 		vector_upl_t vector_upl = upl->vector_upl;
9498 
9499 		if (vector_upl) {
9500 			upl_t subupl = NULL;
9501 			vector_upl_iostates_t subupl_state;
9502 
9503 			for (i = 0; i < vector_upl->num_upls; i++) {
9504 				subupl = vector_upl->upls[i].elem;
9505 				subupl_state = vector_upl->upls[i].iostate;
9506 				if (*upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
9507 					/* We could have been passed an offset/size pair that belongs
9508 					 * to an UPL element that has already been committed/aborted.
9509 					 * If so, return NULL.
9510 					 */
9511 					if (subupl == NULL) {
9512 						return NULL;
9513 					}
9514 					if ((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) {
9515 						*upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset;
9516 						if (*upl_size > subupl_state.size) {
9517 							*upl_size = subupl_state.size;
9518 						}
9519 					}
9520 					if (*upl_offset >= subupl_state.offset) {
9521 						*upl_offset -= subupl_state.offset;
9522 					} else if (i) {
9523 						panic("Vector UPL offset miscalculation");
9524 					}
9525 					return subupl;
9526 				}
9527 			}
9528 		} else {
9529 			panic("vector_upl_subupl_byoffset was passed a non-vectored UPL");
9530 		}
9531 	}
9532 	return NULL;
9533 }
9534 
9535 void
9536 vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr)
9537 {
9538 	*v_upl_submap = NULL;
9539 
9540 	if (vector_upl_is_valid(upl)) {
9541 		vector_upl_t vector_upl = upl->vector_upl;
9542 		if (vector_upl) {
9543 			*v_upl_submap = vector_upl->submap;
9544 			*submap_dst_addr = vector_upl->submap_dst_addr;
9545 		} else {
9546 			panic("vector_upl_get_submap was passed a non-vectored UPL");
9547 		}
9548 	} else {
9549 		panic("vector_upl_get_submap was passed a null UPL");
9550 	}
9551 }
9552 
9553 void
9554 vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
9555 {
9556 	if (vector_upl_is_valid(upl)) {
9557 		vector_upl_t vector_upl = upl->vector_upl;
9558 		if (vector_upl) {
9559 			vector_upl->submap = submap;
9560 			vector_upl->submap_dst_addr = submap_dst_addr;
9561 		} else {
9562 			panic("vector_upl_get_submap was passed a non-vectored UPL");
9563 		}
9564 	} else {
9565 		panic("vector_upl_get_submap was passed a NULL UPL");
9566 	}
9567 }
9568 
9569 void
9570 vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
9571 {
9572 	if (vector_upl_is_valid(upl)) {
9573 		uint32_t i = 0;
9574 		vector_upl_t vector_upl = upl->vector_upl;
9575 
9576 		if (vector_upl) {
9577 			for (i = 0; i < vector_upl->num_upls; i++) {
9578 				if (vector_upl->upls[i].elem == subupl) {
9579 					break;
9580 				}
9581 			}
9582 
9583 			if (i == vector_upl->num_upls) {
9584 				panic("setting sub-upl iostate when none exists");
9585 			}
9586 
9587 			vector_upl->upls[i].iostate.offset = offset;
9588 			if (size < PAGE_SIZE) {
9589 				size = PAGE_SIZE;
9590 			}
9591 			vector_upl->upls[i].iostate.size = size;
9592 		} else {
9593 			panic("vector_upl_set_iostate was passed a non-vectored UPL");
9594 		}
9595 	} else {
9596 		panic("vector_upl_set_iostate was passed a NULL UPL");
9597 	}
9598 }
9599 
9600 void
9601 vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size)
9602 {
9603 	if (vector_upl_is_valid(upl)) {
9604 		uint32_t i = 0;
9605 		vector_upl_t vector_upl = upl->vector_upl;
9606 
9607 		if (vector_upl) {
9608 			for (i = 0; i < vector_upl->num_upls; i++) {
9609 				if (vector_upl->upls[i].elem == subupl) {
9610 					break;
9611 				}
9612 			}
9613 
9614 			if (i == vector_upl->num_upls) {
9615 				panic("getting sub-upl iostate when none exists");
9616 			}
9617 
9618 			*offset = vector_upl->upls[i].iostate.offset;
9619 			*size = vector_upl->upls[i].iostate.size;
9620 		} else {
9621 			panic("vector_upl_get_iostate was passed a non-vectored UPL");
9622 		}
9623 	} else {
9624 		panic("vector_upl_get_iostate was passed a NULL UPL");
9625 	}
9626 }
9627 
9628 void
9629 vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size)
9630 {
9631 	if (vector_upl_is_valid(upl)) {
9632 		vector_upl_t vector_upl = upl->vector_upl;
9633 		if (vector_upl) {
9634 			if (index < vector_upl->num_upls) {
9635 				*offset = vector_upl->upls[index].iostate.offset;
9636 				*size = vector_upl->upls[index].iostate.size;
9637 			} else {
9638 				*offset = *size = 0;
9639 			}
9640 		} else {
9641 			panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL");
9642 		}
9643 	} else {
9644 		panic("vector_upl_get_iostate_byindex was passed a NULL UPL");
9645 	}
9646 }
9647 
9648 void *
9649 upl_get_internal_vectorupl(upl_t upl)
9650 {
9651 	return upl->vector_upl;
9652 }
9653 
9654 upl_page_info_t *
9655 upl_get_internal_vectorupl_pagelist(upl_t upl)
9656 {
9657 	return upl->vector_upl->pagelist;
9658 }
9659 
9660 upl_page_info_t *
9661 upl_get_internal_page_list(upl_t upl)
9662 {
9663 	return upl->vector_upl ? upl->vector_upl->pagelist : upl->page_list;
9664 }
9665 
9666 void
9667 upl_clear_dirty(
9668 	upl_t           upl,
9669 	boolean_t       value)
9670 {
9671 	if (value) {
9672 		upl->flags |= UPL_CLEAR_DIRTY;
9673 	} else {
9674 		upl->flags &= ~UPL_CLEAR_DIRTY;
9675 	}
9676 }
9677 
9678 void
9679 upl_set_referenced(
9680 	upl_t           upl,
9681 	boolean_t       value)
9682 {
9683 	upl_lock(upl);
9684 	if (value) {
9685 		upl->ext_ref_count++;
9686 	} else {
9687 		if (!upl->ext_ref_count) {
9688 			panic("upl_set_referenced not %p", upl);
9689 		}
9690 		upl->ext_ref_count--;
9691 	}
9692 	upl_unlock(upl);
9693 }
9694 
9695 void
9696 upl_set_map_exclusive(upl_t upl)
9697 {
9698 	upl_lock(upl);
9699 	while (upl->map_addr_owner) {
9700 		upl->flags |= UPL_MAP_EXCLUSIVE_WAIT;
9701 		upl_lock_sleep(upl, &upl->map_addr_owner, ctid_get_thread(upl->map_addr_owner));
9702 	}
9703 	upl->map_addr_owner = thread_get_ctid(current_thread());
9704 	upl_unlock(upl);
9705 }
9706 
9707 void
9708 upl_clear_map_exclusive(upl_t upl)
9709 {
9710 	assert(upl->map_addr_owner == thread_get_ctid(current_thread()));
9711 	upl_lock(upl);
9712 	if (upl->flags & UPL_MAP_EXCLUSIVE_WAIT) {
9713 		upl->flags &= ~UPL_MAP_EXCLUSIVE_WAIT;
9714 		upl_wakeup(&upl->map_addr_owner);
9715 	}
9716 	upl->map_addr_owner = 0;
9717 	upl_unlock(upl);
9718 }
9719 
9720 #if CONFIG_IOSCHED
9721 void
9722 upl_set_blkno(
9723 	upl_t           upl,
9724 	vm_offset_t     upl_offset,
9725 	int             io_size,
9726 	int64_t         blkno)
9727 {
9728 	int i, j;
9729 	if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0) {
9730 		return;
9731 	}
9732 
9733 	assert(upl->upl_reprio_info != 0);
9734 	for (i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) {
9735 		UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
9736 	}
9737 }
9738 #endif
9739 
9740 void inline
9741 memoryshot(unsigned int event, unsigned int control)
9742 {
9743 	if (vm_debug_events) {
9744 		KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control,
9745 		    vm_page_active_count, vm_page_inactive_count,
9746 		    vm_page_free_count, vm_page_speculative_count,
9747 		    vm_page_throttled_count);
9748 	} else {
9749 		(void) event;
9750 		(void) control;
9751 	}
9752 }
9753 
9754 #ifdef MACH_BSD
9755 
9756 boolean_t
9757 upl_device_page(upl_page_info_t *upl)
9758 {
9759 	return UPL_DEVICE_PAGE(upl);
9760 }
9761 boolean_t
9762 upl_page_present(upl_page_info_t *upl, int index)
9763 {
9764 	return UPL_PAGE_PRESENT(upl, index);
9765 }
9766 boolean_t
9767 upl_speculative_page(upl_page_info_t *upl, int index)
9768 {
9769 	return UPL_SPECULATIVE_PAGE(upl, index);
9770 }
9771 boolean_t
9772 upl_dirty_page(upl_page_info_t *upl, int index)
9773 {
9774 	return UPL_DIRTY_PAGE(upl, index);
9775 }
9776 boolean_t
9777 upl_valid_page(upl_page_info_t *upl, int index)
9778 {
9779 	return UPL_VALID_PAGE(upl, index);
9780 }
9781 ppnum_t
9782 upl_phys_page(upl_page_info_t *upl, int index)
9783 {
9784 	return UPL_PHYS_PAGE(upl, index);
9785 }
9786 
9787 void
9788 upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v)
9789 {
9790 	upl[index].mark = v;
9791 }
9792 
9793 boolean_t
9794 upl_page_get_mark(upl_page_info_t *upl, int index)
9795 {
9796 	return upl[index].mark;
9797 }
9798 
9799 void
9800 vm_countdirtypages(void)
9801 {
9802 	vm_page_t m;
9803 	int dpages;
9804 	int pgopages;
9805 	int precpages;
9806 
9807 
9808 	dpages = 0;
9809 	pgopages = 0;
9810 	precpages = 0;
9811 
9812 	vm_page_lock_queues();
9813 	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
9814 	do {
9815 		if (m == (vm_page_t)0) {
9816 			break;
9817 		}
9818 
9819 		if (m->vmp_dirty) {
9820 			dpages++;
9821 		}
9822 		if (m->vmp_free_when_done) {
9823 			pgopages++;
9824 		}
9825 		if (m->vmp_precious) {
9826 			precpages++;
9827 		}
9828 
9829 		assert(!is_kernel_object(VM_PAGE_OBJECT(m)));
9830 		m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9831 		if (m == (vm_page_t)0) {
9832 			break;
9833 		}
9834 	} while (!vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t) m));
9835 	vm_page_unlock_queues();
9836 
9837 	vm_page_lock_queues();
9838 	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
9839 	do {
9840 		if (m == (vm_page_t)0) {
9841 			break;
9842 		}
9843 
9844 		dpages++;
9845 		assert(m->vmp_dirty);
9846 		assert(!m->vmp_free_when_done);
9847 		assert(!is_kernel_object(VM_PAGE_OBJECT(m)));
9848 		m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9849 		if (m == (vm_page_t)0) {
9850 			break;
9851 		}
9852 	} while (!vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t) m));
9853 	vm_page_unlock_queues();
9854 
9855 	vm_page_lock_queues();
9856 	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
9857 	do {
9858 		if (m == (vm_page_t)0) {
9859 			break;
9860 		}
9861 
9862 		if (m->vmp_dirty) {
9863 			dpages++;
9864 		}
9865 		if (m->vmp_free_when_done) {
9866 			pgopages++;
9867 		}
9868 		if (m->vmp_precious) {
9869 			precpages++;
9870 		}
9871 
9872 		assert(!is_kernel_object(VM_PAGE_OBJECT(m)));
9873 		m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9874 		if (m == (vm_page_t)0) {
9875 			break;
9876 		}
9877 	} while (!vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t) m));
9878 	vm_page_unlock_queues();
9879 
9880 	printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
9881 
9882 	dpages = 0;
9883 	pgopages = 0;
9884 	precpages = 0;
9885 
9886 	vm_page_lock_queues();
9887 	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
9888 
9889 	do {
9890 		if (m == (vm_page_t)0) {
9891 			break;
9892 		}
9893 		if (m->vmp_dirty) {
9894 			dpages++;
9895 		}
9896 		if (m->vmp_free_when_done) {
9897 			pgopages++;
9898 		}
9899 		if (m->vmp_precious) {
9900 			precpages++;
9901 		}
9902 
9903 		assert(!is_kernel_object(VM_PAGE_OBJECT(m)));
9904 		m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9905 		if (m == (vm_page_t)0) {
9906 			break;
9907 		}
9908 	} while (!vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t) m));
9909 	vm_page_unlock_queues();
9910 
9911 	printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
9912 }
9913 #endif /* MACH_BSD */
9914 
9915 
9916 #if CONFIG_IOSCHED
9917 int
9918 upl_get_cached_tier(upl_t  upl)
9919 {
9920 	assert(upl);
9921 	if (upl->flags & UPL_TRACKED_BY_OBJECT) {
9922 		return upl->upl_priority;
9923 	}
9924 	return -1;
9925 }
9926 #endif /* CONFIG_IOSCHED */
9927 
9928 
9929 void
9930 upl_callout_iodone(upl_t upl)
9931 {
9932 	struct upl_io_completion *upl_ctx = upl->upl_iodone;
9933 
9934 	if (upl_ctx) {
9935 		void    (*iodone_func)(void *, int) = upl_ctx->io_done;
9936 
9937 		assert(upl_ctx->io_done);
9938 
9939 		(*iodone_func)(upl_ctx->io_context, upl_ctx->io_error);
9940 	}
9941 }
9942 
9943 void
9944 upl_set_iodone(upl_t upl, void *upl_iodone)
9945 {
9946 	upl->upl_iodone = (struct upl_io_completion *)upl_iodone;
9947 }
9948 
9949 void
9950 upl_set_iodone_error(upl_t upl, int error)
9951 {
9952 	struct upl_io_completion *upl_ctx = upl->upl_iodone;
9953 
9954 	if (upl_ctx) {
9955 		upl_ctx->io_error = error;
9956 	}
9957 }
9958 
9959 
9960 ppnum_t
9961 upl_get_highest_page(
9962 	upl_t                      upl)
9963 {
9964 	return upl->highest_page;
9965 }
9966 
9967 upl_size_t
9968 upl_get_size(
9969 	upl_t                      upl)
9970 {
9971 	return upl_adjusted_size(upl, PAGE_MASK);
9972 }
9973 
9974 upl_size_t
9975 upl_adjusted_size(
9976 	upl_t upl,
9977 	vm_map_offset_t pgmask)
9978 {
9979 	vm_object_offset_t start_offset, end_offset;
9980 
9981 	start_offset = trunc_page_mask_64(upl->u_offset, pgmask);
9982 	end_offset = round_page_mask_64(upl->u_offset + upl->u_size, pgmask);
9983 
9984 	return (upl_size_t)(end_offset - start_offset);
9985 }
9986 
9987 vm_object_offset_t
9988 upl_adjusted_offset(
9989 	upl_t upl,
9990 	vm_map_offset_t pgmask)
9991 {
9992 	return trunc_page_mask_64(upl->u_offset, pgmask);
9993 }
9994 
9995 vm_object_offset_t
9996 upl_get_data_offset(
9997 	upl_t upl)
9998 {
9999 	return upl->u_offset - upl_adjusted_offset(upl, PAGE_MASK);
10000 }
10001 
10002 upl_t
10003 upl_associated_upl(upl_t upl)
10004 {
10005 	return upl->associated_upl;
10006 }
10007 
10008 void
10009 upl_set_associated_upl(upl_t upl, upl_t associated_upl)
10010 {
10011 	upl->associated_upl = associated_upl;
10012 }
10013 
10014 struct vnode *
10015 upl_lookup_vnode(upl_t upl)
10016 {
10017 	if (!upl->map_object->internal) {
10018 		return vnode_pager_lookup_vnode(upl->map_object->pager);
10019 	} else {
10020 		return NULL;
10021 	}
10022 }
10023 
10024 #if UPL_DEBUG
10025 kern_return_t
10026 upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
10027 {
10028 	upl->ubc_alias1 = alias1;
10029 	upl->ubc_alias2 = alias2;
10030 	return KERN_SUCCESS;
10031 }
10032 int
10033 upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
10034 {
10035 	if (al) {
10036 		*al = upl->ubc_alias1;
10037 	}
10038 	if (al2) {
10039 		*al2 = upl->ubc_alias2;
10040 	}
10041 	return KERN_SUCCESS;
10042 }
10043 #endif /* UPL_DEBUG */
10044 
10045 #if VM_PRESSURE_EVENTS
10046 /*
10047  * Upward trajectory.
10048  */
10049 
10050 boolean_t
10051 VM_PRESSURE_NORMAL_TO_WARNING(void)
10052 {
10053 	if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10054 		/* Available pages below our threshold */
10055 		uint32_t available_pages = memorystatus_get_available_page_count();
10056 		if (available_pages < memorystatus_get_soft_memlimit_page_shortage_threshold()) {
10057 #if CONFIG_FREEZE
10058 			/* No frozen processes to kill */
10059 			if (memorystatus_frozen_count == 0) {
10060 				/* Not enough suspended processes available. */
10061 				if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
10062 					return TRUE;
10063 				}
10064 			}
10065 #else /* CONFIG_FREEZE */
10066 			return TRUE;
10067 #endif /* CONFIG_FREEZE */
10068 		}
10069 		return FALSE;
10070 	} else {
10071 		return (AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0;
10072 	}
10073 }
10074 
10075 boolean_t
10076 VM_PRESSURE_WARNING_TO_CRITICAL(void)
10077 {
10078 	if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10079 		/* Available pages below our threshold */
10080 		uint32_t available_pages = memorystatus_get_available_page_count();
10081 		return available_pages < memorystatus_get_critical_page_shortage_threshold();
10082 	} else {
10083 		return vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0;
10084 	}
10085 }
10086 
10087 /*
10088  * Downward trajectory.
10089  */
10090 boolean_t
10091 VM_PRESSURE_WARNING_TO_NORMAL(void)
10092 {
10093 	if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10094 		/* Available pages above our threshold */
10095 		uint32_t available_pages = memorystatus_get_available_page_count();
10096 		uint32_t target_threshold = (((115 * memorystatus_get_soft_memlimit_page_shortage_threshold()) / 100));
10097 		return available_pages > target_threshold;
10098 	} else {
10099 		return (AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0;
10100 	}
10101 }
10102 
10103 boolean_t
10104 VM_PRESSURE_CRITICAL_TO_WARNING(void)
10105 {
10106 	if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10107 		uint32_t available_pages = memorystatus_get_available_page_count();
10108 		uint32_t target_threshold = (((115 * memorystatus_get_critical_page_shortage_threshold()) / 100));
10109 		return available_pages > target_threshold;
10110 	} else {
10111 		return (AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0;
10112 	}
10113 }
10114 #endif /* VM_PRESSURE_EVENTS */
10115 
10116 #if DEVELOPMENT || DEBUG
10117 bool compressor_running_perf_test;
10118 uint64_t compressor_perf_test_pages_processed;
10119 
10120 static kern_return_t
10121 move_pages_to_queue(
10122 	vm_map_t map,
10123 	user_addr_t start_addr,
10124 	size_t buffer_size,
10125 	vm_page_queue_head_t *queue,
10126 	size_t *pages_moved)
10127 {
10128 	kern_return_t err = KERN_SUCCESS;
10129 	vm_map_entry_t curr_entry = VM_MAP_ENTRY_NULL;
10130 	boolean_t addr_in_map = FALSE;
10131 	user_addr_t end_addr = USER_ADDR_NULL, curr_addr = USER_ADDR_NULL;
10132 	vm_object_t curr_object = VM_OBJECT_NULL;
10133 	*pages_moved = 0;
10134 
10135 
10136 	if (VM_MAP_PAGE_SIZE(map) != PAGE_SIZE_64) {
10137 		/*
10138 		 * We don't currently support benchmarking maps with a different page size
10139 		 * than the kernel.
10140 		 */
10141 		return KERN_INVALID_ARGUMENT;
10142 	}
10143 
10144 	if (os_add_overflow(start_addr, buffer_size, &end_addr)) {
10145 		return KERN_INVALID_ARGUMENT;
10146 	}
10147 
10148 	vm_map_lock_read(map);
10149 	curr_addr = vm_map_trunc_page_mask(start_addr, VM_MAP_PAGE_MASK(map));
10150 	end_addr = vm_map_round_page_mask(start_addr + buffer_size, VM_MAP_PAGE_MASK(map));
10151 
10152 
10153 	while (curr_addr < end_addr) {
10154 		addr_in_map = vm_map_lookup_entry(map, curr_addr, &curr_entry);
10155 		if (!addr_in_map) {
10156 			err = KERN_INVALID_ARGUMENT;
10157 			break;
10158 		}
10159 		curr_object = VME_OBJECT(curr_entry);
10160 		if (curr_object) {
10161 			vm_object_lock(curr_object);
10162 			/* We really only want anonymous memory that's in the top level map and object here. */
10163 			if (curr_entry->is_sub_map || curr_entry->wired_count != 0 ||
10164 			    curr_object->shadow != VM_OBJECT_NULL || !curr_object->internal) {
10165 				err = KERN_INVALID_ARGUMENT;
10166 				vm_object_unlock(curr_object);
10167 				break;
10168 			}
10169 			vm_map_offset_t start_offset = (curr_addr - curr_entry->vme_start) + VME_OFFSET(curr_entry);
10170 			vm_map_offset_t end_offset = MIN(curr_entry->vme_end, end_addr) -
10171 			    (curr_entry->vme_start + VME_OFFSET(curr_entry));
10172 			vm_map_offset_t curr_offset = start_offset;
10173 			vm_page_t curr_page;
10174 			while (curr_offset < end_offset) {
10175 				curr_page = vm_page_lookup(curr_object, vm_object_trunc_page(curr_offset));
10176 				if (curr_page != VM_PAGE_NULL) {
10177 					vm_page_lock_queues();
10178 					if (curr_page->vmp_laundry) {
10179 						vm_pageout_steal_laundry(curr_page, TRUE);
10180 					}
10181 					/*
10182 					 * we've already factored out pages in the laundry which
10183 					 * means this page can't be on the pageout queue so it's
10184 					 * safe to do the vm_page_queues_remove
10185 					 */
10186 					bool donate = (curr_page->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE);
10187 					vm_page_queues_remove(curr_page, TRUE);
10188 					if (donate) {
10189 						/*
10190 						 * The compressor needs to see this bit to know
10191 						 * where this page needs to land. Also if stolen,
10192 						 * this bit helps put the page back in the right
10193 						 * special queue where it belongs.
10194 						 */
10195 						curr_page->vmp_on_specialq = VM_PAGE_SPECIAL_Q_DONATE;
10196 					}
10197 					// Clear the referenced bit so we ensure this gets paged out
10198 					curr_page->vmp_reference = false;
10199 					if (curr_page->vmp_pmapped) {
10200 						pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(curr_page),
10201 						    VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void*)NULL);
10202 					}
10203 					vm_page_queue_enter(queue, curr_page, vmp_pageq);
10204 					vm_page_unlock_queues();
10205 					*pages_moved += 1;
10206 				}
10207 				curr_offset += PAGE_SIZE_64;
10208 				curr_addr += PAGE_SIZE_64;
10209 			}
10210 		}
10211 		vm_object_unlock(curr_object);
10212 	}
10213 	vm_map_unlock_read(map);
10214 	return err;
10215 }
10216 
10217 /*
10218  * Local queue for processing benchmark pages.
10219  * Can't be allocated on the stack because the pointer has to
10220  * be packable.
10221  */
10222 vm_page_queue_head_t compressor_perf_test_queue VM_PAGE_PACKED_ALIGNED;
10223 kern_return_t
10224 run_compressor_perf_test(
10225 	user_addr_t buf,
10226 	size_t buffer_size,
10227 	uint64_t *time,
10228 	uint64_t *bytes_compressed,
10229 	uint64_t *compressor_growth)
10230 {
10231 	kern_return_t err = KERN_SUCCESS;
10232 	if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10233 		return KERN_NOT_SUPPORTED;
10234 	}
10235 	if (current_task() == kernel_task) {
10236 		return KERN_INVALID_ARGUMENT;
10237 	}
10238 	vm_page_lock_queues();
10239 	if (compressor_running_perf_test) {
10240 		/* Only run one instance of the benchmark at a time. */
10241 		vm_page_unlock_queues();
10242 		return KERN_RESOURCE_SHORTAGE;
10243 	}
10244 	vm_page_unlock_queues();
10245 	size_t page_count = 0;
10246 	vm_map_t map;
10247 	vm_page_t p, next;
10248 	uint64_t compressor_perf_test_start = 0, compressor_perf_test_end = 0;
10249 	uint64_t compressed_bytes_start = 0, compressed_bytes_end = 0;
10250 	*bytes_compressed = *compressor_growth = 0;
10251 
10252 	vm_page_queue_init(&compressor_perf_test_queue);
10253 	map = current_task()->map;
10254 	err = move_pages_to_queue(map, buf, buffer_size, &compressor_perf_test_queue, &page_count);
10255 	if (err != KERN_SUCCESS) {
10256 		goto out;
10257 	}
10258 
10259 	vm_page_lock_queues();
10260 	compressor_running_perf_test = true;
10261 	compressor_perf_test_pages_processed = 0;
10262 	/*
10263 	 * At this point the compressor threads should only process the benchmark queue
10264 	 * so we can look at the difference in c_segment_compressed_bytes while the perf test is running
10265 	 * to determine how many compressed bytes we ended up using.
10266 	 */
10267 	compressed_bytes_start = os_atomic_load(&c_segment_compressed_bytes, relaxed);
10268 	vm_page_unlock_queues();
10269 
10270 	page_count = vm_pageout_page_queue(&compressor_perf_test_queue, page_count, true);
10271 
10272 	vm_page_lock_queues();
10273 	compressor_perf_test_start = mach_absolute_time();
10274 
10275 	// Wake up the compressor thread(s)
10276 	sched_cond_signal(&pgo_iothread_internal_state[0].pgo_wakeup,
10277 	    pgo_iothread_internal_state[0].pgo_iothread);
10278 
10279 	/*
10280 	 * Depending on when this test is run we could overshoot or be right on the mark
10281 	 * with our page_count. So the comparison is of the _less than_ variety.
10282 	 */
10283 	while (compressor_perf_test_pages_processed < page_count) {
10284 		assert_wait((event_t) &compressor_perf_test_pages_processed, THREAD_UNINT);
10285 		vm_page_unlock_queues();
10286 		thread_block(THREAD_CONTINUE_NULL);
10287 		vm_page_lock_queues();
10288 	}
10289 	compressor_perf_test_end = mach_absolute_time();
10290 	compressed_bytes_end = os_atomic_load(&c_segment_compressed_bytes, relaxed);
10291 	vm_page_unlock_queues();
10292 
10293 
10294 out:
10295 	/*
10296 	 * If we errored out above, then we could still have some pages
10297 	 * on the local queue. Make sure to put them back on the active queue before
10298 	 * returning so they're not orphaned.
10299 	 */
10300 	vm_page_lock_queues();
10301 	absolutetime_to_nanoseconds(compressor_perf_test_end - compressor_perf_test_start, time);
10302 	p = (vm_page_t) vm_page_queue_first(&compressor_perf_test_queue);
10303 	while (p && !vm_page_queue_end(&compressor_perf_test_queue, (vm_page_queue_entry_t)p)) {
10304 		next = (vm_page_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.next);
10305 
10306 		vm_page_enqueue_active(p, FALSE);
10307 		p = next;
10308 	}
10309 
10310 	compressor_running_perf_test = false;
10311 	vm_page_unlock_queues();
10312 	if (err == KERN_SUCCESS) {
10313 		*bytes_compressed = page_count * PAGE_SIZE_64;
10314 		*compressor_growth = compressed_bytes_end - compressed_bytes_start;
10315 	}
10316 
10317 	/*
10318 	 * pageout_scan will consider waking the compactor swapper
10319 	 * before it blocks. Do the same thing here before we return
10320 	 * to ensure that back to back benchmark runs can't overly fragment the
10321 	 * compressor pool.
10322 	 */
10323 	vm_consider_waking_compactor_swapper();
10324 	return err;
10325 }
10326 #endif /* DEVELOPMENT || DEBUG */
10327