xref: /xnu-11215.1.10/osfmk/vm/vm_pageout.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	vm/vm_pageout.c
60  *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61  *	Date:	1985
62  *
63  *	The proverbial page-out daemon.
64  */
65 
66 #include "mach/kern_return.h"
67 #include <stdint.h>
68 #include <ptrauth.h>
69 
70 #include <debug.h>
71 
72 #include <mach/mach_types.h>
73 #include <mach/memory_object.h>
74 #include <mach/mach_host_server.h>
75 #include <mach/upl.h>
76 #include <mach/vm_map.h>
77 #include <mach/vm_param.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/sdt.h>
80 
81 #include <kern/kern_types.h>
82 #include <kern/counter.h>
83 #include <kern/host_statistics.h>
84 #include <kern/machine.h>
85 #include <kern/misc_protos.h>
86 #include <kern/sched.h>
87 #include <kern/thread.h>
88 #include <kern/kalloc.h>
89 #include <kern/zalloc_internal.h>
90 #include <kern/policy_internal.h>
91 #include <kern/thread_group.h>
92 
93 #include <os/log.h>
94 
95 #include <sys/kdebug_triage.h>
96 
97 #include <machine/vm_tuning.h>
98 #include <machine/commpage.h>
99 
100 #include <vm/pmap.h>
101 #include <vm/vm_compressor_pager_internal.h>
102 #include <vm/vm_fault_internal.h>
103 #include <vm/vm_map_internal.h>
104 #include <vm/vm_object_internal.h>
105 #include <vm/vm_page_internal.h>
106 #include <vm/vm_pageout_internal.h>
107 #include <vm/vm_protos_internal.h> /* must be last */
108 #include <vm/memory_object.h>
109 #include <vm/vm_purgeable_internal.h>
110 #include <vm/vm_shared_region.h>
111 #include <vm/vm_compressor_internal.h>
112 #include <vm/vm_kern_xnu.h>
113 #include <vm/vm_iokit.h>
114 #include <vm/vm_ubc.h>
115 
116 #include <san/kasan.h>
117 
118 #if CONFIG_PHANTOM_CACHE
119 #include <vm/vm_phantom_cache_internal.h>
120 #endif
121 
122 #if UPL_DEBUG
123 #include <libkern/OSDebug.h>
124 #endif
125 
126 extern int cs_debug;
127 
128 #if CONFIG_MBUF_MCACHE
129 extern void mbuf_drain(boolean_t);
130 #endif /* CONFIG_MBUF_MCACHE */
131 
132 #if VM_PRESSURE_EVENTS
133 #if CONFIG_JETSAM
134 extern unsigned int memorystatus_available_pages;
135 extern unsigned int memorystatus_available_pages_pressure;
136 extern unsigned int memorystatus_available_pages_critical;
137 #else /* CONFIG_JETSAM */
138 extern uint64_t memorystatus_available_pages;
139 extern uint64_t memorystatus_available_pages_pressure;
140 extern uint64_t memorystatus_available_pages_critical;
141 #endif /* CONFIG_JETSAM */
142 #if CONFIG_FREEZE
143 extern unsigned int memorystatus_frozen_count;
144 extern unsigned int memorystatus_suspended_count;
145 #endif /* CONFIG_FREEZE */
146 extern vm_pressure_level_t memorystatus_vm_pressure_level;
147 
148 extern lck_mtx_t memorystatus_jetsam_broadcast_lock;
149 extern uint32_t memorystatus_jetsam_fg_band_waiters;
150 extern uint32_t memorystatus_jetsam_bg_band_waiters;
151 
152 void vm_pressure_response(void);
153 extern void consider_vm_pressure_events(void);
154 
155 #define MEMORYSTATUS_SUSPENDED_THRESHOLD  4
156 #endif /* VM_PRESSURE_EVENTS */
157 
158 SECURITY_READ_ONLY_LATE(thread_t) vm_pageout_scan_thread;
159 SECURITY_READ_ONLY_LATE(thread_t) vm_pageout_gc_thread;
160 #if CONFIG_VPS_DYNAMIC_PRIO
161 TUNABLE(bool, vps_dynamic_priority_enabled, "vps_dynamic_priority_enabled", false);
162 #else
163 const bool vps_dynamic_priority_enabled = false;
164 #endif
165 boolean_t vps_yield_for_pgqlockwaiters = TRUE;
166 
167 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE  /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
168 #if !XNU_TARGET_OS_OSX
169 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
170 #else /* !XNU_TARGET_OS_OSX */
171 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
172 #endif /* !XNU_TARGET_OS_OSX */
173 #endif
174 
175 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
176 #define VM_PAGEOUT_DEADLOCK_RELIEF 100  /* number of pages to move to break deadlock */
177 #endif
178 
179 #ifndef VM_PAGE_LAUNDRY_MAX
180 #define VM_PAGE_LAUNDRY_MAX     128UL   /* maximum pageouts on a given pageout queue */
181 #endif  /* VM_PAGEOUT_LAUNDRY_MAX */
182 
183 #ifndef VM_PAGEOUT_BURST_WAIT
184 #define VM_PAGEOUT_BURST_WAIT   1       /* milliseconds */
185 #endif  /* VM_PAGEOUT_BURST_WAIT */
186 
187 #ifndef VM_PAGEOUT_EMPTY_WAIT
188 #define VM_PAGEOUT_EMPTY_WAIT   50      /* milliseconds */
189 #endif  /* VM_PAGEOUT_EMPTY_WAIT */
190 
191 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
192 #define VM_PAGEOUT_DEADLOCK_WAIT 100    /* milliseconds */
193 #endif  /* VM_PAGEOUT_DEADLOCK_WAIT */
194 
195 #ifndef VM_PAGEOUT_IDLE_WAIT
196 #define VM_PAGEOUT_IDLE_WAIT    10      /* milliseconds */
197 #endif  /* VM_PAGEOUT_IDLE_WAIT */
198 
199 #ifndef VM_PAGEOUT_SWAP_WAIT
200 #define VM_PAGEOUT_SWAP_WAIT    10      /* milliseconds */
201 #endif  /* VM_PAGEOUT_SWAP_WAIT */
202 
203 /*
204  * vm_page_max_speculative_age_q should be less than or equal to
205  * VM_PAGE_RESERVED_SPECULATIVE_AGE_Q which is number of allocated
206  * vm_page_queue_speculative entries.
207  */
208 
209 TUNABLE_DEV_WRITEABLE(unsigned int, vm_page_max_speculative_age_q, "vm_page_max_speculative_age_q", VM_PAGE_DEFAULT_MAX_SPECULATIVE_AGE_Q);
210 #ifndef VM_PAGE_SPECULATIVE_TARGET
211 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_pageout_state.vm_page_speculative_percentage))
212 #endif /* VM_PAGE_SPECULATIVE_TARGET */
213 
214 
215 /*
216  *	To obtain a reasonable LRU approximation, the inactive queue
217  *	needs to be large enough to give pages on it a chance to be
218  *	referenced a second time.  This macro defines the fraction
219  *	of active+inactive pages that should be inactive.
220  *	The pageout daemon uses it to update vm_page_inactive_target.
221  *
222  *	If vm_page_free_count falls below vm_page_free_target and
223  *	vm_page_inactive_count is below vm_page_inactive_target,
224  *	then the pageout daemon starts running.
225  */
226 
227 #ifndef VM_PAGE_INACTIVE_TARGET
228 #define VM_PAGE_INACTIVE_TARGET(avail)  ((avail) * 1 / 2)
229 #endif  /* VM_PAGE_INACTIVE_TARGET */
230 
231 /*
232  *	Once the pageout daemon starts running, it keeps going
233  *	until vm_page_free_count meets or exceeds vm_page_free_target.
234  */
235 
236 #ifndef VM_PAGE_FREE_TARGET
237 #if !XNU_TARGET_OS_OSX
238 #define VM_PAGE_FREE_TARGET(free)       (15 + (free) / 100)
239 #else /* !XNU_TARGET_OS_OSX */
240 #define VM_PAGE_FREE_TARGET(free)       (15 + (free) / 80)
241 #endif /* !XNU_TARGET_OS_OSX */
242 #endif  /* VM_PAGE_FREE_TARGET */
243 
244 
245 /*
246  *	The pageout daemon always starts running once vm_page_free_count
247  *	falls below vm_page_free_min.
248  */
249 
250 #ifndef VM_PAGE_FREE_MIN
251 #if !XNU_TARGET_OS_OSX
252 #define VM_PAGE_FREE_MIN(free)          (10 + (free) / 200)
253 #else /* !XNU_TARGET_OS_OSX */
254 #define VM_PAGE_FREE_MIN(free)          (10 + (free) / 100)
255 #endif /* !XNU_TARGET_OS_OSX */
256 #endif  /* VM_PAGE_FREE_MIN */
257 
258 #if !XNU_TARGET_OS_OSX
259 #define VM_PAGE_FREE_RESERVED_LIMIT     100
260 #define VM_PAGE_FREE_MIN_LIMIT          1500
261 #define VM_PAGE_FREE_TARGET_LIMIT       2000
262 #else /* !XNU_TARGET_OS_OSX */
263 #define VM_PAGE_FREE_RESERVED_LIMIT     1700
264 #define VM_PAGE_FREE_MIN_LIMIT          3500
265 #define VM_PAGE_FREE_TARGET_LIMIT       4000
266 #endif /* !XNU_TARGET_OS_OSX */
267 
268 /*
269  *	When vm_page_free_count falls below vm_page_free_reserved,
270  *	only vm-privileged threads can allocate pages.  vm-privilege
271  *	allows the pageout daemon and default pager (and any other
272  *	associated threads needed for default pageout) to continue
273  *	operation by dipping into the reserved pool of pages.
274  */
275 
276 #ifndef VM_PAGE_FREE_RESERVED
277 #define VM_PAGE_FREE_RESERVED(n)        \
278 	((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
279 #endif  /* VM_PAGE_FREE_RESERVED */
280 
281 /*
282  *	When we dequeue pages from the inactive list, they are
283  *	reactivated (ie, put back on the active queue) if referenced.
284  *	However, it is possible to starve the free list if other
285  *	processors are referencing pages faster than we can turn off
286  *	the referenced bit.  So we limit the number of reactivations
287  *	we will make per call of vm_pageout_scan().
288  */
289 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
290 
291 #ifndef VM_PAGE_REACTIVATE_LIMIT
292 #if !XNU_TARGET_OS_OSX
293 #define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
294 #else /* !XNU_TARGET_OS_OSX */
295 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
296 #endif /* !XNU_TARGET_OS_OSX */
297 #endif  /* VM_PAGE_REACTIVATE_LIMIT */
298 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM       1000
299 
300 int vm_pageout_protect_realtime = true;
301 
302 extern boolean_t hibernate_cleaning_in_progress;
303 
304 struct pgo_iothread_state pgo_iothread_internal_state[MAX_COMPRESSOR_THREAD_COUNT];
305 struct pgo_iothread_state pgo_iothread_external_state;
306 
307 #if VM_PRESSURE_EVENTS
308 void vm_pressure_thread(void);
309 
310 boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
311 boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
312 
313 boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
314 boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
315 #endif
316 
317 static void vm_pageout_iothread_external(struct pgo_iothread_state *, wait_result_t);
318 static void vm_pageout_iothread_internal(struct pgo_iothread_state *, wait_result_t);
319 static void vm_pageout_adjust_eq_iothrottle(struct pgo_iothread_state *, boolean_t);
320 
321 extern void vm_pageout_continue(void);
322 extern void vm_pageout_scan(void);
323 
324 boolean_t vm_pageout_running = FALSE;
325 
326 uint32_t vm_page_upl_tainted = 0;
327 uint32_t vm_page_iopl_tainted = 0;
328 
329 #if XNU_TARGET_OS_OSX
330 static boolean_t vm_pageout_waiter  = FALSE;
331 #endif /* XNU_TARGET_OS_OSX */
332 
333 
334 #if DEVELOPMENT || DEBUG
335 struct vm_pageout_debug vm_pageout_debug;
336 #endif
337 struct vm_pageout_vminfo vm_pageout_vminfo;
338 struct vm_pageout_state  vm_pageout_state;
339 struct vm_config         vm_config;
340 
341 struct  vm_pageout_queue vm_pageout_queue_internal VM_PAGE_PACKED_ALIGNED;
342 struct  vm_pageout_queue vm_pageout_queue_external VM_PAGE_PACKED_ALIGNED;
343 #if DEVELOPMENT || DEBUG
344 struct vm_pageout_queue vm_pageout_queue_benchmark VM_PAGE_PACKED_ALIGNED;
345 #endif /* DEVELOPMENT || DEBUG */
346 
347 int         vm_upl_wait_for_pages = 0;
348 vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
349 
350 boolean_t(*volatile consider_buffer_cache_collect)(int) = NULL;
351 
352 int     vm_debug_events = 0;
353 
354 LCK_GRP_DECLARE(vm_pageout_lck_grp, "vm_pageout");
355 
356 #if CONFIG_MEMORYSTATUS
357 extern void memorystatus_kill_on_vps_starvation(void);
358 
359 uint32_t vm_pageout_memorystatus_fb_factor_nr = 5;
360 uint32_t vm_pageout_memorystatus_fb_factor_dr = 2;
361 
362 #endif
363 
364 #if __AMP__
365 
366 
367 /*
368  * Bind compressor threads to e-cores unless there are multiple non-e clusters
369  */
370 #if (MAX_CPU_CLUSTERS > 2)
371 #define VM_COMPRESSOR_EBOUND_DEFAULT false
372 #elif defined(XNU_TARGET_OS_XR)
373 #define VM_COMPRESSOR_EBOUND_DEFAULT false
374 #else
375 #define VM_COMPRESSOR_EBOUND_DEFAULT true
376 #endif
377 
378 TUNABLE(bool, vm_compressor_ebound, "vmcomp_ecluster", VM_COMPRESSOR_EBOUND_DEFAULT);
379 int vm_pgo_pbound = 0;
380 extern void thread_bind_cluster_type(thread_t, char, bool);
381 
382 #endif /* __AMP__ */
383 
384 
385 /*
386  *	Routine:	vm_pageout_object_terminate
387  *	Purpose:
388  *		Destroy the pageout_object, and perform all of the
389  *		required cleanup actions.
390  *
391  *	In/Out conditions:
392  *		The object must be locked, and will be returned locked.
393  */
394 void
vm_pageout_object_terminate(vm_object_t object)395 vm_pageout_object_terminate(
396 	vm_object_t     object)
397 {
398 	vm_object_t     shadow_object;
399 
400 	/*
401 	 * Deal with the deallocation (last reference) of a pageout object
402 	 * (used for cleaning-in-place) by dropping the paging references/
403 	 * freeing pages in the original object.
404 	 */
405 
406 	assert(object->pageout);
407 	shadow_object = object->shadow;
408 	vm_object_lock(shadow_object);
409 
410 	while (!vm_page_queue_empty(&object->memq)) {
411 		vm_page_t               p, m;
412 		vm_object_offset_t      offset;
413 
414 		p = (vm_page_t) vm_page_queue_first(&object->memq);
415 
416 		assert(p->vmp_private);
417 		assert(p->vmp_free_when_done);
418 		p->vmp_free_when_done = FALSE;
419 		assert(!p->vmp_cleaning);
420 		assert(!p->vmp_laundry);
421 
422 		offset = p->vmp_offset;
423 		VM_PAGE_FREE(p);
424 		p = VM_PAGE_NULL;
425 
426 		m = vm_page_lookup(shadow_object,
427 		    offset + object->vo_shadow_offset);
428 
429 		if (m == VM_PAGE_NULL) {
430 			continue;
431 		}
432 
433 		assert((m->vmp_dirty) || (m->vmp_precious) ||
434 		    (m->vmp_busy && m->vmp_cleaning));
435 
436 		/*
437 		 * Handle the trusted pager throttle.
438 		 * Also decrement the burst throttle (if external).
439 		 */
440 		vm_page_lock_queues();
441 		if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
442 			vm_pageout_throttle_up(m);
443 		}
444 
445 		/*
446 		 * Handle the "target" page(s). These pages are to be freed if
447 		 * successfully cleaned. Target pages are always busy, and are
448 		 * wired exactly once. The initial target pages are not mapped,
449 		 * (so cannot be referenced or modified) but converted target
450 		 * pages may have been modified between the selection as an
451 		 * adjacent page and conversion to a target.
452 		 */
453 		if (m->vmp_free_when_done) {
454 			assert(m->vmp_busy);
455 			assert(m->vmp_q_state == VM_PAGE_IS_WIRED);
456 			assert(m->vmp_wire_count == 1);
457 			m->vmp_cleaning = FALSE;
458 			m->vmp_free_when_done = FALSE;
459 			/*
460 			 * Revoke all access to the page. Since the object is
461 			 * locked, and the page is busy, this prevents the page
462 			 * from being dirtied after the pmap_disconnect() call
463 			 * returns.
464 			 *
465 			 * Since the page is left "dirty" but "not modifed", we
466 			 * can detect whether the page was redirtied during
467 			 * pageout by checking the modify state.
468 			 */
469 			if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
470 				SET_PAGE_DIRTY(m, FALSE);
471 			} else {
472 				m->vmp_dirty = FALSE;
473 			}
474 
475 			if (m->vmp_dirty) {
476 				vm_page_unwire(m, TRUE);        /* reactivates */
477 				counter_inc(&vm_statistics_reactivations);
478 				vm_page_wakeup_done(object, m);
479 			} else {
480 				vm_page_free(m);  /* clears busy, etc. */
481 			}
482 			vm_page_unlock_queues();
483 			continue;
484 		}
485 		/*
486 		 * Handle the "adjacent" pages. These pages were cleaned in
487 		 * place, and should be left alone.
488 		 * If prep_pin_count is nonzero, then someone is using the
489 		 * page, so make it active.
490 		 */
491 		if ((m->vmp_q_state == VM_PAGE_NOT_ON_Q) && !m->vmp_private) {
492 			if (m->vmp_reference) {
493 				vm_page_activate(m);
494 			} else {
495 				vm_page_deactivate(m);
496 			}
497 		}
498 		if (m->vmp_overwriting) {
499 			/*
500 			 * the (COPY_OUT_FROM == FALSE) request_page_list case
501 			 */
502 			if (m->vmp_busy) {
503 				/*
504 				 * We do not re-set m->vmp_dirty !
505 				 * The page was busy so no extraneous activity
506 				 * could have occurred. COPY_INTO is a read into the
507 				 * new pages. CLEAN_IN_PLACE does actually write
508 				 * out the pages but handling outside of this code
509 				 * will take care of resetting dirty. We clear the
510 				 * modify however for the Programmed I/O case.
511 				 */
512 				pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
513 
514 				m->vmp_busy = FALSE;
515 				m->vmp_absent = FALSE;
516 			} else {
517 				/*
518 				 * alternate (COPY_OUT_FROM == FALSE) request_page_list case
519 				 * Occurs when the original page was wired
520 				 * at the time of the list request
521 				 */
522 				assert(VM_PAGE_WIRED(m));
523 				vm_page_unwire(m, TRUE);        /* reactivates */
524 			}
525 			m->vmp_overwriting = FALSE;
526 		} else {
527 			m->vmp_dirty = FALSE;
528 		}
529 		m->vmp_cleaning = FALSE;
530 
531 		/*
532 		 * Wakeup any thread waiting for the page to be un-cleaning.
533 		 */
534 		vm_page_wakeup(object, m);
535 		vm_page_unlock_queues();
536 	}
537 	/*
538 	 * Account for the paging reference taken in vm_paging_object_allocate.
539 	 */
540 	vm_object_activity_end(shadow_object);
541 	vm_object_unlock(shadow_object);
542 
543 	assert(object->ref_count == 0);
544 	assert(object->paging_in_progress == 0);
545 	assert(object->activity_in_progress == 0);
546 	assert(object->resident_page_count == 0);
547 	return;
548 }
549 
550 /*
551  * Routine:	vm_pageclean_setup
552  *
553  * Purpose:	setup a page to be cleaned (made non-dirty), but not
554  *		necessarily flushed from the VM page cache.
555  *		This is accomplished by cleaning in place.
556  *
557  *		The page must not be busy, and new_object
558  *		must be locked.
559  *
560  */
561 static void
vm_pageclean_setup(vm_page_t m,vm_page_t new_m,vm_object_t new_object,vm_object_offset_t new_offset)562 vm_pageclean_setup(
563 	vm_page_t               m,
564 	vm_page_t               new_m,
565 	vm_object_t             new_object,
566 	vm_object_offset_t      new_offset)
567 {
568 	assert(!m->vmp_busy);
569 #if 0
570 	assert(!m->vmp_cleaning);
571 #endif
572 
573 	pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
574 
575 	/*
576 	 * Mark original page as cleaning in place.
577 	 */
578 	m->vmp_cleaning = TRUE;
579 	SET_PAGE_DIRTY(m, FALSE);
580 	m->vmp_precious = FALSE;
581 
582 	/*
583 	 * Convert the fictitious page to a private shadow of
584 	 * the real page.
585 	 */
586 	assert(new_m->vmp_fictitious);
587 	assert(VM_PAGE_GET_PHYS_PAGE(new_m) == vm_page_fictitious_addr);
588 	new_m->vmp_fictitious = FALSE;
589 	new_m->vmp_private = TRUE;
590 	new_m->vmp_free_when_done = TRUE;
591 	VM_PAGE_SET_PHYS_PAGE(new_m, VM_PAGE_GET_PHYS_PAGE(m));
592 
593 	vm_page_lockspin_queues();
594 	vm_page_wire(new_m, VM_KERN_MEMORY_NONE, TRUE);
595 	vm_page_unlock_queues();
596 
597 	vm_page_insert_wired(new_m, new_object, new_offset, VM_KERN_MEMORY_NONE);
598 	assert(!new_m->vmp_wanted);
599 	new_m->vmp_busy = FALSE;
600 }
601 
602 /*
603  *	Routine:	vm_pageout_initialize_page
604  *	Purpose:
605  *		Causes the specified page to be initialized in
606  *		the appropriate memory object. This routine is used to push
607  *		pages into a copy-object when they are modified in the
608  *		permanent object.
609  *
610  *		The page is moved to a temporary object and paged out.
611  *
612  *	In/out conditions:
613  *		The page in question must not be on any pageout queues.
614  *		The object to which it belongs must be locked.
615  *		The page must be busy, but not hold a paging reference.
616  *
617  *	Implementation:
618  *		Move this page to a completely new object.
619  */
620 void
vm_pageout_initialize_page(vm_page_t m)621 vm_pageout_initialize_page(
622 	vm_page_t       m)
623 {
624 	vm_object_t             object;
625 	vm_object_offset_t      paging_offset;
626 	memory_object_t         pager;
627 
628 	assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
629 
630 	object = VM_PAGE_OBJECT(m);
631 
632 	assert(m->vmp_busy);
633 	assert(object->internal);
634 
635 	/*
636 	 *	Verify that we really want to clean this page
637 	 */
638 	assert(!m->vmp_absent);
639 	assert(m->vmp_dirty);
640 
641 	/*
642 	 *	Create a paging reference to let us play with the object.
643 	 */
644 	paging_offset = m->vmp_offset + object->paging_offset;
645 
646 	if (m->vmp_absent || VMP_ERROR_GET(m) || m->vmp_restart || (!m->vmp_dirty && !m->vmp_precious)) {
647 		panic("reservation without pageout?"); /* alan */
648 
649 		VM_PAGE_FREE(m);
650 		vm_object_unlock(object);
651 
652 		return;
653 	}
654 
655 	/*
656 	 * If there's no pager, then we can't clean the page.  This should
657 	 * never happen since this should be a copy object and therefore not
658 	 * an external object, so the pager should always be there.
659 	 */
660 
661 	pager = object->pager;
662 
663 	if (pager == MEMORY_OBJECT_NULL) {
664 		panic("missing pager for copy object");
665 
666 		VM_PAGE_FREE(m);
667 		return;
668 	}
669 
670 	/*
671 	 * set the page for future call to vm_fault_list_request
672 	 */
673 	pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
674 	SET_PAGE_DIRTY(m, FALSE);
675 
676 	/*
677 	 * keep the object from collapsing or terminating
678 	 */
679 	vm_object_paging_begin(object);
680 	vm_object_unlock(object);
681 
682 	/*
683 	 *	Write the data to its pager.
684 	 *	Note that the data is passed by naming the new object,
685 	 *	not a virtual address; the pager interface has been
686 	 *	manipulated to use the "internal memory" data type.
687 	 *	[The object reference from its allocation is donated
688 	 *	to the eventual recipient.]
689 	 */
690 	memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
691 
692 	vm_object_lock(object);
693 	vm_object_paging_end(object);
694 }
695 
696 
697 /*
698  * vm_pageout_cluster:
699  *
700  * Given a page, queue it to the appropriate I/O thread,
701  * which will page it out and attempt to clean adjacent pages
702  * in the same operation.
703  *
704  * The object and queues must be locked. We will take a
705  * paging reference to prevent deallocation or collapse when we
706  * release the object lock back at the call site.  The I/O thread
707  * is responsible for consuming this reference
708  *
709  * The page must not be on any pageout queue.
710  */
711 #if DEVELOPMENT || DEBUG
712 vmct_stats_t vmct_stats;
713 
714 int32_t vmct_active = 0;
715 uint64_t vm_compressor_epoch_start = 0;
716 uint64_t vm_compressor_epoch_stop = 0;
717 
718 typedef enum vmct_state_t {
719 	VMCT_IDLE,
720 	VMCT_AWAKENED,
721 	VMCT_ACTIVE,
722 } vmct_state_t;
723 vmct_state_t vmct_state[MAX_COMPRESSOR_THREAD_COUNT];
724 #endif
725 
726 
727 
728 static void
vm_pageout_cluster_to_queue(vm_page_t m,struct vm_pageout_queue * q)729 vm_pageout_cluster_to_queue(vm_page_t m, struct vm_pageout_queue *q)
730 {
731 	vm_object_t object = VM_PAGE_OBJECT(m);
732 
733 	VM_PAGE_CHECK(m);
734 	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
735 	vm_object_lock_assert_exclusive(object);
736 
737 	/*
738 	 * Make sure it's OK to page this out.
739 	 */
740 	assert((m->vmp_dirty || m->vmp_precious) && (!VM_PAGE_WIRED(m)));
741 	assert(!m->vmp_cleaning && !m->vmp_laundry);
742 	assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
743 
744 	/*
745 	 * protect the object from collapse or termination
746 	 */
747 	vm_object_activity_begin(object);
748 
749 
750 	/*
751 	 * pgo_laundry count is tied to the laundry bit
752 	 */
753 	m->vmp_laundry = TRUE;
754 	q->pgo_laundry++;
755 
756 	m->vmp_q_state = VM_PAGE_ON_PAGEOUT_Q;
757 	vm_page_queue_enter(&q->pgo_pending, m, vmp_pageq);
758 
759 	if (object->internal == TRUE) {
760 		assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
761 		m->vmp_busy = TRUE;
762 #if DEVELOPMENT || DEBUG
763 		/*
764 		 * The benchmark queue will be woken up independently by the benchmark
765 		 * itself.
766 		 */
767 		if (q != &vm_pageout_queue_benchmark) {
768 #else /* DEVELOPMENT || DEBUG */
769 		if (true) {
770 #endif /* DEVELOPMENT || DEBUG */
771 			/*
772 			 * Wake up the first compressor thread. It will wake subsequent
773 			 * threads if necessary.
774 			 */
775 			sched_cond_signal(&pgo_iothread_internal_state[0].pgo_wakeup,
776 			    pgo_iothread_internal_state[0].pgo_iothread);
777 		}
778 	} else {
779 		sched_cond_signal(&pgo_iothread_external_state.pgo_wakeup, pgo_iothread_external_state.pgo_iothread);
780 	}
781 	VM_PAGE_CHECK(m);
782 }
783 
784 void
785 vm_pageout_cluster(vm_page_t m)
786 {
787 	struct          vm_pageout_queue *q;
788 	vm_object_t     object = VM_PAGE_OBJECT(m);
789 	if (object->internal) {
790 		q = &vm_pageout_queue_internal;
791 	} else {
792 		q = &vm_pageout_queue_external;
793 	}
794 	vm_pageout_cluster_to_queue(m, q);
795 }
796 
797 
798 /*
799  * A page is back from laundry or we are stealing it back from
800  * the laundering state.  See if there are some pages waiting to
801  * go to laundry and if we can let some of them go now.
802  *
803  * Object and page queues must be locked.
804  */
805 void
806 vm_pageout_throttle_up(
807 	vm_page_t       m)
808 {
809 	struct vm_pageout_queue *q;
810 	vm_object_t      m_object;
811 
812 	m_object = VM_PAGE_OBJECT(m);
813 
814 	assert(m_object != VM_OBJECT_NULL);
815 	assert(!is_kernel_object(m_object));
816 
817 	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
818 	vm_object_lock_assert_exclusive(m_object);
819 
820 	if (m_object->internal == TRUE) {
821 		q = &vm_pageout_queue_internal;
822 	} else {
823 		q = &vm_pageout_queue_external;
824 	}
825 
826 	if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
827 		vm_page_queue_remove(&q->pgo_pending, m, vmp_pageq);
828 		m->vmp_q_state = VM_PAGE_NOT_ON_Q;
829 
830 		VM_PAGE_ZERO_PAGEQ_ENTRY(m);
831 
832 		vm_object_activity_end(m_object);
833 
834 		VM_PAGEOUT_DEBUG(vm_page_steal_pageout_page, 1);
835 	}
836 	if (m->vmp_laundry == TRUE) {
837 		m->vmp_laundry = FALSE;
838 		q->pgo_laundry--;
839 
840 		if (q->pgo_throttled == TRUE) {
841 			q->pgo_throttled = FALSE;
842 			thread_wakeup((event_t) &q->pgo_laundry);
843 		}
844 		if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
845 			q->pgo_draining = FALSE;
846 			thread_wakeup((event_t) (&q->pgo_laundry + 1));
847 		}
848 		VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, 1);
849 	}
850 }
851 
852 
853 static void
854 vm_pageout_throttle_up_batch(
855 	struct vm_pageout_queue *q,
856 	int             batch_cnt)
857 {
858 	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
859 
860 	VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, batch_cnt);
861 
862 	q->pgo_laundry -= batch_cnt;
863 
864 	if (q->pgo_throttled == TRUE) {
865 		q->pgo_throttled = FALSE;
866 		thread_wakeup((event_t) &q->pgo_laundry);
867 	}
868 	if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
869 		q->pgo_draining = FALSE;
870 		thread_wakeup((event_t) (&q->pgo_laundry + 1));
871 	}
872 }
873 
874 
875 
876 /*
877  * VM memory pressure monitoring.
878  *
879  * vm_pageout_scan() keeps track of the number of pages it considers and
880  * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
881  *
882  * compute_memory_pressure() is called every second from compute_averages()
883  * and moves "vm_pageout_stat_now" forward, to start accumulating the number
884  * of recalimed pages in a new vm_pageout_stat[] bucket.
885  *
886  * mach_vm_pressure_monitor() collects past statistics about memory pressure.
887  * The caller provides the number of seconds ("nsecs") worth of statistics
888  * it wants, up to 30 seconds.
889  * It computes the number of pages reclaimed in the past "nsecs" seconds and
890  * also returns the number of pages the system still needs to reclaim at this
891  * moment in time.
892  */
893 #if DEVELOPMENT || DEBUG
894 #define VM_PAGEOUT_STAT_SIZE    (30 * 8) + 1
895 #else
896 #define VM_PAGEOUT_STAT_SIZE    (1 * 8) + 1
897 #endif
898 struct vm_pageout_stat {
899 	unsigned long vm_page_active_count;
900 	unsigned long vm_page_speculative_count;
901 	unsigned long vm_page_inactive_count;
902 	unsigned long vm_page_anonymous_count;
903 
904 	unsigned long vm_page_free_count;
905 	unsigned long vm_page_wire_count;
906 	unsigned long vm_page_compressor_count;
907 
908 	unsigned long vm_page_pages_compressed;
909 	unsigned long vm_page_pageable_internal_count;
910 	unsigned long vm_page_pageable_external_count;
911 	unsigned long vm_page_xpmapped_external_count;
912 
913 	unsigned int pages_grabbed;
914 	unsigned int pages_freed;
915 
916 	unsigned int pages_compressed;
917 	unsigned int pages_grabbed_by_compressor;
918 	unsigned int failed_compressions;
919 
920 	unsigned int pages_evicted;
921 	unsigned int pages_purged;
922 
923 	unsigned int considered;
924 	unsigned int considered_bq_internal;
925 	unsigned int considered_bq_external;
926 
927 	unsigned int skipped_external;
928 	unsigned int skipped_internal;
929 	unsigned int filecache_min_reactivations;
930 
931 	unsigned int freed_speculative;
932 	unsigned int freed_cleaned;
933 	unsigned int freed_internal;
934 	unsigned int freed_external;
935 
936 	unsigned int cleaned_dirty_external;
937 	unsigned int cleaned_dirty_internal;
938 
939 	unsigned int inactive_referenced;
940 	unsigned int inactive_nolock;
941 	unsigned int reactivation_limit_exceeded;
942 	unsigned int forced_inactive_reclaim;
943 
944 	unsigned int throttled_internal_q;
945 	unsigned int throttled_external_q;
946 
947 	unsigned int phantom_ghosts_found;
948 	unsigned int phantom_ghosts_added;
949 
950 	unsigned int vm_page_realtime_count;
951 	unsigned int forcereclaimed_sharedcache;
952 	unsigned int forcereclaimed_realtime;
953 	unsigned int protected_sharedcache;
954 	unsigned int protected_realtime;
955 } vm_pageout_stats[VM_PAGEOUT_STAT_SIZE];
956 
957 unsigned int vm_pageout_stat_now = 0;
958 
959 #define VM_PAGEOUT_STAT_BEFORE(i) \
960 	(((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
961 #define VM_PAGEOUT_STAT_AFTER(i) \
962 	(((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
963 
964 #if VM_PAGE_BUCKETS_CHECK
965 int vm_page_buckets_check_interval = 80; /* in eighths of a second */
966 #endif /* VM_PAGE_BUCKETS_CHECK */
967 
968 
969 void
970 record_memory_pressure(void);
971 void
972 record_memory_pressure(void)
973 {
974 	unsigned int vm_pageout_next;
975 
976 #if VM_PAGE_BUCKETS_CHECK
977 	/* check the consistency of VM page buckets at regular interval */
978 	static int counter = 0;
979 	if ((++counter % vm_page_buckets_check_interval) == 0) {
980 		vm_page_buckets_check();
981 	}
982 #endif /* VM_PAGE_BUCKETS_CHECK */
983 
984 	vm_pageout_state.vm_memory_pressure =
985 	    vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_speculative +
986 	    vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_cleaned +
987 	    vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_internal +
988 	    vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_external;
989 
990 	commpage_set_memory_pressure((unsigned int)vm_pageout_state.vm_memory_pressure );
991 
992 	/* move "now" forward */
993 	vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
994 
995 	bzero(&vm_pageout_stats[vm_pageout_next], sizeof(struct vm_pageout_stat));
996 
997 	vm_pageout_stat_now = vm_pageout_next;
998 }
999 
1000 
1001 /*
1002  * IMPORTANT
1003  * mach_vm_ctl_page_free_wanted() is called indirectly, via
1004  * mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
1005  * it must be safe in the restricted stackshot context. Locks and/or
1006  * blocking are not allowable.
1007  */
1008 unsigned int
1009 mach_vm_ctl_page_free_wanted(void)
1010 {
1011 	unsigned int page_free_target, page_free_count, page_free_wanted;
1012 
1013 	page_free_target = vm_page_free_target;
1014 	page_free_count = vm_page_free_count;
1015 	if (page_free_target > page_free_count) {
1016 		page_free_wanted = page_free_target - page_free_count;
1017 	} else {
1018 		page_free_wanted = 0;
1019 	}
1020 
1021 	return page_free_wanted;
1022 }
1023 
1024 
1025 /*
1026  * IMPORTANT:
1027  * mach_vm_pressure_monitor() is called when taking a stackshot, with
1028  * wait_for_pressure FALSE, so that code path must remain safe in the
1029  * restricted stackshot context. No blocking or locks are allowable.
1030  * on that code path.
1031  */
1032 
1033 kern_return_t
1034 mach_vm_pressure_monitor(
1035 	boolean_t       wait_for_pressure,
1036 	unsigned int    nsecs_monitored,
1037 	unsigned int    *pages_reclaimed_p,
1038 	unsigned int    *pages_wanted_p)
1039 {
1040 	wait_result_t   wr;
1041 	unsigned int    vm_pageout_then, vm_pageout_now;
1042 	unsigned int    pages_reclaimed;
1043 	unsigned int    units_of_monitor;
1044 
1045 	units_of_monitor = 8 * nsecs_monitored;
1046 	/*
1047 	 * We don't take the vm_page_queue_lock here because we don't want
1048 	 * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
1049 	 * thread when it's trying to reclaim memory.  We don't need fully
1050 	 * accurate monitoring anyway...
1051 	 */
1052 
1053 	if (wait_for_pressure) {
1054 		/* wait until there's memory pressure */
1055 		while (vm_page_free_count >= vm_page_free_target) {
1056 			wr = assert_wait((event_t) &vm_page_free_wanted,
1057 			    THREAD_INTERRUPTIBLE);
1058 			if (wr == THREAD_WAITING) {
1059 				wr = thread_block(THREAD_CONTINUE_NULL);
1060 			}
1061 			if (wr == THREAD_INTERRUPTED) {
1062 				return KERN_ABORTED;
1063 			}
1064 			if (wr == THREAD_AWAKENED) {
1065 				/*
1066 				 * The memory pressure might have already
1067 				 * been relieved but let's not block again
1068 				 * and let's report that there was memory
1069 				 * pressure at some point.
1070 				 */
1071 				break;
1072 			}
1073 		}
1074 	}
1075 
1076 	/* provide the number of pages the system wants to reclaim */
1077 	if (pages_wanted_p != NULL) {
1078 		*pages_wanted_p = mach_vm_ctl_page_free_wanted();
1079 	}
1080 
1081 	if (pages_reclaimed_p == NULL) {
1082 		return KERN_SUCCESS;
1083 	}
1084 
1085 	/* provide number of pages reclaimed in the last "nsecs_monitored" */
1086 	vm_pageout_now = vm_pageout_stat_now;
1087 	pages_reclaimed = 0;
1088 	for (vm_pageout_then =
1089 	    VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1090 	    vm_pageout_then != vm_pageout_now &&
1091 	    units_of_monitor-- != 0;
1092 	    vm_pageout_then =
1093 	    VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1094 		pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_speculative;
1095 		pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_cleaned;
1096 		pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_internal;
1097 		pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_external;
1098 	}
1099 	*pages_reclaimed_p = pages_reclaimed;
1100 
1101 	return KERN_SUCCESS;
1102 }
1103 
1104 
1105 
1106 #if DEVELOPMENT || DEBUG
1107 
1108 static void
1109 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *, int);
1110 
1111 /*
1112  * condition variable used to make sure there is
1113  * only a single sweep going on at a time
1114  */
1115 bool vm_pageout_disconnect_all_pages_active = false;
1116 
1117 void
1118 vm_pageout_disconnect_all_pages()
1119 {
1120 	vm_page_lock_queues();
1121 
1122 	if (vm_pageout_disconnect_all_pages_active) {
1123 		vm_page_unlock_queues();
1124 		return;
1125 	}
1126 	vm_pageout_disconnect_all_pages_active = true;
1127 
1128 	vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_throttled,
1129 	    vm_page_throttled_count);
1130 	vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_anonymous,
1131 	    vm_page_anonymous_count);
1132 	vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_inactive,
1133 	    (vm_page_inactive_count - vm_page_anonymous_count));
1134 	vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_active,
1135 	    vm_page_active_count);
1136 #ifdef CONFIG_SECLUDED_MEMORY
1137 	vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_secluded,
1138 	    vm_page_secluded_count);
1139 #endif /* CONFIG_SECLUDED_MEMORY */
1140 	vm_page_unlock_queues();
1141 
1142 	vm_pageout_disconnect_all_pages_active = false;
1143 }
1144 
1145 /* NB: assumes the page_queues lock is held on entry, returns with page queue lock held */
1146 void
1147 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *q, int qcount)
1148 {
1149 	vm_page_t       m;
1150 	vm_object_t     t_object = NULL;
1151 	vm_object_t     l_object = NULL;
1152 	vm_object_t     m_object = NULL;
1153 	int             delayed_unlock = 0;
1154 	int             try_failed_count = 0;
1155 	int             disconnected_count = 0;
1156 	int             paused_count = 0;
1157 	int             object_locked_count = 0;
1158 
1159 	KDBG((MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS) |
1160 	    DBG_FUNC_START),
1161 	    q, qcount);
1162 
1163 	while (qcount && !vm_page_queue_empty(q)) {
1164 		LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1165 
1166 		m = (vm_page_t) vm_page_queue_first(q);
1167 		m_object = VM_PAGE_OBJECT(m);
1168 
1169 		if (m_object == VM_OBJECT_NULL) {
1170 			/*
1171 			 * Bumped into a free page. This should only happen on the
1172 			 * secluded queue
1173 			 */
1174 #if CONFIG_SECLUDED_MEMORY
1175 			assert(q == &vm_page_queue_secluded);
1176 #endif /* CONFIG_SECLUDED_MEMORY */
1177 			goto reenter_pg_on_q;
1178 		}
1179 
1180 		/*
1181 		 * check to see if we currently are working
1182 		 * with the same object... if so, we've
1183 		 * already got the lock
1184 		 */
1185 		if (m_object != l_object) {
1186 			/*
1187 			 * the object associated with candidate page is
1188 			 * different from the one we were just working
1189 			 * with... dump the lock if we still own it
1190 			 */
1191 			if (l_object != NULL) {
1192 				vm_object_unlock(l_object);
1193 				l_object = NULL;
1194 			}
1195 			if (m_object != t_object) {
1196 				try_failed_count = 0;
1197 			}
1198 
1199 			/*
1200 			 * Try to lock object; since we've alread got the
1201 			 * page queues lock, we can only 'try' for this one.
1202 			 * if the 'try' fails, we need to do a mutex_pause
1203 			 * to allow the owner of the object lock a chance to
1204 			 * run...
1205 			 */
1206 			if (!vm_object_lock_try_scan(m_object)) {
1207 				if (try_failed_count > 20) {
1208 					goto reenter_pg_on_q;
1209 				}
1210 				vm_page_unlock_queues();
1211 				mutex_pause(try_failed_count++);
1212 				vm_page_lock_queues();
1213 				delayed_unlock = 0;
1214 
1215 				paused_count++;
1216 
1217 				t_object = m_object;
1218 				continue;
1219 			}
1220 			object_locked_count++;
1221 
1222 			l_object = m_object;
1223 		}
1224 		if (!m_object->alive || m->vmp_cleaning || m->vmp_laundry ||
1225 		    m->vmp_busy || m->vmp_absent || VMP_ERROR_GET(m) ||
1226 		    m->vmp_free_when_done) {
1227 			/*
1228 			 * put it back on the head of its queue
1229 			 */
1230 			goto reenter_pg_on_q;
1231 		}
1232 		if (m->vmp_pmapped == TRUE) {
1233 			pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
1234 
1235 			disconnected_count++;
1236 		}
1237 reenter_pg_on_q:
1238 		vm_page_queue_remove(q, m, vmp_pageq);
1239 		vm_page_queue_enter(q, m, vmp_pageq);
1240 
1241 		qcount--;
1242 		try_failed_count = 0;
1243 
1244 		if (delayed_unlock++ > 128) {
1245 			if (l_object != NULL) {
1246 				vm_object_unlock(l_object);
1247 				l_object = NULL;
1248 			}
1249 			lck_mtx_yield(&vm_page_queue_lock);
1250 			delayed_unlock = 0;
1251 		}
1252 	}
1253 	if (l_object != NULL) {
1254 		vm_object_unlock(l_object);
1255 		l_object = NULL;
1256 	}
1257 
1258 	KDBG((MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS) |
1259 	    DBG_FUNC_END),
1260 	    q, disconnected_count, object_locked_count, paused_count);
1261 }
1262 
1263 extern const char *proc_best_name(struct proc* proc);
1264 
1265 int
1266 vm_toggle_task_selfdonate_pages(task_t task)
1267 {
1268 	int state = 0;
1269 	if (vm_page_donate_mode == VM_PAGE_DONATE_DISABLED) {
1270 		printf("VM Donation mode is OFF on the system\n");
1271 		return state;
1272 	}
1273 	if (task != kernel_task) {
1274 		task_lock(task);
1275 		if (!task->donates_own_pages) {
1276 			printf("SELF DONATE for %s ON\n", proc_best_name(get_bsdtask_info(task)));
1277 			task->donates_own_pages = true;
1278 			state = 1;
1279 		} else if (task->donates_own_pages) {
1280 			printf("SELF DONATE for %s OFF\n", proc_best_name(get_bsdtask_info(task)));
1281 			task->donates_own_pages = false;
1282 			state = 0;
1283 		}
1284 		task_unlock(task);
1285 	}
1286 	return state;
1287 }
1288 #endif /* DEVELOPMENT || DEBUG */
1289 
1290 void
1291 vm_task_set_selfdonate_pages(task_t task, bool donate)
1292 {
1293 	assert(vm_page_donate_mode != VM_PAGE_DONATE_DISABLED);
1294 	assert(task != kernel_task);
1295 
1296 	task_lock(task);
1297 	task->donates_own_pages = donate;
1298 	task_unlock(task);
1299 }
1300 
1301 
1302 
1303 static size_t
1304 vm_pageout_page_queue(vm_page_queue_head_t *, size_t, bool);
1305 
1306 /*
1307  * condition variable used to make sure there is
1308  * only a single sweep going on at a time
1309  */
1310 boolean_t       vm_pageout_anonymous_pages_active = FALSE;
1311 
1312 
1313 kern_return_t
1314 vm_pageout_anonymous_pages()
1315 {
1316 	if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
1317 		size_t throttled_pages_moved, anonymous_pages_moved, active_pages_moved;
1318 		vm_page_lock_queues();
1319 
1320 		if (vm_pageout_anonymous_pages_active == TRUE) {
1321 			vm_page_unlock_queues();
1322 			return KERN_RESOURCE_SHORTAGE;
1323 		}
1324 		vm_pageout_anonymous_pages_active = TRUE;
1325 		vm_page_unlock_queues();
1326 
1327 		throttled_pages_moved = vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count, false);
1328 		anonymous_pages_moved = vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count, false);
1329 		active_pages_moved = vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count, false);
1330 
1331 		os_log(OS_LOG_DEFAULT,
1332 		    "%s: throttled pages moved: %zu, anonymous pages moved: %zu, active pages moved: %zu",
1333 		    __func__, throttled_pages_moved, anonymous_pages_moved, active_pages_moved);
1334 
1335 		if (VM_CONFIG_SWAP_IS_PRESENT) {
1336 			vm_consider_swapping();
1337 		}
1338 
1339 		vm_page_lock_queues();
1340 		vm_pageout_anonymous_pages_active = FALSE;
1341 		vm_page_unlock_queues();
1342 		return KERN_SUCCESS;
1343 	} else {
1344 		return KERN_NOT_SUPPORTED;
1345 	}
1346 }
1347 
1348 
1349 size_t
1350 vm_pageout_page_queue(vm_page_queue_head_t *q, size_t qcount, bool perf_test)
1351 {
1352 	vm_page_t       m;
1353 	vm_object_t     t_object = NULL;
1354 	vm_object_t     l_object = NULL;
1355 	vm_object_t     m_object = NULL;
1356 	int             delayed_unlock = 0;
1357 	int             try_failed_count = 0;
1358 	int             refmod_state;
1359 	int             pmap_options;
1360 	struct          vm_pageout_queue *iq;
1361 	ppnum_t         phys_page;
1362 	size_t          pages_moved = 0;
1363 
1364 
1365 	iq = &vm_pageout_queue_internal;
1366 
1367 	vm_page_lock_queues();
1368 
1369 #if DEVELOPMENT || DEBUG
1370 	if (perf_test) {
1371 		iq = &vm_pageout_queue_benchmark;
1372 		// ensure the benchmark queue isn't throttled
1373 		iq->pgo_maxlaundry = (unsigned int) qcount;
1374 	}
1375 #endif /* DEVELOPMENT ||DEBUG */
1376 
1377 	while (qcount && !vm_page_queue_empty(q)) {
1378 		LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1379 
1380 		if (VM_PAGE_Q_THROTTLED(iq)) {
1381 			if (l_object != NULL) {
1382 				vm_object_unlock(l_object);
1383 				l_object = NULL;
1384 			}
1385 			iq->pgo_draining = TRUE;
1386 
1387 			assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
1388 			vm_page_unlock_queues();
1389 
1390 			thread_block(THREAD_CONTINUE_NULL);
1391 
1392 			vm_page_lock_queues();
1393 			delayed_unlock = 0;
1394 			continue;
1395 		}
1396 		m = (vm_page_t) vm_page_queue_first(q);
1397 		m_object = VM_PAGE_OBJECT(m);
1398 
1399 		/*
1400 		 * check to see if we currently are working
1401 		 * with the same object... if so, we've
1402 		 * already got the lock
1403 		 */
1404 		if (m_object != l_object) {
1405 			if (!m_object->internal) {
1406 				goto reenter_pg_on_q;
1407 			}
1408 
1409 			/*
1410 			 * the object associated with candidate page is
1411 			 * different from the one we were just working
1412 			 * with... dump the lock if we still own it
1413 			 */
1414 			if (l_object != NULL) {
1415 				vm_object_unlock(l_object);
1416 				l_object = NULL;
1417 			}
1418 			if (m_object != t_object) {
1419 				try_failed_count = 0;
1420 			}
1421 
1422 			/*
1423 			 * Try to lock object; since we've alread got the
1424 			 * page queues lock, we can only 'try' for this one.
1425 			 * if the 'try' fails, we need to do a mutex_pause
1426 			 * to allow the owner of the object lock a chance to
1427 			 * run...
1428 			 */
1429 			if (!vm_object_lock_try_scan(m_object)) {
1430 				if (try_failed_count > 20) {
1431 					goto reenter_pg_on_q;
1432 				}
1433 				vm_page_unlock_queues();
1434 				mutex_pause(try_failed_count++);
1435 				vm_page_lock_queues();
1436 				delayed_unlock = 0;
1437 
1438 				t_object = m_object;
1439 				continue;
1440 			}
1441 			l_object = m_object;
1442 		}
1443 		if (!m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || VMP_ERROR_GET(m) || m->vmp_free_when_done) {
1444 			/*
1445 			 * page is not to be cleaned
1446 			 * put it back on the head of its queue
1447 			 */
1448 			goto reenter_pg_on_q;
1449 		}
1450 		phys_page = VM_PAGE_GET_PHYS_PAGE(m);
1451 
1452 		if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) {
1453 			refmod_state = pmap_get_refmod(phys_page);
1454 
1455 			if (refmod_state & VM_MEM_REFERENCED) {
1456 				m->vmp_reference = TRUE;
1457 			}
1458 			if (refmod_state & VM_MEM_MODIFIED) {
1459 				SET_PAGE_DIRTY(m, FALSE);
1460 			}
1461 		}
1462 		if (m->vmp_reference == TRUE) {
1463 			m->vmp_reference = FALSE;
1464 			pmap_clear_refmod_options(phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1465 			goto reenter_pg_on_q;
1466 		}
1467 		if (m->vmp_pmapped == TRUE) {
1468 			if (m->vmp_dirty || m->vmp_precious) {
1469 				pmap_options = PMAP_OPTIONS_COMPRESSOR;
1470 			} else {
1471 				pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1472 			}
1473 			refmod_state = pmap_disconnect_options(phys_page, pmap_options, NULL);
1474 			if (refmod_state & VM_MEM_MODIFIED) {
1475 				SET_PAGE_DIRTY(m, FALSE);
1476 			}
1477 		}
1478 
1479 		if (!m->vmp_dirty && !m->vmp_precious) {
1480 			vm_page_unlock_queues();
1481 			VM_PAGE_FREE(m);
1482 			vm_page_lock_queues();
1483 			delayed_unlock = 0;
1484 
1485 			goto next_pg;
1486 		}
1487 		if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL) {
1488 			if (!m_object->pager_initialized) {
1489 				vm_page_unlock_queues();
1490 
1491 				vm_object_collapse(m_object, (vm_object_offset_t) 0, TRUE);
1492 
1493 				if (!m_object->pager_initialized) {
1494 					vm_object_compressor_pager_create(m_object);
1495 				}
1496 
1497 				vm_page_lock_queues();
1498 				delayed_unlock = 0;
1499 			}
1500 			if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL) {
1501 				goto reenter_pg_on_q;
1502 			}
1503 			/*
1504 			 * vm_object_compressor_pager_create will drop the object lock
1505 			 * which means 'm' may no longer be valid to use
1506 			 */
1507 			continue;
1508 		}
1509 
1510 		if (!perf_test) {
1511 			/*
1512 			 * we've already factored out pages in the laundry which
1513 			 * means this page can't be on the pageout queue so it's
1514 			 * safe to do the vm_page_queues_remove
1515 			 */
1516 			bool donate = (m->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE);
1517 			vm_page_queues_remove(m, TRUE);
1518 			if (donate) {
1519 				/*
1520 				 * The compressor needs to see this bit to know
1521 				 * where this page needs to land. Also if stolen,
1522 				 * this bit helps put the page back in the right
1523 				 * special queue where it belongs.
1524 				 */
1525 				m->vmp_on_specialq = VM_PAGE_SPECIAL_Q_DONATE;
1526 			}
1527 		} else {
1528 			vm_page_queue_remove(q, m, vmp_pageq);
1529 		}
1530 
1531 		LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1532 
1533 		vm_pageout_cluster_to_queue(m, iq);
1534 
1535 		pages_moved++;
1536 		goto next_pg;
1537 
1538 reenter_pg_on_q:
1539 		vm_page_queue_remove(q, m, vmp_pageq);
1540 		vm_page_queue_enter(q, m, vmp_pageq);
1541 next_pg:
1542 		qcount--;
1543 		try_failed_count = 0;
1544 
1545 		if (delayed_unlock++ > 128) {
1546 			if (l_object != NULL) {
1547 				vm_object_unlock(l_object);
1548 				l_object = NULL;
1549 			}
1550 			lck_mtx_yield(&vm_page_queue_lock);
1551 			delayed_unlock = 0;
1552 		}
1553 	}
1554 	if (l_object != NULL) {
1555 		vm_object_unlock(l_object);
1556 		l_object = NULL;
1557 	}
1558 	vm_page_unlock_queues();
1559 	return pages_moved;
1560 }
1561 
1562 
1563 
1564 /*
1565  * function in BSD to apply I/O throttle to the pageout thread
1566  */
1567 extern void vm_pageout_io_throttle(void);
1568 
1569 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, obj)                    \
1570 	MACRO_BEGIN                                                     \
1571 	/* \
1572 	 * If a "reusable" page somehow made it back into \
1573 	 * the active queue, it's been re-used and is not \
1574 	 * quite re-usable. \
1575 	 * If the VM object was "all_reusable", consider it \
1576 	 * as "all re-used" instead of converting it to \
1577 	 * "partially re-used", which could be expensive. \
1578 	 */                                                             \
1579 	assert(VM_PAGE_OBJECT((m)) == (obj));                           \
1580 	if ((m)->vmp_reusable ||                                        \
1581 	    (obj)->all_reusable) {                                      \
1582 	        vm_object_reuse_pages((obj),                            \
1583 	                              (m)->vmp_offset,                  \
1584 	                              (m)->vmp_offset + PAGE_SIZE_64,   \
1585 	                              FALSE);                           \
1586 	}                                                               \
1587 	MACRO_END
1588 
1589 
1590 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT         64
1591 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX     1024
1592 
1593 #define FCS_IDLE                0
1594 #define FCS_DELAYED             1
1595 #define FCS_DEADLOCK_DETECTED   2
1596 
1597 struct flow_control {
1598 	int             state;
1599 	mach_timespec_t ts;
1600 };
1601 
1602 
1603 uint64_t vm_pageout_rejected_bq_internal = 0;
1604 uint64_t vm_pageout_rejected_bq_external = 0;
1605 uint64_t vm_pageout_skipped_bq_internal = 0;
1606 uint64_t vm_pageout_skipped_bq_external = 0;
1607 
1608 #define ANONS_GRABBED_LIMIT     2
1609 
1610 
1611 #if 0
1612 static void vm_pageout_delayed_unlock(int *, int *, vm_page_t *);
1613 #endif
1614 static void vm_pageout_prepare_to_block(vm_object_t *, int *, vm_page_t *, int *, int);
1615 
1616 #define VM_PAGEOUT_PB_NO_ACTION                         0
1617 #define VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER 1
1618 #define VM_PAGEOUT_PB_THREAD_YIELD                      2
1619 
1620 
1621 #if 0
1622 static void
1623 vm_pageout_delayed_unlock(int *delayed_unlock, int *local_freed, vm_page_t *local_freeq)
1624 {
1625 	if (*local_freeq) {
1626 		vm_page_unlock_queues();
1627 
1628 		VM_DEBUG_CONSTANT_EVENT(
1629 			vm_pageout_freelist, DBG_VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1630 			vm_page_free_count, 0, 0, 1);
1631 
1632 		vm_page_free_list(*local_freeq, TRUE);
1633 
1634 		VM_DEBUG_CONSTANT_EVENT(vm_pageout_freelist, DBG_VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1635 		    vm_page_free_count, *local_freed, 0, 1);
1636 
1637 		*local_freeq = NULL;
1638 		*local_freed = 0;
1639 
1640 		vm_page_lock_queues();
1641 	} else {
1642 		lck_mtx_yield(&vm_page_queue_lock);
1643 	}
1644 	*delayed_unlock = 1;
1645 }
1646 #endif
1647 
1648 
1649 static void
1650 vm_pageout_prepare_to_block(vm_object_t *object, int *delayed_unlock,
1651     vm_page_t *local_freeq, int *local_freed, int action)
1652 {
1653 	vm_page_unlock_queues();
1654 
1655 	if (*object != NULL) {
1656 		vm_object_unlock(*object);
1657 		*object = NULL;
1658 	}
1659 	if (*local_freeq) {
1660 		vm_page_free_list(*local_freeq, TRUE);
1661 
1662 		*local_freeq = NULL;
1663 		*local_freed = 0;
1664 	}
1665 	*delayed_unlock = 1;
1666 
1667 	switch (action) {
1668 	case VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER:
1669 		vm_consider_waking_compactor_swapper();
1670 		break;
1671 	case VM_PAGEOUT_PB_THREAD_YIELD:
1672 		thread_yield_internal(1);
1673 		break;
1674 	case VM_PAGEOUT_PB_NO_ACTION:
1675 	default:
1676 		break;
1677 	}
1678 	vm_page_lock_queues();
1679 }
1680 
1681 
1682 static struct vm_pageout_vminfo last;
1683 
1684 uint64_t last_vm_page_pages_grabbed = 0;
1685 
1686 extern  uint32_t c_segment_pages_compressed;
1687 
1688 extern uint64_t shared_region_pager_reclaimed;
1689 extern struct memory_object_pager_ops shared_region_pager_ops;
1690 
1691 void
1692 update_vm_info(void)
1693 {
1694 	unsigned long tmp;
1695 	uint64_t tmp64;
1696 
1697 	vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count = vm_page_active_count;
1698 	vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count = vm_page_speculative_count;
1699 	vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count = vm_page_inactive_count;
1700 	vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count = vm_page_anonymous_count;
1701 
1702 	vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count = vm_page_free_count;
1703 	vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count = vm_page_wire_count;
1704 	vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count = VM_PAGE_COMPRESSOR_COUNT;
1705 
1706 	vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed = c_segment_pages_compressed;
1707 	vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count = vm_page_pageable_internal_count;
1708 	vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count = vm_page_pageable_external_count;
1709 	vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count = vm_page_xpmapped_external_count;
1710 	vm_pageout_stats[vm_pageout_stat_now].vm_page_realtime_count = vm_page_realtime_count;
1711 
1712 	tmp = vm_pageout_vminfo.vm_pageout_considered_page;
1713 	vm_pageout_stats[vm_pageout_stat_now].considered = (unsigned int)(tmp - last.vm_pageout_considered_page);
1714 	last.vm_pageout_considered_page = tmp;
1715 
1716 	tmp64 = vm_pageout_vminfo.vm_pageout_compressions;
1717 	vm_pageout_stats[vm_pageout_stat_now].pages_compressed = (unsigned int)(tmp64 - last.vm_pageout_compressions);
1718 	last.vm_pageout_compressions = tmp64;
1719 
1720 	tmp = vm_pageout_vminfo.vm_compressor_failed;
1721 	vm_pageout_stats[vm_pageout_stat_now].failed_compressions = (unsigned int)(tmp - last.vm_compressor_failed);
1722 	last.vm_compressor_failed = tmp;
1723 
1724 	tmp64 = vm_pageout_vminfo.vm_compressor_pages_grabbed;
1725 	vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor = (unsigned int)(tmp64 - last.vm_compressor_pages_grabbed);
1726 	last.vm_compressor_pages_grabbed = tmp64;
1727 
1728 	tmp = vm_pageout_vminfo.vm_phantom_cache_found_ghost;
1729 	vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found = (unsigned int)(tmp - last.vm_phantom_cache_found_ghost);
1730 	last.vm_phantom_cache_found_ghost = tmp;
1731 
1732 	tmp = vm_pageout_vminfo.vm_phantom_cache_added_ghost;
1733 	vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added = (unsigned int)(tmp - last.vm_phantom_cache_added_ghost);
1734 	last.vm_phantom_cache_added_ghost = tmp;
1735 
1736 	tmp64 = counter_load(&vm_page_grab_count);
1737 	vm_pageout_stats[vm_pageout_stat_now].pages_grabbed = (unsigned int)(tmp64 - last_vm_page_pages_grabbed);
1738 	last_vm_page_pages_grabbed = tmp64;
1739 
1740 	tmp = vm_pageout_vminfo.vm_page_pages_freed;
1741 	vm_pageout_stats[vm_pageout_stat_now].pages_freed = (unsigned int)(tmp - last.vm_page_pages_freed);
1742 	last.vm_page_pages_freed = tmp;
1743 
1744 	if (vm_pageout_stats[vm_pageout_stat_now].considered) {
1745 		tmp = vm_pageout_vminfo.vm_pageout_pages_evicted;
1746 		vm_pageout_stats[vm_pageout_stat_now].pages_evicted = (unsigned int)(tmp - last.vm_pageout_pages_evicted);
1747 		last.vm_pageout_pages_evicted = tmp;
1748 
1749 		tmp = vm_pageout_vminfo.vm_pageout_pages_purged;
1750 		vm_pageout_stats[vm_pageout_stat_now].pages_purged = (unsigned int)(tmp - last.vm_pageout_pages_purged);
1751 		last.vm_pageout_pages_purged = tmp;
1752 
1753 		tmp = vm_pageout_vminfo.vm_pageout_freed_speculative;
1754 		vm_pageout_stats[vm_pageout_stat_now].freed_speculative = (unsigned int)(tmp - last.vm_pageout_freed_speculative);
1755 		last.vm_pageout_freed_speculative = tmp;
1756 
1757 		tmp = vm_pageout_vminfo.vm_pageout_freed_external;
1758 		vm_pageout_stats[vm_pageout_stat_now].freed_external = (unsigned int)(tmp - last.vm_pageout_freed_external);
1759 		last.vm_pageout_freed_external = tmp;
1760 
1761 		tmp = vm_pageout_vminfo.vm_pageout_inactive_referenced;
1762 		vm_pageout_stats[vm_pageout_stat_now].inactive_referenced = (unsigned int)(tmp - last.vm_pageout_inactive_referenced);
1763 		last.vm_pageout_inactive_referenced = tmp;
1764 
1765 		tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external;
1766 		vm_pageout_stats[vm_pageout_stat_now].throttled_external_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_external);
1767 		last.vm_pageout_scan_inactive_throttled_external = tmp;
1768 
1769 		tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_external;
1770 		vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_external);
1771 		last.vm_pageout_inactive_dirty_external = tmp;
1772 
1773 		tmp = vm_pageout_vminfo.vm_pageout_freed_cleaned;
1774 		vm_pageout_stats[vm_pageout_stat_now].freed_cleaned = (unsigned int)(tmp - last.vm_pageout_freed_cleaned);
1775 		last.vm_pageout_freed_cleaned = tmp;
1776 
1777 		tmp = vm_pageout_vminfo.vm_pageout_inactive_nolock;
1778 		vm_pageout_stats[vm_pageout_stat_now].inactive_nolock = (unsigned int)(tmp - last.vm_pageout_inactive_nolock);
1779 		last.vm_pageout_inactive_nolock = tmp;
1780 
1781 		tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal;
1782 		vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_internal);
1783 		last.vm_pageout_scan_inactive_throttled_internal = tmp;
1784 
1785 		tmp = vm_pageout_vminfo.vm_pageout_skipped_external;
1786 		vm_pageout_stats[vm_pageout_stat_now].skipped_external = (unsigned int)(tmp - last.vm_pageout_skipped_external);
1787 		last.vm_pageout_skipped_external = tmp;
1788 
1789 		tmp = vm_pageout_vminfo.vm_pageout_skipped_internal;
1790 		vm_pageout_stats[vm_pageout_stat_now].skipped_internal = (unsigned int)(tmp - last.vm_pageout_skipped_internal);
1791 		last.vm_pageout_skipped_internal = tmp;
1792 
1793 		tmp = vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded;
1794 		vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded = (unsigned int)(tmp - last.vm_pageout_reactivation_limit_exceeded);
1795 		last.vm_pageout_reactivation_limit_exceeded = tmp;
1796 
1797 		tmp = vm_pageout_vminfo.vm_pageout_inactive_force_reclaim;
1798 		vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim = (unsigned int)(tmp - last.vm_pageout_inactive_force_reclaim);
1799 		last.vm_pageout_inactive_force_reclaim = tmp;
1800 
1801 		tmp = vm_pageout_vminfo.vm_pageout_freed_internal;
1802 		vm_pageout_stats[vm_pageout_stat_now].freed_internal = (unsigned int)(tmp - last.vm_pageout_freed_internal);
1803 		last.vm_pageout_freed_internal = tmp;
1804 
1805 		tmp = vm_pageout_vminfo.vm_pageout_considered_bq_internal;
1806 		vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal = (unsigned int)(tmp - last.vm_pageout_considered_bq_internal);
1807 		last.vm_pageout_considered_bq_internal = tmp;
1808 
1809 		tmp = vm_pageout_vminfo.vm_pageout_considered_bq_external;
1810 		vm_pageout_stats[vm_pageout_stat_now].considered_bq_external = (unsigned int)(tmp - last.vm_pageout_considered_bq_external);
1811 		last.vm_pageout_considered_bq_external = tmp;
1812 
1813 		tmp = vm_pageout_vminfo.vm_pageout_filecache_min_reactivated;
1814 		vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations = (unsigned int)(tmp - last.vm_pageout_filecache_min_reactivated);
1815 		last.vm_pageout_filecache_min_reactivated = tmp;
1816 
1817 		tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_internal;
1818 		vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_internal);
1819 		last.vm_pageout_inactive_dirty_internal = tmp;
1820 
1821 		tmp = vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache;
1822 		vm_pageout_stats[vm_pageout_stat_now].forcereclaimed_sharedcache = (unsigned int)(tmp - last.vm_pageout_forcereclaimed_sharedcache);
1823 		last.vm_pageout_forcereclaimed_sharedcache = tmp;
1824 
1825 		tmp = vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime;
1826 		vm_pageout_stats[vm_pageout_stat_now].forcereclaimed_realtime = (unsigned int)(tmp - last.vm_pageout_forcereclaimed_realtime);
1827 		last.vm_pageout_forcereclaimed_realtime = tmp;
1828 
1829 		tmp = vm_pageout_vminfo.vm_pageout_protected_sharedcache;
1830 		vm_pageout_stats[vm_pageout_stat_now].protected_sharedcache = (unsigned int)(tmp - last.vm_pageout_protected_sharedcache);
1831 		last.vm_pageout_protected_sharedcache = tmp;
1832 
1833 		tmp = vm_pageout_vminfo.vm_pageout_protected_realtime;
1834 		vm_pageout_stats[vm_pageout_stat_now].protected_realtime = (unsigned int)(tmp - last.vm_pageout_protected_realtime);
1835 		last.vm_pageout_protected_realtime = tmp;
1836 	}
1837 
1838 	KDBG((VMDBG_CODE(DBG_VM_INFO1)) | DBG_FUNC_NONE,
1839 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count,
1840 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count,
1841 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count,
1842 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count);
1843 
1844 	KDBG((VMDBG_CODE(DBG_VM_INFO2)) | DBG_FUNC_NONE,
1845 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count,
1846 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count,
1847 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count);
1848 
1849 	KDBG((VMDBG_CODE(DBG_VM_INFO3)) | DBG_FUNC_NONE,
1850 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed,
1851 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count,
1852 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count,
1853 	    vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count);
1854 
1855 	if (vm_pageout_stats[vm_pageout_stat_now].considered ||
1856 	    vm_pageout_stats[vm_pageout_stat_now].pages_compressed ||
1857 	    vm_pageout_stats[vm_pageout_stat_now].failed_compressions) {
1858 		KDBG((VMDBG_CODE(DBG_VM_INFO4)) | DBG_FUNC_NONE,
1859 		    vm_pageout_stats[vm_pageout_stat_now].considered,
1860 		    vm_pageout_stats[vm_pageout_stat_now].freed_speculative,
1861 		    vm_pageout_stats[vm_pageout_stat_now].freed_external,
1862 		    vm_pageout_stats[vm_pageout_stat_now].inactive_referenced);
1863 
1864 		KDBG((VMDBG_CODE(DBG_VM_INFO5)) | DBG_FUNC_NONE,
1865 		    vm_pageout_stats[vm_pageout_stat_now].throttled_external_q,
1866 		    vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external,
1867 		    vm_pageout_stats[vm_pageout_stat_now].freed_cleaned,
1868 		    vm_pageout_stats[vm_pageout_stat_now].inactive_nolock);
1869 
1870 		KDBG((VMDBG_CODE(DBG_VM_INFO6)) | DBG_FUNC_NONE,
1871 		    vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q,
1872 		    vm_pageout_stats[vm_pageout_stat_now].pages_compressed,
1873 		    vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor,
1874 		    vm_pageout_stats[vm_pageout_stat_now].skipped_external);
1875 
1876 		KDBG((VMDBG_CODE(DBG_VM_INFO7)) | DBG_FUNC_NONE,
1877 		    vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded,
1878 		    vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim,
1879 		    vm_pageout_stats[vm_pageout_stat_now].failed_compressions,
1880 		    vm_pageout_stats[vm_pageout_stat_now].freed_internal);
1881 
1882 		KDBG((VMDBG_CODE(DBG_VM_INFO8)) | DBG_FUNC_NONE,
1883 		    vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal,
1884 		    vm_pageout_stats[vm_pageout_stat_now].considered_bq_external,
1885 		    vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations,
1886 		    vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal);
1887 
1888 		KDBG((VMDBG_CODE(DBG_VM_INFO10)) | DBG_FUNC_NONE,
1889 		    vm_pageout_stats[vm_pageout_stat_now].forcereclaimed_sharedcache,
1890 		    vm_pageout_stats[vm_pageout_stat_now].forcereclaimed_realtime,
1891 		    vm_pageout_stats[vm_pageout_stat_now].protected_sharedcache,
1892 		    vm_pageout_stats[vm_pageout_stat_now].protected_realtime);
1893 	}
1894 	KDBG((VMDBG_CODE(DBG_VM_INFO9)) | DBG_FUNC_NONE,
1895 	    vm_pageout_stats[vm_pageout_stat_now].pages_grabbed,
1896 	    vm_pageout_stats[vm_pageout_stat_now].pages_freed,
1897 	    vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found,
1898 	    vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added);
1899 
1900 	record_memory_pressure();
1901 }
1902 
1903 extern boolean_t hibernation_vmqueues_inspection;
1904 
1905 /*
1906  * Return values for functions called by vm_pageout_scan
1907  * that control its flow.
1908  *
1909  * PROCEED -- vm_pageout_scan will keep making forward progress.
1910  * DONE_RETURN -- page demand satisfied, work is done -> vm_pageout_scan returns.
1911  * NEXT_ITERATION -- restart the 'for' loop in vm_pageout_scan aka continue.
1912  */
1913 
1914 #define VM_PAGEOUT_SCAN_PROCEED                 (0)
1915 #define VM_PAGEOUT_SCAN_DONE_RETURN             (1)
1916 #define VM_PAGEOUT_SCAN_NEXT_ITERATION          (2)
1917 
1918 /*
1919  * This function is called only from vm_pageout_scan and
1920  * it moves overflow secluded pages (one-at-a-time) to the
1921  * batched 'local' free Q or active Q.
1922  */
1923 static void
1924 vps_deal_with_secluded_page_overflow(vm_page_t *local_freeq, int *local_freed)
1925 {
1926 #if CONFIG_SECLUDED_MEMORY
1927 	/*
1928 	 * Deal with secluded_q overflow.
1929 	 */
1930 	if (vm_page_secluded_count > vm_page_secluded_target) {
1931 		vm_page_t secluded_page;
1932 
1933 		/*
1934 		 * SECLUDED_AGING_BEFORE_ACTIVE:
1935 		 * Excess secluded pages go to the active queue and
1936 		 * will later go to the inactive queue.
1937 		 */
1938 		assert((vm_page_secluded_count_free +
1939 		    vm_page_secluded_count_inuse) ==
1940 		    vm_page_secluded_count);
1941 		secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
1942 		assert(secluded_page->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
1943 
1944 		vm_page_queues_remove(secluded_page, FALSE);
1945 		assert(!secluded_page->vmp_fictitious);
1946 		assert(!VM_PAGE_WIRED(secluded_page));
1947 
1948 		if (secluded_page->vmp_object == 0) {
1949 			/* transfer to free queue */
1950 			assert(secluded_page->vmp_busy);
1951 			secluded_page->vmp_snext = *local_freeq;
1952 			*local_freeq = secluded_page;
1953 			*local_freed += 1;
1954 		} else {
1955 			/* transfer to head of active queue */
1956 			vm_page_enqueue_active(secluded_page, FALSE);
1957 			secluded_page = VM_PAGE_NULL;
1958 		}
1959 	}
1960 #else /* CONFIG_SECLUDED_MEMORY */
1961 
1962 #pragma unused(local_freeq)
1963 #pragma unused(local_freed)
1964 
1965 	return;
1966 
1967 #endif /* CONFIG_SECLUDED_MEMORY */
1968 }
1969 
1970 /*
1971  * This function is called only from vm_pageout_scan and
1972  * it initializes the loop targets for vm_pageout_scan().
1973  */
1974 static void
1975 vps_init_page_targets(void)
1976 {
1977 	/*
1978 	 * LD TODO: Other page targets should be calculated here too.
1979 	 */
1980 	vm_page_anonymous_min = vm_page_inactive_target / 20;
1981 
1982 	if (vm_pageout_state.vm_page_speculative_percentage > 50) {
1983 		vm_pageout_state.vm_page_speculative_percentage = 50;
1984 	} else if (vm_pageout_state.vm_page_speculative_percentage <= 0) {
1985 		vm_pageout_state.vm_page_speculative_percentage = 1;
1986 	}
1987 
1988 	vm_pageout_state.vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1989 	    vm_page_inactive_count);
1990 }
1991 
1992 /*
1993  * This function is called only from vm_pageout_scan and
1994  * it purges a single VM object at-a-time and will either
1995  * make vm_pageout_scan() restart the loop or keeping moving forward.
1996  */
1997 static int
1998 vps_purge_object()
1999 {
2000 	int             force_purge;
2001 
2002 	assert(available_for_purge >= 0);
2003 	force_purge = 0; /* no force-purging */
2004 
2005 #if VM_PRESSURE_EVENTS
2006 	vm_pressure_level_t pressure_level;
2007 
2008 	pressure_level = memorystatus_vm_pressure_level;
2009 
2010 	if (pressure_level > kVMPressureNormal) {
2011 		if (pressure_level >= kVMPressureCritical) {
2012 			force_purge = vm_pageout_state.memorystatus_purge_on_critical;
2013 		} else if (pressure_level >= kVMPressureUrgent) {
2014 			force_purge = vm_pageout_state.memorystatus_purge_on_urgent;
2015 		} else if (pressure_level >= kVMPressureWarning) {
2016 			force_purge = vm_pageout_state.memorystatus_purge_on_warning;
2017 		}
2018 	}
2019 #endif /* VM_PRESSURE_EVENTS */
2020 
2021 	if (available_for_purge || force_purge) {
2022 		memoryshot(DBG_VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
2023 
2024 		VM_DEBUG_EVENT(vm_pageout_purgeone, DBG_VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
2025 		if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
2026 			VM_PAGEOUT_DEBUG(vm_pageout_purged_objects, 1);
2027 			VM_DEBUG_EVENT(vm_pageout_purgeone, DBG_VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
2028 			memoryshot(DBG_VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2029 
2030 			return VM_PAGEOUT_SCAN_NEXT_ITERATION;
2031 		}
2032 		VM_DEBUG_EVENT(vm_pageout_purgeone, DBG_VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
2033 		memoryshot(DBG_VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2034 	}
2035 
2036 	return VM_PAGEOUT_SCAN_PROCEED;
2037 }
2038 
2039 /*
2040  * This function is called only from vm_pageout_scan and
2041  * it will try to age the next speculative Q if the oldest
2042  * one is empty.
2043  */
2044 static int
2045 vps_age_speculative_queue(boolean_t force_speculative_aging)
2046 {
2047 #define DELAY_SPECULATIVE_AGE   1000
2048 
2049 	/*
2050 	 * try to pull pages from the aging bins...
2051 	 * see vm_page_internal.h for an explanation of how
2052 	 * this mechanism works
2053 	 */
2054 	boolean_t                       can_steal = FALSE;
2055 	int                             num_scanned_queues;
2056 	static int                      delay_speculative_age = 0; /* depends the # of times we go through the main pageout_scan loop.*/
2057 	mach_timespec_t                 ts;
2058 	struct vm_speculative_age_q     *aq;
2059 	struct vm_speculative_age_q     *sq;
2060 
2061 	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2062 
2063 	aq = &vm_page_queue_speculative[speculative_steal_index];
2064 
2065 	num_scanned_queues = 0;
2066 	while (vm_page_queue_empty(&aq->age_q) &&
2067 	    num_scanned_queues++ != vm_page_max_speculative_age_q) {
2068 		speculative_steal_index++;
2069 
2070 		if (speculative_steal_index > vm_page_max_speculative_age_q) {
2071 			speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2072 		}
2073 
2074 		aq = &vm_page_queue_speculative[speculative_steal_index];
2075 	}
2076 
2077 	if (num_scanned_queues == vm_page_max_speculative_age_q + 1) {
2078 		/*
2079 		 * XXX We've scanned all the speculative
2080 		 * queues but still haven't found one
2081 		 * that is not empty, even though
2082 		 * vm_page_speculative_count is not 0.
2083 		 */
2084 		if (!vm_page_queue_empty(&sq->age_q)) {
2085 			return VM_PAGEOUT_SCAN_NEXT_ITERATION;
2086 		}
2087 #if DEVELOPMENT || DEBUG
2088 		panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty", vm_page_speculative_count);
2089 #endif
2090 		/* readjust... */
2091 		vm_page_speculative_count = 0;
2092 		/* ... and continue */
2093 		return VM_PAGEOUT_SCAN_NEXT_ITERATION;
2094 	}
2095 
2096 	if (vm_page_speculative_count > vm_pageout_state.vm_page_speculative_target || force_speculative_aging == TRUE) {
2097 		can_steal = TRUE;
2098 	} else {
2099 		if (!delay_speculative_age) {
2100 			mach_timespec_t ts_fully_aged;
2101 
2102 			ts_fully_aged.tv_sec = (vm_page_max_speculative_age_q * vm_pageout_state.vm_page_speculative_q_age_ms) / 1000;
2103 			ts_fully_aged.tv_nsec = ((vm_page_max_speculative_age_q * vm_pageout_state.vm_page_speculative_q_age_ms) % 1000)
2104 			    * 1000 * NSEC_PER_USEC;
2105 
2106 			ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
2107 
2108 			clock_sec_t sec;
2109 			clock_nsec_t nsec;
2110 			clock_get_system_nanotime(&sec, &nsec);
2111 			ts.tv_sec = (unsigned int) sec;
2112 			ts.tv_nsec = nsec;
2113 
2114 			if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0) {
2115 				can_steal = TRUE;
2116 			} else {
2117 				delay_speculative_age++;
2118 			}
2119 		} else {
2120 			delay_speculative_age++;
2121 			if (delay_speculative_age == DELAY_SPECULATIVE_AGE) {
2122 				delay_speculative_age = 0;
2123 			}
2124 		}
2125 	}
2126 	if (can_steal == TRUE) {
2127 		vm_page_speculate_ageit(aq);
2128 	}
2129 
2130 	return VM_PAGEOUT_SCAN_PROCEED;
2131 }
2132 
2133 /*
2134  * This function is called only from vm_pageout_scan and
2135  * it evicts a single VM object from the cache.
2136  */
2137 static int inline
2138 vps_object_cache_evict(vm_object_t *object_to_unlock)
2139 {
2140 	static int                      cache_evict_throttle = 0;
2141 	struct vm_speculative_age_q     *sq;
2142 
2143 	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2144 
2145 	if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
2146 		int     pages_evicted;
2147 
2148 		if (*object_to_unlock != NULL) {
2149 			vm_object_unlock(*object_to_unlock);
2150 			*object_to_unlock = NULL;
2151 		}
2152 		KDBG(0x13001ec | DBG_FUNC_START);
2153 
2154 		pages_evicted = vm_object_cache_evict(100, 10);
2155 
2156 		KDBG(0x13001ec | DBG_FUNC_END, pages_evicted);
2157 
2158 		if (pages_evicted) {
2159 			vm_pageout_vminfo.vm_pageout_pages_evicted += pages_evicted;
2160 
2161 			VM_DEBUG_EVENT(vm_pageout_cache_evict, DBG_VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
2162 			    vm_page_free_count, pages_evicted, vm_pageout_vminfo.vm_pageout_pages_evicted, 0);
2163 			memoryshot(DBG_VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
2164 
2165 			/*
2166 			 * we just freed up to 100 pages,
2167 			 * so go back to the top of the main loop
2168 			 * and re-evaulate the memory situation
2169 			 */
2170 			return VM_PAGEOUT_SCAN_NEXT_ITERATION;
2171 		} else {
2172 			cache_evict_throttle = 1000;
2173 		}
2174 	}
2175 	if (cache_evict_throttle) {
2176 		cache_evict_throttle--;
2177 	}
2178 
2179 	return VM_PAGEOUT_SCAN_PROCEED;
2180 }
2181 
2182 
2183 /*
2184  * This function is called only from vm_pageout_scan and
2185  * it calculates the filecache min. that needs to be maintained
2186  * as we start to steal pages.
2187  */
2188 static void
2189 vps_calculate_filecache_min(void)
2190 {
2191 	int divisor = vm_pageout_state.vm_page_filecache_min_divisor;
2192 
2193 #if CONFIG_JETSAM
2194 	/*
2195 	 * don't let the filecache_min fall below 15% of available memory
2196 	 * on systems with an active compressor that isn't nearing its
2197 	 * limits w/r to accepting new data
2198 	 *
2199 	 * on systems w/o the compressor/swapper, the filecache is always
2200 	 * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
2201 	 * since most (if not all) of the anonymous pages are in the
2202 	 * throttled queue (which isn't counted as available) which
2203 	 * effectively disables this filter
2204 	 */
2205 	if (vm_compressor_low_on_space() || divisor == 0) {
2206 		vm_pageout_state.vm_page_filecache_min = 0;
2207 	} else {
2208 		vm_pageout_state.vm_page_filecache_min =
2209 		    ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
2210 	}
2211 #else
2212 	if (vm_compressor_out_of_space() || divisor == 0) {
2213 		vm_pageout_state.vm_page_filecache_min = 0;
2214 	} else {
2215 		/*
2216 		 * don't let the filecache_min fall below the specified critical level
2217 		 */
2218 		vm_pageout_state.vm_page_filecache_min =
2219 		    ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
2220 	}
2221 #endif
2222 	if (vm_page_free_count < (vm_page_free_reserved / 4)) {
2223 		vm_pageout_state.vm_page_filecache_min = 0;
2224 	}
2225 }
2226 
2227 /*
2228  * This function is called only from vm_pageout_scan and
2229  * it updates the flow control time to detect if VM pageoutscan
2230  * isn't making progress.
2231  */
2232 static void
2233 vps_flow_control_reset_deadlock_timer(struct flow_control *flow_control)
2234 {
2235 	mach_timespec_t ts;
2236 	clock_sec_t sec;
2237 	clock_nsec_t nsec;
2238 
2239 	ts.tv_sec = vm_pageout_state.vm_pageout_deadlock_wait / 1000;
2240 	ts.tv_nsec = (vm_pageout_state.vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
2241 	clock_get_system_nanotime(&sec, &nsec);
2242 	flow_control->ts.tv_sec = (unsigned int) sec;
2243 	flow_control->ts.tv_nsec = nsec;
2244 	ADD_MACH_TIMESPEC(&flow_control->ts, &ts);
2245 
2246 	flow_control->state = FCS_DELAYED;
2247 
2248 	vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal++;
2249 }
2250 
2251 /*
2252  * This function is called only from vm_pageout_scan and
2253  * it is the flow control logic of VM pageout scan which
2254  * controls if it should block and for how long.
2255  * Any blocking of vm_pageout_scan happens ONLY in this function.
2256  */
2257 static int
2258 vps_flow_control(struct flow_control *flow_control, int *anons_grabbed, vm_object_t *object, int *delayed_unlock,
2259     vm_page_t *local_freeq, int *local_freed, int *vm_pageout_deadlock_target, unsigned int inactive_burst_count)
2260 {
2261 	boolean_t       exceeded_burst_throttle = FALSE;
2262 	unsigned int    msecs = 0;
2263 	uint32_t        inactive_external_count;
2264 	mach_timespec_t ts;
2265 	struct  vm_pageout_queue *iq;
2266 	struct  vm_pageout_queue *eq;
2267 	struct  vm_speculative_age_q *sq;
2268 
2269 	iq = &vm_pageout_queue_internal;
2270 	eq = &vm_pageout_queue_external;
2271 	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2272 
2273 	/*
2274 	 * Sometimes we have to pause:
2275 	 *	1) No inactive pages - nothing to do.
2276 	 *	2) Loop control - no acceptable pages found on the inactive queue
2277 	 *         within the last vm_pageout_burst_inactive_throttle iterations
2278 	 *	3) Flow control - default pageout queue is full
2279 	 */
2280 	if (vm_page_queue_empty(&vm_page_queue_inactive) &&
2281 	    vm_page_queue_empty(&vm_page_queue_anonymous) &&
2282 	    vm_page_queue_empty(&vm_page_queue_cleaned) &&
2283 	    vm_page_queue_empty(&sq->age_q)) {
2284 		VM_PAGEOUT_DEBUG(vm_pageout_scan_empty_throttle, 1);
2285 		msecs = vm_pageout_state.vm_pageout_empty_wait;
2286 	} else if (inactive_burst_count >=
2287 	    MIN(vm_pageout_state.vm_pageout_burst_inactive_throttle,
2288 	    (vm_page_inactive_count +
2289 	    vm_page_speculative_count))) {
2290 		VM_PAGEOUT_DEBUG(vm_pageout_scan_burst_throttle, 1);
2291 		msecs = vm_pageout_state.vm_pageout_burst_wait;
2292 
2293 		exceeded_burst_throttle = TRUE;
2294 	} else if (VM_PAGE_Q_THROTTLED(iq) &&
2295 	    VM_DYNAMIC_PAGING_ENABLED()) {
2296 		clock_sec_t sec;
2297 		clock_nsec_t nsec;
2298 
2299 		switch (flow_control->state) {
2300 		case FCS_IDLE:
2301 			if ((vm_page_free_count + *local_freed) < vm_page_free_target &&
2302 			    vm_pageout_state.vm_restricted_to_single_processor == FALSE) {
2303 				/*
2304 				 * since the compressor is running independently of vm_pageout_scan
2305 				 * let's not wait for it just yet... as long as we have a healthy supply
2306 				 * of filecache pages to work with, let's keep stealing those.
2307 				 */
2308 				inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2309 
2310 				if (vm_page_pageable_external_count > vm_pageout_state.vm_page_filecache_min &&
2311 				    (inactive_external_count >= VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
2312 					*anons_grabbed = ANONS_GRABBED_LIMIT;
2313 					VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle_deferred, 1);
2314 					return VM_PAGEOUT_SCAN_PROCEED;
2315 				}
2316 			}
2317 
2318 			vps_flow_control_reset_deadlock_timer(flow_control);
2319 			msecs = vm_pageout_state.vm_pageout_deadlock_wait;
2320 
2321 			break;
2322 
2323 		case FCS_DELAYED:
2324 			clock_get_system_nanotime(&sec, &nsec);
2325 			ts.tv_sec = (unsigned int) sec;
2326 			ts.tv_nsec = nsec;
2327 
2328 			if (CMP_MACH_TIMESPEC(&ts, &flow_control->ts) >= 0) {
2329 				/*
2330 				 * the pageout thread for the default pager is potentially
2331 				 * deadlocked since the
2332 				 * default pager queue has been throttled for more than the
2333 				 * allowable time... we need to move some clean pages or dirty
2334 				 * pages belonging to the external pagers if they aren't throttled
2335 				 * vm_page_free_wanted represents the number of threads currently
2336 				 * blocked waiting for pages... we'll move one page for each of
2337 				 * these plus a fixed amount to break the logjam... once we're done
2338 				 * moving this number of pages, we'll re-enter the FSC_DELAYED state
2339 				 * with a new timeout target since we have no way of knowing
2340 				 * whether we've broken the deadlock except through observation
2341 				 * of the queue associated with the default pager... we need to
2342 				 * stop moving pages and allow the system to run to see what
2343 				 * state it settles into.
2344 				 */
2345 
2346 				*vm_pageout_deadlock_target = vm_pageout_state.vm_pageout_deadlock_relief +
2347 				    vm_page_free_wanted + vm_page_free_wanted_privileged;
2348 				VM_PAGEOUT_DEBUG(vm_pageout_scan_deadlock_detected, 1);
2349 				flow_control->state = FCS_DEADLOCK_DETECTED;
2350 				thread_wakeup(VM_PAGEOUT_GC_EVENT);
2351 				return VM_PAGEOUT_SCAN_PROCEED;
2352 			}
2353 			/*
2354 			 * just resniff instead of trying
2355 			 * to compute a new delay time... we're going to be
2356 			 * awakened immediately upon a laundry completion,
2357 			 * so we won't wait any longer than necessary
2358 			 */
2359 			msecs = vm_pageout_state.vm_pageout_idle_wait;
2360 			break;
2361 
2362 		case FCS_DEADLOCK_DETECTED:
2363 			if (*vm_pageout_deadlock_target) {
2364 				return VM_PAGEOUT_SCAN_PROCEED;
2365 			}
2366 
2367 			vps_flow_control_reset_deadlock_timer(flow_control);
2368 			msecs = vm_pageout_state.vm_pageout_deadlock_wait;
2369 
2370 			break;
2371 		}
2372 	} else {
2373 		/*
2374 		 * No need to pause...
2375 		 */
2376 		return VM_PAGEOUT_SCAN_PROCEED;
2377 	}
2378 
2379 	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2380 
2381 	vm_pageout_prepare_to_block(object, delayed_unlock, local_freeq, local_freed,
2382 	    VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
2383 
2384 	if (vm_page_free_count >= vm_page_free_target) {
2385 		/*
2386 		 * we're here because
2387 		 *  1) someone else freed up some pages while we had
2388 		 *     the queues unlocked above
2389 		 * and we've hit one of the 3 conditions that
2390 		 * cause us to pause the pageout scan thread
2391 		 *
2392 		 * since we already have enough free pages,
2393 		 * let's avoid stalling and return normally
2394 		 *
2395 		 * before we return, make sure the pageout I/O threads
2396 		 * are running throttled in case there are still requests
2397 		 * in the laundry... since we have enough free pages
2398 		 * we don't need the laundry to be cleaned in a timely
2399 		 * fashion... so let's avoid interfering with foreground
2400 		 * activity
2401 		 *
2402 		 * we don't want to hold vm_page_queue_free_lock when
2403 		 * calling vm_pageout_adjust_eq_iothrottle (since it
2404 		 * may cause other locks to be taken), we do the intitial
2405 		 * check outside of the lock.  Once we take the lock,
2406 		 * we recheck the condition since it may have changed.
2407 		 * if it has, no problem, we will make the threads
2408 		 * non-throttled before actually blocking
2409 		 */
2410 		vm_pageout_adjust_eq_iothrottle(&pgo_iothread_external_state, TRUE);
2411 	}
2412 	vm_free_page_lock();
2413 
2414 	if (vm_page_free_count >= vm_page_free_target &&
2415 	    (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
2416 		return VM_PAGEOUT_SCAN_DONE_RETURN;
2417 	}
2418 	vm_free_page_unlock();
2419 
2420 	if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
2421 		/*
2422 		 * we're most likely about to block due to one of
2423 		 * the 3 conditions that cause vm_pageout_scan to
2424 		 * not be able to make forward progress w/r
2425 		 * to providing new pages to the free queue,
2426 		 * so unthrottle the I/O threads in case we
2427 		 * have laundry to be cleaned... it needs
2428 		 * to be completed ASAP.
2429 		 *
2430 		 * even if we don't block, we want the io threads
2431 		 * running unthrottled since the sum of free +
2432 		 * clean pages is still under our free target
2433 		 */
2434 		vm_pageout_adjust_eq_iothrottle(&pgo_iothread_external_state, FALSE);
2435 	}
2436 	if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
2437 		/*
2438 		 * if we get here we're below our free target and
2439 		 * we're stalling due to a full laundry queue or
2440 		 * we don't have any inactive pages other then
2441 		 * those in the clean queue...
2442 		 * however, we have pages on the clean queue that
2443 		 * can be moved to the free queue, so let's not
2444 		 * stall the pageout scan
2445 		 */
2446 		flow_control->state = FCS_IDLE;
2447 		return VM_PAGEOUT_SCAN_PROCEED;
2448 	}
2449 	if (flow_control->state == FCS_DELAYED && !VM_PAGE_Q_THROTTLED(iq)) {
2450 		flow_control->state = FCS_IDLE;
2451 		return VM_PAGEOUT_SCAN_PROCEED;
2452 	}
2453 
2454 	VM_CHECK_MEMORYSTATUS;
2455 
2456 	if (flow_control->state != FCS_IDLE) {
2457 		VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle, 1);
2458 	}
2459 
2460 	iq->pgo_throttled = TRUE;
2461 	assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000 * NSEC_PER_USEC);
2462 
2463 	vm_page_unlock_queues();
2464 
2465 	assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2466 
2467 	VM_DEBUG_EVENT(vm_pageout_thread_block, DBG_VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
2468 	    iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2469 	memoryshot(DBG_VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
2470 
2471 	thread_block(THREAD_CONTINUE_NULL);
2472 
2473 	VM_DEBUG_EVENT(vm_pageout_thread_block, DBG_VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
2474 	    iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2475 	memoryshot(DBG_VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
2476 
2477 	vm_page_lock_queues();
2478 
2479 	iq->pgo_throttled = FALSE;
2480 
2481 	vps_init_page_targets();
2482 
2483 	return VM_PAGEOUT_SCAN_NEXT_ITERATION;
2484 }
2485 
2486 extern boolean_t vm_darkwake_mode;
2487 /*
2488  * This function is called only from vm_pageout_scan and
2489  * it will find and return the most appropriate page to be
2490  * reclaimed.
2491  */
2492 static int
2493 vps_choose_victim_page(vm_page_t *victim_page, int *anons_grabbed, boolean_t *grab_anonymous, boolean_t force_anonymous,
2494     boolean_t *is_page_from_bg_q, unsigned int *reactivated_this_call)
2495 {
2496 	vm_page_t                       m = NULL;
2497 	vm_object_t                     m_object = VM_OBJECT_NULL;
2498 	uint32_t                        inactive_external_count;
2499 	struct vm_speculative_age_q     *sq;
2500 	struct vm_pageout_queue         *iq;
2501 	int                             retval = VM_PAGEOUT_SCAN_PROCEED;
2502 
2503 	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2504 	iq = &vm_pageout_queue_internal;
2505 
2506 	*is_page_from_bg_q = FALSE;
2507 
2508 	m = NULL;
2509 	m_object = VM_OBJECT_NULL;
2510 
2511 	if (VM_DYNAMIC_PAGING_ENABLED()) {
2512 		assert(vm_page_throttled_count == 0);
2513 		assert(vm_page_queue_empty(&vm_page_queue_throttled));
2514 	}
2515 
2516 	/*
2517 	 * Try for a clean-queue inactive page.
2518 	 * These are pages that vm_pageout_scan tried to steal earlier, but
2519 	 * were dirty and had to be cleaned.  Pick them up now that they are clean.
2520 	 */
2521 	if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
2522 		m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2523 
2524 		assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
2525 
2526 		goto found_page;
2527 	}
2528 
2529 	/*
2530 	 * The next most eligible pages are ones we paged in speculatively,
2531 	 * but which have not yet been touched and have been aged out.
2532 	 */
2533 	if (!vm_page_queue_empty(&sq->age_q)) {
2534 		m = (vm_page_t) vm_page_queue_first(&sq->age_q);
2535 
2536 		assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
2537 
2538 		if (!m->vmp_dirty || force_anonymous == FALSE) {
2539 			goto found_page;
2540 		} else {
2541 			m = NULL;
2542 		}
2543 	}
2544 
2545 #if !CONFIG_JETSAM
2546 	if (vm_page_donate_mode != VM_PAGE_DONATE_DISABLED) {
2547 		if (vm_page_donate_queue_ripe && !vm_page_queue_empty(&vm_page_queue_donate)) {
2548 			m = (vm_page_t) vm_page_queue_first(&vm_page_queue_donate);
2549 			assert(m->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE);
2550 			goto found_page;
2551 		}
2552 	}
2553 #endif /* !CONFIG_JETSAM */
2554 
2555 	if (vm_page_background_mode != VM_PAGE_BG_DISABLED && (vm_page_background_count > vm_page_background_target)) {
2556 		vm_object_t     bg_m_object = NULL;
2557 
2558 		m = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
2559 
2560 		bg_m_object = VM_PAGE_OBJECT(m);
2561 
2562 		if (!VM_PAGE_PAGEABLE(m) || (vm_darkwake_mode && m->vmp_busy)) {
2563 			/*
2564 			 * This page is on the background queue
2565 			 * but not on a pageable queue OR is busy during
2566 			 * darkwake mode when the target is artificially lowered.
2567 			 * If it is busy during darkwake mode, and we don't skip it,
2568 			 * we will just swing back around and try again with the same
2569 			 * queue and might hit the same page or its neighbor in a
2570 			 * similar state. Both of these are transient states and will
2571 			 * get resolved, but, at this point let's ignore this page.
2572 			 */
2573 			if (vm_darkwake_mode && m->vmp_busy) {
2574 				if (bg_m_object->internal) {
2575 					vm_pageout_skipped_bq_internal++;
2576 				} else {
2577 					vm_pageout_skipped_bq_external++;
2578 				}
2579 			}
2580 		} else if (force_anonymous == FALSE || bg_m_object->internal) {
2581 			if (bg_m_object->internal &&
2582 			    (VM_PAGE_Q_THROTTLED(iq) ||
2583 			    vm_compressor_out_of_space() == TRUE ||
2584 			    vm_page_free_count < (vm_page_free_reserved / 4))) {
2585 				vm_pageout_skipped_bq_internal++;
2586 			} else {
2587 				*is_page_from_bg_q = TRUE;
2588 
2589 				if (bg_m_object->internal) {
2590 					vm_pageout_vminfo.vm_pageout_considered_bq_internal++;
2591 				} else {
2592 					vm_pageout_vminfo.vm_pageout_considered_bq_external++;
2593 				}
2594 				goto found_page;
2595 			}
2596 		}
2597 	}
2598 
2599 	inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2600 
2601 	if ((vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min || force_anonymous == TRUE) ||
2602 	    (inactive_external_count < VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
2603 		*grab_anonymous = TRUE;
2604 		*anons_grabbed = 0;
2605 
2606 		if (VM_CONFIG_SWAP_IS_ACTIVE) {
2607 			vm_pageout_vminfo.vm_pageout_skipped_external++;
2608 		} else {
2609 			if (vm_page_free_count < (COMPRESSOR_FREE_RESERVED_LIMIT * 2)) {
2610 				/*
2611 				 * No swap and we are in dangerously low levels of free memory.
2612 				 * If we keep going ahead with anonymous pages, we are going to run into a situation
2613 				 * where the compressor will be stuck waiting for free pages (if it isn't already).
2614 				 *
2615 				 * So, pick a file backed page...
2616 				 */
2617 				*grab_anonymous = FALSE;
2618 				*anons_grabbed = ANONS_GRABBED_LIMIT;
2619 				vm_pageout_vminfo.vm_pageout_skipped_internal++;
2620 			}
2621 		}
2622 		goto want_anonymous;
2623 	}
2624 	*grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
2625 
2626 #if CONFIG_JETSAM
2627 	/* If the file-backed pool has accumulated
2628 	 * significantly more pages than the jetsam
2629 	 * threshold, prefer to reclaim those
2630 	 * inline to minimise compute overhead of reclaiming
2631 	 * anonymous pages.
2632 	 * This calculation does not account for the CPU local
2633 	 * external page queues, as those are expected to be
2634 	 * much smaller relative to the global pools.
2635 	 */
2636 
2637 	struct vm_pageout_queue *eq = &vm_pageout_queue_external;
2638 
2639 	if (*grab_anonymous == TRUE && !VM_PAGE_Q_THROTTLED(eq)) {
2640 		if (vm_page_pageable_external_count >
2641 		    vm_pageout_state.vm_page_filecache_min) {
2642 			if ((vm_page_pageable_external_count *
2643 			    vm_pageout_memorystatus_fb_factor_dr) >
2644 			    (memorystatus_available_pages_critical *
2645 			    vm_pageout_memorystatus_fb_factor_nr)) {
2646 				*grab_anonymous = FALSE;
2647 
2648 				VM_PAGEOUT_DEBUG(vm_grab_anon_overrides, 1);
2649 			}
2650 		}
2651 		if (*grab_anonymous) {
2652 			VM_PAGEOUT_DEBUG(vm_grab_anon_nops, 1);
2653 		}
2654 	}
2655 #endif /* CONFIG_JETSAM */
2656 
2657 want_anonymous:
2658 	if (*grab_anonymous == FALSE || *anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)) {
2659 		if (!vm_page_queue_empty(&vm_page_queue_inactive)) {
2660 			m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2661 
2662 			assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
2663 			*anons_grabbed = 0;
2664 
2665 			if (vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min) {
2666 				if (!vm_page_queue_empty(&vm_page_queue_anonymous)) {
2667 					if ((++(*reactivated_this_call) % 100)) {
2668 						vm_pageout_vminfo.vm_pageout_filecache_min_reactivated++;
2669 
2670 						vm_page_activate(m);
2671 						counter_inc(&vm_statistics_reactivations);
2672 #if DEVELOPMENT || DEBUG
2673 						if (*is_page_from_bg_q == TRUE) {
2674 							if (m_object->internal) {
2675 								vm_pageout_rejected_bq_internal++;
2676 							} else {
2677 								vm_pageout_rejected_bq_external++;
2678 							}
2679 						}
2680 #endif /* DEVELOPMENT || DEBUG */
2681 						vm_pageout_state.vm_pageout_inactive_used++;
2682 
2683 						m = NULL;
2684 						retval = VM_PAGEOUT_SCAN_NEXT_ITERATION;
2685 
2686 						goto found_page;
2687 					}
2688 
2689 					/*
2690 					 * steal 1 of the file backed pages even if
2691 					 * we are under the limit that has been set
2692 					 * for a healthy filecache
2693 					 */
2694 				}
2695 			}
2696 			goto found_page;
2697 		}
2698 	}
2699 	if (!vm_page_queue_empty(&vm_page_queue_anonymous)) {
2700 		m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2701 
2702 		assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
2703 		*anons_grabbed += 1;
2704 
2705 		goto found_page;
2706 	}
2707 
2708 	m = NULL;
2709 
2710 found_page:
2711 	*victim_page = m;
2712 
2713 	return retval;
2714 }
2715 
2716 /*
2717  * This function is called only from vm_pageout_scan and
2718  * it will put a page back on the active/inactive queue
2719  * if we can't reclaim it for some reason.
2720  */
2721 static void
2722 vps_requeue_page(vm_page_t m, int page_prev_q_state, __unused boolean_t page_from_bg_q)
2723 {
2724 	if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
2725 		vm_page_enqueue_inactive(m, FALSE);
2726 	} else {
2727 		vm_page_activate(m);
2728 	}
2729 
2730 #if DEVELOPMENT || DEBUG
2731 	vm_object_t m_object = VM_PAGE_OBJECT(m);
2732 
2733 	if (page_from_bg_q == TRUE) {
2734 		if (m_object->internal) {
2735 			vm_pageout_rejected_bq_internal++;
2736 		} else {
2737 			vm_pageout_rejected_bq_external++;
2738 		}
2739 	}
2740 #endif /* DEVELOPMENT || DEBUG */
2741 }
2742 
2743 /*
2744  * This function is called only from vm_pageout_scan and
2745  * it will try to grab the victim page's VM object (m_object)
2746  * which differs from the previous victim page's object (object).
2747  */
2748 static int
2749 vps_switch_object(vm_page_t m, vm_object_t m_object, vm_object_t *object, int page_prev_q_state, boolean_t avoid_anon_pages, boolean_t page_from_bg_q)
2750 {
2751 	struct vm_speculative_age_q *sq;
2752 
2753 	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2754 
2755 	/*
2756 	 * the object associated with candidate page is
2757 	 * different from the one we were just working
2758 	 * with... dump the lock if we still own it
2759 	 */
2760 	if (*object != NULL) {
2761 		vm_object_unlock(*object);
2762 		*object = NULL;
2763 	}
2764 	/*
2765 	 * Try to lock object; since we've alread got the
2766 	 * page queues lock, we can only 'try' for this one.
2767 	 * if the 'try' fails, we need to do a mutex_pause
2768 	 * to allow the owner of the object lock a chance to
2769 	 * run... otherwise, we're likely to trip over this
2770 	 * object in the same state as we work our way through
2771 	 * the queue... clumps of pages associated with the same
2772 	 * object are fairly typical on the inactive and active queues
2773 	 */
2774 	if (!vm_object_lock_try_scan(m_object)) {
2775 		vm_page_t m_want = NULL;
2776 
2777 		vm_pageout_vminfo.vm_pageout_inactive_nolock++;
2778 
2779 		if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
2780 			VM_PAGEOUT_DEBUG(vm_pageout_cleaned_nolock, 1);
2781 		}
2782 
2783 		pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
2784 
2785 		m->vmp_reference = FALSE;
2786 
2787 		if (!m_object->object_is_shared_cache) {
2788 			/*
2789 			 * don't apply this optimization if this is the shared cache
2790 			 * object, it's too easy to get rid of very hot and important
2791 			 * pages...
2792 			 * m->vmp_object must be stable since we hold the page queues lock...
2793 			 * we can update the scan_collisions field sans the object lock
2794 			 * since it is a separate field and this is the only spot that does
2795 			 * a read-modify-write operation and it is never executed concurrently...
2796 			 * we can asynchronously set this field to 0 when creating a UPL, so it
2797 			 * is possible for the value to be a bit non-determistic, but that's ok
2798 			 * since it's only used as a hint
2799 			 */
2800 			m_object->scan_collisions = 1;
2801 		}
2802 		if (page_from_bg_q) {
2803 			m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
2804 		} else if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
2805 			m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2806 		} else if (!vm_page_queue_empty(&sq->age_q)) {
2807 			m_want = (vm_page_t) vm_page_queue_first(&sq->age_q);
2808 		} else if ((avoid_anon_pages || vm_page_queue_empty(&vm_page_queue_anonymous)) &&
2809 		    !vm_page_queue_empty(&vm_page_queue_inactive)) {
2810 			m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2811 		} else if (!vm_page_queue_empty(&vm_page_queue_anonymous)) {
2812 			m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2813 		}
2814 
2815 		/*
2816 		 * this is the next object we're going to be interested in
2817 		 * try to make sure its available after the mutex_pause
2818 		 * returns control
2819 		 */
2820 		if (m_want) {
2821 			vm_pageout_scan_wants_object = VM_PAGE_OBJECT(m_want);
2822 		}
2823 
2824 		vps_requeue_page(m, page_prev_q_state, page_from_bg_q);
2825 
2826 		return VM_PAGEOUT_SCAN_NEXT_ITERATION;
2827 	} else {
2828 		*object = m_object;
2829 		vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2830 	}
2831 
2832 	return VM_PAGEOUT_SCAN_PROCEED;
2833 }
2834 
2835 /*
2836  * This function is called only from vm_pageout_scan and
2837  * it notices that pageout scan may be rendered ineffective
2838  * due to a FS deadlock and will jetsam a process if possible.
2839  * If jetsam isn't supported, it'll move the page to the active
2840  * queue to try and get some different pages pushed onwards so
2841  * we can try to get out of this scenario.
2842  */
2843 static void
2844 vps_deal_with_throttled_queues(vm_page_t m, vm_object_t *object, uint32_t *vm_pageout_inactive_external_forced_reactivate_limit,
2845     boolean_t *force_anonymous, __unused boolean_t is_page_from_bg_q)
2846 {
2847 	struct  vm_pageout_queue *eq;
2848 	vm_object_t cur_object = VM_OBJECT_NULL;
2849 
2850 	cur_object = *object;
2851 
2852 	eq = &vm_pageout_queue_external;
2853 
2854 	if (cur_object->internal == FALSE) {
2855 		/*
2856 		 * we need to break up the following potential deadlock case...
2857 		 *  a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
2858 		 *  b) The thread doing the writing is waiting for pages while holding the truncate lock
2859 		 *  c) Most of the pages in the inactive queue belong to this file.
2860 		 *
2861 		 * we are potentially in this deadlock because...
2862 		 *  a) the external pageout queue is throttled
2863 		 *  b) we're done with the active queue and moved on to the inactive queue
2864 		 *  c) we've got a dirty external page
2865 		 *
2866 		 * since we don't know the reason for the external pageout queue being throttled we
2867 		 * must suspect that we are deadlocked, so move the current page onto the active queue
2868 		 * in an effort to cause a page from the active queue to 'age' to the inactive queue
2869 		 *
2870 		 * if we don't have jetsam configured (i.e. we have a dynamic pager), set
2871 		 * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
2872 		 * pool the next time we select a victim page... if we can make enough new free pages,
2873 		 * the deadlock will break, the external pageout queue will empty and it will no longer
2874 		 * be throttled
2875 		 *
2876 		 * if we have jetsam configured, keep a count of the pages reactivated this way so
2877 		 * that we can try to find clean pages in the active/inactive queues before
2878 		 * deciding to jetsam a process
2879 		 */
2880 		vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external++;
2881 
2882 		vm_page_check_pageable_safe(m);
2883 		assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
2884 		vm_page_queue_enter(&vm_page_queue_active, m, vmp_pageq);
2885 		m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
2886 		vm_page_active_count++;
2887 		vm_page_pageable_external_count++;
2888 
2889 		vm_pageout_adjust_eq_iothrottle(&pgo_iothread_external_state, FALSE);
2890 
2891 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
2892 
2893 #pragma unused(force_anonymous)
2894 
2895 		*vm_pageout_inactive_external_forced_reactivate_limit -= 1;
2896 
2897 		if (*vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
2898 			*vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
2899 			/*
2900 			 * Possible deadlock scenario so request jetsam action
2901 			 */
2902 			memorystatus_kill_on_vps_starvation();
2903 			VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, DBG_VM_PAGEOUT_JETSAM, DBG_FUNC_NONE,
2904 			    vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
2905 		}
2906 #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
2907 
2908 #pragma unused(vm_pageout_inactive_external_forced_reactivate_limit)
2909 
2910 		*force_anonymous = TRUE;
2911 #endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
2912 	} else {
2913 		vm_page_activate(m);
2914 		counter_inc(&vm_statistics_reactivations);
2915 
2916 #if DEVELOPMENT || DEBUG
2917 		if (is_page_from_bg_q == TRUE) {
2918 			if (cur_object->internal) {
2919 				vm_pageout_rejected_bq_internal++;
2920 			} else {
2921 				vm_pageout_rejected_bq_external++;
2922 			}
2923 		}
2924 #endif /* DEVELOPMENT || DEBUG */
2925 
2926 		vm_pageout_state.vm_pageout_inactive_used++;
2927 	}
2928 }
2929 
2930 
2931 void
2932 vm_page_balance_inactive(int max_to_move)
2933 {
2934 	vm_page_t m;
2935 
2936 	LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2937 
2938 	if (hibernation_vmqueues_inspection || hibernate_cleaning_in_progress) {
2939 		/*
2940 		 * It is likely that the hibernation code path is
2941 		 * dealing with these very queues as we are about
2942 		 * to move pages around in/from them and completely
2943 		 * change the linkage of the pages.
2944 		 *
2945 		 * And so we skip the rebalancing of these queues.
2946 		 */
2947 		return;
2948 	}
2949 	vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
2950 	    vm_page_inactive_count +
2951 	    vm_page_speculative_count);
2952 
2953 	while (max_to_move-- && (vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) {
2954 		VM_PAGEOUT_DEBUG(vm_pageout_balanced, 1);
2955 
2956 		m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
2957 
2958 		assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
2959 		assert(!m->vmp_laundry);
2960 		assert(!is_kernel_object(VM_PAGE_OBJECT(m)));
2961 		assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2962 
2963 		DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2964 
2965 		/*
2966 		 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
2967 		 *
2968 		 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
2969 		 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
2970 		 * new reference happens. If no futher references happen on the page after that remote TLB flushes
2971 		 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
2972 		 * by pageout_scan, which is just fine since the last reference would have happened quite far
2973 		 * in the past (TLB caches don't hang around for very long), and of course could just as easily
2974 		 * have happened before we moved the page
2975 		 */
2976 		if (m->vmp_pmapped == TRUE) {
2977 			/*
2978 			 * We might be holding the page queue lock as a
2979 			 * spin lock and clearing the "referenced" bit could
2980 			 * take a while if there are lots of mappings of
2981 			 * that page, so make sure we acquire the lock as
2982 			 * as mutex to avoid a spinlock timeout.
2983 			 */
2984 			vm_page_lockconvert_queues();
2985 			pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
2986 		}
2987 
2988 		/*
2989 		 * The page might be absent or busy,
2990 		 * but vm_page_deactivate can handle that.
2991 		 * FALSE indicates that we don't want a H/W clear reference
2992 		 */
2993 		vm_page_deactivate_internal(m, FALSE);
2994 	}
2995 }
2996 
2997 /*
2998  *	vm_pageout_scan does the dirty work for the pageout daemon.
2999  *	It returns with both vm_page_queue_free_lock and vm_page_queue_lock
3000  *	held and vm_page_free_wanted == 0.
3001  */
3002 void
3003 vm_pageout_scan(void)
3004 {
3005 	unsigned int loop_count = 0;
3006 	unsigned int inactive_burst_count = 0;
3007 	unsigned int reactivated_this_call;
3008 	unsigned int reactivate_limit;
3009 	vm_page_t   local_freeq = NULL;
3010 	int         local_freed = 0;
3011 	int         delayed_unlock;
3012 	int         delayed_unlock_limit = 0;
3013 	int         refmod_state = 0;
3014 	int     vm_pageout_deadlock_target = 0;
3015 	struct  vm_pageout_queue *iq;
3016 	struct  vm_pageout_queue *eq;
3017 	struct  vm_speculative_age_q *sq;
3018 	struct  flow_control    flow_control = { .state = 0, .ts = { .tv_sec = 0, .tv_nsec = 0 } };
3019 	boolean_t inactive_throttled = FALSE;
3020 	vm_object_t     object = NULL;
3021 	uint32_t        inactive_reclaim_run;
3022 	boolean_t       grab_anonymous = FALSE;
3023 	boolean_t       force_anonymous = FALSE;
3024 	boolean_t       force_speculative_aging = FALSE;
3025 	int             anons_grabbed = 0;
3026 	int             page_prev_q_state = 0;
3027 	boolean_t       page_from_bg_q = FALSE;
3028 	uint32_t        vm_pageout_inactive_external_forced_reactivate_limit = 0;
3029 	vm_object_t     m_object = VM_OBJECT_NULL;
3030 	int             retval = 0;
3031 	boolean_t       lock_yield_check = FALSE;
3032 
3033 
3034 	VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, DBG_VM_PAGEOUT_SCAN, DBG_FUNC_START,
3035 	    vm_pageout_vminfo.vm_pageout_freed_speculative,
3036 	    vm_pageout_state.vm_pageout_inactive_clean,
3037 	    vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
3038 	    vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
3039 
3040 	flow_control.state = FCS_IDLE;
3041 	iq = &vm_pageout_queue_internal;
3042 	eq = &vm_pageout_queue_external;
3043 	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3044 
3045 	/* Ask the pmap layer to return any pages it no longer needs. */
3046 	pmap_release_pages_fast();
3047 
3048 	vm_page_lock_queues();
3049 
3050 	delayed_unlock = 1;
3051 
3052 	/*
3053 	 *	Calculate the max number of referenced pages on the inactive
3054 	 *	queue that we will reactivate.
3055 	 */
3056 	reactivated_this_call = 0;
3057 	reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
3058 	    vm_page_inactive_count);
3059 	inactive_reclaim_run = 0;
3060 
3061 	vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
3062 
3063 	/*
3064 	 *	We must limit the rate at which we send pages to the pagers
3065 	 *	so that we don't tie up too many pages in the I/O queues.
3066 	 *	We implement a throttling mechanism using the laundry count
3067 	 *      to limit the number of pages outstanding to the default
3068 	 *	and external pagers.  We can bypass the throttles and look
3069 	 *	for clean pages if the pageout queues don't drain in a timely
3070 	 *	fashion since this may indicate that the pageout paths are
3071 	 *	stalled waiting for memory, which only we can provide.
3072 	 */
3073 
3074 	vps_init_page_targets();
3075 	assert(object == NULL);
3076 	assert(delayed_unlock != 0);
3077 
3078 	for (;;) {
3079 		vm_page_t m;
3080 
3081 		DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
3082 
3083 		if (lock_yield_check) {
3084 			lock_yield_check = FALSE;
3085 
3086 			if (delayed_unlock++ > delayed_unlock_limit) {
3087 				vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
3088 				    VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
3089 			} else if (vm_pageout_scan_wants_object) {
3090 				vm_page_unlock_queues();
3091 				mutex_pause(0);
3092 				vm_page_lock_queues();
3093 			} else if (vps_yield_for_pgqlockwaiters && lck_mtx_yield(&vm_page_queue_lock)) {
3094 				VM_PAGEOUT_DEBUG(vm_pageout_yield_for_free_pages, 1);
3095 			}
3096 		}
3097 
3098 		if (vm_upl_wait_for_pages < 0) {
3099 			vm_upl_wait_for_pages = 0;
3100 		}
3101 
3102 		delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
3103 
3104 		if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX) {
3105 			delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
3106 		}
3107 
3108 		vps_deal_with_secluded_page_overflow(&local_freeq, &local_freed);
3109 
3110 		assert(delayed_unlock);
3111 
3112 		/*
3113 		 * maintain our balance
3114 		 */
3115 		vm_page_balance_inactive(1);
3116 
3117 
3118 		/**********************************************************************
3119 		* above this point we're playing with the active and secluded queues
3120 		* below this point we're playing with the throttling mechanisms
3121 		* and the inactive queue
3122 		**********************************************************************/
3123 
3124 		if (vm_page_free_count + local_freed >= vm_page_free_target) {
3125 			vm_pageout_scan_wants_object = VM_OBJECT_NULL;
3126 
3127 			vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
3128 			    VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
3129 			/*
3130 			 * make sure the pageout I/O threads are running
3131 			 * throttled in case there are still requests
3132 			 * in the laundry... since we have met our targets
3133 			 * we don't need the laundry to be cleaned in a timely
3134 			 * fashion... so let's avoid interfering with foreground
3135 			 * activity
3136 			 */
3137 			vm_pageout_adjust_eq_iothrottle(&pgo_iothread_external_state, TRUE);
3138 
3139 			vm_free_page_lock();
3140 
3141 			if ((vm_page_free_count >= vm_page_free_target) &&
3142 			    (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
3143 				/*
3144 				 * done - we have met our target *and*
3145 				 * there is no one waiting for a page.
3146 				 */
3147 return_from_scan:
3148 				assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
3149 
3150 				VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, DBG_VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
3151 				    vm_pageout_state.vm_pageout_inactive,
3152 				    vm_pageout_state.vm_pageout_inactive_used, 0, 0);
3153 				VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, DBG_VM_PAGEOUT_SCAN, DBG_FUNC_END,
3154 				    vm_pageout_vminfo.vm_pageout_freed_speculative,
3155 				    vm_pageout_state.vm_pageout_inactive_clean,
3156 				    vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
3157 				    vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
3158 
3159 				return;
3160 			}
3161 			vm_free_page_unlock();
3162 		}
3163 
3164 		/*
3165 		 * Before anything, we check if we have any ripe volatile
3166 		 * objects around. If so, try to purge the first object.
3167 		 * If the purge fails, fall through to reclaim a page instead.
3168 		 * If the purge succeeds, go back to the top and reevalute
3169 		 * the new memory situation.
3170 		 */
3171 		retval = vps_purge_object();
3172 
3173 		if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
3174 			/*
3175 			 * Success
3176 			 */
3177 			if (object != NULL) {
3178 				vm_object_unlock(object);
3179 				object = NULL;
3180 			}
3181 
3182 			lock_yield_check = FALSE;
3183 			continue;
3184 		}
3185 
3186 		/*
3187 		 * If our 'aged' queue is empty and we have some speculative pages
3188 		 * in the other queues, let's go through and see if we need to age
3189 		 * them.
3190 		 *
3191 		 * If we succeeded in aging a speculative Q or just that everything
3192 		 * looks normal w.r.t queue age and queue counts, we keep going onward.
3193 		 *
3194 		 * If, for some reason, we seem to have a mismatch between the spec.
3195 		 * page count and the page queues, we reset those variables and
3196 		 * restart the loop (LD TODO: Track this better?).
3197 		 */
3198 		if (vm_page_queue_empty(&sq->age_q) && vm_page_speculative_count) {
3199 			retval = vps_age_speculative_queue(force_speculative_aging);
3200 
3201 			if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
3202 				lock_yield_check = FALSE;
3203 				continue;
3204 			}
3205 		}
3206 		force_speculative_aging = FALSE;
3207 
3208 		/*
3209 		 * Check to see if we need to evict objects from the cache.
3210 		 *
3211 		 * Note: 'object' here doesn't have anything to do with
3212 		 * the eviction part. We just need to make sure we have dropped
3213 		 * any object lock we might be holding if we need to go down
3214 		 * into the eviction logic.
3215 		 */
3216 		retval = vps_object_cache_evict(&object);
3217 
3218 		if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
3219 			lock_yield_check = FALSE;
3220 			continue;
3221 		}
3222 
3223 
3224 		/*
3225 		 * Calculate our filecache_min that will affect the loop
3226 		 * going forward.
3227 		 */
3228 		vps_calculate_filecache_min();
3229 
3230 		/*
3231 		 * LD TODO: Use a structure to hold all state variables for a single
3232 		 * vm_pageout_scan iteration and pass that structure to this function instead.
3233 		 */
3234 		retval = vps_flow_control(&flow_control, &anons_grabbed, &object,
3235 		    &delayed_unlock, &local_freeq, &local_freed,
3236 		    &vm_pageout_deadlock_target, inactive_burst_count);
3237 
3238 		if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
3239 			if (loop_count >= vm_page_inactive_count) {
3240 				loop_count = 0;
3241 			}
3242 
3243 			inactive_burst_count = 0;
3244 
3245 			assert(object == NULL);
3246 			assert(delayed_unlock != 0);
3247 
3248 			lock_yield_check = FALSE;
3249 			continue;
3250 		} else if (retval == VM_PAGEOUT_SCAN_DONE_RETURN) {
3251 			goto return_from_scan;
3252 		}
3253 
3254 		flow_control.state = FCS_IDLE;
3255 
3256 		vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
3257 		    vm_pageout_inactive_external_forced_reactivate_limit);
3258 		loop_count++;
3259 		inactive_burst_count++;
3260 		vm_pageout_state.vm_pageout_inactive++;
3261 
3262 		/*
3263 		 * Choose a victim.
3264 		 */
3265 
3266 		m = NULL;
3267 		retval = vps_choose_victim_page(&m, &anons_grabbed, &grab_anonymous, force_anonymous, &page_from_bg_q, &reactivated_this_call);
3268 
3269 		if (m == NULL) {
3270 			if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
3271 				inactive_burst_count = 0;
3272 
3273 				if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
3274 					VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
3275 				}
3276 
3277 				lock_yield_check = TRUE;
3278 				continue;
3279 			}
3280 
3281 			/*
3282 			 * if we've gotten here, we have no victim page.
3283 			 * check to see if we've not finished balancing the queues
3284 			 * or we have a page on the aged speculative queue that we
3285 			 * skipped due to force_anonymous == TRUE.. or we have
3286 			 * speculative  pages that we can prematurely age... if
3287 			 * one of these cases we'll keep going, else panic
3288 			 */
3289 			force_anonymous = FALSE;
3290 			VM_PAGEOUT_DEBUG(vm_pageout_no_victim, 1);
3291 
3292 			if (!vm_page_queue_empty(&sq->age_q)) {
3293 				lock_yield_check = TRUE;
3294 				continue;
3295 			}
3296 
3297 			if (vm_page_speculative_count) {
3298 				force_speculative_aging = TRUE;
3299 				lock_yield_check = TRUE;
3300 				continue;
3301 			}
3302 			panic("vm_pageout: no victim");
3303 
3304 			/* NOTREACHED */
3305 		}
3306 
3307 		assert(VM_PAGE_PAGEABLE(m));
3308 		m_object = VM_PAGE_OBJECT(m);
3309 		force_anonymous = FALSE;
3310 
3311 		page_prev_q_state = m->vmp_q_state;
3312 		/*
3313 		 * we just found this page on one of our queues...
3314 		 * it can't also be on the pageout queue, so safe
3315 		 * to call vm_page_queues_remove
3316 		 */
3317 		bool donate = (m->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE);
3318 		vm_page_queues_remove(m, TRUE);
3319 		if (donate) {
3320 			/*
3321 			 * The compressor needs to see this bit to know
3322 			 * where this page needs to land. Also if stolen,
3323 			 * this bit helps put the page back in the right
3324 			 * special queue where it belongs.
3325 			 */
3326 			m->vmp_on_specialq = VM_PAGE_SPECIAL_Q_DONATE;
3327 		}
3328 
3329 		assert(!m->vmp_laundry);
3330 		assert(!m->vmp_private);
3331 		assert(!m->vmp_fictitious);
3332 		assert(!is_kernel_object(m_object));
3333 		assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
3334 
3335 		vm_pageout_vminfo.vm_pageout_considered_page++;
3336 
3337 		DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
3338 
3339 		/*
3340 		 * check to see if we currently are working
3341 		 * with the same object... if so, we've
3342 		 * already got the lock
3343 		 */
3344 		if (m_object != object) {
3345 			boolean_t avoid_anon_pages = (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT);
3346 
3347 			/*
3348 			 * vps_switch_object() will always drop the 'object' lock first
3349 			 * and then try to acquire the 'm_object' lock. So 'object' has to point to
3350 			 * either 'm_object' or NULL.
3351 			 */
3352 			retval = vps_switch_object(m, m_object, &object, page_prev_q_state, avoid_anon_pages, page_from_bg_q);
3353 
3354 			if (retval == VM_PAGEOUT_SCAN_NEXT_ITERATION) {
3355 				lock_yield_check = TRUE;
3356 				continue;
3357 			}
3358 		}
3359 		assert(m_object == object);
3360 		assert(VM_PAGE_OBJECT(m) == m_object);
3361 
3362 		if (m->vmp_busy) {
3363 			/*
3364 			 *	Somebody is already playing with this page.
3365 			 *	Put it back on the appropriate queue
3366 			 *
3367 			 */
3368 			VM_PAGEOUT_DEBUG(vm_pageout_inactive_busy, 1);
3369 
3370 			if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
3371 				VM_PAGEOUT_DEBUG(vm_pageout_cleaned_busy, 1);
3372 			}
3373 
3374 			vps_requeue_page(m, page_prev_q_state, page_from_bg_q);
3375 
3376 			lock_yield_check = TRUE;
3377 			continue;
3378 		}
3379 
3380 		/*
3381 		 *   if (m->vmp_cleaning && !m->vmp_free_when_done)
3382 		 *	If already cleaning this page in place
3383 		 *	just leave if off the paging queues.
3384 		 *	We can leave the page mapped, and upl_commit_range
3385 		 *	will put it on the clean queue.
3386 		 *
3387 		 *   if (m->vmp_free_when_done && !m->vmp_cleaning)
3388 		 *	an msync INVALIDATE is in progress...
3389 		 *	this page has been marked for destruction
3390 		 *      after it has been cleaned,
3391 		 *      but not yet gathered into a UPL
3392 		 *	where 'cleaning' will be set...
3393 		 *	just leave it off the paging queues
3394 		 *
3395 		 *   if (m->vmp_free_when_done && m->vmp_clenaing)
3396 		 *	an msync INVALIDATE is in progress
3397 		 *	and the UPL has already gathered this page...
3398 		 *	just leave it off the paging queues
3399 		 */
3400 		if (m->vmp_free_when_done || m->vmp_cleaning) {
3401 			lock_yield_check = TRUE;
3402 			continue;
3403 		}
3404 
3405 
3406 		/*
3407 		 *	If it's absent, in error or the object is no longer alive,
3408 		 *	we can reclaim the page... in the no longer alive case,
3409 		 *	there are 2 states the page can be in that preclude us
3410 		 *	from reclaiming it - busy or cleaning - that we've already
3411 		 *	dealt with
3412 		 */
3413 		if (m->vmp_absent || VMP_ERROR_GET(m) || !object->alive ||
3414 		    (!object->internal && object->pager == MEMORY_OBJECT_NULL)) {
3415 			if (m->vmp_absent) {
3416 				VM_PAGEOUT_DEBUG(vm_pageout_inactive_absent, 1);
3417 			} else if (!object->alive ||
3418 			    (!object->internal &&
3419 			    object->pager == MEMORY_OBJECT_NULL)) {
3420 				VM_PAGEOUT_DEBUG(vm_pageout_inactive_notalive, 1);
3421 			} else {
3422 				VM_PAGEOUT_DEBUG(vm_pageout_inactive_error, 1);
3423 			}
3424 reclaim_page:
3425 			if (vm_pageout_deadlock_target) {
3426 				VM_PAGEOUT_DEBUG(vm_pageout_scan_inactive_throttle_success, 1);
3427 				vm_pageout_deadlock_target--;
3428 			}
3429 
3430 			DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
3431 
3432 			if (object->internal) {
3433 				DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
3434 			} else {
3435 				DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
3436 			}
3437 			assert(!m->vmp_cleaning);
3438 			assert(!m->vmp_laundry);
3439 
3440 			if (!object->internal &&
3441 			    object->pager != NULL &&
3442 			    object->pager->mo_pager_ops == &shared_region_pager_ops) {
3443 				shared_region_pager_reclaimed++;
3444 			}
3445 
3446 			m->vmp_busy = TRUE;
3447 
3448 			/*
3449 			 * remove page from object here since we're already
3450 			 * behind the object lock... defer the rest of the work
3451 			 * we'd normally do in vm_page_free_prepare_object
3452 			 * until 'vm_page_free_list' is called
3453 			 */
3454 			if (m->vmp_tabled) {
3455 				vm_page_remove(m, TRUE);
3456 			}
3457 
3458 			assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
3459 			m->vmp_snext = local_freeq;
3460 			local_freeq = m;
3461 			local_freed++;
3462 
3463 			if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
3464 				vm_pageout_vminfo.vm_pageout_freed_speculative++;
3465 			} else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
3466 				vm_pageout_vminfo.vm_pageout_freed_cleaned++;
3467 			} else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q) {
3468 				vm_pageout_vminfo.vm_pageout_freed_internal++;
3469 			} else {
3470 				vm_pageout_vminfo.vm_pageout_freed_external++;
3471 			}
3472 
3473 			inactive_burst_count = 0;
3474 
3475 			lock_yield_check = TRUE;
3476 			continue;
3477 		}
3478 		if (object->vo_copy == VM_OBJECT_NULL) {
3479 			/*
3480 			 * No one else can have any interest in this page.
3481 			 * If this is an empty purgable object, the page can be
3482 			 * reclaimed even if dirty.
3483 			 * If the page belongs to a volatile purgable object, we
3484 			 * reactivate it if the compressor isn't active.
3485 			 */
3486 			if (object->purgable == VM_PURGABLE_EMPTY) {
3487 				if (m->vmp_pmapped == TRUE) {
3488 					/* unmap the page */
3489 					refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
3490 					if (refmod_state & VM_MEM_MODIFIED) {
3491 						SET_PAGE_DIRTY(m, FALSE);
3492 					}
3493 				}
3494 				if (m->vmp_dirty || m->vmp_precious) {
3495 					/* we saved the cost of cleaning this page ! */
3496 					vm_page_purged_count++;
3497 				}
3498 				goto reclaim_page;
3499 			}
3500 
3501 			if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
3502 				/*
3503 				 * With the VM compressor, the cost of
3504 				 * reclaiming a page is much lower (no I/O),
3505 				 * so if we find a "volatile" page, it's better
3506 				 * to let it get compressed rather than letting
3507 				 * it occupy a full page until it gets purged.
3508 				 * So no need to check for "volatile" here.
3509 				 */
3510 			} else if (object->purgable == VM_PURGABLE_VOLATILE) {
3511 				/*
3512 				 * Avoid cleaning a "volatile" page which might
3513 				 * be purged soon.
3514 				 */
3515 
3516 				/* if it's wired, we can't put it on our queue */
3517 				assert(!VM_PAGE_WIRED(m));
3518 
3519 				/* just stick it back on! */
3520 				reactivated_this_call++;
3521 
3522 				if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
3523 					VM_PAGEOUT_DEBUG(vm_pageout_cleaned_volatile_reactivated, 1);
3524 				}
3525 
3526 				goto reactivate_page;
3527 			}
3528 		} /* vo_copy NULL */
3529 		/*
3530 		 *	If it's being used, reactivate.
3531 		 *	(Fictitious pages are either busy or absent.)
3532 		 *	First, update the reference and dirty bits
3533 		 *	to make sure the page is unreferenced.
3534 		 */
3535 		refmod_state = -1;
3536 
3537 		if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) {
3538 			refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
3539 
3540 			if (refmod_state & VM_MEM_REFERENCED) {
3541 				m->vmp_reference = TRUE;
3542 			}
3543 			if (refmod_state & VM_MEM_MODIFIED) {
3544 				SET_PAGE_DIRTY(m, FALSE);
3545 			}
3546 		}
3547 
3548 		if (m->vmp_reference || m->vmp_dirty) {
3549 			/* deal with a rogue "reusable" page */
3550 			VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, m_object);
3551 		}
3552 
3553 		if (vm_pageout_state.vm_page_xpmapped_min_divisor == 0) {
3554 			vm_pageout_state.vm_page_xpmapped_min = 0;
3555 		} else {
3556 			vm_pageout_state.vm_page_xpmapped_min = (vm_page_pageable_external_count * 10) /
3557 			    vm_pageout_state.vm_page_xpmapped_min_divisor;
3558 		}
3559 
3560 		if (!m->vmp_no_cache &&
3561 		    page_from_bg_q == FALSE &&
3562 		    (m->vmp_reference || (m->vmp_xpmapped && !object->internal &&
3563 		    (vm_page_xpmapped_external_count < vm_pageout_state.vm_page_xpmapped_min)))) {
3564 			/*
3565 			 * The page we pulled off the inactive list has
3566 			 * been referenced.  It is possible for other
3567 			 * processors to be touching pages faster than we
3568 			 * can clear the referenced bit and traverse the
3569 			 * inactive queue, so we limit the number of
3570 			 * reactivations.
3571 			 */
3572 			if (++reactivated_this_call >= reactivate_limit &&
3573 			    !object->object_is_shared_cache &&
3574 			    !((m->vmp_realtime ||
3575 			    object->for_realtime) &&
3576 			    vm_pageout_protect_realtime)) {
3577 				vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded++;
3578 			} else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
3579 				vm_pageout_vminfo.vm_pageout_inactive_force_reclaim++;
3580 				if (object->object_is_shared_cache) {
3581 					vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache++;
3582 				} else if (m->vmp_realtime ||
3583 				    object->for_realtime) {
3584 					vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime++;
3585 				}
3586 			} else {
3587 				uint32_t isinuse;
3588 
3589 				if (reactivated_this_call >= reactivate_limit) {
3590 					if (object->object_is_shared_cache) {
3591 						vm_pageout_vminfo.vm_pageout_protected_sharedcache++;
3592 					} else if ((m->vmp_realtime ||
3593 					    object->for_realtime) &&
3594 					    vm_pageout_protect_realtime) {
3595 						vm_pageout_vminfo.vm_pageout_protected_realtime++;
3596 					}
3597 				}
3598 				if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
3599 					VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reference_reactivated, 1);
3600 				}
3601 
3602 				vm_pageout_vminfo.vm_pageout_inactive_referenced++;
3603 reactivate_page:
3604 				if (!object->internal && object->pager != MEMORY_OBJECT_NULL &&
3605 				    vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
3606 					/*
3607 					 * no explict mappings of this object exist
3608 					 * and it's not open via the filesystem
3609 					 */
3610 					vm_page_deactivate(m);
3611 					VM_PAGEOUT_DEBUG(vm_pageout_inactive_deactivated, 1);
3612 				} else {
3613 					/*
3614 					 * The page was/is being used, so put back on active list.
3615 					 */
3616 					vm_page_activate(m);
3617 					counter_inc(&vm_statistics_reactivations);
3618 					inactive_burst_count = 0;
3619 				}
3620 #if DEVELOPMENT || DEBUG
3621 				if (page_from_bg_q == TRUE) {
3622 					if (m_object->internal) {
3623 						vm_pageout_rejected_bq_internal++;
3624 					} else {
3625 						vm_pageout_rejected_bq_external++;
3626 					}
3627 				}
3628 #endif /* DEVELOPMENT || DEBUG */
3629 
3630 				if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
3631 					VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
3632 				}
3633 				vm_pageout_state.vm_pageout_inactive_used++;
3634 
3635 				lock_yield_check = TRUE;
3636 				continue;
3637 			}
3638 			/*
3639 			 * Make sure we call pmap_get_refmod() if it
3640 			 * wasn't already called just above, to update
3641 			 * the dirty bit.
3642 			 */
3643 			if ((refmod_state == -1) && !m->vmp_dirty && m->vmp_pmapped) {
3644 				refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
3645 				if (refmod_state & VM_MEM_MODIFIED) {
3646 					SET_PAGE_DIRTY(m, FALSE);
3647 				}
3648 			}
3649 		}
3650 
3651 		/*
3652 		 * we've got a candidate page to steal...
3653 		 *
3654 		 * m->vmp_dirty is up to date courtesy of the
3655 		 * preceding check for m->vmp_reference... if
3656 		 * we get here, then m->vmp_reference had to be
3657 		 * FALSE (or possibly "reactivate_limit" was
3658 		 * exceeded), but in either case we called
3659 		 * pmap_get_refmod() and updated both
3660 		 * m->vmp_reference and m->vmp_dirty
3661 		 *
3662 		 * if it's dirty or precious we need to
3663 		 * see if the target queue is throtttled
3664 		 * it if is, we need to skip over it by moving it back
3665 		 * to the end of the inactive queue
3666 		 */
3667 
3668 		inactive_throttled = FALSE;
3669 
3670 		if (m->vmp_dirty || m->vmp_precious) {
3671 			if (object->internal) {
3672 				if (VM_PAGE_Q_THROTTLED(iq)) {
3673 					inactive_throttled = TRUE;
3674 				}
3675 			} else if (VM_PAGE_Q_THROTTLED(eq)) {
3676 				inactive_throttled = TRUE;
3677 			}
3678 		}
3679 throttle_inactive:
3680 		if (!VM_DYNAMIC_PAGING_ENABLED() &&
3681 		    object->internal && m->vmp_dirty &&
3682 		    (object->purgable == VM_PURGABLE_DENY ||
3683 		    object->purgable == VM_PURGABLE_NONVOLATILE ||
3684 		    object->purgable == VM_PURGABLE_VOLATILE)) {
3685 			vm_page_check_pageable_safe(m);
3686 			assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
3687 			vm_page_queue_enter(&vm_page_queue_throttled, m, vmp_pageq);
3688 			m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
3689 			vm_page_throttled_count++;
3690 
3691 			VM_PAGEOUT_DEBUG(vm_pageout_scan_reclaimed_throttled, 1);
3692 
3693 			inactive_burst_count = 0;
3694 
3695 			lock_yield_check = TRUE;
3696 			continue;
3697 		}
3698 		if (inactive_throttled == TRUE) {
3699 			vps_deal_with_throttled_queues(m, &object, &vm_pageout_inactive_external_forced_reactivate_limit,
3700 			    &force_anonymous, page_from_bg_q);
3701 
3702 			inactive_burst_count = 0;
3703 
3704 			if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q) {
3705 				VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
3706 			}
3707 
3708 			lock_yield_check = TRUE;
3709 			continue;
3710 		}
3711 
3712 		/*
3713 		 * we've got a page that we can steal...
3714 		 * eliminate all mappings and make sure
3715 		 * we have the up-to-date modified state
3716 		 *
3717 		 * if we need to do a pmap_disconnect then we
3718 		 * need to re-evaluate m->vmp_dirty since the pmap_disconnect
3719 		 * provides the true state atomically... the
3720 		 * page was still mapped up to the pmap_disconnect
3721 		 * and may have been dirtied at the last microsecond
3722 		 *
3723 		 * Note that if 'pmapped' is FALSE then the page is not
3724 		 * and has not been in any map, so there is no point calling
3725 		 * pmap_disconnect().  m->vmp_dirty could have been set in anticipation
3726 		 * of likely usage of the page.
3727 		 */
3728 		if (m->vmp_pmapped == TRUE) {
3729 			int pmap_options;
3730 
3731 			/*
3732 			 * Don't count this page as going into the compressor
3733 			 * if any of these are true:
3734 			 * 1) compressed pager isn't enabled
3735 			 * 2) Freezer enabled device with compressed pager
3736 			 *    backend (exclusive use) i.e. most of the VM system
3737 			 *    (including vm_pageout_scan) has no knowledge of
3738 			 *    the compressor
3739 			 * 3) This page belongs to a file and hence will not be
3740 			 *    sent into the compressor
3741 			 */
3742 			if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE ||
3743 			    object->internal == FALSE) {
3744 				pmap_options = 0;
3745 			} else if (m->vmp_dirty || m->vmp_precious) {
3746 				/*
3747 				 * VM knows that this page is dirty (or
3748 				 * precious) and needs to be compressed
3749 				 * rather than freed.
3750 				 * Tell the pmap layer to count this page
3751 				 * as "compressed".
3752 				 */
3753 				pmap_options = PMAP_OPTIONS_COMPRESSOR;
3754 			} else {
3755 				/*
3756 				 * VM does not know if the page needs to
3757 				 * be preserved but the pmap layer might tell
3758 				 * us if any mapping has "modified" it.
3759 				 * Let's the pmap layer to count this page
3760 				 * as compressed if and only if it has been
3761 				 * modified.
3762 				 */
3763 				pmap_options =
3764 				    PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
3765 			}
3766 			refmod_state = pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m),
3767 			    pmap_options,
3768 			    NULL);
3769 			if (refmod_state & VM_MEM_MODIFIED) {
3770 				SET_PAGE_DIRTY(m, FALSE);
3771 			}
3772 		}
3773 
3774 		/*
3775 		 * reset our count of pages that have been reclaimed
3776 		 * since the last page was 'stolen'
3777 		 */
3778 		inactive_reclaim_run = 0;
3779 
3780 		/*
3781 		 *	If it's clean and not precious, we can free the page.
3782 		 */
3783 		if (!m->vmp_dirty && !m->vmp_precious) {
3784 			vm_pageout_state.vm_pageout_inactive_clean++;
3785 
3786 			/*
3787 			 * OK, at this point we have found a page we are going to free.
3788 			 */
3789 #if CONFIG_PHANTOM_CACHE
3790 			if (!object->internal) {
3791 				vm_phantom_cache_add_ghost(m);
3792 			}
3793 #endif
3794 			goto reclaim_page;
3795 		}
3796 
3797 		/*
3798 		 * The page may have been dirtied since the last check
3799 		 * for a throttled target queue (which may have been skipped
3800 		 * if the page was clean then).  With the dirty page
3801 		 * disconnected here, we can make one final check.
3802 		 */
3803 		if (object->internal) {
3804 			if (VM_PAGE_Q_THROTTLED(iq)) {
3805 				inactive_throttled = TRUE;
3806 			}
3807 		} else if (VM_PAGE_Q_THROTTLED(eq)) {
3808 			inactive_throttled = TRUE;
3809 		}
3810 
3811 		if (inactive_throttled == TRUE) {
3812 			goto throttle_inactive;
3813 		}
3814 
3815 #if VM_PRESSURE_EVENTS
3816 #if CONFIG_JETSAM
3817 
3818 		/*
3819 		 * If Jetsam is enabled, then the sending
3820 		 * of memory pressure notifications is handled
3821 		 * from the same thread that takes care of high-water
3822 		 * and other jetsams i.e. the memorystatus_thread.
3823 		 */
3824 
3825 #else /* CONFIG_JETSAM */
3826 
3827 		vm_pressure_response();
3828 
3829 #endif /* CONFIG_JETSAM */
3830 #endif /* VM_PRESSURE_EVENTS */
3831 
3832 		if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
3833 			VM_PAGEOUT_DEBUG(vm_pageout_speculative_dirty, 1);
3834 		}
3835 
3836 		if (object->internal) {
3837 			vm_pageout_vminfo.vm_pageout_inactive_dirty_internal++;
3838 		} else {
3839 			vm_pageout_vminfo.vm_pageout_inactive_dirty_external++;
3840 		}
3841 
3842 		/*
3843 		 * internal pages will go to the compressor...
3844 		 * external pages will go to the appropriate pager to be cleaned
3845 		 * and upon completion will end up on 'vm_page_queue_cleaned' which
3846 		 * is a preferred queue to steal from
3847 		 */
3848 		vm_pageout_cluster(m);
3849 		inactive_burst_count = 0;
3850 
3851 		/*
3852 		 * back to top of pageout scan loop
3853 		 */
3854 	}
3855 }
3856 
3857 
3858 void
3859 vm_page_free_reserve(
3860 	int pages)
3861 {
3862 	int             free_after_reserve;
3863 
3864 	if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3865 		if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT)) {
3866 			vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
3867 		} else {
3868 			vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
3869 		}
3870 	} else {
3871 		if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT) {
3872 			vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
3873 		} else {
3874 			vm_page_free_reserved += pages;
3875 		}
3876 	}
3877 	free_after_reserve = vm_pageout_state.vm_page_free_count_init - vm_page_free_reserved;
3878 
3879 	vm_page_free_min = vm_page_free_reserved +
3880 	    VM_PAGE_FREE_MIN(free_after_reserve);
3881 
3882 	if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT) {
3883 		vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
3884 	}
3885 
3886 	vm_page_free_target = vm_page_free_reserved +
3887 	    VM_PAGE_FREE_TARGET(free_after_reserve);
3888 
3889 	if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT) {
3890 		vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
3891 	}
3892 
3893 	if (vm_page_free_target < vm_page_free_min + 5) {
3894 		vm_page_free_target = vm_page_free_min + 5;
3895 	}
3896 
3897 	vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 2);
3898 }
3899 
3900 /*
3901  *	vm_pageout is the high level pageout daemon.
3902  */
3903 
3904 void
3905 vm_pageout_continue(void)
3906 {
3907 	DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
3908 	VM_PAGEOUT_DEBUG(vm_pageout_scan_event_counter, 1);
3909 
3910 	vm_free_page_lock();
3911 	vm_pageout_running = TRUE;
3912 	vm_free_page_unlock();
3913 
3914 	vm_pageout_scan();
3915 	/*
3916 	 * we hold both the vm_page_queue_free_lock
3917 	 * and the vm_page_queues_lock at this point
3918 	 */
3919 	assert(vm_page_free_wanted == 0);
3920 	assert(vm_page_free_wanted_privileged == 0);
3921 	assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
3922 
3923 	vm_pageout_running = FALSE;
3924 #if XNU_TARGET_OS_OSX
3925 	if (vm_pageout_waiter) {
3926 		vm_pageout_waiter = FALSE;
3927 		thread_wakeup((event_t)&vm_pageout_waiter);
3928 	}
3929 #endif /* XNU_TARGET_OS_OSX */
3930 
3931 	vm_free_page_unlock();
3932 	vm_page_unlock_queues();
3933 
3934 	thread_block((thread_continue_t)vm_pageout_continue);
3935 	/*NOTREACHED*/
3936 }
3937 
3938 #if XNU_TARGET_OS_OSX
3939 kern_return_t
3940 vm_pageout_wait(uint64_t deadline)
3941 {
3942 	kern_return_t kr;
3943 
3944 	vm_free_page_lock();
3945 	for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr);) {
3946 		vm_pageout_waiter = TRUE;
3947 		if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
3948 			    &vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
3949 			    (event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
3950 			kr = KERN_OPERATION_TIMED_OUT;
3951 		}
3952 	}
3953 	vm_free_page_unlock();
3954 
3955 	return kr;
3956 }
3957 #endif /* XNU_TARGET_OS_OSX */
3958 
3959 OS_NORETURN
3960 static void
3961 vm_pageout_iothread_external_continue(struct pgo_iothread_state *ethr, __unused wait_result_t w)
3962 {
3963 	vm_page_t       m = NULL;
3964 	vm_object_t     object;
3965 	vm_object_offset_t offset;
3966 	memory_object_t pager;
3967 	struct vm_pageout_queue *q = ethr->q;
3968 
3969 	/* On systems with a compressor, the external IO thread clears its
3970 	 * VM privileged bit to accommodate large allocations (e.g. bulk UPL
3971 	 * creation)
3972 	 */
3973 	if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3974 		current_thread()->options &= ~TH_OPT_VMPRIV;
3975 	}
3976 
3977 	sched_cond_ack(&(ethr->pgo_wakeup));
3978 
3979 	while (true) {
3980 		vm_page_lockspin_queues();
3981 
3982 		while (!vm_page_queue_empty(&q->pgo_pending)) {
3983 			q->pgo_busy = TRUE;
3984 			vm_page_queue_remove_first(&q->pgo_pending, m, vmp_pageq);
3985 
3986 			assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q);
3987 			VM_PAGE_CHECK(m);
3988 			/*
3989 			 * grab a snapshot of the object and offset this
3990 			 * page is tabled in so that we can relookup this
3991 			 * page after we've taken the object lock - these
3992 			 * fields are stable while we hold the page queues lock
3993 			 * but as soon as we drop it, there is nothing to keep
3994 			 * this page in this object... we hold an activity_in_progress
3995 			 * on this object which will keep it from terminating
3996 			 */
3997 			object = VM_PAGE_OBJECT(m);
3998 			offset = m->vmp_offset;
3999 
4000 			m->vmp_q_state = VM_PAGE_NOT_ON_Q;
4001 			VM_PAGE_ZERO_PAGEQ_ENTRY(m);
4002 
4003 			vm_page_unlock_queues();
4004 
4005 			vm_object_lock(object);
4006 
4007 			m = vm_page_lookup(object, offset);
4008 
4009 			if (m == NULL || m->vmp_busy || m->vmp_cleaning ||
4010 			    !m->vmp_laundry || (m->vmp_q_state != VM_PAGE_NOT_ON_Q)) {
4011 				/*
4012 				 * it's either the same page that someone else has
4013 				 * started cleaning (or it's finished cleaning or
4014 				 * been put back on the pageout queue), or
4015 				 * the page has been freed or we have found a
4016 				 * new page at this offset... in all of these cases
4017 				 * we merely need to release the activity_in_progress
4018 				 * we took when we put the page on the pageout queue
4019 				 */
4020 				vm_object_activity_end(object);
4021 				vm_object_unlock(object);
4022 
4023 				vm_page_lockspin_queues();
4024 				continue;
4025 			}
4026 			pager = object->pager;
4027 
4028 			if (pager == MEMORY_OBJECT_NULL) {
4029 				/*
4030 				 * This pager has been destroyed by either
4031 				 * memory_object_destroy or vm_object_destroy, and
4032 				 * so there is nowhere for the page to go.
4033 				 */
4034 				if (m->vmp_free_when_done) {
4035 					/*
4036 					 * Just free the page... VM_PAGE_FREE takes
4037 					 * care of cleaning up all the state...
4038 					 * including doing the vm_pageout_throttle_up
4039 					 */
4040 					VM_PAGE_FREE(m);
4041 				} else {
4042 					vm_page_lockspin_queues();
4043 
4044 					vm_pageout_throttle_up(m);
4045 					vm_page_activate(m);
4046 
4047 					vm_page_unlock_queues();
4048 
4049 					/*
4050 					 *	And we are done with it.
4051 					 */
4052 				}
4053 				vm_object_activity_end(object);
4054 				vm_object_unlock(object);
4055 
4056 				vm_page_lockspin_queues();
4057 				continue;
4058 			}
4059 	#if 0
4060 			/*
4061 			 * we don't hold the page queue lock
4062 			 * so this check isn't safe to make
4063 			 */
4064 			VM_PAGE_CHECK(m);
4065 	#endif
4066 			/*
4067 			 * give back the activity_in_progress reference we
4068 			 * took when we queued up this page and replace it
4069 			 * it with a paging_in_progress reference that will
4070 			 * also hold the paging offset from changing and
4071 			 * prevent the object from terminating
4072 			 */
4073 			vm_object_activity_end(object);
4074 			vm_object_paging_begin(object);
4075 			vm_object_unlock(object);
4076 
4077 			/*
4078 			 * Send the data to the pager.
4079 			 * any pageout clustering happens there
4080 			 */
4081 			memory_object_data_return(pager,
4082 			    m->vmp_offset + object->paging_offset,
4083 			    PAGE_SIZE,
4084 			    NULL,
4085 			    NULL,
4086 			    FALSE,
4087 			    FALSE,
4088 			    0);
4089 
4090 			vm_object_lock(object);
4091 			vm_object_paging_end(object);
4092 			vm_object_unlock(object);
4093 
4094 			vm_pageout_io_throttle();
4095 
4096 			vm_page_lockspin_queues();
4097 		}
4098 		q->pgo_busy = FALSE;
4099 
4100 		vm_page_unlock_queues();
4101 		sched_cond_wait_parameter(&(ethr->pgo_wakeup), THREAD_UNINT, (thread_continue_t)vm_pageout_iothread_external_continue, ethr);
4102 	}
4103 	/*NOTREACHED*/
4104 }
4105 
4106 uint32_t vm_compressor_time_thread; /* Set via sysctl 'vm.compressor_timing_enabled' to record time accrued by this thread. */
4107 
4108 #if DEVELOPMENT || DEBUG
4109 static void
4110 vm_pageout_record_thread_time(int cqid, int ncomps)
4111 {
4112 	if (__improbable(vm_compressor_time_thread)) {
4113 		vmct_stats.vmct_runtimes[cqid] = thread_get_runtime_self();
4114 		vmct_stats.vmct_pages[cqid] += ncomps;
4115 		vmct_stats.vmct_iterations[cqid]++;
4116 		if (ncomps > vmct_stats.vmct_maxpages[cqid]) {
4117 			vmct_stats.vmct_maxpages[cqid] = ncomps;
4118 		}
4119 		if (ncomps < vmct_stats.vmct_minpages[cqid]) {
4120 			vmct_stats.vmct_minpages[cqid] = ncomps;
4121 		}
4122 	}
4123 }
4124 #endif
4125 
4126 static void *
4127 vm_pageout_select_filling_chead(struct pgo_iothread_state *cq, vm_page_t m)
4128 {
4129 	/*
4130 	 * Technically we need the pageq locks to manipulate the vmp_on_specialq field.
4131 	 * However, this page has been removed from all queues and is only
4132 	 * known to this compressor thread dealing with this local queue.
4133 	 *
4134 	 * TODO: Add a second localq that is the early localq and
4135 	 * put special pages like this one on that queue in the block above
4136 	 * under the pageq lock to avoid this 'works but not clean' logic.
4137 	 */
4138 	void *donate_queue_head;
4139 #if XNU_TARGET_OS_OSX /* tag:DONATE */
4140 	donate_queue_head = &cq->current_early_swapout_chead;
4141 #else /* XNU_TARGET_OS_OSX */
4142 	donate_queue_head = &cq->current_late_swapout_chead;
4143 #endif /* XNU_TARGET_OS_OSX */
4144 	if (m->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE) {
4145 		m->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY;
4146 		return donate_queue_head;
4147 	} else {
4148 		return &cq->current_regular_swapout_chead;
4149 	}
4150 }
4151 
4152 #define         MAX_FREE_BATCH          32
4153 
4154 OS_NORETURN
4155 static void
4156 vm_pageout_iothread_internal_continue(struct pgo_iothread_state *cq, __unused wait_result_t w)
4157 {
4158 	struct vm_pageout_queue *q;
4159 	vm_page_t       m = NULL;
4160 	boolean_t       pgo_draining;
4161 	vm_page_t   local_q;
4162 	int         local_cnt;
4163 	vm_page_t   local_freeq = NULL;
4164 	int         local_freed = 0;
4165 	int         local_batch_size;
4166 #if DEVELOPMENT || DEBUG
4167 	int       ncomps = 0;
4168 	boolean_t marked_active = FALSE;
4169 	int       num_pages_processed = 0;
4170 #endif
4171 	void *chead = NULL;
4172 
4173 	KDBG_FILTERED(0xe040000c | DBG_FUNC_END);
4174 
4175 	sched_cond_ack(&(cq->pgo_wakeup));
4176 
4177 	q = cq->q;
4178 
4179 	while (true) { /* this top loop is for the compressor_running_perf_test running a full speed without blocking */
4180 #if DEVELOPMENT || DEBUG
4181 		bool benchmark_accounting = false;
4182 		/* If we're running the compressor perf test, only process the benchmark pages.
4183 		 * We'll get back to our regular queue once the benchmark is done */
4184 		if (compressor_running_perf_test) {
4185 			q = cq->benchmark_q;
4186 			if (!vm_page_queue_empty(&q->pgo_pending)) {
4187 				benchmark_accounting = true;
4188 			} else {
4189 				q = cq->q;
4190 				benchmark_accounting = false;
4191 			}
4192 		}
4193 #endif /* DEVELOPMENT || DEBUG */
4194 
4195 #if __AMP__
4196 		if (vm_compressor_ebound && (vm_pageout_state.vm_compressor_thread_count > 1)) {
4197 			local_batch_size = (q->pgo_maxlaundry >> 3);
4198 			local_batch_size = MAX(local_batch_size, 16);
4199 		} else {
4200 			local_batch_size = q->pgo_maxlaundry / (vm_pageout_state.vm_compressor_thread_count * 2);
4201 		}
4202 #else
4203 		local_batch_size = q->pgo_maxlaundry / (vm_pageout_state.vm_compressor_thread_count * 2);
4204 #endif
4205 
4206 #if RECORD_THE_COMPRESSED_DATA
4207 		if (q->pgo_laundry) {
4208 			c_compressed_record_init();
4209 		}
4210 #endif
4211 		while (true) { /* this loop is for working though all the pages in the pending queue */
4212 			int     pages_left_on_q = 0;
4213 
4214 			local_cnt = 0;
4215 			local_q = NULL;
4216 
4217 			KDBG_FILTERED(0xe0400014 | DBG_FUNC_START);
4218 
4219 			vm_page_lock_queues();
4220 #if DEVELOPMENT || DEBUG
4221 			if (marked_active == FALSE) {
4222 				vmct_active++;
4223 				vmct_state[cq->id] = VMCT_ACTIVE;
4224 				marked_active = TRUE;
4225 				if (vmct_active == 1) {
4226 					vm_compressor_epoch_start = mach_absolute_time();
4227 				}
4228 			}
4229 #endif
4230 			KDBG_FILTERED(0xe0400014 | DBG_FUNC_END);
4231 
4232 			KDBG_FILTERED(0xe0400018 | DBG_FUNC_START, q->pgo_laundry);
4233 
4234 			/* empty the entire content of the thread input q to local_q, but not more than local_batch_size pages */
4235 			while (!vm_page_queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) {
4236 				vm_page_queue_remove_first(&q->pgo_pending, m, vmp_pageq);
4237 				assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q);
4238 				VM_PAGE_CHECK(m);
4239 
4240 				m->vmp_q_state = VM_PAGE_NOT_ON_Q;
4241 				VM_PAGE_ZERO_PAGEQ_ENTRY(m);
4242 				m->vmp_laundry = FALSE;
4243 
4244 				m->vmp_snext = local_q;
4245 				local_q = m;
4246 				local_cnt++;
4247 			}
4248 			if (local_q == NULL) {
4249 				break;
4250 			}
4251 
4252 			q->pgo_busy = TRUE;
4253 
4254 			if ((pgo_draining = q->pgo_draining) == FALSE) {
4255 				vm_pageout_throttle_up_batch(q, local_cnt);
4256 				pages_left_on_q = q->pgo_laundry;
4257 			} else {
4258 				pages_left_on_q = q->pgo_laundry - local_cnt;
4259 			}
4260 
4261 			vm_page_unlock_queues();
4262 
4263 #if !RECORD_THE_COMPRESSED_DATA
4264 			/* if we have lots to compress, wake up the other thread to help.
4265 			 * disabled when recording data since record data is not protected with a mutex so this may cause races */
4266 			if (pages_left_on_q >= local_batch_size && cq->id < (vm_pageout_state.vm_compressor_thread_count - 1)) {
4267 				// wake up the next compressor thread
4268 				sched_cond_signal(&pgo_iothread_internal_state[cq->id + 1].pgo_wakeup,
4269 				    pgo_iothread_internal_state[cq->id + 1].pgo_iothread);
4270 			}
4271 #endif
4272 			KDBG_FILTERED(0xe0400018 | DBG_FUNC_END, q->pgo_laundry);
4273 
4274 			while (local_q) {
4275 				KDBG_FILTERED(0xe0400024 | DBG_FUNC_START, local_cnt);
4276 
4277 				m = local_q;
4278 				local_q = m->vmp_snext;
4279 				m->vmp_snext = NULL;
4280 
4281 
4282 				chead = vm_pageout_select_filling_chead(cq, m);
4283 
4284 				if (vm_pageout_compress_page(chead, cq->scratch_buf, m) == KERN_SUCCESS) {
4285 #if DEVELOPMENT || DEBUG
4286 					ncomps++;
4287 #endif
4288 					KDBG_FILTERED(0xe0400024 | DBG_FUNC_END, local_cnt);
4289 
4290 					m->vmp_snext = local_freeq;
4291 					local_freeq = m;
4292 					local_freed++;
4293 
4294 					/* if we gathered enough free pages, free them now */
4295 					if (local_freed >= MAX_FREE_BATCH) {
4296 						OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
4297 
4298 						vm_page_free_list(local_freeq, TRUE);
4299 
4300 						local_freeq = NULL;
4301 						local_freed = 0;
4302 					}
4303 				}
4304 #if DEVELOPMENT || DEBUG
4305 				num_pages_processed++;
4306 #endif /* DEVELOPMENT || DEBUG */
4307 #if !CONFIG_JETSAM /* Maybe: if there's no JETSAM, be more proactive in waking up anybody that needs free pages */
4308 				while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
4309 					kern_return_t   wait_result;
4310 					int             need_wakeup = 0;
4311 
4312 					if (local_freeq) {
4313 						OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
4314 
4315 						vm_page_free_list(local_freeq, TRUE);
4316 						local_freeq = NULL;
4317 						local_freed = 0;
4318 
4319 						continue;
4320 					}
4321 					vm_free_page_lock_spin();
4322 
4323 					if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
4324 						if (vm_page_free_wanted_privileged++ == 0) {
4325 							need_wakeup = 1;
4326 						}
4327 						wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
4328 
4329 						vm_free_page_unlock();
4330 
4331 						if (need_wakeup) {
4332 							thread_wakeup((event_t)&vm_page_free_wanted);
4333 						}
4334 
4335 						if (wait_result == THREAD_WAITING) {
4336 							thread_block(THREAD_CONTINUE_NULL);
4337 						}
4338 					} else {
4339 						vm_free_page_unlock();
4340 					}
4341 				}
4342 #endif
4343 			}  /* while (local_q) */
4344 			/* free any leftovers in the freeq */
4345 			if (local_freeq) {
4346 				OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
4347 
4348 				vm_page_free_list(local_freeq, TRUE);
4349 				local_freeq = NULL;
4350 				local_freed = 0;
4351 			}
4352 			if (pgo_draining == TRUE) {
4353 				vm_page_lockspin_queues();
4354 				vm_pageout_throttle_up_batch(q, local_cnt);
4355 				vm_page_unlock_queues();
4356 			}
4357 		}
4358 		KDBG_FILTERED(0xe040000c | DBG_FUNC_START);
4359 
4360 		/*
4361 		 * queue lock is held and our q is empty
4362 		 */
4363 		q->pgo_busy = FALSE;
4364 #if DEVELOPMENT || DEBUG
4365 		if (marked_active == TRUE) {
4366 			vmct_active--;
4367 			vmct_state[cq->id] = VMCT_IDLE;
4368 
4369 			if (vmct_active == 0) {
4370 				vm_compressor_epoch_stop = mach_absolute_time();
4371 				assertf(vm_compressor_epoch_stop >= vm_compressor_epoch_start,
4372 				    "Compressor epoch non-monotonic: 0x%llx -> 0x%llx",
4373 				    vm_compressor_epoch_start, vm_compressor_epoch_stop);
4374 				/* This interval includes intervals where one or more
4375 				 * compressor threads were pre-empted
4376 				 */
4377 				vmct_stats.vmct_cthreads_total += vm_compressor_epoch_stop - vm_compressor_epoch_start;
4378 			}
4379 		}
4380 		if (compressor_running_perf_test && benchmark_accounting) {
4381 			/*
4382 			 * We could turn ON compressor_running_perf_test while still processing
4383 			 * regular non-benchmark pages. We shouldn't count them here else we
4384 			 * could overshoot. We might also still be populating that benchmark Q
4385 			 * and be under pressure. So we will go back to the regular queues. And
4386 			 * benchmark accounting will be off for that case too.
4387 			 */
4388 			compressor_perf_test_pages_processed += num_pages_processed;
4389 			thread_wakeup(&compressor_perf_test_pages_processed);
4390 		}
4391 #endif
4392 		vm_page_unlock_queues();
4393 #if DEVELOPMENT || DEBUG
4394 		vm_pageout_record_thread_time(cq->id, ncomps);
4395 #endif
4396 
4397 		KDBG_FILTERED(0xe0400018 | DBG_FUNC_END);
4398 #if DEVELOPMENT || DEBUG
4399 		if (compressor_running_perf_test && benchmark_accounting) {
4400 			/*
4401 			 * We've been exclusively compressing pages from the benchmark queue,
4402 			 * do 1 pass over the internal queue before blocking.
4403 			 */
4404 			continue;
4405 		}
4406 #endif
4407 
4408 		sched_cond_wait_parameter(&(cq->pgo_wakeup), THREAD_UNINT, (thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
4409 	}
4410 	/*NOTREACHED*/
4411 }
4412 
4413 /* resolves the pager and maintain stats in the pager and in the vm_object */
4414 kern_return_t
4415 vm_pageout_compress_page(void **current_chead, char *scratch_buf, vm_page_t m)
4416 {
4417 	vm_object_t     object;
4418 	memory_object_t pager;
4419 	int             compressed_count_delta;
4420 	kern_return_t   retval;
4421 
4422 	object = VM_PAGE_OBJECT(m);
4423 
4424 	assert(!m->vmp_free_when_done);
4425 	assert(!m->vmp_laundry);
4426 
4427 	pager = object->pager;
4428 
4429 	if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) {
4430 		KDBG_FILTERED(0xe0400010 | DBG_FUNC_START, object, pager);
4431 
4432 		vm_object_lock(object);
4433 
4434 		/*
4435 		 * If there is no memory object for the page, create
4436 		 * one and hand it to the compression pager.
4437 		 */
4438 
4439 		if (!object->pager_initialized) {
4440 			vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
4441 		}
4442 		if (!object->pager_initialized) {
4443 			vm_object_compressor_pager_create(object);
4444 		}
4445 
4446 		pager = object->pager;
4447 
4448 		if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) {
4449 			/*
4450 			 * Still no pager for the object,
4451 			 * or the pager has been destroyed.
4452 			 * Reactivate the page.
4453 			 *
4454 			 * Should only happen if there is no
4455 			 * compression pager
4456 			 */
4457 			vm_page_wakeup_done(object, m);
4458 
4459 			vm_page_lockspin_queues();
4460 			vm_page_activate(m);
4461 			VM_PAGEOUT_DEBUG(vm_pageout_dirty_no_pager, 1);
4462 			vm_page_unlock_queues();
4463 
4464 			/*
4465 			 *	And we are done with it.
4466 			 */
4467 			vm_object_activity_end(object);
4468 			vm_object_unlock(object);
4469 
4470 			return KERN_FAILURE;
4471 		}
4472 		vm_object_unlock(object);
4473 
4474 		KDBG_FILTERED(0xe0400010 | DBG_FUNC_END, object, pager);
4475 	}
4476 	assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL);
4477 	assert(object->activity_in_progress > 0);
4478 
4479 #if CONFIG_TRACK_UNMODIFIED_ANON_PAGES
4480 	if (m->vmp_unmodified_ro == true) {
4481 		os_atomic_inc(&compressor_ro_uncompressed_total_returned, relaxed);
4482 	}
4483 #endif /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */
4484 
4485 	vm_compressor_options_t flags = 0;
4486 
4487 #if CONFIG_TRACK_UNMODIFIED_ANON_PAGES
4488 	if (m->vmp_unmodified_ro) {
4489 		flags |= C_PAGE_UNMODIFIED;
4490 	}
4491 #endif /* CONFIG_TRACK_UNMODIFIED_ANON_PAGES */
4492 
4493 
4494 	retval = vm_compressor_pager_put(
4495 		pager,
4496 		m->vmp_offset + object->paging_offset,
4497 		VM_PAGE_GET_PHYS_PAGE(m),
4498 		current_chead,
4499 		scratch_buf,
4500 		&compressed_count_delta,
4501 		flags);
4502 
4503 	vm_object_lock(object);
4504 
4505 	assert(object->activity_in_progress > 0);
4506 	assert(VM_PAGE_OBJECT(m) == object);
4507 	assert( !VM_PAGE_WIRED(m));
4508 
4509 	vm_compressor_pager_count(pager,
4510 	    compressed_count_delta,
4511 	    FALSE,                       /* shared_lock */
4512 	    object);
4513 
4514 	if (retval == KERN_SUCCESS) {
4515 		/*
4516 		 * If the object is purgeable, its owner's
4517 		 * purgeable ledgers will be updated in
4518 		 * vm_page_remove() but the page still
4519 		 * contributes to the owner's memory footprint,
4520 		 * so account for it as such.
4521 		 */
4522 		if (m->vmp_tabled) {
4523 			vm_page_remove(m, TRUE);
4524 		}
4525 		if ((object->purgable != VM_PURGABLE_DENY ||
4526 		    object->vo_ledger_tag) &&
4527 		    object->vo_owner != NULL) {
4528 			/* one more compressed purgeable/tagged page */
4529 			vm_object_owner_compressed_update(object,
4530 			    compressed_count_delta);
4531 		}
4532 		counter_inc(&vm_statistics_compressions);
4533 	} else {
4534 		vm_page_wakeup_done(object, m);
4535 
4536 		vm_page_lockspin_queues();
4537 
4538 		vm_page_activate(m);
4539 		vm_pageout_vminfo.vm_compressor_failed++;
4540 
4541 		vm_page_unlock_queues();
4542 	}
4543 	vm_object_activity_end(object);
4544 	vm_object_unlock(object);
4545 
4546 	return retval;
4547 }
4548 
4549 
4550 static void
4551 vm_pageout_adjust_eq_iothrottle(struct pgo_iothread_state *ethr, boolean_t req_lowpriority)
4552 {
4553 	uint32_t        policy;
4554 
4555 	if (hibernate_cleaning_in_progress == TRUE) {
4556 		req_lowpriority = FALSE;
4557 	}
4558 
4559 	if (ethr->q->pgo_inited == TRUE && ethr->q->pgo_lowpriority != req_lowpriority) {
4560 		vm_page_unlock_queues();
4561 
4562 		if (req_lowpriority == TRUE) {
4563 			policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
4564 			DTRACE_VM(laundrythrottle);
4565 		} else {
4566 			policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
4567 			DTRACE_VM(laundryunthrottle);
4568 		}
4569 		proc_set_thread_policy(ethr->pgo_iothread,
4570 		    TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
4571 
4572 		vm_page_lock_queues();
4573 		ethr->q->pgo_lowpriority = req_lowpriority;
4574 	}
4575 }
4576 
4577 OS_NORETURN
4578 static void
4579 vm_pageout_iothread_external(struct pgo_iothread_state *ethr, __unused wait_result_t w)
4580 {
4581 	thread_t        self = current_thread();
4582 
4583 	self->options |= TH_OPT_VMPRIV;
4584 
4585 	DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
4586 
4587 	proc_set_thread_policy(self, TASK_POLICY_EXTERNAL,
4588 	    TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
4589 
4590 	vm_page_lock_queues();
4591 
4592 	vm_pageout_queue_external.pgo_lowpriority = TRUE;
4593 	vm_pageout_queue_external.pgo_inited = TRUE;
4594 
4595 	vm_page_unlock_queues();
4596 
4597 #if CONFIG_THREAD_GROUPS
4598 	thread_group_vm_add();
4599 #endif /* CONFIG_THREAD_GROUPS */
4600 
4601 	vm_pageout_iothread_external_continue(ethr, 0);
4602 	/*NOTREACHED*/
4603 }
4604 
4605 
4606 OS_NORETURN
4607 static void
4608 vm_pageout_iothread_internal(struct pgo_iothread_state *cthr, __unused wait_result_t w)
4609 {
4610 	thread_t        self = current_thread();
4611 
4612 	self->options |= TH_OPT_VMPRIV;
4613 
4614 	vm_page_lock_queues();
4615 
4616 	vm_pageout_queue_internal.pgo_lowpriority = TRUE;
4617 	vm_pageout_queue_internal.pgo_inited = TRUE;
4618 
4619 #if DEVELOPMENT || DEBUG
4620 	vm_pageout_queue_benchmark.pgo_lowpriority = vm_pageout_queue_internal.pgo_lowpriority;
4621 	vm_pageout_queue_benchmark.pgo_inited = vm_pageout_queue_internal.pgo_inited;
4622 	vm_pageout_queue_benchmark.pgo_busy = FALSE;
4623 #endif /* DEVELOPMENT || DEBUG */
4624 
4625 	vm_page_unlock_queues();
4626 
4627 	if (vm_pageout_state.vm_restricted_to_single_processor == TRUE) {
4628 		thread_vm_bind_group_add();
4629 	}
4630 
4631 #if CONFIG_THREAD_GROUPS
4632 	thread_group_vm_add();
4633 #endif /* CONFIG_THREAD_GROUPS */
4634 
4635 #if __AMP__
4636 	if (vm_compressor_ebound) {
4637 		/*
4638 		 * Use the soft bound option for vm_compressor to allow it to run on
4639 		 * P-cores if E-cluster is unavailable.
4640 		 */
4641 		thread_bind_cluster_type(self, 'E', true);
4642 	}
4643 #endif /* __AMP__ */
4644 
4645 	thread_set_thread_name(current_thread(), "VM_compressor");
4646 #if DEVELOPMENT || DEBUG
4647 	vmct_stats.vmct_minpages[cthr->id] = INT32_MAX;
4648 #endif
4649 	vm_pageout_iothread_internal_continue(cthr, 0);
4650 
4651 	/*NOTREACHED*/
4652 }
4653 
4654 kern_return_t
4655 vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
4656 {
4657 	if (OSCompareAndSwapPtr(NULL, ptrauth_nop_cast(void *, func), (void * volatile *) &consider_buffer_cache_collect)) {
4658 		return KERN_SUCCESS;
4659 	} else {
4660 		return KERN_FAILURE; /* Already set */
4661 	}
4662 }
4663 
4664 extern boolean_t        memorystatus_manual_testing_on;
4665 extern unsigned int     memorystatus_level;
4666 
4667 
4668 #if VM_PRESSURE_EVENTS
4669 
4670 boolean_t vm_pressure_events_enabled = FALSE;
4671 
4672 extern uint64_t next_warning_notification_sent_at_ts;
4673 extern uint64_t next_critical_notification_sent_at_ts;
4674 
4675 #define PRESSURE_LEVEL_STUCK_THRESHOLD_MINS    (30)    /* 30 minutes. */
4676 
4677 /*
4678  * The last time there was change in pressure level OR we forced a check
4679  * because the system is stuck in a non-normal pressure level.
4680  */
4681 uint64_t  vm_pressure_last_level_transition_abs = 0;
4682 
4683 /*
4684  *  This is how the long the system waits 'stuck' in an unchanged non-normal pressure
4685  * level before resending out notifications for that level again.
4686  */
4687 int  vm_pressure_level_transition_threshold = PRESSURE_LEVEL_STUCK_THRESHOLD_MINS;
4688 
4689 void
4690 vm_pressure_response(void)
4691 {
4692 	vm_pressure_level_t     old_level = kVMPressureNormal;
4693 	int                     new_level = -1;
4694 	unsigned int            total_pages;
4695 	uint64_t                available_memory = 0;
4696 	uint64_t                curr_ts, abs_time_since_level_transition, time_in_ns;
4697 	bool                    force_check = false;
4698 	int                     time_in_mins;
4699 
4700 
4701 	if (vm_pressure_events_enabled == FALSE) {
4702 		return;
4703 	}
4704 
4705 #if !XNU_TARGET_OS_OSX
4706 
4707 	available_memory = (uint64_t) memorystatus_available_pages;
4708 
4709 #else /* !XNU_TARGET_OS_OSX */
4710 
4711 	available_memory = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
4712 	memorystatus_available_pages = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
4713 
4714 #endif /* !XNU_TARGET_OS_OSX */
4715 
4716 	total_pages = (unsigned int) atop_64(max_mem);
4717 #if CONFIG_SECLUDED_MEMORY
4718 	total_pages -= vm_page_secluded_count;
4719 #endif /* CONFIG_SECLUDED_MEMORY */
4720 	memorystatus_level = (unsigned int) ((available_memory * 100) / total_pages);
4721 
4722 	if (memorystatus_manual_testing_on) {
4723 		return;
4724 	}
4725 
4726 	curr_ts = mach_absolute_time();
4727 	abs_time_since_level_transition = curr_ts - vm_pressure_last_level_transition_abs;
4728 
4729 	absolutetime_to_nanoseconds(abs_time_since_level_transition, &time_in_ns);
4730 	time_in_mins = (int) ((time_in_ns / NSEC_PER_SEC) / 60);
4731 	force_check = (time_in_mins >= vm_pressure_level_transition_threshold);
4732 
4733 	old_level = memorystatus_vm_pressure_level;
4734 
4735 	switch (memorystatus_vm_pressure_level) {
4736 	case kVMPressureNormal:
4737 	{
4738 		if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4739 			new_level = kVMPressureCritical;
4740 		} else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
4741 			new_level = kVMPressureWarning;
4742 		}
4743 		break;
4744 	}
4745 
4746 	case kVMPressureWarning:
4747 	case kVMPressureUrgent:
4748 	{
4749 		if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4750 			new_level = kVMPressureNormal;
4751 		} else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4752 			new_level = kVMPressureCritical;
4753 		} else if (force_check) {
4754 			new_level = kVMPressureWarning;
4755 			next_warning_notification_sent_at_ts = curr_ts;
4756 		}
4757 		break;
4758 	}
4759 
4760 	case kVMPressureCritical:
4761 	{
4762 		if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4763 			new_level = kVMPressureNormal;
4764 		} else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
4765 			new_level = kVMPressureWarning;
4766 		} else if (force_check) {
4767 			new_level = kVMPressureCritical;
4768 			next_critical_notification_sent_at_ts = curr_ts;
4769 		}
4770 		break;
4771 	}
4772 
4773 	default:
4774 		return;
4775 	}
4776 
4777 	if (new_level != -1 || force_check) {
4778 		if (new_level != -1) {
4779 			memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
4780 
4781 			if (new_level != (int) old_level) {
4782 				VM_DEBUG_CONSTANT_EVENT(vm_pressure_level_change, DBG_VM_PRESSURE_LEVEL_CHANGE, DBG_FUNC_NONE,
4783 				    new_level, old_level, 0, 0);
4784 			}
4785 		} else {
4786 			VM_DEBUG_CONSTANT_EVENT(vm_pressure_level_change, DBG_VM_PRESSURE_LEVEL_CHANGE, DBG_FUNC_NONE,
4787 			    new_level, old_level, force_check, 0);
4788 		}
4789 
4790 		if (hibernation_vmqueues_inspection || hibernate_cleaning_in_progress) {
4791 			/*
4792 			 * We don't want to schedule a wakeup while hibernation is in progress
4793 			 * because that could collide with checks for non-monotonicity in the scheduler.
4794 			 * We do however do all the updates to memorystatus_vm_pressure_level because
4795 			 * we _might_ want to use that for decisions regarding which pages or how
4796 			 * many pages we want to dump in hibernation.
4797 			 */
4798 			return;
4799 		}
4800 
4801 		if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != memorystatus_vm_pressure_level) || force_check) {
4802 			if (vm_pageout_state.vm_pressure_thread_running == FALSE) {
4803 				thread_wakeup(&vm_pressure_thread);
4804 			}
4805 
4806 			if (old_level != memorystatus_vm_pressure_level) {
4807 				thread_wakeup(&vm_pageout_state.vm_pressure_changed);
4808 			}
4809 			vm_pressure_last_level_transition_abs = curr_ts; /* renew the window of observation for a stuck pressure level */
4810 		}
4811 	}
4812 }
4813 #endif /* VM_PRESSURE_EVENTS */
4814 
4815 
4816 /**
4817  * Called by a kernel thread to ask if a number of pages may be wired.
4818  */
4819 kern_return_t
4820 mach_vm_wire_level_monitor(int64_t requested_pages)
4821 {
4822 	if (requested_pages <= 0) {
4823 		return KERN_INVALID_ARGUMENT;
4824 	}
4825 
4826 	const int64_t max_wire_pages = atop_64(vm_global_user_wire_limit);
4827 	/**
4828 	 * Available pages can be negative in the case where more system memory is
4829 	 * wired than the threshold, so we must use a signed integer.
4830 	 */
4831 	const int64_t available_pages = max_wire_pages - vm_page_wire_count;
4832 
4833 	if (requested_pages > available_pages) {
4834 		return KERN_RESOURCE_SHORTAGE;
4835 	}
4836 	return KERN_SUCCESS;
4837 }
4838 
4839 /*
4840  * Function called by a kernel thread to either get the current pressure level or
4841  * wait until memory pressure changes from a given level.
4842  */
4843 kern_return_t
4844 mach_vm_pressure_level_monitor(boolean_t wait_for_pressure, unsigned int *pressure_level)
4845 {
4846 #if !VM_PRESSURE_EVENTS
4847 	(void)wait_for_pressure;
4848 	(void)pressure_level;
4849 	return KERN_NOT_SUPPORTED;
4850 #else /* VM_PRESSURE_EVENTS */
4851 
4852 	uint32_t *waiters = NULL;
4853 	wait_result_t wr = 0;
4854 	vm_pressure_level_t old_level = memorystatus_vm_pressure_level;
4855 
4856 	if (pressure_level == NULL) {
4857 		return KERN_INVALID_ARGUMENT;
4858 	}
4859 	if (!wait_for_pressure && (*pressure_level == kVMPressureBackgroundJetsam ||
4860 	    *pressure_level == kVMPressureForegroundJetsam)) {
4861 		return KERN_INVALID_ARGUMENT;
4862 	}
4863 
4864 	if (wait_for_pressure) {
4865 		switch (*pressure_level) {
4866 		case kVMPressureForegroundJetsam:
4867 		case kVMPressureBackgroundJetsam:
4868 
4869 			if (*pressure_level == kVMPressureForegroundJetsam) {
4870 				waiters = &memorystatus_jetsam_fg_band_waiters;
4871 			} else {
4872 				/* kVMPressureBackgroundJetsam */
4873 				waiters = &memorystatus_jetsam_bg_band_waiters;
4874 			}
4875 
4876 			lck_mtx_lock(&memorystatus_jetsam_broadcast_lock);
4877 			wr = assert_wait((event_t)waiters, THREAD_INTERRUPTIBLE);
4878 			if (wr == THREAD_WAITING) {
4879 				*waiters += 1;
4880 				lck_mtx_unlock(&memorystatus_jetsam_broadcast_lock);
4881 				wr = thread_block(THREAD_CONTINUE_NULL);
4882 			} else {
4883 				lck_mtx_unlock(&memorystatus_jetsam_broadcast_lock);
4884 			}
4885 
4886 			if (wr != THREAD_AWAKENED) {
4887 				return KERN_ABORTED;
4888 			}
4889 
4890 			return KERN_SUCCESS;
4891 		case kVMPressureNormal:
4892 		case kVMPressureWarning:
4893 		case kVMPressureUrgent:
4894 		case kVMPressureCritical:
4895 			while (old_level == *pressure_level) {
4896 				wr = assert_wait((event_t) &vm_pageout_state.vm_pressure_changed,
4897 				    THREAD_INTERRUPTIBLE);
4898 				if (wr == THREAD_WAITING) {
4899 					wr = thread_block(THREAD_CONTINUE_NULL);
4900 				}
4901 				if (wr == THREAD_INTERRUPTED) {
4902 					return KERN_ABORTED;
4903 				}
4904 
4905 				if (wr == THREAD_AWAKENED) {
4906 					old_level = memorystatus_vm_pressure_level;
4907 				}
4908 			}
4909 			break;
4910 		default:
4911 			return KERN_INVALID_ARGUMENT;
4912 		}
4913 	}
4914 
4915 	*pressure_level = old_level;
4916 	return KERN_SUCCESS;
4917 #endif /* VM_PRESSURE_EVENTS */
4918 }
4919 
4920 #if VM_PRESSURE_EVENTS
4921 void
4922 vm_pressure_thread(void)
4923 {
4924 	static boolean_t thread_initialized = FALSE;
4925 
4926 	if (thread_initialized == TRUE) {
4927 		vm_pageout_state.vm_pressure_thread_running = TRUE;
4928 		consider_vm_pressure_events();
4929 		vm_pageout_state.vm_pressure_thread_running = FALSE;
4930 	}
4931 
4932 #if CONFIG_THREAD_GROUPS
4933 	thread_group_vm_add();
4934 #endif /* CONFIG_THREAD_GROUPS */
4935 
4936 	thread_set_thread_name(current_thread(), "VM_pressure");
4937 	thread_initialized = TRUE;
4938 	assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
4939 	thread_block((thread_continue_t)vm_pressure_thread);
4940 }
4941 #endif /* VM_PRESSURE_EVENTS */
4942 
4943 
4944 /*
4945  * called once per-second via "compute_averages"
4946  */
4947 void
4948 compute_pageout_gc_throttle(__unused void *arg)
4949 {
4950 	if (vm_pageout_vminfo.vm_pageout_considered_page != vm_pageout_state.vm_pageout_considered_page_last) {
4951 		vm_pageout_state.vm_pageout_considered_page_last = vm_pageout_vminfo.vm_pageout_considered_page;
4952 
4953 		thread_wakeup(VM_PAGEOUT_GC_EVENT);
4954 	}
4955 }
4956 
4957 /*
4958  * vm_pageout_garbage_collect can also be called when the zone allocator needs
4959  * to call zone_gc on a different thread in order to trigger zone-map-exhaustion
4960  * jetsams. We need to check if the zone map size is above its jetsam limit to
4961  * decide if this was indeed the case.
4962  *
4963  * We need to do this on a different thread because of the following reasons:
4964  *
4965  * 1. In the case of synchronous jetsams, the leaking process can try to jetsam
4966  * itself causing the system to hang. We perform synchronous jetsams if we're
4967  * leaking in the VM map entries zone, so the leaking process could be doing a
4968  * zalloc for a VM map entry while holding its vm_map lock, when it decides to
4969  * jetsam itself. We also need the vm_map lock on the process termination path,
4970  * which would now lead the dying process to deadlock against itself.
4971  *
4972  * 2. The jetsam path might need to allocate zone memory itself. We could try
4973  * using the non-blocking variant of zalloc for this path, but we can still
4974  * end up trying to do a kmem_alloc when the zone maps are almost full.
4975  */
4976 __dead2
4977 void
4978 vm_pageout_garbage_collect(void *step, wait_result_t wr __unused)
4979 {
4980 	assert(step == VM_PAGEOUT_GC_INIT || step == VM_PAGEOUT_GC_COLLECT);
4981 
4982 	if (step == VM_PAGEOUT_GC_INIT) {
4983 		/* first time being called is not about GC */
4984 #if CONFIG_THREAD_GROUPS
4985 		thread_group_vm_add();
4986 #endif /* CONFIG_THREAD_GROUPS */
4987 	} else if (zone_map_nearing_exhaustion()) {
4988 		/*
4989 		 * Woken up by the zone allocator for zone-map-exhaustion jetsams.
4990 		 *
4991 		 * Bail out after calling zone_gc (which triggers the
4992 		 * zone-map-exhaustion jetsams). If we fall through, the subsequent
4993 		 * operations that clear out a bunch of caches might allocate zone
4994 		 * memory themselves (for eg. vm_map operations would need VM map
4995 		 * entries). Since the zone map is almost full at this point, we
4996 		 * could end up with a panic. We just need to quickly jetsam a
4997 		 * process and exit here.
4998 		 *
4999 		 * It could so happen that we were woken up to relieve memory
5000 		 * pressure and the zone map also happened to be near its limit at
5001 		 * the time, in which case we'll skip out early. But that should be
5002 		 * ok; if memory pressure persists, the thread will simply be woken
5003 		 * up again.
5004 		 */
5005 		zone_gc(ZONE_GC_JETSAM);
5006 	} else {
5007 		/* Woken up by vm_pageout_scan or compute_pageout_gc_throttle. */
5008 		boolean_t buf_large_zfree = FALSE;
5009 		boolean_t first_try = TRUE;
5010 
5011 		stack_collect();
5012 
5013 		consider_machine_collect();
5014 #if CONFIG_MBUF_MCACHE
5015 		mbuf_drain(FALSE);
5016 #endif /* CONFIG_MBUF_MCACHE */
5017 
5018 		do {
5019 			if (consider_buffer_cache_collect != NULL) {
5020 				buf_large_zfree = (*consider_buffer_cache_collect)(0);
5021 			}
5022 			if (first_try == TRUE || buf_large_zfree == TRUE) {
5023 				/*
5024 				 * zone_gc should be last, because the other operations
5025 				 * might return memory to zones.
5026 				 */
5027 				zone_gc(ZONE_GC_TRIM);
5028 			}
5029 			first_try = FALSE;
5030 		} while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
5031 
5032 		consider_machine_adjust();
5033 	}
5034 
5035 	assert_wait(VM_PAGEOUT_GC_EVENT, THREAD_UNINT);
5036 
5037 	thread_block_parameter(vm_pageout_garbage_collect, VM_PAGEOUT_GC_COLLECT);
5038 	__builtin_unreachable();
5039 }
5040 
5041 
5042 #if VM_PAGE_BUCKETS_CHECK
5043 #if VM_PAGE_FAKE_BUCKETS
5044 extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
5045 #endif /* VM_PAGE_FAKE_BUCKETS */
5046 #endif /* VM_PAGE_BUCKETS_CHECK */
5047 
5048 
5049 
5050 void
5051 vm_set_restrictions(unsigned int num_cpus)
5052 {
5053 	int vm_restricted_to_single_processor = 0;
5054 
5055 	if (PE_parse_boot_argn("vm_restricted_to_single_processor", &vm_restricted_to_single_processor, sizeof(vm_restricted_to_single_processor))) {
5056 		kprintf("Overriding vm_restricted_to_single_processor to %d\n", vm_restricted_to_single_processor);
5057 		vm_pageout_state.vm_restricted_to_single_processor = (vm_restricted_to_single_processor ? TRUE : FALSE);
5058 	} else {
5059 		assert(num_cpus > 0);
5060 
5061 		if (num_cpus <= 3) {
5062 			/*
5063 			 * on systems with a limited number of CPUS, bind the
5064 			 * 4 major threads that can free memory and that tend to use
5065 			 * a fair bit of CPU under pressured conditions to a single processor.
5066 			 * This insures that these threads don't hog all of the available CPUs
5067 			 * (important for camera launch), while allowing them to run independently
5068 			 * w/r to locks... the 4 threads are
5069 			 * vm_pageout_scan,  vm_pageout_iothread_internal (compressor),
5070 			 * vm_compressor_swap_trigger_thread (minor and major compactions),
5071 			 * memorystatus_thread (jetsams).
5072 			 *
5073 			 * the first time the thread is run, it is responsible for checking the
5074 			 * state of vm_restricted_to_single_processor, and if TRUE it calls
5075 			 * thread_bind_master...  someday this should be replaced with a group
5076 			 * scheduling mechanism and KPI.
5077 			 */
5078 			vm_pageout_state.vm_restricted_to_single_processor = TRUE;
5079 		} else {
5080 			vm_pageout_state.vm_restricted_to_single_processor = FALSE;
5081 		}
5082 	}
5083 }
5084 
5085 /*
5086  * Set up vm_config based on the vm_compressor_mode.
5087  * Must run BEFORE the pageout thread starts up.
5088  */
5089 __startup_func
5090 void
5091 vm_config_init(void)
5092 {
5093 	bzero(&vm_config, sizeof(vm_config));
5094 
5095 	switch (vm_compressor_mode) {
5096 	case VM_PAGER_DEFAULT:
5097 		printf("mapping deprecated VM_PAGER_DEFAULT to VM_PAGER_COMPRESSOR_WITH_SWAP\n");
5098 		OS_FALLTHROUGH;
5099 
5100 	case VM_PAGER_COMPRESSOR_WITH_SWAP:
5101 		vm_config.compressor_is_present = TRUE;
5102 		vm_config.swap_is_present = TRUE;
5103 		vm_config.compressor_is_active = TRUE;
5104 		vm_config.swap_is_active = TRUE;
5105 		break;
5106 
5107 	case VM_PAGER_COMPRESSOR_NO_SWAP:
5108 		vm_config.compressor_is_present = TRUE;
5109 		vm_config.swap_is_present = TRUE;
5110 		vm_config.compressor_is_active = TRUE;
5111 		break;
5112 
5113 	case VM_PAGER_FREEZER_DEFAULT:
5114 		printf("mapping deprecated VM_PAGER_FREEZER_DEFAULT to VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP\n");
5115 		OS_FALLTHROUGH;
5116 
5117 	case VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP:
5118 		vm_config.compressor_is_present = TRUE;
5119 		vm_config.swap_is_present = TRUE;
5120 		break;
5121 
5122 	case VM_PAGER_COMPRESSOR_NO_SWAP_PLUS_FREEZER_COMPRESSOR_WITH_SWAP:
5123 		vm_config.compressor_is_present = TRUE;
5124 		vm_config.swap_is_present = TRUE;
5125 		vm_config.compressor_is_active = TRUE;
5126 		vm_config.freezer_swap_is_active = TRUE;
5127 		break;
5128 
5129 	case VM_PAGER_NOT_CONFIGURED:
5130 		break;
5131 
5132 	default:
5133 		printf("unknown compressor mode - %x\n", vm_compressor_mode);
5134 		break;
5135 	}
5136 }
5137 
5138 __startup_func
5139 static void
5140 vm_pageout_create_gc_thread(void)
5141 {
5142 	thread_t thread;
5143 
5144 	if (kernel_thread_create(vm_pageout_garbage_collect,
5145 	    VM_PAGEOUT_GC_INIT, BASEPRI_DEFAULT, &thread) != KERN_SUCCESS) {
5146 		panic("vm_pageout_garbage_collect: create failed");
5147 	}
5148 	thread_set_thread_name(thread, "VM_pageout_garbage_collect");
5149 	if (thread->reserved_stack == 0) {
5150 		assert(thread->kernel_stack);
5151 		thread->reserved_stack = thread->kernel_stack;
5152 	}
5153 
5154 	/* thread is started in vm_pageout() */
5155 	vm_pageout_gc_thread = thread;
5156 }
5157 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, vm_pageout_create_gc_thread);
5158 
5159 void
5160 vm_pageout(void)
5161 {
5162 	thread_t        self = current_thread();
5163 	thread_t        thread;
5164 	kern_return_t   result;
5165 	spl_t           s;
5166 
5167 	/*
5168 	 * Set thread privileges.
5169 	 */
5170 	s = splsched();
5171 
5172 #if CONFIG_VPS_DYNAMIC_PRIO
5173 	if (vps_dynamic_priority_enabled) {
5174 		sched_set_kernel_thread_priority(self, MAXPRI_THROTTLE);
5175 		thread_set_eager_preempt(self);
5176 	} else {
5177 		sched_set_kernel_thread_priority(self, BASEPRI_VM);
5178 	}
5179 #else /* CONFIG_VPS_DYNAMIC_PRIO */
5180 	sched_set_kernel_thread_priority(self, BASEPRI_VM);
5181 #endif /* CONFIG_VPS_DYNAMIC_PRIO */
5182 
5183 	thread_lock(self);
5184 	self->options |= TH_OPT_VMPRIV;
5185 	thread_unlock(self);
5186 
5187 	if (!self->reserved_stack) {
5188 		self->reserved_stack = self->kernel_stack;
5189 	}
5190 
5191 	if (vm_pageout_state.vm_restricted_to_single_processor == TRUE &&
5192 	    !vps_dynamic_priority_enabled) {
5193 		thread_vm_bind_group_add();
5194 	}
5195 
5196 
5197 #if CONFIG_THREAD_GROUPS
5198 	thread_group_vm_add();
5199 #endif /* CONFIG_THREAD_GROUPS */
5200 
5201 #if __AMP__
5202 	PE_parse_boot_argn("vmpgo_pcluster", &vm_pgo_pbound, sizeof(vm_pgo_pbound));
5203 	if (vm_pgo_pbound) {
5204 		/*
5205 		 * Use the soft bound option for vm pageout to allow it to run on
5206 		 * E-cores if P-cluster is unavailable.
5207 		 */
5208 		thread_bind_cluster_type(self, 'P', true);
5209 	}
5210 #endif /* __AMP__ */
5211 
5212 	PE_parse_boot_argn("vmpgo_protect_realtime",
5213 	    &vm_pageout_protect_realtime,
5214 	    sizeof(vm_pageout_protect_realtime));
5215 	splx(s);
5216 
5217 	thread_set_thread_name(current_thread(), "VM_pageout_scan");
5218 
5219 	/*
5220 	 *	Initialize some paging parameters.
5221 	 */
5222 
5223 	vm_pageout_state.vm_pressure_thread_running = FALSE;
5224 	vm_pageout_state.vm_pressure_changed = FALSE;
5225 	vm_pageout_state.memorystatus_purge_on_warning = 2;
5226 	vm_pageout_state.memorystatus_purge_on_urgent = 5;
5227 	vm_pageout_state.memorystatus_purge_on_critical = 8;
5228 	vm_pageout_state.vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
5229 	vm_pageout_state.vm_page_speculative_percentage = 5;
5230 	vm_pageout_state.vm_page_speculative_target = 0;
5231 
5232 	vm_pageout_state.vm_pageout_swap_wait = 0;
5233 	vm_pageout_state.vm_pageout_idle_wait = 0;
5234 	vm_pageout_state.vm_pageout_empty_wait = 0;
5235 	vm_pageout_state.vm_pageout_burst_wait = 0;
5236 	vm_pageout_state.vm_pageout_deadlock_wait = 0;
5237 	vm_pageout_state.vm_pageout_deadlock_relief = 0;
5238 	vm_pageout_state.vm_pageout_burst_inactive_throttle = 0;
5239 
5240 	vm_pageout_state.vm_pageout_inactive = 0;
5241 	vm_pageout_state.vm_pageout_inactive_used = 0;
5242 	vm_pageout_state.vm_pageout_inactive_clean = 0;
5243 
5244 	vm_pageout_state.vm_memory_pressure = 0;
5245 	vm_pageout_state.vm_page_filecache_min = 0;
5246 #if CONFIG_JETSAM
5247 	vm_pageout_state.vm_page_filecache_min_divisor = 70;
5248 	vm_pageout_state.vm_page_xpmapped_min_divisor = 40;
5249 #else
5250 	vm_pageout_state.vm_page_filecache_min_divisor = 27;
5251 	vm_pageout_state.vm_page_xpmapped_min_divisor = 36;
5252 #endif
5253 	vm_pageout_state.vm_page_free_count_init = vm_page_free_count;
5254 
5255 	vm_pageout_state.vm_pageout_considered_page_last = 0;
5256 
5257 	if (vm_pageout_state.vm_pageout_swap_wait == 0) {
5258 		vm_pageout_state.vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
5259 	}
5260 
5261 	if (vm_pageout_state.vm_pageout_idle_wait == 0) {
5262 		vm_pageout_state.vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
5263 	}
5264 
5265 	if (vm_pageout_state.vm_pageout_burst_wait == 0) {
5266 		vm_pageout_state.vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
5267 	}
5268 
5269 	if (vm_pageout_state.vm_pageout_empty_wait == 0) {
5270 		vm_pageout_state.vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
5271 	}
5272 
5273 	if (vm_pageout_state.vm_pageout_deadlock_wait == 0) {
5274 		vm_pageout_state.vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
5275 	}
5276 
5277 	if (vm_pageout_state.vm_pageout_deadlock_relief == 0) {
5278 		vm_pageout_state.vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
5279 	}
5280 
5281 	if (vm_pageout_state.vm_pageout_burst_inactive_throttle == 0) {
5282 		vm_pageout_state.vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
5283 	}
5284 	/*
5285 	 * even if we've already called vm_page_free_reserve
5286 	 * call it again here to insure that the targets are
5287 	 * accurately calculated (it uses vm_page_free_count_init)
5288 	 * calling it with an arg of 0 will not change the reserve
5289 	 * but will re-calculate free_min and free_target
5290 	 */
5291 	if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
5292 		vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
5293 	} else {
5294 		vm_page_free_reserve(0);
5295 	}
5296 
5297 	bzero(&vm_pageout_queue_external, sizeof(struct vm_pageout_queue));
5298 	bzero(&vm_pageout_queue_internal, sizeof(struct vm_pageout_queue));
5299 
5300 	vm_page_queue_init(&vm_pageout_queue_external.pgo_pending);
5301 	vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
5302 
5303 	vm_page_queue_init(&vm_pageout_queue_internal.pgo_pending);
5304 
5305 #if DEVELOPMENT || DEBUG
5306 	bzero(&vm_pageout_queue_benchmark, sizeof(struct vm_pageout_queue));
5307 	vm_page_queue_init(&vm_pageout_queue_benchmark.pgo_pending);
5308 #endif /* DEVELOPMENT || DEBUG */
5309 
5310 
5311 	/* internal pageout thread started when default pager registered first time */
5312 	/* external pageout and garbage collection threads started here */
5313 	struct pgo_iothread_state *ethr = &pgo_iothread_external_state;
5314 	ethr->id = 0;
5315 	ethr->q = &vm_pageout_queue_external;
5316 	/* in external_state these cheads are never used, they are used only in internal_state for te compressor */
5317 	ethr->current_early_swapout_chead = NULL;
5318 	ethr->current_regular_swapout_chead = NULL;
5319 	ethr->current_late_swapout_chead = NULL;
5320 	ethr->scratch_buf = NULL;
5321 #if DEVELOPMENT || DEBUG
5322 	ethr->benchmark_q = NULL;
5323 #endif /* DEVELOPMENT || DEBUG */
5324 	sched_cond_init(&(ethr->pgo_wakeup));
5325 
5326 	result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external,
5327 	    (void *)ethr, BASEPRI_VM,
5328 	    &(ethr->pgo_iothread));
5329 	if (result != KERN_SUCCESS) {
5330 		panic("vm_pageout: Unable to create external thread (%d)\n", result);
5331 	}
5332 	thread_set_thread_name(ethr->pgo_iothread, "VM_pageout_external_iothread");
5333 
5334 	thread_mtx_lock(vm_pageout_gc_thread );
5335 	thread_start(vm_pageout_gc_thread );
5336 	thread_mtx_unlock(vm_pageout_gc_thread);
5337 
5338 #if VM_PRESSURE_EVENTS
5339 	result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
5340 	    BASEPRI_DEFAULT,
5341 	    &thread);
5342 
5343 	if (result != KERN_SUCCESS) {
5344 		panic("vm_pressure_thread: create failed");
5345 	}
5346 
5347 	thread_deallocate(thread);
5348 #endif
5349 
5350 	vm_object_reaper_init();
5351 
5352 
5353 	if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
5354 		vm_compressor_init();
5355 	}
5356 
5357 #if VM_PRESSURE_EVENTS
5358 	vm_pressure_events_enabled = TRUE;
5359 #endif /* VM_PRESSURE_EVENTS */
5360 
5361 #if CONFIG_PHANTOM_CACHE
5362 	vm_phantom_cache_init();
5363 #endif
5364 #if VM_PAGE_BUCKETS_CHECK
5365 #if VM_PAGE_FAKE_BUCKETS
5366 	printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
5367 	    (uint64_t) vm_page_fake_buckets_start,
5368 	    (uint64_t) vm_page_fake_buckets_end);
5369 	pmap_protect(kernel_pmap,
5370 	    vm_page_fake_buckets_start,
5371 	    vm_page_fake_buckets_end,
5372 	    VM_PROT_READ);
5373 //	*(char *) vm_page_fake_buckets_start = 'x';	/* panic! */
5374 #endif /* VM_PAGE_FAKE_BUCKETS */
5375 #endif /* VM_PAGE_BUCKETS_CHECK */
5376 
5377 #if VM_OBJECT_TRACKING
5378 	vm_object_tracking_init();
5379 #endif /* VM_OBJECT_TRACKING */
5380 
5381 #if __arm64__
5382 //	vm_tests();
5383 #endif /* __arm64__ */
5384 
5385 	vm_pageout_continue();
5386 
5387 	/*
5388 	 * Unreached code!
5389 	 *
5390 	 * The vm_pageout_continue() call above never returns, so the code below is never
5391 	 * executed.  We take advantage of this to declare several DTrace VM related probe
5392 	 * points that our kernel doesn't have an analog for.  These are probe points that
5393 	 * exist in Solaris and are in the DTrace documentation, so people may have written
5394 	 * scripts that use them.  Declaring the probe points here means their scripts will
5395 	 * compile and execute which we want for portability of the scripts, but since this
5396 	 * section of code is never reached, the probe points will simply never fire.  Yes,
5397 	 * this is basically a hack.  The problem is the DTrace probe points were chosen with
5398 	 * Solaris specific VM events in mind, not portability to different VM implementations.
5399 	 */
5400 
5401 	DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
5402 	DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
5403 	DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
5404 	DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
5405 	DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
5406 	DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
5407 	DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
5408 	/*NOTREACHED*/
5409 }
5410 
5411 
5412 
5413 kern_return_t
5414 vm_pageout_internal_start(void)
5415 {
5416 	kern_return_t   result = KERN_SUCCESS;
5417 	host_basic_info_data_t hinfo;
5418 	vm_offset_t     buf, bufsize;
5419 
5420 	assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
5421 
5422 	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
5423 #define BSD_HOST 1
5424 	host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
5425 
5426 	assert(hinfo.max_cpus > 0);
5427 
5428 #if !XNU_TARGET_OS_OSX
5429 	vm_pageout_state.vm_compressor_thread_count = 1;
5430 #else /* !XNU_TARGET_OS_OSX */
5431 	if (hinfo.max_cpus > 4) {
5432 		vm_pageout_state.vm_compressor_thread_count = 2;
5433 	} else {
5434 		vm_pageout_state.vm_compressor_thread_count = 1;
5435 	}
5436 #endif /* !XNU_TARGET_OS_OSX */
5437 #if     __AMP__
5438 	if (vm_compressor_ebound) {
5439 		vm_pageout_state.vm_compressor_thread_count = 2;
5440 	}
5441 #endif
5442 	PE_parse_boot_argn("vmcomp_threads", &vm_pageout_state.vm_compressor_thread_count,
5443 	    sizeof(vm_pageout_state.vm_compressor_thread_count));
5444 
5445 	/* did we get from the bootargs an unreasonable number? */
5446 	if (vm_pageout_state.vm_compressor_thread_count >= hinfo.max_cpus) {
5447 		vm_pageout_state.vm_compressor_thread_count = hinfo.max_cpus - 1;
5448 	}
5449 	if (vm_pageout_state.vm_compressor_thread_count <= 0) {
5450 		vm_pageout_state.vm_compressor_thread_count = 1;
5451 	} else if (vm_pageout_state.vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT) {
5452 		vm_pageout_state.vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
5453 	}
5454 
5455 	vm_pageout_queue_internal.pgo_maxlaundry =
5456 	    (vm_pageout_state.vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
5457 
5458 	PE_parse_boot_argn("vmpgoi_maxlaundry",
5459 	    &vm_pageout_queue_internal.pgo_maxlaundry,
5460 	    sizeof(vm_pageout_queue_internal.pgo_maxlaundry));
5461 
5462 #if DEVELOPMENT || DEBUG
5463 	// Note: this will be modified at enqueue-time such that the benchmark queue is never throttled
5464 	vm_pageout_queue_benchmark.pgo_maxlaundry = vm_pageout_queue_internal.pgo_maxlaundry;
5465 #endif /* DEVELOPMENT || DEBUG */
5466 
5467 	bufsize = COMPRESSOR_SCRATCH_BUF_SIZE;
5468 
5469 	kmem_alloc(kernel_map, &buf,
5470 	    bufsize * vm_pageout_state.vm_compressor_thread_count,
5471 	    KMA_DATA | KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT,
5472 	    VM_KERN_MEMORY_COMPRESSOR);
5473 
5474 	for (int i = 0; i < vm_pageout_state.vm_compressor_thread_count; i++) {
5475 		struct pgo_iothread_state *iq = &pgo_iothread_internal_state[i];
5476 		iq->id = i;
5477 		iq->q = &vm_pageout_queue_internal;
5478 		iq->current_early_swapout_chead = NULL;
5479 		iq->current_regular_swapout_chead = NULL;
5480 		iq->current_late_swapout_chead = NULL;
5481 		iq->scratch_buf = (char *)(buf + i * bufsize);
5482 #if DEVELOPMENT || DEBUG
5483 		iq->benchmark_q = &vm_pageout_queue_benchmark;
5484 #endif /* DEVELOPMENT || DEBUG */
5485 		sched_cond_init(&(iq->pgo_wakeup));
5486 		result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal,
5487 		    (void *)iq, BASEPRI_VM,
5488 		    &(iq->pgo_iothread));
5489 
5490 		if (result != KERN_SUCCESS) {
5491 			panic("vm_pageout: Unable to create compressor thread no. %d (%d)\n", i, result);
5492 		}
5493 	}
5494 	return result;
5495 }
5496 
5497 #if CONFIG_IOSCHED
5498 /*
5499  * To support I/O Expedite for compressed files we mark the upls with special flags.
5500  * The way decmpfs works is that we create a big upl which marks all the pages needed to
5501  * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
5502  * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
5503  * being held in the big original UPL. We mark each of these smaller UPLs with the flag
5504  * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
5505  * decmp_io_upl field (in the upl structure). This link is protected in the forward direction
5506  * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
5507  * unless the real I/O upl is being destroyed).
5508  */
5509 
5510 
5511 static void
5512 upl_set_decmp_info(upl_t upl, upl_t src_upl)
5513 {
5514 	assert((src_upl->flags & UPL_DECMP_REQ) != 0);
5515 
5516 	upl_lock(src_upl);
5517 	if (src_upl->decmp_io_upl) {
5518 		/*
5519 		 * If there is already an alive real I/O UPL, ignore this new UPL.
5520 		 * This case should rarely happen and even if it does, it just means
5521 		 * that we might issue a spurious expedite which the driver is expected
5522 		 * to handle.
5523 		 */
5524 		upl_unlock(src_upl);
5525 		return;
5526 	}
5527 	src_upl->decmp_io_upl = (void *)upl;
5528 	src_upl->ref_count++;
5529 
5530 	upl->flags |= UPL_DECMP_REAL_IO;
5531 	upl->decmp_io_upl = (void *)src_upl;
5532 	upl_unlock(src_upl);
5533 }
5534 #endif /* CONFIG_IOSCHED */
5535 
5536 #if UPL_DEBUG
5537 int     upl_debug_enabled = 1;
5538 #else
5539 int     upl_debug_enabled = 0;
5540 #endif
5541 
5542 static upl_t
5543 upl_create(int type, int flags, upl_size_t size)
5544 {
5545 	uint32_t pages = (uint32_t)atop(round_page_32(size));
5546 	upl_t    upl;
5547 
5548 	assert(page_aligned(size));
5549 
5550 	/*
5551 	 * FIXME: this code assumes the allocation always succeeds,
5552 	 *        however `pages` can be up to MAX_UPL_SIZE.
5553 	 *
5554 	 *        The allocation size is above 32k (resp. 128k)
5555 	 *        on 16k pages (resp. 4k), which kalloc might fail
5556 	 *        to allocate.
5557 	 */
5558 	upl = kalloc_type(struct upl, struct upl_page_info,
5559 	    (type & UPL_CREATE_INTERNAL) ? pages : 0, Z_WAITOK | Z_ZERO);
5560 	if (type & UPL_CREATE_INTERNAL) {
5561 		flags |= UPL_INTERNAL;
5562 	}
5563 
5564 	if (type & UPL_CREATE_LITE) {
5565 		flags |= UPL_LITE;
5566 		if (pages) {
5567 			upl->lite_list = bitmap_alloc(pages);
5568 		}
5569 	}
5570 
5571 	upl->flags = flags;
5572 	upl->ref_count = 1;
5573 	upl_lock_init(upl);
5574 #if CONFIG_IOSCHED
5575 	if (type & UPL_CREATE_IO_TRACKING) {
5576 		upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
5577 	}
5578 
5579 	if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
5580 		/* Only support expedite on internal UPLs */
5581 		thread_t        curthread = current_thread();
5582 		upl->upl_reprio_info = kalloc_data(sizeof(uint64_t) * pages,
5583 		    Z_WAITOK | Z_ZERO);
5584 		upl->flags |= UPL_EXPEDITE_SUPPORTED;
5585 		if (curthread->decmp_upl != NULL) {
5586 			upl_set_decmp_info(upl, curthread->decmp_upl);
5587 		}
5588 	}
5589 #endif
5590 #if CONFIG_IOSCHED || UPL_DEBUG
5591 	if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) {
5592 		upl->upl_creator = current_thread();
5593 		upl->flags |= UPL_TRACKED_BY_OBJECT;
5594 	}
5595 #endif
5596 
5597 #if UPL_DEBUG
5598 	upl->uple_create_btref = btref_get(__builtin_frame_address(0), 0);
5599 #endif /* UPL_DEBUG */
5600 
5601 	return upl;
5602 }
5603 
5604 static void
5605 upl_destroy(upl_t upl)
5606 {
5607 	uint32_t pages;
5608 
5609 //	DEBUG4K_UPL("upl %p (u_offset 0x%llx u_size 0x%llx) object %p\n", upl, (uint64_t)upl->u_offset, (uint64_t)upl->u_size, upl->map_object);
5610 
5611 	if (upl->ext_ref_count) {
5612 		panic("upl(%p) ext_ref_count", upl);
5613 	}
5614 
5615 #if CONFIG_IOSCHED
5616 	if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
5617 		upl_t src_upl;
5618 		src_upl = upl->decmp_io_upl;
5619 		assert((src_upl->flags & UPL_DECMP_REQ) != 0);
5620 		upl_lock(src_upl);
5621 		src_upl->decmp_io_upl = NULL;
5622 		upl_unlock(src_upl);
5623 		upl_deallocate(src_upl);
5624 	}
5625 #endif /* CONFIG_IOSCHED */
5626 
5627 #if CONFIG_IOSCHED || UPL_DEBUG
5628 	if (((upl->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) &&
5629 	    !(upl->flags & UPL_VECTOR)) {
5630 		vm_object_t     object;
5631 
5632 		if (upl->flags & UPL_SHADOWED) {
5633 			object = upl->map_object->shadow;
5634 		} else {
5635 			object = upl->map_object;
5636 		}
5637 
5638 		vm_object_lock(object);
5639 		queue_remove(&object->uplq, upl, upl_t, uplq);
5640 		vm_object_activity_end(object);
5641 		vm_object_collapse(object, 0, TRUE);
5642 		vm_object_unlock(object);
5643 	}
5644 #endif
5645 	/*
5646 	 * drop a reference on the map_object whether or
5647 	 * not a pageout object is inserted
5648 	 */
5649 	if (upl->flags & UPL_SHADOWED) {
5650 		vm_object_deallocate(upl->map_object);
5651 	}
5652 
5653 	if (upl->flags & UPL_DEVICE_MEMORY) {
5654 		pages = 1;
5655 	} else {
5656 		pages = (uint32_t)atop(upl_adjusted_size(upl, PAGE_MASK));
5657 	}
5658 
5659 	upl_lock_destroy(upl);
5660 
5661 #if CONFIG_IOSCHED
5662 	if (upl->flags & UPL_EXPEDITE_SUPPORTED) {
5663 		kfree_data(upl->upl_reprio_info, sizeof(uint64_t) * pages);
5664 	}
5665 #endif
5666 
5667 #if UPL_DEBUG
5668 	for (int i = 0; i < upl->upl_commit_index; i++) {
5669 		btref_put(upl->upl_commit_records[i].c_btref);
5670 	}
5671 	btref_put(upl->uple_create_btref);
5672 #endif /* UPL_DEBUG */
5673 
5674 	if ((upl->flags & UPL_LITE) && pages) {
5675 		bitmap_free(upl->lite_list, pages);
5676 	}
5677 	kfree_type(struct upl, struct upl_page_info,
5678 	    (upl->flags & UPL_INTERNAL) ? pages : 0, upl);
5679 }
5680 
5681 void
5682 upl_deallocate(upl_t upl)
5683 {
5684 	upl_lock(upl);
5685 
5686 	if (--upl->ref_count == 0) {
5687 		if (vector_upl_is_valid(upl)) {
5688 			vector_upl_deallocate(upl);
5689 		}
5690 		upl_unlock(upl);
5691 
5692 		if (upl->upl_iodone) {
5693 			upl_callout_iodone(upl);
5694 		}
5695 
5696 		upl_destroy(upl);
5697 	} else {
5698 		upl_unlock(upl);
5699 	}
5700 }
5701 
5702 #if CONFIG_IOSCHED
5703 void
5704 upl_mark_decmp(upl_t upl)
5705 {
5706 	if (upl->flags & UPL_TRACKED_BY_OBJECT) {
5707 		upl->flags |= UPL_DECMP_REQ;
5708 		upl->upl_creator->decmp_upl = (void *)upl;
5709 	}
5710 }
5711 
5712 void
5713 upl_unmark_decmp(upl_t upl)
5714 {
5715 	if (upl && (upl->flags & UPL_DECMP_REQ)) {
5716 		upl->upl_creator->decmp_upl = NULL;
5717 	}
5718 }
5719 
5720 #endif /* CONFIG_IOSCHED */
5721 
5722 #define VM_PAGE_Q_BACKING_UP(q)         \
5723 	((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
5724 
5725 boolean_t must_throttle_writes(void);
5726 
5727 boolean_t
5728 must_throttle_writes()
5729 {
5730 	if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
5731 	    vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10) {
5732 		return TRUE;
5733 	}
5734 
5735 	return FALSE;
5736 }
5737 
5738 int vm_page_delayed_work_ctx_needed = 0;
5739 KALLOC_TYPE_DEFINE(dw_ctx_zone, struct vm_page_delayed_work_ctx, KT_PRIV_ACCT);
5740 
5741 __startup_func
5742 static void
5743 vm_page_delayed_work_init_ctx(void)
5744 {
5745 	uint16_t min_delayed_work_ctx_allocated = 16;
5746 
5747 	/*
5748 	 * try really hard to always keep NCPU elements around in the zone
5749 	 * in order for the UPL code to almost always get an element.
5750 	 */
5751 	if (min_delayed_work_ctx_allocated < zpercpu_count()) {
5752 		min_delayed_work_ctx_allocated = (uint16_t)zpercpu_count();
5753 	}
5754 
5755 	zone_raise_reserve(dw_ctx_zone, min_delayed_work_ctx_allocated);
5756 }
5757 STARTUP(ZALLOC, STARTUP_RANK_LAST, vm_page_delayed_work_init_ctx);
5758 
5759 struct vm_page_delayed_work*
5760 vm_page_delayed_work_get_ctx(void)
5761 {
5762 	struct vm_page_delayed_work_ctx * dw_ctx = NULL;
5763 
5764 	dw_ctx = zalloc_flags(dw_ctx_zone, Z_ZERO | Z_NOWAIT);
5765 
5766 	if (__probable(dw_ctx)) {
5767 		dw_ctx->delayed_owner = current_thread();
5768 	} else {
5769 		vm_page_delayed_work_ctx_needed++;
5770 	}
5771 	return dw_ctx ? dw_ctx->dwp : NULL;
5772 }
5773 
5774 void
5775 vm_page_delayed_work_finish_ctx(struct vm_page_delayed_work* dwp)
5776 {
5777 	struct  vm_page_delayed_work_ctx *ldw_ctx;
5778 
5779 	ldw_ctx = (struct vm_page_delayed_work_ctx *)dwp;
5780 	ldw_ctx->delayed_owner = NULL;
5781 
5782 	zfree(dw_ctx_zone, ldw_ctx);
5783 }
5784 
5785 /*
5786  *	Routine:	vm_object_upl_request
5787  *	Purpose:
5788  *		Cause the population of a portion of a vm_object.
5789  *		Depending on the nature of the request, the pages
5790  *		returned may be contain valid data or be uninitialized.
5791  *		A page list structure, listing the physical pages
5792  *		will be returned upon request.
5793  *		This function is called by the file system or any other
5794  *		supplier of backing store to a pager.
5795  *		IMPORTANT NOTE: The caller must still respect the relationship
5796  *		between the vm_object and its backing memory object.  The
5797  *		caller MUST NOT substitute changes in the backing file
5798  *		without first doing a memory_object_lock_request on the
5799  *		target range unless it is know that the pages are not
5800  *		shared with another entity at the pager level.
5801  *		Copy_in_to:
5802  *			if a page list structure is present
5803  *			return the mapped physical pages, where a
5804  *			page is not present, return a non-initialized
5805  *			one.  If the no_sync bit is turned on, don't
5806  *			call the pager unlock to synchronize with other
5807  *			possible copies of the page. Leave pages busy
5808  *			in the original object, if a page list structure
5809  *			was specified.  When a commit of the page list
5810  *			pages is done, the dirty bit will be set for each one.
5811  *		Copy_out_from:
5812  *			If a page list structure is present, return
5813  *			all mapped pages.  Where a page does not exist
5814  *			map a zero filled one. Leave pages busy in
5815  *			the original object.  If a page list structure
5816  *			is not specified, this call is a no-op.
5817  *
5818  *		Note:  access of default pager objects has a rather interesting
5819  *		twist.  The caller of this routine, presumably the file system
5820  *		page cache handling code, will never actually make a request
5821  *		against a default pager backed object.  Only the default
5822  *		pager will make requests on backing store related vm_objects
5823  *		In this way the default pager can maintain the relationship
5824  *		between backing store files (abstract memory objects) and
5825  *		the vm_objects (cache objects), they support.
5826  *
5827  */
5828 
5829 __private_extern__ kern_return_t
5830 vm_object_upl_request(
5831 	vm_object_t             object,
5832 	vm_object_offset_t      offset,
5833 	upl_size_t              size,
5834 	upl_t                   *upl_ptr,
5835 	upl_page_info_array_t   user_page_list,
5836 	unsigned int            *page_list_count,
5837 	upl_control_flags_t     cntrl_flags,
5838 	vm_tag_t                tag)
5839 {
5840 	vm_page_t               dst_page = VM_PAGE_NULL;
5841 	vm_object_offset_t      dst_offset;
5842 	upl_size_t              xfer_size;
5843 	unsigned int            size_in_pages;
5844 	boolean_t               dirty;
5845 	boolean_t               hw_dirty;
5846 	upl_t                   upl = NULL;
5847 	unsigned int            entry;
5848 	vm_page_t               alias_page = NULL;
5849 	int                     refmod_state = 0;
5850 	vm_object_t             last_copy_object;
5851 	uint32_t                last_copy_version;
5852 	struct  vm_page_delayed_work    dw_array;
5853 	struct  vm_page_delayed_work    *dwp, *dwp_start;
5854 	bool                    dwp_finish_ctx = TRUE;
5855 	int                     dw_count;
5856 	int                     dw_limit;
5857 	int                     io_tracking_flag = 0;
5858 	int                     grab_options;
5859 	int                     page_grab_count = 0;
5860 	ppnum_t                 phys_page;
5861 	pmap_flush_context      pmap_flush_context_storage;
5862 	boolean_t               pmap_flushes_delayed = FALSE;
5863 #if DEVELOPMENT || DEBUG
5864 	task_t                  task = current_task();
5865 #endif /* DEVELOPMENT || DEBUG */
5866 
5867 	dwp_start = dwp = NULL;
5868 
5869 	if (cntrl_flags & ~UPL_VALID_FLAGS) {
5870 		/*
5871 		 * For forward compatibility's sake,
5872 		 * reject any unknown flag.
5873 		 */
5874 		return KERN_INVALID_VALUE;
5875 	}
5876 	if ((!object->internal) && (object->paging_offset != 0)) {
5877 		panic("vm_object_upl_request: external object with non-zero paging offset");
5878 	}
5879 	if (object->phys_contiguous) {
5880 		panic("vm_object_upl_request: contiguous object specified");
5881 	}
5882 
5883 	assertf(page_aligned(offset) && page_aligned(size),
5884 	    "offset 0x%llx size 0x%x",
5885 	    offset, size);
5886 
5887 	VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, DBG_VM_UPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, 0, 0);
5888 
5889 	dw_count = 0;
5890 	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
5891 	dwp_start = vm_page_delayed_work_get_ctx();
5892 	if (dwp_start == NULL) {
5893 		dwp_start = &dw_array;
5894 		dw_limit = 1;
5895 		dwp_finish_ctx = FALSE;
5896 	}
5897 
5898 	dwp = dwp_start;
5899 
5900 	if (size > MAX_UPL_SIZE_BYTES) {
5901 		size = MAX_UPL_SIZE_BYTES;
5902 	}
5903 
5904 	if ((cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL) {
5905 		*page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
5906 	}
5907 
5908 #if CONFIG_IOSCHED || UPL_DEBUG
5909 	if (object->io_tracking || upl_debug_enabled) {
5910 		io_tracking_flag |= UPL_CREATE_IO_TRACKING;
5911 	}
5912 #endif
5913 #if CONFIG_IOSCHED
5914 	if (object->io_tracking) {
5915 		io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
5916 	}
5917 #endif
5918 
5919 	if (cntrl_flags & UPL_SET_INTERNAL) {
5920 		if (cntrl_flags & UPL_SET_LITE) {
5921 			upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
5922 		} else {
5923 			upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, 0, size);
5924 		}
5925 		user_page_list = size ? upl->page_list : NULL;
5926 	} else {
5927 		if (cntrl_flags & UPL_SET_LITE) {
5928 			upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
5929 		} else {
5930 			upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, 0, size);
5931 		}
5932 	}
5933 	*upl_ptr = upl;
5934 
5935 	if (user_page_list) {
5936 		user_page_list[0].device = FALSE;
5937 	}
5938 
5939 	if (cntrl_flags & UPL_SET_LITE) {
5940 		upl->map_object = object;
5941 	} else {
5942 		upl->map_object = vm_object_allocate(size);
5943 		vm_object_lock(upl->map_object);
5944 		/*
5945 		 * No neeed to lock the new object: nobody else knows
5946 		 * about it yet, so it's all ours so far.
5947 		 */
5948 		upl->map_object->shadow = object;
5949 		VM_OBJECT_SET_PAGEOUT(upl->map_object, TRUE);
5950 		VM_OBJECT_SET_CAN_PERSIST(upl->map_object, FALSE);
5951 		upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
5952 		upl->map_object->vo_shadow_offset = offset;
5953 		upl->map_object->wimg_bits = object->wimg_bits;
5954 		assertf(page_aligned(upl->map_object->vo_shadow_offset),
5955 		    "object %p shadow_offset 0x%llx",
5956 		    upl->map_object, upl->map_object->vo_shadow_offset);
5957 		vm_object_unlock(upl->map_object);
5958 
5959 		alias_page = vm_page_grab_fictitious(TRUE);
5960 
5961 		upl->flags |= UPL_SHADOWED;
5962 	}
5963 	if (cntrl_flags & UPL_FOR_PAGEOUT) {
5964 		upl->flags |= UPL_PAGEOUT;
5965 	}
5966 
5967 	vm_object_lock(object);
5968 	vm_object_activity_begin(object);
5969 
5970 	grab_options = 0;
5971 #if CONFIG_SECLUDED_MEMORY
5972 	if (object->can_grab_secluded) {
5973 		grab_options |= VM_PAGE_GRAB_SECLUDED;
5974 	}
5975 #endif /* CONFIG_SECLUDED_MEMORY */
5976 
5977 	/*
5978 	 * we can lock in the paging_offset once paging_in_progress is set
5979 	 */
5980 	upl->u_size = size;
5981 	upl->u_offset = offset + object->paging_offset;
5982 
5983 #if CONFIG_IOSCHED || UPL_DEBUG
5984 	if (object->io_tracking || upl_debug_enabled) {
5985 		vm_object_activity_begin(object);
5986 		queue_enter(&object->uplq, upl, upl_t, uplq);
5987 	}
5988 #endif
5989 	if ((cntrl_flags & UPL_WILL_MODIFY) && object->vo_copy != VM_OBJECT_NULL) {
5990 		/*
5991 		 * Honor copy-on-write obligations
5992 		 *
5993 		 * The caller is gathering these pages and
5994 		 * might modify their contents.  We need to
5995 		 * make sure that the copy object has its own
5996 		 * private copies of these pages before we let
5997 		 * the caller modify them.
5998 		 */
5999 		vm_object_update(object,
6000 		    offset,
6001 		    size,
6002 		    NULL,
6003 		    NULL,
6004 		    FALSE,              /* should_return */
6005 		    MEMORY_OBJECT_COPY_SYNC,
6006 		    VM_PROT_NO_CHANGE);
6007 
6008 		VM_PAGEOUT_DEBUG(upl_cow, 1);
6009 		VM_PAGEOUT_DEBUG(upl_cow_pages, (size >> PAGE_SHIFT));
6010 	}
6011 	/*
6012 	 * remember which copy object we synchronized with
6013 	 */
6014 	last_copy_object = object->vo_copy;
6015 	last_copy_version = object->vo_copy_version;
6016 	entry = 0;
6017 
6018 	xfer_size = size;
6019 	dst_offset = offset;
6020 	size_in_pages = size / PAGE_SIZE;
6021 
6022 	if (vm_page_free_count > (vm_page_free_target + size_in_pages) ||
6023 	    object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT)) {
6024 		object->scan_collisions = 0;
6025 	}
6026 
6027 	if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
6028 		boolean_t       isSSD = FALSE;
6029 
6030 #if !XNU_TARGET_OS_OSX
6031 		isSSD = TRUE;
6032 #else /* !XNU_TARGET_OS_OSX */
6033 		vnode_pager_get_isSSD(object->pager, &isSSD);
6034 #endif /* !XNU_TARGET_OS_OSX */
6035 		vm_object_unlock(object);
6036 
6037 		OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
6038 
6039 		if (isSSD == TRUE) {
6040 			delay(1000 * size_in_pages);
6041 		} else {
6042 			delay(5000 * size_in_pages);
6043 		}
6044 		OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
6045 
6046 		vm_object_lock(object);
6047 	}
6048 
6049 	while (xfer_size) {
6050 		dwp->dw_mask = 0;
6051 
6052 		if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
6053 			vm_object_unlock(object);
6054 			alias_page = vm_page_grab_fictitious(TRUE);
6055 			vm_object_lock(object);
6056 		}
6057 		if (cntrl_flags & UPL_COPYOUT_FROM) {
6058 			upl->flags |= UPL_PAGE_SYNC_DONE;
6059 
6060 			if (((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
6061 			    dst_page->vmp_fictitious ||
6062 			    dst_page->vmp_absent ||
6063 			    VMP_ERROR_GET(dst_page) ||
6064 			    dst_page->vmp_cleaning ||
6065 			    (VM_PAGE_WIRED(dst_page))) {
6066 				if (user_page_list) {
6067 					user_page_list[entry].phys_addr = 0;
6068 				}
6069 
6070 				goto try_next_page;
6071 			}
6072 			phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
6073 
6074 			/*
6075 			 * grab this up front...
6076 			 * a high percentange of the time we're going to
6077 			 * need the hardware modification state a bit later
6078 			 * anyway... so we can eliminate an extra call into
6079 			 * the pmap layer by grabbing it here and recording it
6080 			 */
6081 			if (dst_page->vmp_pmapped) {
6082 				refmod_state = pmap_get_refmod(phys_page);
6083 			} else {
6084 				refmod_state = 0;
6085 			}
6086 
6087 			if ((refmod_state & VM_MEM_REFERENCED) && VM_PAGE_INACTIVE(dst_page)) {
6088 				/*
6089 				 * page is on inactive list and referenced...
6090 				 * reactivate it now... this gets it out of the
6091 				 * way of vm_pageout_scan which would have to
6092 				 * reactivate it upon tripping over it
6093 				 */
6094 				dwp->dw_mask |= DW_vm_page_activate;
6095 			}
6096 			if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
6097 				/*
6098 				 * we're only asking for DIRTY pages to be returned
6099 				 */
6100 				if (dst_page->vmp_laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
6101 					/*
6102 					 * if we were the page stolen by vm_pageout_scan to be
6103 					 * cleaned (as opposed to a buddy being clustered in
6104 					 * or this request is not being driven by a PAGEOUT cluster
6105 					 * then we only need to check for the page being dirty or
6106 					 * precious to decide whether to return it
6107 					 */
6108 					if (dst_page->vmp_dirty || dst_page->vmp_precious || (refmod_state & VM_MEM_MODIFIED)) {
6109 						goto check_busy;
6110 					}
6111 					goto dont_return;
6112 				}
6113 				/*
6114 				 * this is a request for a PAGEOUT cluster and this page
6115 				 * is merely along for the ride as a 'buddy'... not only
6116 				 * does it have to be dirty to be returned, but it also
6117 				 * can't have been referenced recently...
6118 				 */
6119 				if ((hibernate_cleaning_in_progress == TRUE ||
6120 				    (!((refmod_state & VM_MEM_REFERENCED) || dst_page->vmp_reference) ||
6121 				    (dst_page->vmp_q_state == VM_PAGE_ON_THROTTLED_Q))) &&
6122 				    ((refmod_state & VM_MEM_MODIFIED) || dst_page->vmp_dirty || dst_page->vmp_precious)) {
6123 					goto check_busy;
6124 				}
6125 dont_return:
6126 				/*
6127 				 * if we reach here, we're not to return
6128 				 * the page... go on to the next one
6129 				 */
6130 				if (dst_page->vmp_laundry == TRUE) {
6131 					/*
6132 					 * if we get here, the page is not 'cleaning' (filtered out above).
6133 					 * since it has been referenced, remove it from the laundry
6134 					 * so we don't pay the cost of an I/O to clean a page
6135 					 * we're just going to take back
6136 					 */
6137 					vm_page_lockspin_queues();
6138 
6139 					vm_pageout_steal_laundry(dst_page, TRUE);
6140 					vm_page_activate(dst_page);
6141 
6142 					vm_page_unlock_queues();
6143 				}
6144 				if (user_page_list) {
6145 					user_page_list[entry].phys_addr = 0;
6146 				}
6147 
6148 				goto try_next_page;
6149 			}
6150 check_busy:
6151 			if (dst_page->vmp_busy) {
6152 				if (cntrl_flags & UPL_NOBLOCK) {
6153 					if (user_page_list) {
6154 						user_page_list[entry].phys_addr = 0;
6155 					}
6156 					dwp->dw_mask = 0;
6157 
6158 					goto try_next_page;
6159 				}
6160 				/*
6161 				 * someone else is playing with the
6162 				 * page.  We will have to wait.
6163 				 */
6164 				vm_page_sleep(object, dst_page, THREAD_UNINT, LCK_SLEEP_EXCLUSIVE);
6165 
6166 				continue;
6167 			}
6168 			if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
6169 				vm_page_lockspin_queues();
6170 
6171 				if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
6172 					/*
6173 					 * we've buddied up a page for a clustered pageout
6174 					 * that has already been moved to the pageout
6175 					 * queue by pageout_scan... we need to remove
6176 					 * it from the queue and drop the laundry count
6177 					 * on that queue
6178 					 */
6179 					vm_pageout_throttle_up(dst_page);
6180 				}
6181 				vm_page_unlock_queues();
6182 			}
6183 			hw_dirty = refmod_state & VM_MEM_MODIFIED;
6184 			dirty = hw_dirty ? TRUE : dst_page->vmp_dirty;
6185 
6186 			if (phys_page > upl->highest_page) {
6187 				upl->highest_page = phys_page;
6188 			}
6189 
6190 			assert(!pmap_is_noencrypt(phys_page));
6191 
6192 			if (cntrl_flags & UPL_SET_LITE) {
6193 				unsigned int    pg_num;
6194 
6195 				pg_num = (unsigned int) ((dst_offset - offset) / PAGE_SIZE);
6196 				assert(pg_num == (dst_offset - offset) / PAGE_SIZE);
6197 				bitmap_set(upl->lite_list, pg_num);
6198 
6199 				if (hw_dirty) {
6200 					if (pmap_flushes_delayed == FALSE) {
6201 						pmap_flush_context_init(&pmap_flush_context_storage);
6202 						pmap_flushes_delayed = TRUE;
6203 					}
6204 					pmap_clear_refmod_options(phys_page,
6205 					    VM_MEM_MODIFIED,
6206 					    PMAP_OPTIONS_NOFLUSH | PMAP_OPTIONS_CLEAR_WRITE,
6207 					    &pmap_flush_context_storage);
6208 				}
6209 
6210 				/*
6211 				 * Mark original page as cleaning
6212 				 * in place.
6213 				 */
6214 				dst_page->vmp_cleaning = TRUE;
6215 				dst_page->vmp_precious = FALSE;
6216 			} else {
6217 				/*
6218 				 * use pageclean setup, it is more
6219 				 * convenient even for the pageout
6220 				 * cases here
6221 				 */
6222 				vm_object_lock(upl->map_object);
6223 				vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
6224 				vm_object_unlock(upl->map_object);
6225 
6226 				alias_page->vmp_absent = FALSE;
6227 				alias_page = NULL;
6228 			}
6229 			if (dirty) {
6230 				SET_PAGE_DIRTY(dst_page, FALSE);
6231 			} else {
6232 				dst_page->vmp_dirty = FALSE;
6233 			}
6234 
6235 			if (!dirty) {
6236 				dst_page->vmp_precious = TRUE;
6237 			}
6238 
6239 			if (!(cntrl_flags & UPL_CLEAN_IN_PLACE)) {
6240 				if (!VM_PAGE_WIRED(dst_page)) {
6241 					dst_page->vmp_free_when_done = TRUE;
6242 				}
6243 			}
6244 		} else {
6245 			if ((cntrl_flags & UPL_WILL_MODIFY) &&
6246 			    (object->vo_copy != last_copy_object ||
6247 			    object->vo_copy_version != last_copy_version)) {
6248 				/*
6249 				 * Honor copy-on-write obligations
6250 				 *
6251 				 * The copy object has changed since we
6252 				 * last synchronized for copy-on-write.
6253 				 * Another copy object might have been
6254 				 * inserted while we released the object's
6255 				 * lock.  Since someone could have seen the
6256 				 * original contents of the remaining pages
6257 				 * through that new object, we have to
6258 				 * synchronize with it again for the remaining
6259 				 * pages only.  The previous pages are "busy"
6260 				 * so they can not be seen through the new
6261 				 * mapping.  The new mapping will see our
6262 				 * upcoming changes for those previous pages,
6263 				 * but that's OK since they couldn't see what
6264 				 * was there before.  It's just a race anyway
6265 				 * and there's no guarantee of consistency or
6266 				 * atomicity.  We just don't want new mappings
6267 				 * to see both the *before* and *after* pages.
6268 				 */
6269 				if (object->vo_copy != VM_OBJECT_NULL) {
6270 					vm_object_update(
6271 						object,
6272 						dst_offset,/* current offset */
6273 						xfer_size, /* remaining size */
6274 						NULL,
6275 						NULL,
6276 						FALSE,     /* should_return */
6277 						MEMORY_OBJECT_COPY_SYNC,
6278 						VM_PROT_NO_CHANGE);
6279 
6280 					VM_PAGEOUT_DEBUG(upl_cow_again, 1);
6281 					VM_PAGEOUT_DEBUG(upl_cow_again_pages, (xfer_size >> PAGE_SHIFT));
6282 				}
6283 				/*
6284 				 * remember the copy object we synced with
6285 				 */
6286 				last_copy_object = object->vo_copy;
6287 				last_copy_version = object->vo_copy_version;
6288 			}
6289 			dst_page = vm_page_lookup(object, dst_offset);
6290 
6291 			if (dst_page != VM_PAGE_NULL) {
6292 				if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
6293 					/*
6294 					 * skip over pages already present in the cache
6295 					 */
6296 					if (user_page_list) {
6297 						user_page_list[entry].phys_addr = 0;
6298 					}
6299 
6300 					goto try_next_page;
6301 				}
6302 				if (dst_page->vmp_fictitious) {
6303 					panic("need corner case for fictitious page");
6304 				}
6305 
6306 				if (dst_page->vmp_busy || dst_page->vmp_cleaning) {
6307 					/*
6308 					 * someone else is playing with the
6309 					 * page.  We will have to wait.
6310 					 */
6311 					vm_page_sleep(object, dst_page, THREAD_UNINT, LCK_SLEEP_EXCLUSIVE);
6312 
6313 					continue;
6314 				}
6315 				if (dst_page->vmp_laundry) {
6316 					vm_pageout_steal_laundry(dst_page, FALSE);
6317 				}
6318 			} else {
6319 				if (object->private) {
6320 					/*
6321 					 * This is a nasty wrinkle for users
6322 					 * of upl who encounter device or
6323 					 * private memory however, it is
6324 					 * unavoidable, only a fault can
6325 					 * resolve the actual backing
6326 					 * physical page by asking the
6327 					 * backing device.
6328 					 */
6329 					if (user_page_list) {
6330 						user_page_list[entry].phys_addr = 0;
6331 					}
6332 
6333 					goto try_next_page;
6334 				}
6335 				if (object->scan_collisions) {
6336 					/*
6337 					 * the pageout_scan thread is trying to steal
6338 					 * pages from this object, but has run into our
6339 					 * lock... grab 2 pages from the head of the object...
6340 					 * the first is freed on behalf of pageout_scan, the
6341 					 * 2nd is for our own use... we use vm_object_page_grab
6342 					 * in both cases to avoid taking pages from the free
6343 					 * list since we are under memory pressure and our
6344 					 * lock on this object is getting in the way of
6345 					 * relieving it
6346 					 */
6347 					dst_page = vm_object_page_grab(object);
6348 
6349 					if (dst_page != VM_PAGE_NULL) {
6350 						vm_page_release(dst_page,
6351 						    FALSE);
6352 					}
6353 
6354 					dst_page = vm_object_page_grab(object);
6355 				}
6356 				if (dst_page == VM_PAGE_NULL) {
6357 					/*
6358 					 * need to allocate a page
6359 					 */
6360 					dst_page = vm_page_grab_options(grab_options);
6361 					if (dst_page != VM_PAGE_NULL) {
6362 						page_grab_count++;
6363 					}
6364 				}
6365 				if (dst_page == VM_PAGE_NULL) {
6366 					if ((cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
6367 						/*
6368 						 * we don't want to stall waiting for pages to come onto the free list
6369 						 * while we're already holding absent pages in this UPL
6370 						 * the caller will deal with the empty slots
6371 						 */
6372 						if (user_page_list) {
6373 							user_page_list[entry].phys_addr = 0;
6374 						}
6375 
6376 						goto try_next_page;
6377 					}
6378 					/*
6379 					 * no pages available... wait
6380 					 * then try again for the same
6381 					 * offset...
6382 					 */
6383 					vm_object_unlock(object);
6384 
6385 					OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
6386 
6387 					VM_DEBUG_EVENT(vm_upl_page_wait, DBG_VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
6388 
6389 					VM_PAGE_WAIT();
6390 					OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
6391 
6392 					VM_DEBUG_EVENT(vm_upl_page_wait, DBG_VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
6393 
6394 					vm_object_lock(object);
6395 
6396 					continue;
6397 				}
6398 				vm_page_insert(dst_page, object, dst_offset);
6399 
6400 				dst_page->vmp_absent = TRUE;
6401 				dst_page->vmp_busy = FALSE;
6402 
6403 				if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
6404 					/*
6405 					 * if UPL_RET_ONLY_ABSENT was specified,
6406 					 * than we're definitely setting up a
6407 					 * upl for a clustered read/pagein
6408 					 * operation... mark the pages as clustered
6409 					 * so upl_commit_range can put them on the
6410 					 * speculative list
6411 					 */
6412 					dst_page->vmp_clustered = TRUE;
6413 
6414 					if (!(cntrl_flags & UPL_FILE_IO)) {
6415 						counter_inc(&vm_statistics_pageins);
6416 					}
6417 				}
6418 			}
6419 			phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
6420 
6421 			dst_page->vmp_overwriting = TRUE;
6422 
6423 			if (dst_page->vmp_pmapped) {
6424 				if (!(cntrl_flags & UPL_FILE_IO)) {
6425 					/*
6426 					 * eliminate all mappings from the
6427 					 * original object and its prodigy
6428 					 */
6429 					refmod_state = pmap_disconnect(phys_page);
6430 				} else {
6431 					refmod_state = pmap_get_refmod(phys_page);
6432 				}
6433 			} else {
6434 				refmod_state = 0;
6435 			}
6436 
6437 			hw_dirty = refmod_state & VM_MEM_MODIFIED;
6438 			dirty = hw_dirty ? TRUE : dst_page->vmp_dirty;
6439 
6440 			if (cntrl_flags & UPL_SET_LITE) {
6441 				unsigned int    pg_num;
6442 
6443 				pg_num = (unsigned int) ((dst_offset - offset) / PAGE_SIZE);
6444 				assert(pg_num == (dst_offset - offset) / PAGE_SIZE);
6445 				bitmap_set(upl->lite_list, pg_num);
6446 
6447 				if (hw_dirty) {
6448 					pmap_clear_modify(phys_page);
6449 				}
6450 
6451 				/*
6452 				 * Mark original page as cleaning
6453 				 * in place.
6454 				 */
6455 				dst_page->vmp_cleaning = TRUE;
6456 				dst_page->vmp_precious = FALSE;
6457 			} else {
6458 				/*
6459 				 * use pageclean setup, it is more
6460 				 * convenient even for the pageout
6461 				 * cases here
6462 				 */
6463 				vm_object_lock(upl->map_object);
6464 				vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
6465 				vm_object_unlock(upl->map_object);
6466 
6467 				alias_page->vmp_absent = FALSE;
6468 				alias_page = NULL;
6469 			}
6470 
6471 			if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
6472 				upl->flags &= ~UPL_CLEAR_DIRTY;
6473 				upl->flags |= UPL_SET_DIRTY;
6474 				dirty = TRUE;
6475 				/*
6476 				 * Page belonging to a code-signed object is about to
6477 				 * be written. Mark it tainted and disconnect it from
6478 				 * all pmaps so processes have to fault it back in and
6479 				 * deal with the tainted bit.
6480 				 */
6481 				if (object->code_signed && dst_page->vmp_cs_tainted != VMP_CS_ALL_TRUE) {
6482 					dst_page->vmp_cs_tainted = VMP_CS_ALL_TRUE;
6483 					vm_page_upl_tainted++;
6484 					if (dst_page->vmp_pmapped) {
6485 						refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(dst_page));
6486 						if (refmod_state & VM_MEM_REFERENCED) {
6487 							dst_page->vmp_reference = TRUE;
6488 						}
6489 					}
6490 				}
6491 			} else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
6492 				/*
6493 				 * clean in place for read implies
6494 				 * that a write will be done on all
6495 				 * the pages that are dirty before
6496 				 * a upl commit is done.  The caller
6497 				 * is obligated to preserve the
6498 				 * contents of all pages marked dirty
6499 				 */
6500 				upl->flags |= UPL_CLEAR_DIRTY;
6501 			}
6502 			dst_page->vmp_dirty = dirty;
6503 
6504 			if (!dirty) {
6505 				dst_page->vmp_precious = TRUE;
6506 			}
6507 
6508 			if (!VM_PAGE_WIRED(dst_page)) {
6509 				/*
6510 				 * deny access to the target page while
6511 				 * it is being worked on
6512 				 */
6513 				dst_page->vmp_busy = TRUE;
6514 			} else {
6515 				dwp->dw_mask |= DW_vm_page_wire;
6516 			}
6517 
6518 			/*
6519 			 * We might be about to satisfy a fault which has been
6520 			 * requested. So no need for the "restart" bit.
6521 			 */
6522 			dst_page->vmp_restart = FALSE;
6523 			if (!dst_page->vmp_absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
6524 				/*
6525 				 * expect the page to be used
6526 				 */
6527 				dwp->dw_mask |= DW_set_reference;
6528 			}
6529 			if (cntrl_flags & UPL_PRECIOUS) {
6530 				if (object->internal) {
6531 					SET_PAGE_DIRTY(dst_page, FALSE);
6532 					dst_page->vmp_precious = FALSE;
6533 				} else {
6534 					dst_page->vmp_precious = TRUE;
6535 				}
6536 			} else {
6537 				dst_page->vmp_precious = FALSE;
6538 			}
6539 		}
6540 		if (dst_page->vmp_busy) {
6541 			upl->flags |= UPL_HAS_BUSY;
6542 		}
6543 
6544 		if (phys_page > upl->highest_page) {
6545 			upl->highest_page = phys_page;
6546 		}
6547 		assert(!pmap_is_noencrypt(phys_page));
6548 		if (user_page_list) {
6549 			user_page_list[entry].phys_addr = phys_page;
6550 			user_page_list[entry].free_when_done    = dst_page->vmp_free_when_done;
6551 			user_page_list[entry].absent    = dst_page->vmp_absent;
6552 			user_page_list[entry].dirty     = dst_page->vmp_dirty;
6553 			user_page_list[entry].precious  = dst_page->vmp_precious;
6554 			user_page_list[entry].device    = FALSE;
6555 			user_page_list[entry].needed    = FALSE;
6556 			if (dst_page->vmp_clustered == TRUE) {
6557 				user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
6558 			} else {
6559 				user_page_list[entry].speculative = FALSE;
6560 			}
6561 			user_page_list[entry].cs_validated = dst_page->vmp_cs_validated;
6562 			user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted;
6563 			user_page_list[entry].cs_nx = dst_page->vmp_cs_nx;
6564 			user_page_list[entry].mark      = FALSE;
6565 		}
6566 		/*
6567 		 * if UPL_RET_ONLY_ABSENT is set, then
6568 		 * we are working with a fresh page and we've
6569 		 * just set the clustered flag on it to
6570 		 * indicate that it was drug in as part of a
6571 		 * speculative cluster... so leave it alone
6572 		 */
6573 		if (!(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
6574 			/*
6575 			 * someone is explicitly grabbing this page...
6576 			 * update clustered and speculative state
6577 			 *
6578 			 */
6579 			if (dst_page->vmp_clustered) {
6580 				VM_PAGE_CONSUME_CLUSTERED(dst_page);
6581 			}
6582 		}
6583 try_next_page:
6584 		if (dwp->dw_mask) {
6585 			if (dwp->dw_mask & DW_vm_page_activate) {
6586 				counter_inc(&vm_statistics_reactivations);
6587 			}
6588 
6589 			VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
6590 
6591 			if (dw_count >= dw_limit) {
6592 				vm_page_do_delayed_work(object, tag, dwp_start, dw_count);
6593 
6594 				dwp = dwp_start;
6595 				dw_count = 0;
6596 			}
6597 		}
6598 		entry++;
6599 		dst_offset += PAGE_SIZE_64;
6600 		xfer_size -= PAGE_SIZE;
6601 	}
6602 	if (dw_count) {
6603 		vm_page_do_delayed_work(object, tag, dwp_start, dw_count);
6604 		dwp = dwp_start;
6605 		dw_count = 0;
6606 	}
6607 
6608 	if (alias_page != NULL) {
6609 		VM_PAGE_FREE(alias_page);
6610 	}
6611 	if (pmap_flushes_delayed == TRUE) {
6612 		pmap_flush(&pmap_flush_context_storage);
6613 	}
6614 
6615 	if (page_list_count != NULL) {
6616 		if (upl->flags & UPL_INTERNAL) {
6617 			*page_list_count = 0;
6618 		} else if (*page_list_count > entry) {
6619 			*page_list_count = entry;
6620 		}
6621 	}
6622 #if UPL_DEBUG
6623 	upl->upl_state = 1;
6624 #endif
6625 	vm_object_unlock(object);
6626 
6627 	VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, DBG_VM_UPL_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
6628 #if DEVELOPMENT || DEBUG
6629 	if (task != NULL) {
6630 		ledger_credit(task->ledger, task_ledgers.pages_grabbed_upl, page_grab_count);
6631 	}
6632 #endif /* DEVELOPMENT || DEBUG */
6633 
6634 	if (dwp_start && dwp_finish_ctx) {
6635 		vm_page_delayed_work_finish_ctx(dwp_start);
6636 		dwp_start = dwp = NULL;
6637 	}
6638 
6639 	return KERN_SUCCESS;
6640 }
6641 
6642 /*
6643  *	Routine:	vm_object_super_upl_request
6644  *	Purpose:
6645  *		Cause the population of a portion of a vm_object
6646  *		in much the same way as memory_object_upl_request.
6647  *		Depending on the nature of the request, the pages
6648  *		returned may be contain valid data or be uninitialized.
6649  *		However, the region may be expanded up to the super
6650  *		cluster size provided.
6651  */
6652 
6653 __private_extern__ kern_return_t
6654 vm_object_super_upl_request(
6655 	vm_object_t object,
6656 	vm_object_offset_t      offset,
6657 	upl_size_t              size,
6658 	upl_size_t              super_cluster,
6659 	upl_t                   *upl,
6660 	upl_page_info_t         *user_page_list,
6661 	unsigned int            *page_list_count,
6662 	upl_control_flags_t     cntrl_flags,
6663 	vm_tag_t                tag)
6664 {
6665 	if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR) == UPL_VECTOR)) {
6666 		return KERN_FAILURE;
6667 	}
6668 
6669 	assert(object->paging_in_progress);
6670 	offset = offset - object->paging_offset;
6671 
6672 	if (super_cluster > size) {
6673 		vm_object_offset_t      base_offset;
6674 		upl_size_t              super_size;
6675 		vm_object_size_t        super_size_64;
6676 
6677 		base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
6678 		super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster << 1 : super_cluster;
6679 		super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
6680 		super_size = (upl_size_t) super_size_64;
6681 		assert(super_size == super_size_64);
6682 
6683 		if (offset > (base_offset + super_size)) {
6684 			panic("vm_object_super_upl_request: Missed target pageout"
6685 			    " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
6686 			    offset, base_offset, super_size, super_cluster,
6687 			    size, object->paging_offset);
6688 		}
6689 		/*
6690 		 * apparently there is a case where the vm requests a
6691 		 * page to be written out who's offset is beyond the
6692 		 * object size
6693 		 */
6694 		if ((offset + size) > (base_offset + super_size)) {
6695 			super_size_64 = (offset + size) - base_offset;
6696 			super_size = (upl_size_t) super_size_64;
6697 			assert(super_size == super_size_64);
6698 		}
6699 
6700 		offset = base_offset;
6701 		size = super_size;
6702 	}
6703 	return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags, tag);
6704 }
6705 
6706 int cs_executable_create_upl = 0;
6707 extern int proc_selfpid(void);
6708 extern char *proc_name_address(void *p);
6709 
6710 kern_return_t
6711 vm_map_create_upl(
6712 	vm_map_t                map,
6713 	vm_map_address_t        offset,
6714 	upl_size_t              *upl_size,
6715 	upl_t                   *upl,
6716 	upl_page_info_array_t   page_list,
6717 	unsigned int            *count,
6718 	upl_control_flags_t     *flags,
6719 	vm_tag_t                tag)
6720 {
6721 	vm_map_entry_t          entry;
6722 	upl_control_flags_t     caller_flags;
6723 	int                     force_data_sync;
6724 	int                     sync_cow_data;
6725 	vm_object_t             local_object;
6726 	vm_map_offset_t         local_offset;
6727 	vm_map_offset_t         local_start;
6728 	kern_return_t           ret;
6729 	vm_map_address_t        original_offset;
6730 	vm_map_size_t           original_size, adjusted_size;
6731 	vm_map_offset_t         local_entry_start;
6732 	vm_object_offset_t      local_entry_offset;
6733 	vm_object_offset_t      offset_in_mapped_page;
6734 	boolean_t               release_map = FALSE;
6735 
6736 
6737 start_with_map:
6738 
6739 	original_offset = offset;
6740 	original_size = *upl_size;
6741 	adjusted_size = original_size;
6742 
6743 	caller_flags = *flags;
6744 
6745 	if (caller_flags & ~UPL_VALID_FLAGS) {
6746 		/*
6747 		 * For forward compatibility's sake,
6748 		 * reject any unknown flag.
6749 		 */
6750 		ret = KERN_INVALID_VALUE;
6751 		goto done;
6752 	}
6753 	force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
6754 	sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
6755 
6756 	if (upl == NULL) {
6757 		ret = KERN_INVALID_ARGUMENT;
6758 		goto done;
6759 	}
6760 
6761 REDISCOVER_ENTRY:
6762 	vm_map_lock_read(map);
6763 
6764 	if (!vm_map_lookup_entry(map, offset, &entry)) {
6765 		vm_map_unlock_read(map);
6766 		ret = KERN_FAILURE;
6767 		goto done;
6768 	}
6769 
6770 	local_entry_start = entry->vme_start;
6771 	local_entry_offset = VME_OFFSET(entry);
6772 
6773 	if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
6774 		DEBUG4K_UPL("map %p (%d) offset 0x%llx size 0x%x flags 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)offset, *upl_size, *flags);
6775 	}
6776 
6777 	if (entry->vme_end - original_offset < adjusted_size) {
6778 		adjusted_size = entry->vme_end - original_offset;
6779 		assert(adjusted_size > 0);
6780 		*upl_size = (upl_size_t) adjusted_size;
6781 		assert(*upl_size == adjusted_size);
6782 	}
6783 
6784 	if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
6785 		*flags = 0;
6786 
6787 		if (!entry->is_sub_map &&
6788 		    VME_OBJECT(entry) != VM_OBJECT_NULL) {
6789 			if (VME_OBJECT(entry)->private) {
6790 				*flags = UPL_DEV_MEMORY;
6791 			}
6792 
6793 			if (VME_OBJECT(entry)->phys_contiguous) {
6794 				*flags |= UPL_PHYS_CONTIG;
6795 			}
6796 		}
6797 		vm_map_unlock_read(map);
6798 		ret = KERN_SUCCESS;
6799 		goto done;
6800 	}
6801 
6802 	offset_in_mapped_page = 0;
6803 	if (VM_MAP_PAGE_SIZE(map) < PAGE_SIZE) {
6804 		offset = vm_map_trunc_page(original_offset, VM_MAP_PAGE_MASK(map));
6805 		*upl_size = (upl_size_t)
6806 		    (vm_map_round_page(original_offset + adjusted_size,
6807 		    VM_MAP_PAGE_MASK(map))
6808 		    - offset);
6809 
6810 		offset_in_mapped_page = original_offset - offset;
6811 		assert(offset_in_mapped_page < VM_MAP_PAGE_SIZE(map));
6812 
6813 		DEBUG4K_UPL("map %p (%d) offset 0x%llx size 0x%llx flags 0x%llx -> offset 0x%llx adjusted_size 0x%llx *upl_size 0x%x offset_in_mapped_page 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)original_offset, (uint64_t)original_size, *flags, (uint64_t)offset, (uint64_t)adjusted_size, *upl_size, offset_in_mapped_page);
6814 	}
6815 
6816 	if (!entry->is_sub_map) {
6817 		if (VME_OBJECT(entry) == VM_OBJECT_NULL ||
6818 		    !VME_OBJECT(entry)->phys_contiguous) {
6819 			if (*upl_size > MAX_UPL_SIZE_BYTES) {
6820 				*upl_size = MAX_UPL_SIZE_BYTES;
6821 			}
6822 		}
6823 
6824 		/*
6825 		 *      Create an object if necessary.
6826 		 */
6827 		if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6828 			if (vm_map_lock_read_to_write(map)) {
6829 				goto REDISCOVER_ENTRY;
6830 			}
6831 
6832 			VME_OBJECT_SET(entry,
6833 			    vm_object_allocate((vm_size_t)
6834 			    vm_object_round_page((entry->vme_end - entry->vme_start))),
6835 			    false, 0);
6836 			VME_OFFSET_SET(entry, 0);
6837 			assert(entry->use_pmap);
6838 
6839 			vm_map_lock_write_to_read(map);
6840 		}
6841 
6842 		if (!(caller_flags & UPL_COPYOUT_FROM) &&
6843 		    !(entry->protection & VM_PROT_WRITE)) {
6844 			vm_map_unlock_read(map);
6845 			ret = KERN_PROTECTION_FAILURE;
6846 			goto done;
6847 		}
6848 	}
6849 
6850 #if !XNU_TARGET_OS_OSX
6851 	if (map->pmap != kernel_pmap &&
6852 	    (caller_flags & UPL_COPYOUT_FROM) &&
6853 	    (entry->protection & VM_PROT_EXECUTE) &&
6854 	    !(entry->protection & VM_PROT_WRITE)) {
6855 		vm_offset_t     kaddr;
6856 		vm_size_t       ksize;
6857 
6858 		/*
6859 		 * We're about to create a read-only UPL backed by
6860 		 * memory from an executable mapping.
6861 		 * Wiring the pages would result in the pages being copied
6862 		 * (due to the "MAP_PRIVATE" mapping) and no longer
6863 		 * code-signed, so no longer eligible for execution.
6864 		 * Instead, let's copy the data into a kernel buffer and
6865 		 * create the UPL from this kernel buffer.
6866 		 * The kernel buffer is then freed, leaving the UPL holding
6867 		 * the last reference on the VM object, so the memory will
6868 		 * be released when the UPL is committed.
6869 		 */
6870 
6871 		vm_map_unlock_read(map);
6872 		entry = VM_MAP_ENTRY_NULL;
6873 		/* allocate kernel buffer */
6874 		ksize = round_page(*upl_size);
6875 		kaddr = 0;
6876 		ret = kmem_alloc(kernel_map, &kaddr, ksize,
6877 		    KMA_PAGEABLE | KMA_DATA, tag);
6878 		if (ret == KERN_SUCCESS) {
6879 			/* copyin the user data */
6880 			ret = copyinmap(map, offset, (void *)kaddr, *upl_size);
6881 		}
6882 		if (ret == KERN_SUCCESS) {
6883 			if (ksize > *upl_size) {
6884 				/* zero out the extra space in kernel buffer */
6885 				memset((void *)(kaddr + *upl_size),
6886 				    0,
6887 				    ksize - *upl_size);
6888 			}
6889 			/* create the UPL from the kernel buffer */
6890 			vm_object_offset_t      offset_in_object;
6891 			vm_object_offset_t      offset_in_object_page;
6892 
6893 			offset_in_object = offset - local_entry_start + local_entry_offset;
6894 			offset_in_object_page = offset_in_object - vm_object_trunc_page(offset_in_object);
6895 			assert(offset_in_object_page < PAGE_SIZE);
6896 			assert(offset_in_object_page + offset_in_mapped_page < PAGE_SIZE);
6897 			*upl_size -= offset_in_object_page + offset_in_mapped_page;
6898 			ret = vm_map_create_upl(kernel_map,
6899 			    (vm_map_address_t)(kaddr + offset_in_object_page + offset_in_mapped_page),
6900 			    upl_size, upl, page_list, count, flags, tag);
6901 		}
6902 		if (kaddr != 0) {
6903 			/* free the kernel buffer */
6904 			kmem_free(kernel_map, kaddr, ksize);
6905 			kaddr = 0;
6906 			ksize = 0;
6907 		}
6908 #if DEVELOPMENT || DEBUG
6909 		DTRACE_VM4(create_upl_from_executable,
6910 		    vm_map_t, map,
6911 		    vm_map_address_t, offset,
6912 		    upl_size_t, *upl_size,
6913 		    kern_return_t, ret);
6914 #endif /* DEVELOPMENT || DEBUG */
6915 		goto done;
6916 	}
6917 #endif /* !XNU_TARGET_OS_OSX */
6918 
6919 	if (!entry->is_sub_map) {
6920 		local_object = VME_OBJECT(entry);
6921 		assert(local_object != VM_OBJECT_NULL);
6922 	}
6923 
6924 	if (!entry->is_sub_map &&
6925 	    !entry->needs_copy &&
6926 	    *upl_size != 0 &&
6927 	    local_object->vo_size > *upl_size && /* partial UPL */
6928 	    entry->wired_count == 0 && /* No COW for entries that are wired */
6929 	    (map->pmap != kernel_pmap) && /* alias checks */
6930 	    (vm_map_entry_should_cow_for_true_share(entry) /* case 1 */
6931 	    ||
6932 	    ( /* case 2 */
6933 		    local_object->internal &&
6934 		    (local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) &&
6935 		    local_object->ref_count > 1))) {
6936 		vm_prot_t       prot;
6937 
6938 		/*
6939 		 * Case 1:
6940 		 * Set up the targeted range for copy-on-write to avoid
6941 		 * applying true_share/copy_delay to the entire object.
6942 		 *
6943 		 * Case 2:
6944 		 * This map entry covers only part of an internal
6945 		 * object.  There could be other map entries covering
6946 		 * other areas of this object and some of these map
6947 		 * entries could be marked as "needs_copy", which
6948 		 * assumes that the object is COPY_SYMMETRIC.
6949 		 * To avoid marking this object as COPY_DELAY and
6950 		 * "true_share", let's shadow it and mark the new
6951 		 * (smaller) object as "true_share" and COPY_DELAY.
6952 		 */
6953 
6954 		if (vm_map_lock_read_to_write(map)) {
6955 			goto REDISCOVER_ENTRY;
6956 		}
6957 		vm_map_lock_assert_exclusive(map);
6958 		assert(VME_OBJECT(entry) == local_object);
6959 
6960 		vm_map_clip_start(map,
6961 		    entry,
6962 		    vm_map_trunc_page(offset,
6963 		    VM_MAP_PAGE_MASK(map)));
6964 		vm_map_clip_end(map,
6965 		    entry,
6966 		    vm_map_round_page(offset + *upl_size,
6967 		    VM_MAP_PAGE_MASK(map)));
6968 		if ((entry->vme_end - offset) < *upl_size) {
6969 			*upl_size = (upl_size_t) (entry->vme_end - offset);
6970 			assert(*upl_size == entry->vme_end - offset);
6971 		}
6972 
6973 		prot = entry->protection & ~VM_PROT_WRITE;
6974 		if (override_nx(map, VME_ALIAS(entry)) && prot) {
6975 			prot |= VM_PROT_EXECUTE;
6976 		}
6977 		vm_object_pmap_protect(local_object,
6978 		    VME_OFFSET(entry),
6979 		    entry->vme_end - entry->vme_start,
6980 		    ((entry->is_shared ||
6981 		    map->mapped_in_other_pmaps)
6982 		    ? PMAP_NULL
6983 		    : map->pmap),
6984 		    VM_MAP_PAGE_SIZE(map),
6985 		    entry->vme_start,
6986 		    prot);
6987 
6988 		assert(entry->wired_count == 0);
6989 
6990 		/*
6991 		 * Lock the VM object and re-check its status: if it's mapped
6992 		 * in another address space, we could still be racing with
6993 		 * another thread holding that other VM map exclusively.
6994 		 */
6995 		vm_object_lock(local_object);
6996 		if (local_object->true_share) {
6997 			/* object is already in proper state: no COW needed */
6998 			assert(local_object->copy_strategy !=
6999 			    MEMORY_OBJECT_COPY_SYMMETRIC);
7000 		} else {
7001 			/* not true_share: ask for copy-on-write below */
7002 			assert(local_object->copy_strategy ==
7003 			    MEMORY_OBJECT_COPY_SYMMETRIC);
7004 			entry->needs_copy = TRUE;
7005 		}
7006 		vm_object_unlock(local_object);
7007 
7008 		vm_map_lock_write_to_read(map);
7009 	}
7010 
7011 	if (entry->needs_copy) {
7012 		/*
7013 		 * Honor copy-on-write for COPY_SYMMETRIC
7014 		 * strategy.
7015 		 */
7016 		vm_map_t                local_map;
7017 		vm_object_t             object;
7018 		vm_object_offset_t      new_offset;
7019 		vm_prot_t               prot;
7020 		boolean_t               wired;
7021 		vm_map_version_t        version;
7022 		vm_map_t                real_map;
7023 		vm_prot_t               fault_type;
7024 
7025 		local_map = map;
7026 
7027 		if (caller_flags & UPL_COPYOUT_FROM) {
7028 			fault_type = VM_PROT_READ | VM_PROT_COPY;
7029 			vm_counters.create_upl_extra_cow++;
7030 			vm_counters.create_upl_extra_cow_pages +=
7031 			    (entry->vme_end - entry->vme_start) / PAGE_SIZE;
7032 		} else {
7033 			fault_type = VM_PROT_WRITE;
7034 		}
7035 		if (vm_map_lookup_and_lock_object(&local_map,
7036 		    offset, fault_type,
7037 		    OBJECT_LOCK_EXCLUSIVE,
7038 		    &version, &object,
7039 		    &new_offset, &prot, &wired,
7040 		    NULL,
7041 		    &real_map, NULL) != KERN_SUCCESS) {
7042 			if (fault_type == VM_PROT_WRITE) {
7043 				vm_counters.create_upl_lookup_failure_write++;
7044 			} else {
7045 				vm_counters.create_upl_lookup_failure_copy++;
7046 			}
7047 			vm_map_unlock_read(local_map);
7048 			ret = KERN_FAILURE;
7049 			goto done;
7050 		}
7051 		if (real_map != local_map) {
7052 			vm_map_unlock(real_map);
7053 		}
7054 		vm_map_unlock_read(local_map);
7055 
7056 		vm_object_unlock(object);
7057 
7058 		goto REDISCOVER_ENTRY;
7059 	}
7060 
7061 	if (entry->is_sub_map) {
7062 		vm_map_t        submap;
7063 
7064 		submap = VME_SUBMAP(entry);
7065 		local_start = entry->vme_start;
7066 		local_offset = (vm_map_offset_t)VME_OFFSET(entry);
7067 
7068 		vm_map_reference(submap);
7069 		vm_map_unlock_read(map);
7070 
7071 		DEBUG4K_UPL("map %p offset 0x%llx (0x%llx) size 0x%x (adjusted 0x%llx original 0x%llx) offset_in_mapped_page 0x%llx submap %p\n", map, (uint64_t)offset, (uint64_t)original_offset, *upl_size, (uint64_t)adjusted_size, (uint64_t)original_size, offset_in_mapped_page, submap);
7072 		offset += offset_in_mapped_page;
7073 		*upl_size -= offset_in_mapped_page;
7074 
7075 		if (release_map) {
7076 			vm_map_deallocate(map);
7077 		}
7078 		map = submap;
7079 		release_map = TRUE;
7080 		offset = local_offset + (offset - local_start);
7081 		goto start_with_map;
7082 	}
7083 
7084 	if (sync_cow_data &&
7085 	    (VME_OBJECT(entry)->shadow ||
7086 	    VME_OBJECT(entry)->vo_copy)) {
7087 		local_object = VME_OBJECT(entry);
7088 		local_start = entry->vme_start;
7089 		local_offset = (vm_map_offset_t)VME_OFFSET(entry);
7090 
7091 		vm_object_reference(local_object);
7092 		vm_map_unlock_read(map);
7093 
7094 		if (local_object->shadow && local_object->vo_copy) {
7095 			vm_object_lock_request(local_object->shadow,
7096 			    ((vm_object_offset_t)
7097 			    ((offset - local_start) +
7098 			    local_offset) +
7099 			    local_object->vo_shadow_offset),
7100 			    *upl_size, FALSE,
7101 			    MEMORY_OBJECT_DATA_SYNC,
7102 			    VM_PROT_NO_CHANGE);
7103 		}
7104 		sync_cow_data = FALSE;
7105 		vm_object_deallocate(local_object);
7106 
7107 		goto REDISCOVER_ENTRY;
7108 	}
7109 	if (force_data_sync) {
7110 		local_object = VME_OBJECT(entry);
7111 		local_start = entry->vme_start;
7112 		local_offset = (vm_map_offset_t)VME_OFFSET(entry);
7113 
7114 		vm_object_reference(local_object);
7115 		vm_map_unlock_read(map);
7116 
7117 		vm_object_lock_request(local_object,
7118 		    ((vm_object_offset_t)
7119 		    ((offset - local_start) +
7120 		    local_offset)),
7121 		    (vm_object_size_t)*upl_size,
7122 		    FALSE,
7123 		    MEMORY_OBJECT_DATA_SYNC,
7124 		    VM_PROT_NO_CHANGE);
7125 
7126 		force_data_sync = FALSE;
7127 		vm_object_deallocate(local_object);
7128 
7129 		goto REDISCOVER_ENTRY;
7130 	}
7131 	if (VME_OBJECT(entry)->private) {
7132 		*flags = UPL_DEV_MEMORY;
7133 	} else {
7134 		*flags = 0;
7135 	}
7136 
7137 	if (VME_OBJECT(entry)->phys_contiguous) {
7138 		*flags |= UPL_PHYS_CONTIG;
7139 	}
7140 
7141 	local_object = VME_OBJECT(entry);
7142 	local_offset = (vm_map_offset_t)VME_OFFSET(entry);
7143 	local_start = entry->vme_start;
7144 
7145 	/*
7146 	 * Wiring will copy the pages to the shadow object.
7147 	 * The shadow object will not be code-signed so
7148 	 * attempting to execute code from these copied pages
7149 	 * would trigger a code-signing violation.
7150 	 */
7151 	if (entry->protection & VM_PROT_EXECUTE) {
7152 #if MACH_ASSERT
7153 		printf("pid %d[%s] create_upl out of executable range from "
7154 		    "0x%llx to 0x%llx: side effects may include "
7155 		    "code-signing violations later on\n",
7156 		    proc_selfpid(),
7157 		    (get_bsdtask_info(current_task())
7158 		    ? proc_name_address(get_bsdtask_info(current_task()))
7159 		    : "?"),
7160 		    (uint64_t) entry->vme_start,
7161 		    (uint64_t) entry->vme_end);
7162 #endif /* MACH_ASSERT */
7163 		DTRACE_VM2(cs_executable_create_upl,
7164 		    uint64_t, (uint64_t)entry->vme_start,
7165 		    uint64_t, (uint64_t)entry->vme_end);
7166 		cs_executable_create_upl++;
7167 	}
7168 
7169 	vm_object_lock(local_object);
7170 
7171 	/*
7172 	 * Ensure that this object is "true_share" and "copy_delay" now,
7173 	 * while we're still holding the VM map lock.  After we unlock the map,
7174 	 * anything could happen to that mapping, including some copy-on-write
7175 	 * activity.  We need to make sure that the IOPL will point at the
7176 	 * same memory as the mapping.
7177 	 */
7178 	if (local_object->true_share) {
7179 		assert(local_object->copy_strategy !=
7180 		    MEMORY_OBJECT_COPY_SYMMETRIC);
7181 	} else if (!is_kernel_object(local_object) &&
7182 	    local_object != compressor_object &&
7183 	    !local_object->phys_contiguous) {
7184 #if VM_OBJECT_TRACKING_OP_TRUESHARE
7185 		if (!local_object->true_share &&
7186 		    vm_object_tracking_btlog) {
7187 			btlog_record(vm_object_tracking_btlog, local_object,
7188 			    VM_OBJECT_TRACKING_OP_TRUESHARE,
7189 			    btref_get(__builtin_frame_address(0), 0));
7190 		}
7191 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
7192 		VM_OBJECT_SET_TRUE_SHARE(local_object, TRUE);
7193 		if (local_object->copy_strategy ==
7194 		    MEMORY_OBJECT_COPY_SYMMETRIC) {
7195 			local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
7196 		}
7197 	}
7198 
7199 	vm_object_reference_locked(local_object);
7200 	vm_object_unlock(local_object);
7201 
7202 	vm_map_unlock_read(map);
7203 
7204 	offset += offset_in_mapped_page;
7205 	assert(*upl_size > offset_in_mapped_page);
7206 	*upl_size -= offset_in_mapped_page;
7207 
7208 	ret = vm_object_iopl_request(local_object,
7209 	    ((vm_object_offset_t)
7210 	    ((offset - local_start) + local_offset)),
7211 	    *upl_size,
7212 	    upl,
7213 	    page_list,
7214 	    count,
7215 	    caller_flags,
7216 	    tag);
7217 	vm_object_deallocate(local_object);
7218 
7219 done:
7220 	if (release_map) {
7221 		vm_map_deallocate(map);
7222 	}
7223 
7224 	return ret;
7225 }
7226 
7227 /*
7228  * Internal routine to enter a UPL into a VM map.
7229  *
7230  * JMM - This should just be doable through the standard
7231  * vm_map_enter() API.
7232  */
7233 kern_return_t
7234 vm_map_enter_upl_range(
7235 	vm_map_t                map,
7236 	upl_t                   upl,
7237 	vm_object_offset_t      offset_to_map,
7238 	vm_size_t               size_to_map,
7239 	vm_prot_t               prot_to_map,
7240 	vm_map_offset_t         *dst_addr)
7241 {
7242 	vm_map_size_t           size;
7243 	vm_object_offset_t      offset;
7244 	vm_map_offset_t         addr;
7245 	vm_page_t               m;
7246 	kern_return_t           kr;
7247 	int                     isVectorUPL = 0, curr_upl = 0;
7248 	upl_t                   vector_upl = NULL;
7249 	mach_vm_offset_t        vector_upl_dst_addr = 0;
7250 	vm_map_t                vector_upl_submap = NULL;
7251 	upl_offset_t            subupl_offset = 0;
7252 	upl_size_t              subupl_size = 0;
7253 
7254 	if (upl == UPL_NULL) {
7255 		return KERN_INVALID_ARGUMENT;
7256 	}
7257 
7258 	DEBUG4K_UPL("map %p upl %p flags 0x%x object %p offset 0x%llx (uploff: 0x%llx) size 0x%lx (uplsz: 0x%x) \n", map, upl, upl->flags, upl->map_object, offset_to_map, upl->u_offset, size_to_map, upl->u_size);
7259 	assert(map == kernel_map);
7260 
7261 	if ((isVectorUPL = vector_upl_is_valid(upl))) {
7262 		int mapped = 0, valid_upls = 0;
7263 		vector_upl = upl;
7264 
7265 		upl_lock(vector_upl);
7266 		for (curr_upl = 0; curr_upl < vector_upl_max_upls(vector_upl); curr_upl++) {
7267 			upl =  vector_upl_subupl_byindex(vector_upl, curr_upl );
7268 			if (upl == NULL) {
7269 				continue;
7270 			}
7271 			valid_upls++;
7272 			if (UPL_PAGE_LIST_MAPPED & upl->flags) {
7273 				mapped++;
7274 			}
7275 		}
7276 
7277 		if (mapped) {
7278 			if (mapped != valid_upls) {
7279 				panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped", mapped, valid_upls);
7280 			} else {
7281 				upl_unlock(vector_upl);
7282 				return KERN_FAILURE;
7283 			}
7284 		}
7285 
7286 		if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
7287 			panic("TODO4K: vector UPL not implemented");
7288 		}
7289 
7290 		vector_upl_submap = kmem_suballoc(map, &vector_upl_dst_addr,
7291 		    vector_upl->u_size, VM_MAP_CREATE_DEFAULT,
7292 		    VM_FLAGS_ANYWHERE, KMS_NOFAIL | KMS_DATA,
7293 		    VM_KERN_MEMORY_NONE).kmr_submap;
7294 		map = vector_upl_submap;
7295 		vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
7296 		curr_upl = 0;
7297 	} else {
7298 		upl_lock(upl);
7299 	}
7300 
7301 process_upl_to_enter:
7302 	if (isVectorUPL) {
7303 		if (curr_upl == vector_upl_max_upls(vector_upl)) {
7304 			*dst_addr = vector_upl_dst_addr;
7305 			upl_unlock(vector_upl);
7306 			return KERN_SUCCESS;
7307 		}
7308 		upl =  vector_upl_subupl_byindex(vector_upl, curr_upl++ );
7309 		if (upl == NULL) {
7310 			goto process_upl_to_enter;
7311 		}
7312 
7313 		vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
7314 		*dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
7315 	} else {
7316 		/*
7317 		 * check to see if already mapped
7318 		 */
7319 		if (UPL_PAGE_LIST_MAPPED & upl->flags) {
7320 			upl_unlock(upl);
7321 			return KERN_FAILURE;
7322 		}
7323 	}
7324 
7325 	if ((!(upl->flags & UPL_SHADOWED)) &&
7326 	    ((upl->flags & UPL_HAS_BUSY) ||
7327 	    !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) {
7328 		vm_object_t             object;
7329 		vm_page_t               alias_page;
7330 		vm_object_offset_t      new_offset;
7331 		unsigned int            pg_num;
7332 
7333 		size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map));
7334 		object = upl->map_object;
7335 		upl->map_object = vm_object_allocate(vm_object_round_page(size));
7336 
7337 		vm_object_lock(upl->map_object);
7338 
7339 		upl->map_object->shadow = object;
7340 		VM_OBJECT_SET_PAGEOUT(upl->map_object, TRUE);
7341 		VM_OBJECT_SET_CAN_PERSIST(upl->map_object, FALSE);
7342 		upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
7343 		upl->map_object->vo_shadow_offset = upl_adjusted_offset(upl, PAGE_MASK) - object->paging_offset;
7344 		assertf(page_aligned(upl->map_object->vo_shadow_offset),
7345 		    "object %p shadow_offset 0x%llx",
7346 		    upl->map_object,
7347 		    (uint64_t)upl->map_object->vo_shadow_offset);
7348 		upl->map_object->wimg_bits = object->wimg_bits;
7349 		offset = upl->map_object->vo_shadow_offset;
7350 		new_offset = 0;
7351 
7352 		upl->flags |= UPL_SHADOWED;
7353 
7354 		while (size) {
7355 			pg_num = (unsigned int) (new_offset / PAGE_SIZE);
7356 			assert(pg_num == new_offset / PAGE_SIZE);
7357 
7358 			if (bitmap_test(upl->lite_list, pg_num)) {
7359 				alias_page = vm_page_grab_fictitious(TRUE);
7360 
7361 				vm_object_lock(object);
7362 
7363 				m = vm_page_lookup(object, offset);
7364 				if (m == VM_PAGE_NULL) {
7365 					panic("vm_upl_map: page missing");
7366 				}
7367 
7368 				/*
7369 				 * Convert the fictitious page to a private
7370 				 * shadow of the real page.
7371 				 */
7372 				assert(alias_page->vmp_fictitious);
7373 				alias_page->vmp_fictitious = FALSE;
7374 				alias_page->vmp_private = TRUE;
7375 				alias_page->vmp_free_when_done = TRUE;
7376 				/*
7377 				 * since m is a page in the upl it must
7378 				 * already be wired or BUSY, so it's
7379 				 * safe to assign the underlying physical
7380 				 * page to the alias
7381 				 */
7382 				VM_PAGE_SET_PHYS_PAGE(alias_page, VM_PAGE_GET_PHYS_PAGE(m));
7383 
7384 				vm_object_unlock(object);
7385 
7386 				vm_page_lockspin_queues();
7387 				vm_page_wire(alias_page, VM_KERN_MEMORY_NONE, TRUE);
7388 				vm_page_unlock_queues();
7389 
7390 				vm_page_insert_wired(alias_page, upl->map_object, new_offset, VM_KERN_MEMORY_NONE);
7391 
7392 				assert(!alias_page->vmp_wanted);
7393 				alias_page->vmp_busy = FALSE;
7394 				alias_page->vmp_absent = FALSE;
7395 			}
7396 			size -= PAGE_SIZE;
7397 			offset += PAGE_SIZE_64;
7398 			new_offset += PAGE_SIZE_64;
7399 		}
7400 		vm_object_unlock(upl->map_object);
7401 	}
7402 	if (upl->flags & UPL_SHADOWED) {
7403 		if (isVectorUPL) {
7404 			offset = 0;
7405 		} else {
7406 			offset = offset_to_map;
7407 		}
7408 	} else {
7409 		offset = upl_adjusted_offset(upl, VM_MAP_PAGE_MASK(map)) - upl->map_object->paging_offset;
7410 		if (!isVectorUPL) {
7411 			offset += offset_to_map;
7412 		}
7413 	}
7414 
7415 	if (isVectorUPL) {
7416 		size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map));
7417 	} else {
7418 		size = MIN(upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map)), size_to_map);
7419 	}
7420 
7421 	vm_object_reference(upl->map_object);
7422 
7423 	if (!isVectorUPL) {
7424 		*dst_addr = 0;
7425 		/*
7426 		 * NEED A UPL_MAP ALIAS
7427 		 */
7428 		kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
7429 		    VM_MAP_KERNEL_FLAGS_DATA_ANYWHERE(.vm_tag = VM_KERN_MEMORY_OSFMK),
7430 		    upl->map_object, offset, FALSE,
7431 		    prot_to_map, VM_PROT_ALL, VM_INHERIT_DEFAULT);
7432 
7433 		if (kr != KERN_SUCCESS) {
7434 			vm_object_deallocate(upl->map_object);
7435 			upl_unlock(upl);
7436 			return kr;
7437 		}
7438 	} else {
7439 		kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
7440 		    VM_MAP_KERNEL_FLAGS_FIXED(.vm_tag = VM_KERN_MEMORY_OSFMK),
7441 		    upl->map_object, offset, FALSE,
7442 		    prot_to_map, VM_PROT_ALL, VM_INHERIT_DEFAULT);
7443 		if (kr) {
7444 			panic("vm_map_enter failed for a Vector UPL");
7445 		}
7446 	}
7447 	upl->u_mapped_size = (upl_size_t) size; /* When we allow multiple submappings of the UPL */
7448 	                                        /* this will have to be an increment rather than */
7449 	                                        /* an assignment. */
7450 	vm_object_lock(upl->map_object);
7451 
7452 	for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
7453 		m = vm_page_lookup(upl->map_object, offset);
7454 
7455 		if (m) {
7456 			m->vmp_pmapped = TRUE;
7457 
7458 			/*
7459 			 * CODE SIGNING ENFORCEMENT: page has been wpmapped,
7460 			 * but only in kernel space. If this was on a user map,
7461 			 * we'd have to set the wpmapped bit.
7462 			 */
7463 			/* m->vmp_wpmapped = TRUE; */
7464 			assert(map->pmap == kernel_pmap);
7465 
7466 			kr = pmap_enter_check(map->pmap, addr, m, prot_to_map, VM_PROT_NONE, 0, TRUE);
7467 
7468 			assert(kr == KERN_SUCCESS);
7469 #if KASAN
7470 			kasan_notify_address(addr, PAGE_SIZE_64);
7471 #endif
7472 		}
7473 		offset += PAGE_SIZE_64;
7474 	}
7475 	vm_object_unlock(upl->map_object);
7476 
7477 	/*
7478 	 * hold a reference for the mapping
7479 	 */
7480 	upl->ref_count++;
7481 	upl->flags |= UPL_PAGE_LIST_MAPPED;
7482 	upl->kaddr = (vm_offset_t) *dst_addr;
7483 	assert(upl->kaddr == *dst_addr);
7484 
7485 	if (isVectorUPL) {
7486 		goto process_upl_to_enter;
7487 	}
7488 
7489 	if (!isVectorUPL) {
7490 		vm_map_offset_t addr_adjustment;
7491 
7492 		addr_adjustment = (vm_map_offset_t)(upl->u_offset - upl_adjusted_offset(upl, VM_MAP_PAGE_MASK(map)));
7493 		if (addr_adjustment) {
7494 			assert(VM_MAP_PAGE_MASK(map) != PAGE_MASK);
7495 			DEBUG4K_UPL("dst_addr 0x%llx (+ 0x%llx) -> 0x%llx\n", (uint64_t)*dst_addr, (uint64_t)addr_adjustment, (uint64_t)(*dst_addr + addr_adjustment));
7496 			*dst_addr += addr_adjustment;
7497 		}
7498 	}
7499 
7500 	upl_unlock(upl);
7501 
7502 	return KERN_SUCCESS;
7503 }
7504 
7505 kern_return_t
7506 vm_map_enter_upl(
7507 	vm_map_t                map,
7508 	upl_t                   upl,
7509 	vm_map_offset_t         *dst_addr)
7510 {
7511 	upl_size_t upl_size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map));
7512 	return vm_map_enter_upl_range(map, upl, 0, upl_size, VM_PROT_DEFAULT, dst_addr);
7513 }
7514 
7515 /*
7516  * Internal routine to remove a UPL mapping from a VM map.
7517  *
7518  * XXX - This should just be doable through a standard
7519  * vm_map_remove() operation.  Otherwise, implicit clean-up
7520  * of the target map won't be able to correctly remove
7521  * these (and release the reference on the UPL).  Having
7522  * to do this means we can't map these into user-space
7523  * maps yet.
7524  */
7525 kern_return_t
7526 vm_map_remove_upl_range(
7527 	vm_map_t        map,
7528 	upl_t           upl,
7529 	__unused vm_object_offset_t    offset_to_unmap,
7530 	__unused vm_size_t             size_to_unmap)
7531 {
7532 	vm_address_t    addr;
7533 	upl_size_t      size;
7534 	int             isVectorUPL = 0, curr_upl = 0;
7535 	upl_t           vector_upl = NULL;
7536 
7537 	if (upl == UPL_NULL) {
7538 		return KERN_INVALID_ARGUMENT;
7539 	}
7540 
7541 	if ((isVectorUPL = vector_upl_is_valid(upl))) {
7542 		int     unmapped = 0, valid_upls = 0;
7543 		vector_upl = upl;
7544 		upl_lock(vector_upl);
7545 		for (curr_upl = 0; curr_upl < vector_upl_max_upls(vector_upl); curr_upl++) {
7546 			upl =  vector_upl_subupl_byindex(vector_upl, curr_upl );
7547 			if (upl == NULL) {
7548 				continue;
7549 			}
7550 			valid_upls++;
7551 			if (!(UPL_PAGE_LIST_MAPPED & upl->flags)) {
7552 				unmapped++;
7553 			}
7554 		}
7555 
7556 		if (unmapped) {
7557 			if (unmapped != valid_upls) {
7558 				panic("%d of the %d sub-upls within the Vector UPL is/are not mapped", unmapped, valid_upls);
7559 			} else {
7560 				upl_unlock(vector_upl);
7561 				return KERN_FAILURE;
7562 			}
7563 		}
7564 		curr_upl = 0;
7565 	} else {
7566 		upl_lock(upl);
7567 	}
7568 
7569 process_upl_to_remove:
7570 	if (isVectorUPL) {
7571 		if (curr_upl == vector_upl_max_upls(vector_upl)) {
7572 			vm_map_t v_upl_submap;
7573 			vm_offset_t v_upl_submap_dst_addr;
7574 			vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
7575 
7576 			kmem_free_guard(map, v_upl_submap_dst_addr,
7577 			    vector_upl->u_size, KMF_NONE, KMEM_GUARD_SUBMAP);
7578 			vm_map_deallocate(v_upl_submap);
7579 			upl_unlock(vector_upl);
7580 			return KERN_SUCCESS;
7581 		}
7582 
7583 		upl =  vector_upl_subupl_byindex(vector_upl, curr_upl++ );
7584 		if (upl == NULL) {
7585 			goto process_upl_to_remove;
7586 		}
7587 	}
7588 
7589 	if (upl->flags & UPL_PAGE_LIST_MAPPED) {
7590 		addr = upl->kaddr;
7591 		size = upl->u_mapped_size;
7592 
7593 		assert(upl->ref_count > 1);
7594 		upl->ref_count--;               /* removing mapping ref */
7595 
7596 		upl->flags &= ~UPL_PAGE_LIST_MAPPED;
7597 		upl->kaddr = (vm_offset_t) 0;
7598 		upl->u_mapped_size = 0;
7599 
7600 		if (isVectorUPL) {
7601 			/*
7602 			 * If it's a Vectored UPL, we'll be removing the entire
7603 			 * submap anyways, so no need to remove individual UPL
7604 			 * element mappings from within the submap
7605 			 */
7606 			goto process_upl_to_remove;
7607 		}
7608 
7609 		upl_unlock(upl);
7610 
7611 		vm_map_remove(map,
7612 		    vm_map_trunc_page(addr, VM_MAP_PAGE_MASK(map)),
7613 		    vm_map_round_page(addr + size, VM_MAP_PAGE_MASK(map)));
7614 		return KERN_SUCCESS;
7615 	}
7616 	upl_unlock(upl);
7617 
7618 	return KERN_FAILURE;
7619 }
7620 
7621 kern_return_t
7622 vm_map_remove_upl(
7623 	vm_map_t        map,
7624 	upl_t           upl)
7625 {
7626 	upl_size_t upl_size = upl_adjusted_size(upl, VM_MAP_PAGE_MASK(map));
7627 	return vm_map_remove_upl_range(map, upl, 0, upl_size);
7628 }
7629 
7630 void
7631 iopl_valid_data(
7632 	upl_t    upl,
7633 	vm_tag_t tag)
7634 {
7635 	vm_object_t     object;
7636 	vm_offset_t     offset;
7637 	vm_page_t       m, nxt_page = VM_PAGE_NULL;
7638 	upl_size_t      size;
7639 	int             wired_count = 0;
7640 
7641 	if (upl == NULL) {
7642 		panic("iopl_valid_data: NULL upl");
7643 	}
7644 	if (vector_upl_is_valid(upl)) {
7645 		panic("iopl_valid_data: vector upl");
7646 	}
7647 	if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_SHADOWED | UPL_ACCESS_BLOCKED | UPL_IO_WIRE | UPL_INTERNAL)) != UPL_IO_WIRE) {
7648 		panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
7649 	}
7650 
7651 	object = upl->map_object;
7652 
7653 	if (is_kernel_object(object) || object == compressor_object) {
7654 		panic("iopl_valid_data: object == kernel or compressor");
7655 	}
7656 
7657 	if (object->purgable == VM_PURGABLE_VOLATILE ||
7658 	    object->purgable == VM_PURGABLE_EMPTY) {
7659 		panic("iopl_valid_data: object %p purgable %d",
7660 		    object, object->purgable);
7661 	}
7662 
7663 	size = upl_adjusted_size(upl, PAGE_MASK);
7664 
7665 	vm_object_lock(object);
7666 	VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
7667 
7668 	bool whole_object;
7669 
7670 	if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE)) {
7671 		nxt_page = (vm_page_t)vm_page_queue_first(&object->memq);
7672 		whole_object = true;
7673 	} else {
7674 		offset = (vm_offset_t)(upl_adjusted_offset(upl, PAGE_MASK) - object->paging_offset);
7675 		whole_object = false;
7676 	}
7677 
7678 	while (size) {
7679 		if (whole_object) {
7680 			if (nxt_page != VM_PAGE_NULL) {
7681 				m = nxt_page;
7682 				nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->vmp_listq);
7683 			}
7684 		} else {
7685 			m = vm_page_lookup(object, offset);
7686 			offset += PAGE_SIZE;
7687 
7688 			if (m == VM_PAGE_NULL) {
7689 				panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
7690 			}
7691 		}
7692 		if (m->vmp_busy) {
7693 			if (!m->vmp_absent) {
7694 				panic("iopl_valid_data: busy page w/o absent");
7695 			}
7696 
7697 			if (m->vmp_pageq.next || m->vmp_pageq.prev) {
7698 				panic("iopl_valid_data: busy+absent page on page queue");
7699 			}
7700 			if (m->vmp_reusable) {
7701 				panic("iopl_valid_data: %p is reusable", m);
7702 			}
7703 
7704 			m->vmp_absent = FALSE;
7705 			m->vmp_dirty = TRUE;
7706 			assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
7707 			assert(m->vmp_wire_count == 0);
7708 			m->vmp_wire_count++;
7709 			assert(m->vmp_wire_count);
7710 			if (m->vmp_wire_count == 1) {
7711 				m->vmp_q_state = VM_PAGE_IS_WIRED;
7712 				wired_count++;
7713 			} else {
7714 				panic("iopl_valid_data: %p already wired", m);
7715 			}
7716 
7717 			vm_page_wakeup_done(object, m);
7718 		}
7719 		size -= PAGE_SIZE;
7720 	}
7721 	if (wired_count) {
7722 		VM_OBJECT_WIRED_PAGE_COUNT(object, wired_count);
7723 		assert(object->resident_page_count >= object->wired_page_count);
7724 
7725 		/* no need to adjust purgeable accounting for this object: */
7726 		assert(object->purgable != VM_PURGABLE_VOLATILE);
7727 		assert(object->purgable != VM_PURGABLE_EMPTY);
7728 
7729 		vm_page_lockspin_queues();
7730 		vm_page_wire_count += wired_count;
7731 		vm_page_unlock_queues();
7732 	}
7733 	VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
7734 	vm_object_unlock(object);
7735 }
7736 
7737 
7738 void
7739 vm_object_set_pmap_cache_attr(
7740 	vm_object_t             object,
7741 	upl_page_info_array_t   user_page_list,
7742 	unsigned int            num_pages,
7743 	boolean_t               batch_pmap_op)
7744 {
7745 	unsigned int    cache_attr = 0;
7746 
7747 	cache_attr = object->wimg_bits & VM_WIMG_MASK;
7748 	assert(user_page_list);
7749 	if (cache_attr != VM_WIMG_USE_DEFAULT) {
7750 		PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
7751 	}
7752 }
7753 
7754 
7755 static bool
7756 vm_object_iopl_wire_full(
7757 	vm_object_t             object,
7758 	upl_t                   upl,
7759 	upl_page_info_array_t   user_page_list,
7760 	upl_control_flags_t     cntrl_flags,
7761 	vm_tag_t                tag)
7762 {
7763 	vm_page_t       dst_page;
7764 	unsigned int    entry;
7765 	int             page_count;
7766 	int             delayed_unlock = 0;
7767 	boolean_t       retval = TRUE;
7768 	ppnum_t         phys_page;
7769 
7770 	vm_object_lock_assert_exclusive(object);
7771 	assert(object->purgable != VM_PURGABLE_VOLATILE);
7772 	assert(object->purgable != VM_PURGABLE_EMPTY);
7773 	assert(object->pager == NULL);
7774 	assert(object->vo_copy == NULL);
7775 	assert(object->shadow == NULL);
7776 
7777 	page_count = object->resident_page_count;
7778 	dst_page = (vm_page_t)vm_page_queue_first(&object->memq);
7779 
7780 	vm_page_lock_queues();
7781 
7782 	while (page_count--) {
7783 		if (dst_page->vmp_busy ||
7784 		    dst_page->vmp_fictitious ||
7785 		    dst_page->vmp_absent ||
7786 		    VMP_ERROR_GET(dst_page) ||
7787 		    dst_page->vmp_cleaning ||
7788 		    dst_page->vmp_restart ||
7789 		    dst_page->vmp_laundry) {
7790 			retval = FALSE;
7791 			goto done;
7792 		}
7793 		if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) {
7794 			retval = FALSE;
7795 			goto done;
7796 		}
7797 		dst_page->vmp_reference = TRUE;
7798 
7799 		vm_page_wire(dst_page, tag, FALSE);
7800 
7801 		if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7802 			SET_PAGE_DIRTY(dst_page, FALSE);
7803 		}
7804 		entry = (unsigned int)(dst_page->vmp_offset / PAGE_SIZE);
7805 		assert(entry >= 0 && entry < object->resident_page_count);
7806 		bitmap_set(upl->lite_list, entry);
7807 
7808 		phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7809 
7810 		if (phys_page > upl->highest_page) {
7811 			upl->highest_page = phys_page;
7812 		}
7813 
7814 		if (user_page_list) {
7815 			user_page_list[entry].phys_addr = phys_page;
7816 			user_page_list[entry].absent    = dst_page->vmp_absent;
7817 			user_page_list[entry].dirty     = dst_page->vmp_dirty;
7818 			user_page_list[entry].free_when_done   = dst_page->vmp_free_when_done;
7819 			user_page_list[entry].precious  = dst_page->vmp_precious;
7820 			user_page_list[entry].device    = FALSE;
7821 			user_page_list[entry].speculative = FALSE;
7822 			user_page_list[entry].cs_validated = FALSE;
7823 			user_page_list[entry].cs_tainted = FALSE;
7824 			user_page_list[entry].cs_nx     = FALSE;
7825 			user_page_list[entry].needed    = FALSE;
7826 			user_page_list[entry].mark      = FALSE;
7827 		}
7828 		if (delayed_unlock++ > 256) {
7829 			delayed_unlock = 0;
7830 			lck_mtx_yield(&vm_page_queue_lock);
7831 
7832 			VM_CHECK_MEMORYSTATUS;
7833 		}
7834 		dst_page = (vm_page_t)vm_page_queue_next(&dst_page->vmp_listq);
7835 	}
7836 done:
7837 	vm_page_unlock_queues();
7838 
7839 	VM_CHECK_MEMORYSTATUS;
7840 
7841 	return retval;
7842 }
7843 
7844 
7845 static kern_return_t
7846 vm_object_iopl_wire_empty(
7847 	vm_object_t             object,
7848 	upl_t                   upl,
7849 	upl_page_info_array_t   user_page_list,
7850 	upl_control_flags_t     cntrl_flags,
7851 	vm_tag_t                tag,
7852 	vm_object_offset_t     *dst_offset,
7853 	int                     page_count,
7854 	int                    *page_grab_count)
7855 {
7856 	vm_page_t       dst_page;
7857 	boolean_t       no_zero_fill = FALSE;
7858 	int             interruptible;
7859 	int             pages_wired = 0;
7860 	int             pages_inserted = 0;
7861 	int             entry = 0;
7862 	uint64_t        delayed_ledger_update = 0;
7863 	kern_return_t   ret = KERN_SUCCESS;
7864 	int             grab_options;
7865 	ppnum_t         phys_page;
7866 
7867 	vm_object_lock_assert_exclusive(object);
7868 	assert(object->purgable != VM_PURGABLE_VOLATILE);
7869 	assert(object->purgable != VM_PURGABLE_EMPTY);
7870 	assert(object->pager == NULL);
7871 	assert(object->vo_copy == NULL);
7872 	assert(object->shadow == NULL);
7873 
7874 	if (cntrl_flags & UPL_SET_INTERRUPTIBLE) {
7875 		interruptible = THREAD_ABORTSAFE;
7876 	} else {
7877 		interruptible = THREAD_UNINT;
7878 	}
7879 
7880 	if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO)) {
7881 		no_zero_fill = TRUE;
7882 	}
7883 
7884 	grab_options = 0;
7885 #if CONFIG_SECLUDED_MEMORY
7886 	if (object->can_grab_secluded) {
7887 		grab_options |= VM_PAGE_GRAB_SECLUDED;
7888 	}
7889 #endif /* CONFIG_SECLUDED_MEMORY */
7890 
7891 	while (page_count--) {
7892 		while ((dst_page = vm_page_grab_options(grab_options))
7893 		    == VM_PAGE_NULL) {
7894 			OSAddAtomic(page_count, &vm_upl_wait_for_pages);
7895 
7896 			VM_DEBUG_EVENT(vm_iopl_page_wait, DBG_VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
7897 
7898 			if (vm_page_wait(interruptible) == FALSE) {
7899 				/*
7900 				 * interrupted case
7901 				 */
7902 				OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
7903 
7904 				VM_DEBUG_EVENT(vm_iopl_page_wait, DBG_VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
7905 
7906 				ret = MACH_SEND_INTERRUPTED;
7907 				goto done;
7908 			}
7909 			OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
7910 
7911 			VM_DEBUG_EVENT(vm_iopl_page_wait, DBG_VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
7912 		}
7913 		if (no_zero_fill == FALSE) {
7914 			vm_page_zero_fill(dst_page);
7915 		} else {
7916 			dst_page->vmp_absent = TRUE;
7917 		}
7918 
7919 		dst_page->vmp_reference = TRUE;
7920 
7921 		if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7922 			SET_PAGE_DIRTY(dst_page, FALSE);
7923 		}
7924 		if (dst_page->vmp_absent == FALSE) {
7925 			assert(dst_page->vmp_q_state == VM_PAGE_NOT_ON_Q);
7926 			assert(dst_page->vmp_wire_count == 0);
7927 			dst_page->vmp_wire_count++;
7928 			dst_page->vmp_q_state = VM_PAGE_IS_WIRED;
7929 			assert(dst_page->vmp_wire_count);
7930 			pages_wired++;
7931 			vm_page_wakeup_done(object, dst_page);
7932 		}
7933 		pages_inserted++;
7934 
7935 		vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
7936 
7937 		bitmap_set(upl->lite_list, entry);
7938 
7939 		phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7940 
7941 		if (phys_page > upl->highest_page) {
7942 			upl->highest_page = phys_page;
7943 		}
7944 
7945 		if (user_page_list) {
7946 			user_page_list[entry].phys_addr = phys_page;
7947 			user_page_list[entry].absent    = dst_page->vmp_absent;
7948 			user_page_list[entry].dirty     = dst_page->vmp_dirty;
7949 			user_page_list[entry].free_when_done    = FALSE;
7950 			user_page_list[entry].precious  = FALSE;
7951 			user_page_list[entry].device    = FALSE;
7952 			user_page_list[entry].speculative = FALSE;
7953 			user_page_list[entry].cs_validated = FALSE;
7954 			user_page_list[entry].cs_tainted = FALSE;
7955 			user_page_list[entry].cs_nx     = FALSE;
7956 			user_page_list[entry].needed    = FALSE;
7957 			user_page_list[entry].mark      = FALSE;
7958 		}
7959 		entry++;
7960 		*dst_offset += PAGE_SIZE_64;
7961 	}
7962 done:
7963 	if (pages_wired) {
7964 		vm_page_lockspin_queues();
7965 		vm_page_wire_count += pages_wired;
7966 		vm_page_unlock_queues();
7967 	}
7968 	if (pages_inserted) {
7969 		if (object->internal) {
7970 			OSAddAtomic(pages_inserted, &vm_page_internal_count);
7971 		} else {
7972 			OSAddAtomic(pages_inserted, &vm_page_external_count);
7973 		}
7974 	}
7975 	if (delayed_ledger_update) {
7976 		task_t          owner;
7977 		int             ledger_idx_volatile;
7978 		int             ledger_idx_nonvolatile;
7979 		int             ledger_idx_volatile_compressed;
7980 		int             ledger_idx_nonvolatile_compressed;
7981 		int             ledger_idx_composite;
7982 		int             ledger_idx_external_wired;
7983 		boolean_t       do_footprint;
7984 
7985 		owner = VM_OBJECT_OWNER(object);
7986 		assert(owner);
7987 
7988 		vm_object_ledger_tag_ledgers(object,
7989 		    &ledger_idx_volatile,
7990 		    &ledger_idx_nonvolatile,
7991 		    &ledger_idx_volatile_compressed,
7992 		    &ledger_idx_nonvolatile_compressed,
7993 		    &ledger_idx_composite,
7994 		    &ledger_idx_external_wired,
7995 		    &do_footprint);
7996 
7997 		if (object->internal) {
7998 			/* more non-volatile bytes */
7999 			ledger_credit(owner->ledger,
8000 			    ledger_idx_nonvolatile,
8001 			    delayed_ledger_update);
8002 			if (do_footprint) {
8003 				/* more footprint */
8004 				ledger_credit(owner->ledger,
8005 				    task_ledgers.phys_footprint,
8006 				    delayed_ledger_update);
8007 			} else if (ledger_idx_composite != -1) {
8008 				ledger_credit(owner->ledger,
8009 				    ledger_idx_composite,
8010 				    delayed_ledger_update);
8011 			}
8012 		} else {
8013 			/* more external wired bytes */
8014 			ledger_credit(owner->ledger,
8015 			    ledger_idx_external_wired,
8016 			    delayed_ledger_update);
8017 			if (do_footprint) {
8018 				/* more footprint */
8019 				ledger_credit(owner->ledger,
8020 				    task_ledgers.phys_footprint,
8021 				    delayed_ledger_update);
8022 			} else if (ledger_idx_composite != -1) {
8023 				ledger_credit(owner->ledger,
8024 				    ledger_idx_composite,
8025 				    delayed_ledger_update);
8026 			}
8027 		}
8028 	}
8029 
8030 	assert(page_grab_count);
8031 	*page_grab_count = pages_inserted;
8032 
8033 	return ret;
8034 }
8035 
8036 
8037 
8038 kern_return_t
8039 vm_object_iopl_request(
8040 	vm_object_t             object,
8041 	vm_object_offset_t      offset,
8042 	upl_size_t              size,
8043 	upl_t                   *upl_ptr,
8044 	upl_page_info_array_t   user_page_list,
8045 	unsigned int            *page_list_count,
8046 	upl_control_flags_t     cntrl_flags,
8047 	vm_tag_t                tag)
8048 {
8049 	vm_page_t               dst_page;
8050 	vm_object_offset_t      dst_offset;
8051 	upl_size_t              xfer_size;
8052 	upl_t                   upl = NULL;
8053 	unsigned int            entry;
8054 	int                     no_zero_fill = FALSE;
8055 	unsigned int            size_in_pages;
8056 	int                     page_grab_count = 0;
8057 	u_int32_t               psize;
8058 	kern_return_t           ret;
8059 	vm_prot_t               prot;
8060 	struct vm_object_fault_info fault_info = {};
8061 	struct  vm_page_delayed_work    dw_array;
8062 	struct  vm_page_delayed_work    *dwp, *dwp_start;
8063 	bool                    dwp_finish_ctx = TRUE;
8064 	int                     dw_count;
8065 	int                     dw_limit;
8066 	int                     dw_index;
8067 	boolean_t               caller_lookup;
8068 	int                     io_tracking_flag = 0;
8069 	int                     interruptible;
8070 	ppnum_t                 phys_page;
8071 
8072 	boolean_t               set_cache_attr_needed = FALSE;
8073 	boolean_t               free_wired_pages = FALSE;
8074 	boolean_t               fast_path_empty_req = FALSE;
8075 	boolean_t               fast_path_full_req = FALSE;
8076 
8077 #if DEVELOPMENT || DEBUG
8078 	task_t                  task = current_task();
8079 #endif /* DEVELOPMENT || DEBUG */
8080 
8081 	dwp_start = dwp = NULL;
8082 
8083 	vm_object_offset_t original_offset = offset;
8084 	upl_size_t original_size = size;
8085 
8086 //	DEBUG4K_UPL("object %p offset 0x%llx size 0x%llx cntrl_flags 0x%llx\n", object, (uint64_t)offset, (uint64_t)size, cntrl_flags);
8087 
8088 	size = (upl_size_t)(vm_object_round_page(offset + size) - vm_object_trunc_page(offset));
8089 	offset = vm_object_trunc_page(offset);
8090 	if (size != original_size || offset != original_offset) {
8091 		DEBUG4K_IOKIT("flags 0x%llx object %p offset 0x%llx size 0x%x -> offset 0x%llx size 0x%x\n", cntrl_flags, object, original_offset, original_size, offset, size);
8092 	}
8093 
8094 	if (cntrl_flags & ~UPL_VALID_FLAGS) {
8095 		/*
8096 		 * For forward compatibility's sake,
8097 		 * reject any unknown flag.
8098 		 */
8099 		return KERN_INVALID_VALUE;
8100 	}
8101 	if (vm_lopage_needed == FALSE) {
8102 		cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
8103 	}
8104 
8105 	if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
8106 		if ((cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE)) {
8107 			return KERN_INVALID_VALUE;
8108 		}
8109 
8110 		if (object->phys_contiguous) {
8111 			if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address) {
8112 				return KERN_INVALID_ADDRESS;
8113 			}
8114 
8115 			if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address) {
8116 				return KERN_INVALID_ADDRESS;
8117 			}
8118 		}
8119 	}
8120 	if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO)) {
8121 		no_zero_fill = TRUE;
8122 	}
8123 
8124 	if (cntrl_flags & UPL_COPYOUT_FROM) {
8125 		prot = VM_PROT_READ;
8126 	} else {
8127 		prot = VM_PROT_READ | VM_PROT_WRITE;
8128 	}
8129 
8130 	if ((!object->internal) && (object->paging_offset != 0)) {
8131 		panic("vm_object_iopl_request: external object with non-zero paging offset");
8132 	}
8133 
8134 
8135 	VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, DBG_VM_IOPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, prot, 0);
8136 
8137 #if CONFIG_IOSCHED || UPL_DEBUG
8138 	if ((object->io_tracking && !is_kernel_object(object)) || upl_debug_enabled) {
8139 		io_tracking_flag |= UPL_CREATE_IO_TRACKING;
8140 	}
8141 #endif
8142 
8143 #if CONFIG_IOSCHED
8144 	if (object->io_tracking) {
8145 		/* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
8146 		if (!is_kernel_object(object)) {
8147 			io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
8148 		}
8149 	}
8150 #endif
8151 
8152 	if (object->phys_contiguous) {
8153 		psize = PAGE_SIZE;
8154 	} else {
8155 		psize = size;
8156 
8157 		dw_count = 0;
8158 		dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
8159 		dwp_start = vm_page_delayed_work_get_ctx();
8160 		if (dwp_start == NULL) {
8161 			dwp_start = &dw_array;
8162 			dw_limit = 1;
8163 			dwp_finish_ctx = FALSE;
8164 		}
8165 
8166 		dwp = dwp_start;
8167 	}
8168 
8169 	if (cntrl_flags & UPL_SET_INTERNAL) {
8170 		upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8171 		user_page_list = size ? upl->page_list : NULL;
8172 	} else {
8173 		upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8174 	}
8175 	if (user_page_list) {
8176 		user_page_list[0].device = FALSE;
8177 	}
8178 	*upl_ptr = upl;
8179 
8180 	if (cntrl_flags & UPL_NOZEROFILLIO) {
8181 		DTRACE_VM4(upl_nozerofillio,
8182 		    vm_object_t, object,
8183 		    vm_object_offset_t, offset,
8184 		    upl_size_t, size,
8185 		    upl_t, upl);
8186 	}
8187 
8188 	upl->map_object = object;
8189 	upl->u_offset = original_offset;
8190 	upl->u_size = original_size;
8191 
8192 	size_in_pages = size / PAGE_SIZE;
8193 
8194 	if (is_kernel_object(object) &&
8195 	    !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
8196 		upl->flags |= UPL_KERNEL_OBJECT;
8197 #if UPL_DEBUG
8198 		vm_object_lock(object);
8199 #else
8200 		vm_object_lock_shared(object);
8201 #endif
8202 	} else {
8203 		vm_object_lock(object);
8204 		vm_object_activity_begin(object);
8205 	}
8206 	/*
8207 	 * paging in progress also protects the paging_offset
8208 	 */
8209 	upl->u_offset = original_offset + object->paging_offset;
8210 
8211 	if (cntrl_flags & UPL_BLOCK_ACCESS) {
8212 		/*
8213 		 * The user requested that access to the pages in this UPL
8214 		 * be blocked until the UPL is commited or aborted.
8215 		 */
8216 		upl->flags |= UPL_ACCESS_BLOCKED;
8217 	}
8218 
8219 #if CONFIG_IOSCHED || UPL_DEBUG
8220 	if ((upl->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) {
8221 		vm_object_activity_begin(object);
8222 		queue_enter(&object->uplq, upl, upl_t, uplq);
8223 	}
8224 #endif
8225 
8226 	if (object->phys_contiguous) {
8227 		if (upl->flags & UPL_ACCESS_BLOCKED) {
8228 			assert(!object->blocked_access);
8229 			object->blocked_access = TRUE;
8230 		}
8231 
8232 		vm_object_unlock(object);
8233 
8234 		/*
8235 		 * don't need any shadow mappings for this one
8236 		 * since it is already I/O memory
8237 		 */
8238 		upl->flags |= UPL_DEVICE_MEMORY;
8239 
8240 		upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1) >> PAGE_SHIFT);
8241 
8242 		if (user_page_list) {
8243 			user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset) >> PAGE_SHIFT);
8244 			user_page_list[0].device = TRUE;
8245 		}
8246 		if (page_list_count != NULL) {
8247 			if (upl->flags & UPL_INTERNAL) {
8248 				*page_list_count = 0;
8249 			} else {
8250 				*page_list_count = 1;
8251 			}
8252 		}
8253 
8254 		VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, DBG_VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, 0, 0);
8255 #if DEVELOPMENT || DEBUG
8256 		if (task != NULL) {
8257 			ledger_credit(task->ledger, task_ledgers.pages_grabbed_iopl, page_grab_count);
8258 		}
8259 #endif /* DEVELOPMENT || DEBUG */
8260 		return KERN_SUCCESS;
8261 	}
8262 	if (!is_kernel_object(object) && object != compressor_object) {
8263 		/*
8264 		 * Protect user space from future COW operations
8265 		 */
8266 #if VM_OBJECT_TRACKING_OP_TRUESHARE
8267 		if (!object->true_share &&
8268 		    vm_object_tracking_btlog) {
8269 			btlog_record(vm_object_tracking_btlog, object,
8270 			    VM_OBJECT_TRACKING_OP_TRUESHARE,
8271 			    btref_get(__builtin_frame_address(0), 0));
8272 		}
8273 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
8274 
8275 		vm_object_lock_assert_exclusive(object);
8276 		VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
8277 
8278 		if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
8279 			object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8280 		}
8281 	}
8282 
8283 	if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
8284 	    object->vo_copy != VM_OBJECT_NULL) {
8285 		/*
8286 		 * Honor copy-on-write obligations
8287 		 *
8288 		 * The caller is gathering these pages and
8289 		 * might modify their contents.  We need to
8290 		 * make sure that the copy object has its own
8291 		 * private copies of these pages before we let
8292 		 * the caller modify them.
8293 		 *
8294 		 * NOTE: someone else could map the original object
8295 		 * after we've done this copy-on-write here, and they
8296 		 * could then see an inconsistent picture of the memory
8297 		 * while it's being modified via the UPL.  To prevent this,
8298 		 * we would have to block access to these pages until the
8299 		 * UPL is released.  We could use the UPL_BLOCK_ACCESS
8300 		 * code path for that...
8301 		 */
8302 		vm_object_update(object,
8303 		    offset,
8304 		    size,
8305 		    NULL,
8306 		    NULL,
8307 		    FALSE,              /* should_return */
8308 		    MEMORY_OBJECT_COPY_SYNC,
8309 		    VM_PROT_NO_CHANGE);
8310 		VM_PAGEOUT_DEBUG(iopl_cow, 1);
8311 		VM_PAGEOUT_DEBUG(iopl_cow_pages, (size >> PAGE_SHIFT));
8312 	}
8313 	if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) &&
8314 	    object->purgable != VM_PURGABLE_VOLATILE &&
8315 	    object->purgable != VM_PURGABLE_EMPTY &&
8316 	    object->vo_copy == NULL &&
8317 	    size == object->vo_size &&
8318 	    offset == 0 &&
8319 	    object->shadow == NULL &&
8320 	    object->pager == NULL) {
8321 		if (object->resident_page_count == size_in_pages) {
8322 			assert(object != compressor_object);
8323 			assert(!is_kernel_object(object));
8324 			fast_path_full_req = TRUE;
8325 		} else if (object->resident_page_count == 0) {
8326 			assert(object != compressor_object);
8327 			assert(!is_kernel_object(object));
8328 			fast_path_empty_req = TRUE;
8329 			set_cache_attr_needed = TRUE;
8330 		}
8331 	}
8332 
8333 	if (cntrl_flags & UPL_SET_INTERRUPTIBLE) {
8334 		interruptible = THREAD_ABORTSAFE;
8335 	} else {
8336 		interruptible = THREAD_UNINT;
8337 	}
8338 
8339 	entry = 0;
8340 
8341 	xfer_size = size;
8342 	dst_offset = offset;
8343 
8344 	if (fast_path_full_req) {
8345 		if (vm_object_iopl_wire_full(object, upl, user_page_list, cntrl_flags, tag) == TRUE) {
8346 			goto finish;
8347 		}
8348 		/*
8349 		 * we couldn't complete the processing of this request on the fast path
8350 		 * so fall through to the slow path and finish up
8351 		 */
8352 	} else if (fast_path_empty_req) {
8353 		if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8354 			ret = KERN_MEMORY_ERROR;
8355 			goto return_err;
8356 		}
8357 		ret = vm_object_iopl_wire_empty(object, upl, user_page_list,
8358 		    cntrl_flags, tag, &dst_offset, size_in_pages, &page_grab_count);
8359 
8360 		if (ret) {
8361 			free_wired_pages = TRUE;
8362 			goto return_err;
8363 		}
8364 		goto finish;
8365 	}
8366 
8367 	fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
8368 	fault_info.lo_offset = offset;
8369 	fault_info.hi_offset = offset + xfer_size;
8370 	fault_info.mark_zf_absent = TRUE;
8371 	fault_info.interruptible = interruptible;
8372 	fault_info.batch_pmap_op = TRUE;
8373 
8374 	while (xfer_size) {
8375 		vm_fault_return_t       result;
8376 
8377 		dwp->dw_mask = 0;
8378 
8379 		if (fast_path_full_req) {
8380 			/*
8381 			 * if we get here, it means that we ran into a page
8382 			 * state we couldn't handle in the fast path and
8383 			 * bailed out to the slow path... since the order
8384 			 * we look at pages is different between the 2 paths,
8385 			 * the following check is needed to determine whether
8386 			 * this page was already processed in the fast path
8387 			 */
8388 			if (bitmap_test(upl->lite_list, entry)) {
8389 				goto skip_page;
8390 			}
8391 		}
8392 		dst_page = vm_page_lookup(object, dst_offset);
8393 
8394 		if (dst_page == VM_PAGE_NULL ||
8395 		    dst_page->vmp_busy ||
8396 		    VMP_ERROR_GET(dst_page) ||
8397 		    dst_page->vmp_restart ||
8398 		    dst_page->vmp_absent ||
8399 		    dst_page->vmp_fictitious) {
8400 			if (is_kernel_object(object)) {
8401 				panic("vm_object_iopl_request: missing/bad page in kernel object");
8402 			}
8403 			if (object == compressor_object) {
8404 				panic("vm_object_iopl_request: missing/bad page in compressor object");
8405 			}
8406 
8407 			if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8408 				ret = KERN_MEMORY_ERROR;
8409 				goto return_err;
8410 			}
8411 			set_cache_attr_needed = TRUE;
8412 
8413 			/*
8414 			 * We just looked up the page and the result remains valid
8415 			 * until the object lock is release, so send it to
8416 			 * vm_fault_page() (as "dst_page"), to avoid having to
8417 			 * look it up again there.
8418 			 */
8419 			caller_lookup = TRUE;
8420 
8421 			do {
8422 				vm_page_t       top_page;
8423 				kern_return_t   error_code;
8424 
8425 				fault_info.cluster_size = xfer_size;
8426 
8427 				vm_object_paging_begin(object);
8428 
8429 				result = vm_fault_page(object, dst_offset,
8430 				    prot | VM_PROT_WRITE, FALSE,
8431 				    caller_lookup,
8432 				    &prot, &dst_page, &top_page,
8433 				    (int *)0,
8434 				    &error_code, no_zero_fill,
8435 				    &fault_info);
8436 
8437 				/* our lookup is no longer valid at this point */
8438 				caller_lookup = FALSE;
8439 
8440 				switch (result) {
8441 				case VM_FAULT_SUCCESS:
8442 					page_grab_count++;
8443 
8444 					if (!dst_page->vmp_absent) {
8445 						vm_page_wakeup_done(object, dst_page);
8446 					} else {
8447 						/*
8448 						 * we only get back an absent page if we
8449 						 * requested that it not be zero-filled
8450 						 * because we are about to fill it via I/O
8451 						 *
8452 						 * absent pages should be left BUSY
8453 						 * to prevent them from being faulted
8454 						 * into an address space before we've
8455 						 * had a chance to complete the I/O on
8456 						 * them since they may contain info that
8457 						 * shouldn't be seen by the faulting task
8458 						 */
8459 					}
8460 					/*
8461 					 *	Release paging references and
8462 					 *	top-level placeholder page, if any.
8463 					 */
8464 					if (top_page != VM_PAGE_NULL) {
8465 						vm_object_t local_object;
8466 
8467 						local_object = VM_PAGE_OBJECT(top_page);
8468 
8469 						/*
8470 						 * comparing 2 packed pointers
8471 						 */
8472 						if (top_page->vmp_object != dst_page->vmp_object) {
8473 							vm_object_lock(local_object);
8474 							VM_PAGE_FREE(top_page);
8475 							vm_object_paging_end(local_object);
8476 							vm_object_unlock(local_object);
8477 						} else {
8478 							VM_PAGE_FREE(top_page);
8479 							vm_object_paging_end(local_object);
8480 						}
8481 					}
8482 					vm_object_paging_end(object);
8483 					break;
8484 
8485 				case VM_FAULT_RETRY:
8486 					vm_object_lock(object);
8487 					break;
8488 
8489 				case VM_FAULT_MEMORY_SHORTAGE:
8490 					OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages);
8491 
8492 					VM_DEBUG_EVENT(vm_iopl_page_wait, DBG_VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
8493 
8494 					if (vm_page_wait(interruptible)) {
8495 						OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8496 
8497 						VM_DEBUG_EVENT(vm_iopl_page_wait, DBG_VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8498 						vm_object_lock(object);
8499 
8500 						break;
8501 					}
8502 					OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8503 
8504 					VM_DEBUG_EVENT(vm_iopl_page_wait, DBG_VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8505 					ktriage_record(thread_tid(current_thread()), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_VM_FAULT_OBJIOPLREQ_MEMORY_SHORTAGE), 0 /* arg */);
8506 					OS_FALLTHROUGH;
8507 
8508 				case VM_FAULT_INTERRUPTED:
8509 					error_code = MACH_SEND_INTERRUPTED;
8510 					OS_FALLTHROUGH;
8511 				case VM_FAULT_MEMORY_ERROR:
8512 memory_error:
8513 					ret = (error_code ? error_code: KERN_MEMORY_ERROR);
8514 
8515 					vm_object_lock(object);
8516 					goto return_err;
8517 
8518 				case VM_FAULT_SUCCESS_NO_VM_PAGE:
8519 					/* success but no page: fail */
8520 					vm_object_paging_end(object);
8521 					vm_object_unlock(object);
8522 					goto memory_error;
8523 
8524 				default:
8525 					panic("vm_object_iopl_request: unexpected error"
8526 					    " 0x%x from vm_fault_page()\n", result);
8527 				}
8528 			} while (result != VM_FAULT_SUCCESS);
8529 		}
8530 		phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8531 
8532 		if (upl->flags & UPL_KERNEL_OBJECT) {
8533 			goto record_phys_addr;
8534 		}
8535 
8536 		if (dst_page->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
8537 			dst_page->vmp_busy = TRUE;
8538 			goto record_phys_addr;
8539 		}
8540 
8541 		if (dst_page->vmp_cleaning) {
8542 			/*
8543 			 * Someone else is cleaning this page in place.
8544 			 * In theory, we should be able to  proceed and use this
8545 			 * page but they'll probably end up clearing the "busy"
8546 			 * bit on it in upl_commit_range() but they didn't set
8547 			 * it, so they would clear our "busy" bit and open
8548 			 * us to race conditions.
8549 			 * We'd better wait for the cleaning to complete and
8550 			 * then try again.
8551 			 */
8552 			VM_PAGEOUT_DEBUG(vm_object_iopl_request_sleep_for_cleaning, 1);
8553 			vm_page_sleep(object, dst_page, THREAD_UNINT, LCK_SLEEP_EXCLUSIVE);
8554 			continue;
8555 		}
8556 		if (dst_page->vmp_laundry) {
8557 			vm_pageout_steal_laundry(dst_page, FALSE);
8558 		}
8559 
8560 		if ((cntrl_flags & UPL_NEED_32BIT_ADDR) &&
8561 		    phys_page >= (max_valid_dma_address >> PAGE_SHIFT)) {
8562 			vm_page_t       low_page;
8563 			int             refmod;
8564 
8565 			/*
8566 			 * support devices that can't DMA above 32 bits
8567 			 * by substituting pages from a pool of low address
8568 			 * memory for any pages we find above the 4G mark
8569 			 * can't substitute if the page is already wired because
8570 			 * we don't know whether that physical address has been
8571 			 * handed out to some other 64 bit capable DMA device to use
8572 			 */
8573 			if (VM_PAGE_WIRED(dst_page)) {
8574 				ret = KERN_PROTECTION_FAILURE;
8575 				goto return_err;
8576 			}
8577 			low_page = vm_page_grablo();
8578 
8579 			if (low_page == VM_PAGE_NULL) {
8580 				ret = KERN_RESOURCE_SHORTAGE;
8581 				goto return_err;
8582 			}
8583 			/*
8584 			 * from here until the vm_page_replace completes
8585 			 * we musn't drop the object lock... we don't
8586 			 * want anyone refaulting this page in and using
8587 			 * it after we disconnect it... we want the fault
8588 			 * to find the new page being substituted.
8589 			 */
8590 			if (dst_page->vmp_pmapped) {
8591 				refmod = pmap_disconnect(phys_page);
8592 			} else {
8593 				refmod = 0;
8594 			}
8595 
8596 			if (!dst_page->vmp_absent) {
8597 				vm_page_copy(dst_page, low_page);
8598 			}
8599 
8600 			low_page->vmp_reference = dst_page->vmp_reference;
8601 			low_page->vmp_dirty     = dst_page->vmp_dirty;
8602 			low_page->vmp_absent    = dst_page->vmp_absent;
8603 
8604 			if (refmod & VM_MEM_REFERENCED) {
8605 				low_page->vmp_reference = TRUE;
8606 			}
8607 			if (refmod & VM_MEM_MODIFIED) {
8608 				SET_PAGE_DIRTY(low_page, FALSE);
8609 			}
8610 
8611 			vm_page_replace(low_page, object, dst_offset);
8612 
8613 			dst_page = low_page;
8614 			/*
8615 			 * vm_page_grablo returned the page marked
8616 			 * BUSY... we don't need a PAGE_WAKEUP_DONE
8617 			 * here, because we've never dropped the object lock
8618 			 */
8619 			if (!dst_page->vmp_absent) {
8620 				dst_page->vmp_busy = FALSE;
8621 			}
8622 
8623 			phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8624 		}
8625 		if (!dst_page->vmp_busy) {
8626 			dwp->dw_mask |= DW_vm_page_wire;
8627 		}
8628 
8629 		if (cntrl_flags & UPL_BLOCK_ACCESS) {
8630 			/*
8631 			 * Mark the page "busy" to block any future page fault
8632 			 * on this page in addition to wiring it.
8633 			 * We'll also remove the mapping
8634 			 * of all these pages before leaving this routine.
8635 			 */
8636 			assert(!dst_page->vmp_fictitious);
8637 			dst_page->vmp_busy = TRUE;
8638 		}
8639 		/*
8640 		 * expect the page to be used
8641 		 * page queues lock must be held to set 'reference'
8642 		 */
8643 		dwp->dw_mask |= DW_set_reference;
8644 
8645 		if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8646 			SET_PAGE_DIRTY(dst_page, TRUE);
8647 			/*
8648 			 * Page belonging to a code-signed object is about to
8649 			 * be written. Mark it tainted and disconnect it from
8650 			 * all pmaps so processes have to fault it back in and
8651 			 * deal with the tainted bit.
8652 			 */
8653 			if (object->code_signed && dst_page->vmp_cs_tainted != VMP_CS_ALL_TRUE) {
8654 				dst_page->vmp_cs_tainted = VMP_CS_ALL_TRUE;
8655 				vm_page_iopl_tainted++;
8656 				if (dst_page->vmp_pmapped) {
8657 					int refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(dst_page));
8658 					if (refmod & VM_MEM_REFERENCED) {
8659 						dst_page->vmp_reference = TRUE;
8660 					}
8661 				}
8662 			}
8663 		}
8664 		if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) {
8665 			pmap_sync_page_attributes_phys(phys_page);
8666 			dst_page->vmp_written_by_kernel = FALSE;
8667 		}
8668 
8669 record_phys_addr:
8670 		if (dst_page->vmp_busy) {
8671 			upl->flags |= UPL_HAS_BUSY;
8672 		}
8673 
8674 		bitmap_set(upl->lite_list, entry);
8675 
8676 		if (phys_page > upl->highest_page) {
8677 			upl->highest_page = phys_page;
8678 		}
8679 
8680 		if (user_page_list) {
8681 			user_page_list[entry].phys_addr = phys_page;
8682 			user_page_list[entry].free_when_done    = dst_page->vmp_free_when_done;
8683 			user_page_list[entry].absent    = dst_page->vmp_absent;
8684 			user_page_list[entry].dirty     = dst_page->vmp_dirty;
8685 			user_page_list[entry].precious  = dst_page->vmp_precious;
8686 			user_page_list[entry].device    = FALSE;
8687 			user_page_list[entry].needed    = FALSE;
8688 			if (dst_page->vmp_clustered == TRUE) {
8689 				user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
8690 			} else {
8691 				user_page_list[entry].speculative = FALSE;
8692 			}
8693 			user_page_list[entry].cs_validated = dst_page->vmp_cs_validated;
8694 			user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted;
8695 			user_page_list[entry].cs_nx = dst_page->vmp_cs_nx;
8696 			user_page_list[entry].mark      = FALSE;
8697 		}
8698 		if (!is_kernel_object(object) && object != compressor_object) {
8699 			/*
8700 			 * someone is explicitly grabbing this page...
8701 			 * update clustered and speculative state
8702 			 *
8703 			 */
8704 			if (dst_page->vmp_clustered) {
8705 				VM_PAGE_CONSUME_CLUSTERED(dst_page);
8706 			}
8707 		}
8708 skip_page:
8709 		entry++;
8710 		dst_offset += PAGE_SIZE_64;
8711 		xfer_size -= PAGE_SIZE;
8712 
8713 		if (dwp->dw_mask) {
8714 			VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
8715 
8716 			if (dw_count >= dw_limit) {
8717 				vm_page_do_delayed_work(object, tag, dwp_start, dw_count);
8718 
8719 				dwp = dwp_start;
8720 				dw_count = 0;
8721 			}
8722 		}
8723 	}
8724 	assert(entry == size_in_pages);
8725 
8726 	if (dw_count) {
8727 		vm_page_do_delayed_work(object, tag, dwp_start, dw_count);
8728 		dwp = dwp_start;
8729 		dw_count = 0;
8730 	}
8731 finish:
8732 	if (user_page_list && set_cache_attr_needed == TRUE) {
8733 		vm_object_set_pmap_cache_attr(object, user_page_list, size_in_pages, TRUE);
8734 	}
8735 
8736 	if (page_list_count != NULL) {
8737 		if (upl->flags & UPL_INTERNAL) {
8738 			*page_list_count = 0;
8739 		} else if (*page_list_count > size_in_pages) {
8740 			*page_list_count = size_in_pages;
8741 		}
8742 	}
8743 	vm_object_unlock(object);
8744 
8745 	if (cntrl_flags & UPL_BLOCK_ACCESS) {
8746 		/*
8747 		 * We've marked all the pages "busy" so that future
8748 		 * page faults will block.
8749 		 * Now remove the mapping for these pages, so that they
8750 		 * can't be accessed without causing a page fault.
8751 		 */
8752 		vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
8753 		    PMAP_NULL,
8754 		    PAGE_SIZE,
8755 		    0, VM_PROT_NONE);
8756 		assert(!object->blocked_access);
8757 		object->blocked_access = TRUE;
8758 	}
8759 
8760 	VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, DBG_VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, 0, 0);
8761 #if DEVELOPMENT || DEBUG
8762 	if (task != NULL) {
8763 		ledger_credit(task->ledger, task_ledgers.pages_grabbed_iopl, page_grab_count);
8764 	}
8765 #endif /* DEVELOPMENT || DEBUG */
8766 
8767 	if (dwp_start && dwp_finish_ctx) {
8768 		vm_page_delayed_work_finish_ctx(dwp_start);
8769 		dwp_start = dwp = NULL;
8770 	}
8771 
8772 	return KERN_SUCCESS;
8773 
8774 return_err:
8775 	dw_index = 0;
8776 
8777 	for (; offset < dst_offset; offset += PAGE_SIZE) {
8778 		boolean_t need_unwire;
8779 
8780 		dst_page = vm_page_lookup(object, offset);
8781 
8782 		if (dst_page == VM_PAGE_NULL) {
8783 			panic("vm_object_iopl_request: Wired page missing.");
8784 		}
8785 
8786 		/*
8787 		 * if we've already processed this page in an earlier
8788 		 * dw_do_work, we need to undo the wiring... we will
8789 		 * leave the dirty and reference bits on if they
8790 		 * were set, since we don't have a good way of knowing
8791 		 * what the previous state was and we won't get here
8792 		 * under any normal circumstances...  we will always
8793 		 * clear BUSY and wakeup any waiters via vm_page_free
8794 		 * or PAGE_WAKEUP_DONE
8795 		 */
8796 		need_unwire = TRUE;
8797 
8798 		if (dw_count) {
8799 			if ((dwp_start)[dw_index].dw_m == dst_page) {
8800 				/*
8801 				 * still in the deferred work list
8802 				 * which means we haven't yet called
8803 				 * vm_page_wire on this page
8804 				 */
8805 				need_unwire = FALSE;
8806 
8807 				dw_index++;
8808 				dw_count--;
8809 			}
8810 		}
8811 		vm_page_lock_queues();
8812 
8813 		if (dst_page->vmp_absent || free_wired_pages == TRUE) {
8814 			vm_page_free(dst_page);
8815 
8816 			need_unwire = FALSE;
8817 		} else {
8818 			if (need_unwire == TRUE) {
8819 				vm_page_unwire(dst_page, TRUE);
8820 			}
8821 
8822 			vm_page_wakeup_done(object, dst_page);
8823 		}
8824 		vm_page_unlock_queues();
8825 
8826 		if (need_unwire == TRUE) {
8827 			counter_inc(&vm_statistics_reactivations);
8828 		}
8829 	}
8830 #if UPL_DEBUG
8831 	upl->upl_state = 2;
8832 #endif
8833 	if (!(upl->flags & UPL_KERNEL_OBJECT)) {
8834 		vm_object_activity_end(object);
8835 		vm_object_collapse(object, 0, TRUE);
8836 	}
8837 	vm_object_unlock(object);
8838 	upl_destroy(upl);
8839 
8840 	VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, DBG_VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, ret, 0, 0);
8841 #if DEVELOPMENT || DEBUG
8842 	if (task != NULL) {
8843 		ledger_credit(task->ledger, task_ledgers.pages_grabbed_iopl, page_grab_count);
8844 	}
8845 #endif /* DEVELOPMENT || DEBUG */
8846 
8847 	if (dwp_start && dwp_finish_ctx) {
8848 		vm_page_delayed_work_finish_ctx(dwp_start);
8849 		dwp_start = dwp = NULL;
8850 	}
8851 	return ret;
8852 }
8853 
8854 kern_return_t
8855 upl_transpose(
8856 	upl_t           upl1,
8857 	upl_t           upl2)
8858 {
8859 	kern_return_t           retval;
8860 	boolean_t               upls_locked;
8861 	vm_object_t             object1, object2;
8862 
8863 	/* LD: Should mapped UPLs be eligible for a transpose? */
8864 	if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR) == UPL_VECTOR) || ((upl2->flags & UPL_VECTOR) == UPL_VECTOR)) {
8865 		return KERN_INVALID_ARGUMENT;
8866 	}
8867 
8868 	upls_locked = FALSE;
8869 
8870 	/*
8871 	 * Since we need to lock both UPLs at the same time,
8872 	 * avoid deadlocks by always taking locks in the same order.
8873 	 */
8874 	if (upl1 < upl2) {
8875 		upl_lock(upl1);
8876 		upl_lock(upl2);
8877 	} else {
8878 		upl_lock(upl2);
8879 		upl_lock(upl1);
8880 	}
8881 	upls_locked = TRUE;     /* the UPLs will need to be unlocked */
8882 
8883 	object1 = upl1->map_object;
8884 	object2 = upl2->map_object;
8885 
8886 	if (upl1->u_offset != 0 || upl2->u_offset != 0 ||
8887 	    upl1->u_size != upl2->u_size) {
8888 		/*
8889 		 * We deal only with full objects, not subsets.
8890 		 * That's because we exchange the entire backing store info
8891 		 * for the objects: pager, resident pages, etc...  We can't do
8892 		 * only part of it.
8893 		 */
8894 		retval = KERN_INVALID_VALUE;
8895 		goto done;
8896 	}
8897 
8898 	/*
8899 	 * Tranpose the VM objects' backing store.
8900 	 */
8901 	retval = vm_object_transpose(object1, object2,
8902 	    upl_adjusted_size(upl1, PAGE_MASK));
8903 
8904 	if (retval == KERN_SUCCESS) {
8905 		/*
8906 		 * Make each UPL point to the correct VM object, i.e. the
8907 		 * object holding the pages that the UPL refers to...
8908 		 */
8909 #if CONFIG_IOSCHED || UPL_DEBUG
8910 		if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8911 			vm_object_lock(object1);
8912 			vm_object_lock(object2);
8913 		}
8914 		if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) {
8915 			queue_remove(&object1->uplq, upl1, upl_t, uplq);
8916 		}
8917 		if ((upl2->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) {
8918 			queue_remove(&object2->uplq, upl2, upl_t, uplq);
8919 		}
8920 #endif
8921 		upl1->map_object = object2;
8922 		upl2->map_object = object1;
8923 
8924 #if CONFIG_IOSCHED || UPL_DEBUG
8925 		if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) {
8926 			queue_enter(&object2->uplq, upl1, upl_t, uplq);
8927 		}
8928 		if ((upl2->flags & UPL_TRACKED_BY_OBJECT) || upl_debug_enabled) {
8929 			queue_enter(&object1->uplq, upl2, upl_t, uplq);
8930 		}
8931 		if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8932 			vm_object_unlock(object2);
8933 			vm_object_unlock(object1);
8934 		}
8935 #endif
8936 	}
8937 
8938 done:
8939 	/*
8940 	 * Cleanup.
8941 	 */
8942 	if (upls_locked) {
8943 		upl_unlock(upl1);
8944 		upl_unlock(upl2);
8945 		upls_locked = FALSE;
8946 	}
8947 
8948 	return retval;
8949 }
8950 
8951 void
8952 upl_range_needed(
8953 	upl_t           upl,
8954 	int             index,
8955 	int             count)
8956 {
8957 	int             size_in_pages;
8958 
8959 	if (!(upl->flags & UPL_INTERNAL) || count <= 0) {
8960 		return;
8961 	}
8962 
8963 	size_in_pages = upl_adjusted_size(upl, PAGE_MASK) / PAGE_SIZE;
8964 
8965 	while (count-- && index < size_in_pages) {
8966 		upl->page_list[index++].needed = TRUE;
8967 	}
8968 }
8969 
8970 
8971 /*
8972  * Reserve of virtual addresses in the kernel address space.
8973  * We need to map the physical pages in the kernel, so that we
8974  * can call the code-signing or slide routines with a kernel
8975  * virtual address.  We keep this pool of pre-allocated kernel
8976  * virtual addresses so that we don't have to scan the kernel's
8977  * virtaul address space each time we need to work with
8978  * a physical page.
8979  */
8980 SIMPLE_LOCK_DECLARE(vm_paging_lock, 0);
8981 #define VM_PAGING_NUM_PAGES     64
8982 SECURITY_READ_ONLY_LATE(vm_offset_t) vm_paging_base_address = 0;
8983 bool            vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
8984 int             vm_paging_max_index = 0;
8985 int             vm_paging_page_waiter = 0;
8986 int             vm_paging_page_waiter_total = 0;
8987 
8988 unsigned long   vm_paging_no_kernel_page = 0;
8989 unsigned long   vm_paging_objects_mapped = 0;
8990 unsigned long   vm_paging_pages_mapped = 0;
8991 unsigned long   vm_paging_objects_mapped_slow = 0;
8992 unsigned long   vm_paging_pages_mapped_slow = 0;
8993 
8994 __startup_func
8995 static void
8996 vm_paging_map_init(void)
8997 {
8998 	kmem_alloc(kernel_map, &vm_paging_base_address,
8999 	    ptoa(VM_PAGING_NUM_PAGES),
9000 	    KMA_DATA | KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT | KMA_PAGEABLE,
9001 	    VM_KERN_MEMORY_NONE);
9002 }
9003 STARTUP(ZALLOC, STARTUP_RANK_LAST, vm_paging_map_init);
9004 
9005 /*
9006  * vm_paging_map_object:
9007  *	Maps part of a VM object's pages in the kernel
9008  *      virtual address space, using the pre-allocated
9009  *	kernel virtual addresses, if possible.
9010  * Context:
9011  *      The VM object is locked.  This lock will get
9012  *      dropped and re-acquired though, so the caller
9013  *      must make sure the VM object is kept alive
9014  *	(by holding a VM map that has a reference
9015  *      on it, for example, or taking an extra reference).
9016  *      The page should also be kept busy to prevent
9017  *	it from being reclaimed.
9018  */
9019 kern_return_t
9020 vm_paging_map_object(
9021 	vm_page_t               page,
9022 	vm_object_t             object,
9023 	vm_object_offset_t      offset,
9024 	vm_prot_t               protection,
9025 	boolean_t               can_unlock_object,
9026 	vm_map_size_t           *size,          /* IN/OUT */
9027 	vm_map_offset_t         *address,       /* OUT */
9028 	boolean_t               *need_unmap)    /* OUT */
9029 {
9030 	kern_return_t           kr;
9031 	vm_map_offset_t         page_map_offset;
9032 	vm_map_size_t           map_size;
9033 	vm_object_offset_t      object_offset;
9034 	int                     i;
9035 
9036 	if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
9037 		/* use permanent 1-to-1 kernel mapping of physical memory ? */
9038 		*address = (vm_map_offset_t)
9039 		    phystokv((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) << PAGE_SHIFT);
9040 		*need_unmap = FALSE;
9041 		return KERN_SUCCESS;
9042 
9043 		assert(page->vmp_busy);
9044 		/*
9045 		 * Use one of the pre-allocated kernel virtual addresses
9046 		 * and just enter the VM page in the kernel address space
9047 		 * at that virtual address.
9048 		 */
9049 		simple_lock(&vm_paging_lock, &vm_pageout_lck_grp);
9050 
9051 		/*
9052 		 * Try and find an available kernel virtual address
9053 		 * from our pre-allocated pool.
9054 		 */
9055 		page_map_offset = 0;
9056 		for (;;) {
9057 			for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
9058 				if (vm_paging_page_inuse[i] == FALSE) {
9059 					page_map_offset =
9060 					    vm_paging_base_address +
9061 					    (i * PAGE_SIZE);
9062 					break;
9063 				}
9064 			}
9065 			if (page_map_offset != 0) {
9066 				/* found a space to map our page ! */
9067 				break;
9068 			}
9069 
9070 			if (can_unlock_object) {
9071 				/*
9072 				 * If we can afford to unlock the VM object,
9073 				 * let's take the slow path now...
9074 				 */
9075 				break;
9076 			}
9077 			/*
9078 			 * We can't afford to unlock the VM object, so
9079 			 * let's wait for a space to become available...
9080 			 */
9081 			vm_paging_page_waiter_total++;
9082 			vm_paging_page_waiter++;
9083 			kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
9084 			if (kr == THREAD_WAITING) {
9085 				simple_unlock(&vm_paging_lock);
9086 				kr = thread_block(THREAD_CONTINUE_NULL);
9087 				simple_lock(&vm_paging_lock, &vm_pageout_lck_grp);
9088 			}
9089 			vm_paging_page_waiter--;
9090 			/* ... and try again */
9091 		}
9092 
9093 		if (page_map_offset != 0) {
9094 			/*
9095 			 * We found a kernel virtual address;
9096 			 * map the physical page to that virtual address.
9097 			 */
9098 			if (i > vm_paging_max_index) {
9099 				vm_paging_max_index = i;
9100 			}
9101 			vm_paging_page_inuse[i] = TRUE;
9102 			simple_unlock(&vm_paging_lock);
9103 
9104 			page->vmp_pmapped = TRUE;
9105 
9106 			/*
9107 			 * Keep the VM object locked over the PMAP_ENTER
9108 			 * and the actual use of the page by the kernel,
9109 			 * or this pmap mapping might get undone by a
9110 			 * vm_object_pmap_protect() call...
9111 			 */
9112 			kr = pmap_enter_check(kernel_pmap,
9113 			    page_map_offset,
9114 			    page,
9115 			    protection,
9116 			    VM_PROT_NONE,
9117 			    0,
9118 			    TRUE);
9119 			assert(kr == KERN_SUCCESS);
9120 			vm_paging_objects_mapped++;
9121 			vm_paging_pages_mapped++;
9122 			*address = page_map_offset;
9123 			*need_unmap = TRUE;
9124 
9125 #if KASAN
9126 			kasan_notify_address(page_map_offset, PAGE_SIZE);
9127 #endif
9128 
9129 			/* all done and mapped, ready to use ! */
9130 			return KERN_SUCCESS;
9131 		}
9132 
9133 		/*
9134 		 * We ran out of pre-allocated kernel virtual
9135 		 * addresses.  Just map the page in the kernel
9136 		 * the slow and regular way.
9137 		 */
9138 		vm_paging_no_kernel_page++;
9139 		simple_unlock(&vm_paging_lock);
9140 	}
9141 
9142 	if (!can_unlock_object) {
9143 		*address = 0;
9144 		*size = 0;
9145 		*need_unmap = FALSE;
9146 		return KERN_NOT_SUPPORTED;
9147 	}
9148 
9149 	object_offset = vm_object_trunc_page(offset);
9150 	map_size = vm_map_round_page(*size,
9151 	    VM_MAP_PAGE_MASK(kernel_map));
9152 
9153 	/*
9154 	 * Try and map the required range of the object
9155 	 * in the kernel_map. Given that allocation is
9156 	 * for pageable memory, it shouldn't contain
9157 	 * pointers and is mapped into the data range.
9158 	 */
9159 
9160 	vm_object_reference_locked(object);     /* for the map entry */
9161 	vm_object_unlock(object);
9162 
9163 	kr = vm_map_enter(kernel_map,
9164 	    address,
9165 	    map_size,
9166 	    0,
9167 	    VM_MAP_KERNEL_FLAGS_DATA_ANYWHERE(),
9168 	    object,
9169 	    object_offset,
9170 	    FALSE,
9171 	    protection,
9172 	    VM_PROT_ALL,
9173 	    VM_INHERIT_NONE);
9174 	if (kr != KERN_SUCCESS) {
9175 		*address = 0;
9176 		*size = 0;
9177 		*need_unmap = FALSE;
9178 		vm_object_deallocate(object);   /* for the map entry */
9179 		vm_object_lock(object);
9180 		return kr;
9181 	}
9182 
9183 	*size = map_size;
9184 
9185 	/*
9186 	 * Enter the mapped pages in the page table now.
9187 	 */
9188 	vm_object_lock(object);
9189 	/*
9190 	 * VM object must be kept locked from before PMAP_ENTER()
9191 	 * until after the kernel is done accessing the page(s).
9192 	 * Otherwise, the pmap mappings in the kernel could be
9193 	 * undone by a call to vm_object_pmap_protect().
9194 	 */
9195 
9196 	for (page_map_offset = 0;
9197 	    map_size != 0;
9198 	    map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
9199 		page = vm_page_lookup(object, offset + page_map_offset);
9200 		if (page == VM_PAGE_NULL) {
9201 			printf("vm_paging_map_object: no page !?");
9202 			vm_object_unlock(object);
9203 			vm_map_remove(kernel_map, *address, *size);
9204 			*address = 0;
9205 			*size = 0;
9206 			*need_unmap = FALSE;
9207 			vm_object_lock(object);
9208 			return KERN_MEMORY_ERROR;
9209 		}
9210 		page->vmp_pmapped = TRUE;
9211 
9212 		kr = pmap_enter_check(kernel_pmap,
9213 		    *address + page_map_offset,
9214 		    page,
9215 		    protection,
9216 		    VM_PROT_NONE,
9217 		    0,
9218 		    TRUE);
9219 		assert(kr == KERN_SUCCESS);
9220 #if KASAN
9221 		kasan_notify_address(*address + page_map_offset, PAGE_SIZE);
9222 #endif
9223 	}
9224 
9225 	vm_paging_objects_mapped_slow++;
9226 	vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
9227 
9228 	*need_unmap = TRUE;
9229 
9230 	return KERN_SUCCESS;
9231 }
9232 
9233 /*
9234  * vm_paging_unmap_object:
9235  *	Unmaps part of a VM object's pages from the kernel
9236  *      virtual address space.
9237  * Context:
9238  *      The VM object is locked.  This lock will get
9239  *      dropped and re-acquired though.
9240  */
9241 void
9242 vm_paging_unmap_object(
9243 	vm_object_t     object,
9244 	vm_map_offset_t start,
9245 	vm_map_offset_t end)
9246 {
9247 	int             i;
9248 
9249 	if ((vm_paging_base_address == 0) ||
9250 	    (start < vm_paging_base_address) ||
9251 	    (end > (vm_paging_base_address
9252 	    + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
9253 		/*
9254 		 * We didn't use our pre-allocated pool of
9255 		 * kernel virtual address.  Deallocate the
9256 		 * virtual memory.
9257 		 */
9258 		if (object != VM_OBJECT_NULL) {
9259 			vm_object_unlock(object);
9260 		}
9261 		vm_map_remove(kernel_map, start, end);
9262 		if (object != VM_OBJECT_NULL) {
9263 			vm_object_lock(object);
9264 		}
9265 	} else {
9266 		/*
9267 		 * We used a kernel virtual address from our
9268 		 * pre-allocated pool.  Put it back in the pool
9269 		 * for next time.
9270 		 */
9271 		assert(end - start == PAGE_SIZE);
9272 		i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
9273 		assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
9274 
9275 		/* undo the pmap mapping */
9276 		pmap_remove(kernel_pmap, start, end);
9277 
9278 		simple_lock(&vm_paging_lock, &vm_pageout_lck_grp);
9279 		vm_paging_page_inuse[i] = FALSE;
9280 		if (vm_paging_page_waiter) {
9281 			thread_wakeup(&vm_paging_page_waiter);
9282 		}
9283 		simple_unlock(&vm_paging_lock);
9284 	}
9285 }
9286 
9287 
9288 /*
9289  * page->vmp_object must be locked
9290  */
9291 void
9292 vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
9293 {
9294 	if (!queues_locked) {
9295 		vm_page_lockspin_queues();
9296 	}
9297 
9298 	page->vmp_free_when_done = FALSE;
9299 	/*
9300 	 * need to drop the laundry count...
9301 	 * we may also need to remove it
9302 	 * from the I/O paging queue...
9303 	 * vm_pageout_throttle_up handles both cases
9304 	 *
9305 	 * the laundry and pageout_queue flags are cleared...
9306 	 */
9307 	vm_pageout_throttle_up(page);
9308 
9309 	if (!queues_locked) {
9310 		vm_page_unlock_queues();
9311 	}
9312 }
9313 
9314 #define VECTOR_UPL_ELEMENTS_UPPER_LIMIT 64
9315 
9316 upl_t
9317 vector_upl_create(vm_offset_t upl_offset, uint32_t max_upls)
9318 {
9319 	int i = 0;
9320 	upl_t   upl;
9321 
9322 	assert(max_upls > 0);
9323 	if (max_upls == 0) {
9324 		return NULL;
9325 	}
9326 
9327 	if (max_upls > VECTOR_UPL_ELEMENTS_UPPER_LIMIT) {
9328 		max_upls = VECTOR_UPL_ELEMENTS_UPPER_LIMIT;
9329 	}
9330 	vector_upl_t vector_upl = kalloc_type(struct _vector_upl, typeof(vector_upl->upls[0]), max_upls, Z_WAITOK | Z_NOFAIL);
9331 
9332 	upl = upl_create(0, UPL_VECTOR, 0);
9333 	upl->vector_upl = vector_upl;
9334 	upl->u_offset = upl_offset;
9335 	vector_upl->size = 0;
9336 	vector_upl->offset = upl_offset;
9337 	vector_upl->invalid_upls = 0;
9338 	vector_upl->num_upls = 0;
9339 	vector_upl->pagelist = NULL;
9340 	vector_upl->max_upls = max_upls;
9341 
9342 	for (i = 0; i < max_upls; i++) {
9343 		vector_upl->upls[i].iostate.size = 0;
9344 		vector_upl->upls[i].iostate.offset = 0;
9345 	}
9346 	return upl;
9347 }
9348 
9349 upl_size_t
9350 vector_upl_get_size(const upl_t upl)
9351 {
9352 	if (!vector_upl_is_valid(upl)) {
9353 		return upl_get_size(upl);
9354 	} else {
9355 		return round_page_32(upl->vector_upl->size);
9356 	}
9357 }
9358 
9359 uint32_t
9360 vector_upl_max_upls(const upl_t upl)
9361 {
9362 	if (!vector_upl_is_valid(upl)) {
9363 		return 0;
9364 	}
9365 	return ((vector_upl_t)(upl->vector_upl))->max_upls;
9366 }
9367 
9368 void
9369 vector_upl_deallocate(upl_t upl)
9370 {
9371 	vector_upl_t vector_upl = upl->vector_upl;
9372 
9373 	assert(vector_upl_is_valid(upl));
9374 
9375 	if (vector_upl->invalid_upls != vector_upl->num_upls) {
9376 		panic("Deallocating non-empty Vectored UPL");
9377 	}
9378 	uint32_t max_upls = vector_upl->max_upls;
9379 	kfree_type(struct upl_page_info, atop(vector_upl->size), vector_upl->pagelist);
9380 	kfree_type(struct _vector_upl, typeof(vector_upl->upls[0]), max_upls, vector_upl);
9381 	upl->vector_upl = NULL;
9382 }
9383 
9384 boolean_t
9385 vector_upl_is_valid(upl_t upl)
9386 {
9387 	return upl && (upl->flags & UPL_VECTOR) && upl->vector_upl;
9388 }
9389 
9390 boolean_t
9391 vector_upl_set_subupl(upl_t upl, upl_t subupl, uint32_t io_size)
9392 {
9393 	if (vector_upl_is_valid(upl)) {
9394 		vector_upl_t vector_upl = upl->vector_upl;
9395 
9396 		if (vector_upl) {
9397 			if (subupl) {
9398 				if (io_size) {
9399 					if (io_size < PAGE_SIZE) {
9400 						io_size = PAGE_SIZE;
9401 					}
9402 					subupl->vector_upl = (void*)vector_upl;
9403 					vector_upl->upls[vector_upl->num_upls++].elem = subupl;
9404 					vector_upl->size += io_size;
9405 					upl->u_size += io_size;
9406 				} else {
9407 					uint32_t i = 0, invalid_upls = 0;
9408 					for (i = 0; i < vector_upl->num_upls; i++) {
9409 						if (vector_upl->upls[i].elem == subupl) {
9410 							break;
9411 						}
9412 					}
9413 					if (i == vector_upl->num_upls) {
9414 						panic("Trying to remove sub-upl when none exists");
9415 					}
9416 
9417 					vector_upl->upls[i].elem = NULL;
9418 					invalid_upls = os_atomic_inc(&(vector_upl)->invalid_upls,
9419 					    relaxed);
9420 					if (invalid_upls == vector_upl->num_upls) {
9421 						return TRUE;
9422 					} else {
9423 						return FALSE;
9424 					}
9425 				}
9426 			} else {
9427 				panic("vector_upl_set_subupl was passed a NULL upl element");
9428 			}
9429 		} else {
9430 			panic("vector_upl_set_subupl was passed a non-vectored upl");
9431 		}
9432 	} else {
9433 		panic("vector_upl_set_subupl was passed a NULL upl");
9434 	}
9435 
9436 	return FALSE;
9437 }
9438 
9439 void
9440 vector_upl_set_pagelist(upl_t upl)
9441 {
9442 	if (vector_upl_is_valid(upl)) {
9443 		uint32_t i = 0;
9444 		vector_upl_t vector_upl = upl->vector_upl;
9445 
9446 		if (vector_upl) {
9447 			vm_offset_t pagelist_size = 0, cur_upl_pagelist_size = 0;
9448 
9449 			vector_upl->pagelist = kalloc_type(struct upl_page_info,
9450 			    atop(vector_upl->size), Z_WAITOK);
9451 
9452 			for (i = 0; i < vector_upl->num_upls; i++) {
9453 				cur_upl_pagelist_size = sizeof(struct upl_page_info) * upl_adjusted_size(vector_upl->upls[i].elem, PAGE_MASK) / PAGE_SIZE;
9454 				bcopy(vector_upl->upls[i].elem->page_list, (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
9455 				pagelist_size += cur_upl_pagelist_size;
9456 				if (vector_upl->upls[i].elem->highest_page > upl->highest_page) {
9457 					upl->highest_page = vector_upl->upls[i].elem->highest_page;
9458 				}
9459 			}
9460 			assert( pagelist_size == (sizeof(struct upl_page_info) * (vector_upl->size / PAGE_SIZE)));
9461 		} else {
9462 			panic("vector_upl_set_pagelist was passed a non-vectored upl");
9463 		}
9464 	} else {
9465 		panic("vector_upl_set_pagelist was passed a NULL upl");
9466 	}
9467 }
9468 
9469 upl_t
9470 vector_upl_subupl_byindex(upl_t upl, uint32_t index)
9471 {
9472 	if (vector_upl_is_valid(upl)) {
9473 		vector_upl_t vector_upl = upl->vector_upl;
9474 		if (vector_upl) {
9475 			if (index < vector_upl->num_upls) {
9476 				return vector_upl->upls[index].elem;
9477 			}
9478 		} else {
9479 			panic("vector_upl_subupl_byindex was passed a non-vectored upl");
9480 		}
9481 	}
9482 	return NULL;
9483 }
9484 
9485 upl_t
9486 vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size)
9487 {
9488 	if (vector_upl_is_valid(upl)) {
9489 		uint32_t i = 0;
9490 		vector_upl_t vector_upl = upl->vector_upl;
9491 
9492 		if (vector_upl) {
9493 			upl_t subupl = NULL;
9494 			vector_upl_iostates_t subupl_state;
9495 
9496 			for (i = 0; i < vector_upl->num_upls; i++) {
9497 				subupl = vector_upl->upls[i].elem;
9498 				subupl_state = vector_upl->upls[i].iostate;
9499 				if (*upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
9500 					/* We could have been passed an offset/size pair that belongs
9501 					 * to an UPL element that has already been committed/aborted.
9502 					 * If so, return NULL.
9503 					 */
9504 					if (subupl == NULL) {
9505 						return NULL;
9506 					}
9507 					if ((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) {
9508 						*upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset;
9509 						if (*upl_size > subupl_state.size) {
9510 							*upl_size = subupl_state.size;
9511 						}
9512 					}
9513 					if (*upl_offset >= subupl_state.offset) {
9514 						*upl_offset -= subupl_state.offset;
9515 					} else if (i) {
9516 						panic("Vector UPL offset miscalculation");
9517 					}
9518 					return subupl;
9519 				}
9520 			}
9521 		} else {
9522 			panic("vector_upl_subupl_byoffset was passed a non-vectored UPL");
9523 		}
9524 	}
9525 	return NULL;
9526 }
9527 
9528 void
9529 vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr)
9530 {
9531 	*v_upl_submap = NULL;
9532 
9533 	if (vector_upl_is_valid(upl)) {
9534 		vector_upl_t vector_upl = upl->vector_upl;
9535 		if (vector_upl) {
9536 			*v_upl_submap = vector_upl->submap;
9537 			*submap_dst_addr = vector_upl->submap_dst_addr;
9538 		} else {
9539 			panic("vector_upl_get_submap was passed a non-vectored UPL");
9540 		}
9541 	} else {
9542 		panic("vector_upl_get_submap was passed a null UPL");
9543 	}
9544 }
9545 
9546 void
9547 vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
9548 {
9549 	if (vector_upl_is_valid(upl)) {
9550 		vector_upl_t vector_upl = upl->vector_upl;
9551 		if (vector_upl) {
9552 			vector_upl->submap = submap;
9553 			vector_upl->submap_dst_addr = submap_dst_addr;
9554 		} else {
9555 			panic("vector_upl_get_submap was passed a non-vectored UPL");
9556 		}
9557 	} else {
9558 		panic("vector_upl_get_submap was passed a NULL UPL");
9559 	}
9560 }
9561 
9562 void
9563 vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
9564 {
9565 	if (vector_upl_is_valid(upl)) {
9566 		uint32_t i = 0;
9567 		vector_upl_t vector_upl = upl->vector_upl;
9568 
9569 		if (vector_upl) {
9570 			for (i = 0; i < vector_upl->num_upls; i++) {
9571 				if (vector_upl->upls[i].elem == subupl) {
9572 					break;
9573 				}
9574 			}
9575 
9576 			if (i == vector_upl->num_upls) {
9577 				panic("setting sub-upl iostate when none exists");
9578 			}
9579 
9580 			vector_upl->upls[i].iostate.offset = offset;
9581 			if (size < PAGE_SIZE) {
9582 				size = PAGE_SIZE;
9583 			}
9584 			vector_upl->upls[i].iostate.size = size;
9585 		} else {
9586 			panic("vector_upl_set_iostate was passed a non-vectored UPL");
9587 		}
9588 	} else {
9589 		panic("vector_upl_set_iostate was passed a NULL UPL");
9590 	}
9591 }
9592 
9593 void
9594 vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size)
9595 {
9596 	if (vector_upl_is_valid(upl)) {
9597 		uint32_t i = 0;
9598 		vector_upl_t vector_upl = upl->vector_upl;
9599 
9600 		if (vector_upl) {
9601 			for (i = 0; i < vector_upl->num_upls; i++) {
9602 				if (vector_upl->upls[i].elem == subupl) {
9603 					break;
9604 				}
9605 			}
9606 
9607 			if (i == vector_upl->num_upls) {
9608 				panic("getting sub-upl iostate when none exists");
9609 			}
9610 
9611 			*offset = vector_upl->upls[i].iostate.offset;
9612 			*size = vector_upl->upls[i].iostate.size;
9613 		} else {
9614 			panic("vector_upl_get_iostate was passed a non-vectored UPL");
9615 		}
9616 	} else {
9617 		panic("vector_upl_get_iostate was passed a NULL UPL");
9618 	}
9619 }
9620 
9621 void
9622 vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size)
9623 {
9624 	if (vector_upl_is_valid(upl)) {
9625 		vector_upl_t vector_upl = upl->vector_upl;
9626 		if (vector_upl) {
9627 			if (index < vector_upl->num_upls) {
9628 				*offset = vector_upl->upls[index].iostate.offset;
9629 				*size = vector_upl->upls[index].iostate.size;
9630 			} else {
9631 				*offset = *size = 0;
9632 			}
9633 		} else {
9634 			panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL");
9635 		}
9636 	} else {
9637 		panic("vector_upl_get_iostate_byindex was passed a NULL UPL");
9638 	}
9639 }
9640 
9641 void *
9642 upl_get_internal_vectorupl(upl_t upl)
9643 {
9644 	return upl->vector_upl;
9645 }
9646 
9647 upl_page_info_t *
9648 upl_get_internal_vectorupl_pagelist(upl_t upl)
9649 {
9650 	return upl->vector_upl->pagelist;
9651 }
9652 
9653 upl_page_info_t *
9654 upl_get_internal_page_list(upl_t upl)
9655 {
9656 	return upl->vector_upl ? upl->vector_upl->pagelist : upl->page_list;
9657 }
9658 
9659 void
9660 upl_clear_dirty(
9661 	upl_t           upl,
9662 	boolean_t       value)
9663 {
9664 	if (value) {
9665 		upl->flags |= UPL_CLEAR_DIRTY;
9666 	} else {
9667 		upl->flags &= ~UPL_CLEAR_DIRTY;
9668 	}
9669 }
9670 
9671 void
9672 upl_set_referenced(
9673 	upl_t           upl,
9674 	boolean_t       value)
9675 {
9676 	upl_lock(upl);
9677 	if (value) {
9678 		upl->ext_ref_count++;
9679 	} else {
9680 		if (!upl->ext_ref_count) {
9681 			panic("upl_set_referenced not %p", upl);
9682 		}
9683 		upl->ext_ref_count--;
9684 	}
9685 	upl_unlock(upl);
9686 }
9687 
9688 void
9689 upl_set_map_exclusive(upl_t upl)
9690 {
9691 	upl_lock(upl);
9692 	while (upl->map_addr_owner) {
9693 		upl->flags |= UPL_MAP_EXCLUSIVE_WAIT;
9694 		upl_lock_sleep(upl, &upl->map_addr_owner, ctid_get_thread(upl->map_addr_owner));
9695 	}
9696 	upl->map_addr_owner = thread_get_ctid(current_thread());
9697 	upl_unlock(upl);
9698 }
9699 
9700 void
9701 upl_clear_map_exclusive(upl_t upl)
9702 {
9703 	assert(upl->map_addr_owner == thread_get_ctid(current_thread()));
9704 	upl_lock(upl);
9705 	if (upl->flags & UPL_MAP_EXCLUSIVE_WAIT) {
9706 		upl->flags &= ~UPL_MAP_EXCLUSIVE_WAIT;
9707 		upl_wakeup(&upl->map_addr_owner);
9708 	}
9709 	upl->map_addr_owner = 0;
9710 	upl_unlock(upl);
9711 }
9712 
9713 #if CONFIG_IOSCHED
9714 void
9715 upl_set_blkno(
9716 	upl_t           upl,
9717 	vm_offset_t     upl_offset,
9718 	int             io_size,
9719 	int64_t         blkno)
9720 {
9721 	int i, j;
9722 	if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0) {
9723 		return;
9724 	}
9725 
9726 	assert(upl->upl_reprio_info != 0);
9727 	for (i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) {
9728 		UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
9729 	}
9730 }
9731 #endif
9732 
9733 void inline
9734 memoryshot(unsigned int event, unsigned int control)
9735 {
9736 	if (vm_debug_events) {
9737 		KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control,
9738 		    vm_page_active_count, vm_page_inactive_count,
9739 		    vm_page_free_count, vm_page_speculative_count,
9740 		    vm_page_throttled_count);
9741 	} else {
9742 		(void) event;
9743 		(void) control;
9744 	}
9745 }
9746 
9747 #ifdef MACH_BSD
9748 
9749 boolean_t
9750 upl_device_page(upl_page_info_t *upl)
9751 {
9752 	return UPL_DEVICE_PAGE(upl);
9753 }
9754 boolean_t
9755 upl_page_present(upl_page_info_t *upl, int index)
9756 {
9757 	return UPL_PAGE_PRESENT(upl, index);
9758 }
9759 boolean_t
9760 upl_speculative_page(upl_page_info_t *upl, int index)
9761 {
9762 	return UPL_SPECULATIVE_PAGE(upl, index);
9763 }
9764 boolean_t
9765 upl_dirty_page(upl_page_info_t *upl, int index)
9766 {
9767 	return UPL_DIRTY_PAGE(upl, index);
9768 }
9769 boolean_t
9770 upl_valid_page(upl_page_info_t *upl, int index)
9771 {
9772 	return UPL_VALID_PAGE(upl, index);
9773 }
9774 ppnum_t
9775 upl_phys_page(upl_page_info_t *upl, int index)
9776 {
9777 	return UPL_PHYS_PAGE(upl, index);
9778 }
9779 
9780 void
9781 upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v)
9782 {
9783 	upl[index].mark = v;
9784 }
9785 
9786 boolean_t
9787 upl_page_get_mark(upl_page_info_t *upl, int index)
9788 {
9789 	return upl[index].mark;
9790 }
9791 
9792 void
9793 vm_countdirtypages(void)
9794 {
9795 	vm_page_t m;
9796 	int dpages;
9797 	int pgopages;
9798 	int precpages;
9799 
9800 
9801 	dpages = 0;
9802 	pgopages = 0;
9803 	precpages = 0;
9804 
9805 	vm_page_lock_queues();
9806 	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
9807 	do {
9808 		if (m == (vm_page_t)0) {
9809 			break;
9810 		}
9811 
9812 		if (m->vmp_dirty) {
9813 			dpages++;
9814 		}
9815 		if (m->vmp_free_when_done) {
9816 			pgopages++;
9817 		}
9818 		if (m->vmp_precious) {
9819 			precpages++;
9820 		}
9821 
9822 		assert(!is_kernel_object(VM_PAGE_OBJECT(m)));
9823 		m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9824 		if (m == (vm_page_t)0) {
9825 			break;
9826 		}
9827 	} while (!vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t) m));
9828 	vm_page_unlock_queues();
9829 
9830 	vm_page_lock_queues();
9831 	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
9832 	do {
9833 		if (m == (vm_page_t)0) {
9834 			break;
9835 		}
9836 
9837 		dpages++;
9838 		assert(m->vmp_dirty);
9839 		assert(!m->vmp_free_when_done);
9840 		assert(!is_kernel_object(VM_PAGE_OBJECT(m)));
9841 		m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9842 		if (m == (vm_page_t)0) {
9843 			break;
9844 		}
9845 	} while (!vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t) m));
9846 	vm_page_unlock_queues();
9847 
9848 	vm_page_lock_queues();
9849 	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
9850 	do {
9851 		if (m == (vm_page_t)0) {
9852 			break;
9853 		}
9854 
9855 		if (m->vmp_dirty) {
9856 			dpages++;
9857 		}
9858 		if (m->vmp_free_when_done) {
9859 			pgopages++;
9860 		}
9861 		if (m->vmp_precious) {
9862 			precpages++;
9863 		}
9864 
9865 		assert(!is_kernel_object(VM_PAGE_OBJECT(m)));
9866 		m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9867 		if (m == (vm_page_t)0) {
9868 			break;
9869 		}
9870 	} while (!vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t) m));
9871 	vm_page_unlock_queues();
9872 
9873 	printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
9874 
9875 	dpages = 0;
9876 	pgopages = 0;
9877 	precpages = 0;
9878 
9879 	vm_page_lock_queues();
9880 	m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
9881 
9882 	do {
9883 		if (m == (vm_page_t)0) {
9884 			break;
9885 		}
9886 		if (m->vmp_dirty) {
9887 			dpages++;
9888 		}
9889 		if (m->vmp_free_when_done) {
9890 			pgopages++;
9891 		}
9892 		if (m->vmp_precious) {
9893 			precpages++;
9894 		}
9895 
9896 		assert(!is_kernel_object(VM_PAGE_OBJECT(m)));
9897 		m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9898 		if (m == (vm_page_t)0) {
9899 			break;
9900 		}
9901 	} while (!vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t) m));
9902 	vm_page_unlock_queues();
9903 
9904 	printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
9905 }
9906 #endif /* MACH_BSD */
9907 
9908 
9909 #if CONFIG_IOSCHED
9910 int
9911 upl_get_cached_tier(upl_t  upl)
9912 {
9913 	assert(upl);
9914 	if (upl->flags & UPL_TRACKED_BY_OBJECT) {
9915 		return upl->upl_priority;
9916 	}
9917 	return -1;
9918 }
9919 #endif /* CONFIG_IOSCHED */
9920 
9921 
9922 void
9923 upl_callout_iodone(upl_t upl)
9924 {
9925 	struct upl_io_completion *upl_ctx = upl->upl_iodone;
9926 
9927 	if (upl_ctx) {
9928 		void    (*iodone_func)(void *, int) = upl_ctx->io_done;
9929 
9930 		assert(upl_ctx->io_done);
9931 
9932 		(*iodone_func)(upl_ctx->io_context, upl_ctx->io_error);
9933 	}
9934 }
9935 
9936 void
9937 upl_set_iodone(upl_t upl, void *upl_iodone)
9938 {
9939 	upl->upl_iodone = (struct upl_io_completion *)upl_iodone;
9940 }
9941 
9942 void
9943 upl_set_iodone_error(upl_t upl, int error)
9944 {
9945 	struct upl_io_completion *upl_ctx = upl->upl_iodone;
9946 
9947 	if (upl_ctx) {
9948 		upl_ctx->io_error = error;
9949 	}
9950 }
9951 
9952 
9953 ppnum_t
9954 upl_get_highest_page(
9955 	upl_t                      upl)
9956 {
9957 	return upl->highest_page;
9958 }
9959 
9960 upl_size_t
9961 upl_get_size(
9962 	upl_t                      upl)
9963 {
9964 	return upl_adjusted_size(upl, PAGE_MASK);
9965 }
9966 
9967 upl_size_t
9968 upl_adjusted_size(
9969 	upl_t upl,
9970 	vm_map_offset_t pgmask)
9971 {
9972 	vm_object_offset_t start_offset, end_offset;
9973 
9974 	start_offset = trunc_page_mask_64(upl->u_offset, pgmask);
9975 	end_offset = round_page_mask_64(upl->u_offset + upl->u_size, pgmask);
9976 
9977 	return (upl_size_t)(end_offset - start_offset);
9978 }
9979 
9980 vm_object_offset_t
9981 upl_adjusted_offset(
9982 	upl_t upl,
9983 	vm_map_offset_t pgmask)
9984 {
9985 	return trunc_page_mask_64(upl->u_offset, pgmask);
9986 }
9987 
9988 vm_object_offset_t
9989 upl_get_data_offset(
9990 	upl_t upl)
9991 {
9992 	return upl->u_offset - upl_adjusted_offset(upl, PAGE_MASK);
9993 }
9994 
9995 upl_t
9996 upl_associated_upl(upl_t upl)
9997 {
9998 	return upl->associated_upl;
9999 }
10000 
10001 void
10002 upl_set_associated_upl(upl_t upl, upl_t associated_upl)
10003 {
10004 	upl->associated_upl = associated_upl;
10005 }
10006 
10007 struct vnode *
10008 upl_lookup_vnode(upl_t upl)
10009 {
10010 	if (!upl->map_object->internal) {
10011 		return vnode_pager_lookup_vnode(upl->map_object->pager);
10012 	} else {
10013 		return NULL;
10014 	}
10015 }
10016 
10017 #if UPL_DEBUG
10018 kern_return_t
10019 upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
10020 {
10021 	upl->ubc_alias1 = alias1;
10022 	upl->ubc_alias2 = alias2;
10023 	return KERN_SUCCESS;
10024 }
10025 int
10026 upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
10027 {
10028 	if (al) {
10029 		*al = upl->ubc_alias1;
10030 	}
10031 	if (al2) {
10032 		*al2 = upl->ubc_alias2;
10033 	}
10034 	return KERN_SUCCESS;
10035 }
10036 #endif /* UPL_DEBUG */
10037 
10038 #if VM_PRESSURE_EVENTS
10039 /*
10040  * Upward trajectory.
10041  */
10042 
10043 boolean_t
10044 VM_PRESSURE_NORMAL_TO_WARNING(void)
10045 {
10046 	if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10047 		/* Available pages below our threshold */
10048 		if (memorystatus_available_pages < memorystatus_available_pages_pressure) {
10049 #if CONFIG_FREEZE
10050 			/* No frozen processes to kill */
10051 			if (memorystatus_frozen_count == 0) {
10052 				/* Not enough suspended processes available. */
10053 				if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
10054 					return TRUE;
10055 				}
10056 			}
10057 #else /* CONFIG_FREEZE */
10058 			return TRUE;
10059 #endif /* CONFIG_FREEZE */
10060 		}
10061 		return FALSE;
10062 	} else {
10063 		return (AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0;
10064 	}
10065 }
10066 
10067 boolean_t
10068 VM_PRESSURE_WARNING_TO_CRITICAL(void)
10069 {
10070 	if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10071 		/* Available pages below our threshold */
10072 		if (memorystatus_available_pages < memorystatus_available_pages_critical) {
10073 			return TRUE;
10074 		}
10075 		return FALSE;
10076 	} else {
10077 		return vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0;
10078 	}
10079 }
10080 
10081 /*
10082  * Downward trajectory.
10083  */
10084 boolean_t
10085 VM_PRESSURE_WARNING_TO_NORMAL(void)
10086 {
10087 	if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10088 		/* Available pages above our threshold */
10089 		unsigned int target_threshold = (unsigned int) (memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100));
10090 		if (memorystatus_available_pages > target_threshold) {
10091 			return TRUE;
10092 		}
10093 		return FALSE;
10094 	} else {
10095 		return (AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0;
10096 	}
10097 }
10098 
10099 boolean_t
10100 VM_PRESSURE_CRITICAL_TO_WARNING(void)
10101 {
10102 	if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10103 		/* Available pages above our threshold */
10104 		unsigned int target_threshold = (unsigned int)(memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100));
10105 		if (memorystatus_available_pages > target_threshold) {
10106 			return TRUE;
10107 		}
10108 		return FALSE;
10109 	} else {
10110 		return (AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0;
10111 	}
10112 }
10113 #endif /* VM_PRESSURE_EVENTS */
10114 
10115 #if DEVELOPMENT || DEBUG
10116 bool compressor_running_perf_test;
10117 uint64_t compressor_perf_test_pages_processed;
10118 
10119 static kern_return_t
10120 move_pages_to_queue(
10121 	vm_map_t map,
10122 	user_addr_t start_addr,
10123 	size_t buffer_size,
10124 	vm_page_queue_head_t *queue,
10125 	size_t *pages_moved)
10126 {
10127 	kern_return_t err = KERN_SUCCESS;
10128 	vm_map_entry_t curr_entry = VM_MAP_ENTRY_NULL;
10129 	boolean_t addr_in_map = FALSE;
10130 	user_addr_t end_addr = USER_ADDR_NULL, curr_addr = USER_ADDR_NULL;
10131 	vm_object_t curr_object = VM_OBJECT_NULL;
10132 	*pages_moved = 0;
10133 
10134 
10135 	if (VM_MAP_PAGE_SIZE(map) != PAGE_SIZE_64) {
10136 		/*
10137 		 * We don't currently support benchmarking maps with a different page size
10138 		 * than the kernel.
10139 		 */
10140 		return KERN_INVALID_ARGUMENT;
10141 	}
10142 
10143 	if (os_add_overflow(start_addr, buffer_size, &end_addr)) {
10144 		return KERN_INVALID_ARGUMENT;
10145 	}
10146 
10147 	vm_map_lock_read(map);
10148 	curr_addr = vm_map_trunc_page_mask(start_addr, VM_MAP_PAGE_MASK(map));
10149 	end_addr = vm_map_round_page_mask(start_addr + buffer_size, VM_MAP_PAGE_MASK(map));
10150 
10151 
10152 	while (curr_addr < end_addr) {
10153 		addr_in_map = vm_map_lookup_entry(map, curr_addr, &curr_entry);
10154 		if (!addr_in_map) {
10155 			err = KERN_INVALID_ARGUMENT;
10156 			break;
10157 		}
10158 		curr_object = VME_OBJECT(curr_entry);
10159 		if (curr_object) {
10160 			vm_object_lock(curr_object);
10161 			/* We really only want anonymous memory that's in the top level map and object here. */
10162 			if (curr_entry->is_sub_map || curr_entry->wired_count != 0 ||
10163 			    curr_object->shadow != VM_OBJECT_NULL || !curr_object->internal) {
10164 				err = KERN_INVALID_ARGUMENT;
10165 				vm_object_unlock(curr_object);
10166 				break;
10167 			}
10168 			vm_map_offset_t start_offset = (curr_addr - curr_entry->vme_start) + VME_OFFSET(curr_entry);
10169 			vm_map_offset_t end_offset = MIN(curr_entry->vme_end, end_addr) -
10170 			    (curr_entry->vme_start + VME_OFFSET(curr_entry));
10171 			vm_map_offset_t curr_offset = start_offset;
10172 			vm_page_t curr_page;
10173 			while (curr_offset < end_offset) {
10174 				curr_page = vm_page_lookup(curr_object, vm_object_trunc_page(curr_offset));
10175 				if (curr_page != VM_PAGE_NULL) {
10176 					vm_page_lock_queues();
10177 					if (curr_page->vmp_laundry) {
10178 						vm_pageout_steal_laundry(curr_page, TRUE);
10179 					}
10180 					/*
10181 					 * we've already factored out pages in the laundry which
10182 					 * means this page can't be on the pageout queue so it's
10183 					 * safe to do the vm_page_queues_remove
10184 					 */
10185 					bool donate = (curr_page->vmp_on_specialq == VM_PAGE_SPECIAL_Q_DONATE);
10186 					vm_page_queues_remove(curr_page, TRUE);
10187 					if (donate) {
10188 						/*
10189 						 * The compressor needs to see this bit to know
10190 						 * where this page needs to land. Also if stolen,
10191 						 * this bit helps put the page back in the right
10192 						 * special queue where it belongs.
10193 						 */
10194 						curr_page->vmp_on_specialq = VM_PAGE_SPECIAL_Q_DONATE;
10195 					}
10196 					// Clear the referenced bit so we ensure this gets paged out
10197 					curr_page->vmp_reference = false;
10198 					if (curr_page->vmp_pmapped) {
10199 						pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(curr_page),
10200 						    VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void*)NULL);
10201 					}
10202 					vm_page_queue_enter(queue, curr_page, vmp_pageq);
10203 					vm_page_unlock_queues();
10204 					*pages_moved += 1;
10205 				}
10206 				curr_offset += PAGE_SIZE_64;
10207 				curr_addr += PAGE_SIZE_64;
10208 			}
10209 		}
10210 		vm_object_unlock(curr_object);
10211 	}
10212 	vm_map_unlock_read(map);
10213 	return err;
10214 }
10215 
10216 /*
10217  * Local queue for processing benchmark pages.
10218  * Can't be allocated on the stack because the pointer has to
10219  * be packable.
10220  */
10221 vm_page_queue_head_t compressor_perf_test_queue VM_PAGE_PACKED_ALIGNED;
10222 kern_return_t
10223 run_compressor_perf_test(
10224 	user_addr_t buf,
10225 	size_t buffer_size,
10226 	uint64_t *time,
10227 	uint64_t *bytes_compressed,
10228 	uint64_t *compressor_growth)
10229 {
10230 	kern_return_t err = KERN_SUCCESS;
10231 	if (!VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
10232 		return KERN_NOT_SUPPORTED;
10233 	}
10234 	if (current_task() == kernel_task) {
10235 		return KERN_INVALID_ARGUMENT;
10236 	}
10237 	vm_page_lock_queues();
10238 	if (compressor_running_perf_test) {
10239 		/* Only run one instance of the benchmark at a time. */
10240 		vm_page_unlock_queues();
10241 		return KERN_RESOURCE_SHORTAGE;
10242 	}
10243 	vm_page_unlock_queues();
10244 	size_t page_count = 0;
10245 	vm_map_t map;
10246 	vm_page_t p, next;
10247 	uint64_t compressor_perf_test_start = 0, compressor_perf_test_end = 0;
10248 	uint64_t compressed_bytes_start = 0, compressed_bytes_end = 0;
10249 	*bytes_compressed = *compressor_growth = 0;
10250 
10251 	vm_page_queue_init(&compressor_perf_test_queue);
10252 	map = current_task()->map;
10253 	err = move_pages_to_queue(map, buf, buffer_size, &compressor_perf_test_queue, &page_count);
10254 	if (err != KERN_SUCCESS) {
10255 		goto out;
10256 	}
10257 
10258 	vm_page_lock_queues();
10259 	compressor_running_perf_test = true;
10260 	compressor_perf_test_pages_processed = 0;
10261 	/*
10262 	 * At this point the compressor threads should only process the benchmark queue
10263 	 * so we can look at the difference in c_segment_compressed_bytes while the perf test is running
10264 	 * to determine how many compressed bytes we ended up using.
10265 	 */
10266 	compressed_bytes_start = os_atomic_load(&c_segment_compressed_bytes, relaxed);
10267 	vm_page_unlock_queues();
10268 
10269 	page_count = vm_pageout_page_queue(&compressor_perf_test_queue, page_count, true);
10270 
10271 	vm_page_lock_queues();
10272 	compressor_perf_test_start = mach_absolute_time();
10273 
10274 	// Wake up the compressor thread(s)
10275 	sched_cond_signal(&pgo_iothread_internal_state[0].pgo_wakeup,
10276 	    pgo_iothread_internal_state[0].pgo_iothread);
10277 
10278 	/*
10279 	 * Depending on when this test is run we could overshoot or be right on the mark
10280 	 * with our page_count. So the comparison is of the _less than_ variety.
10281 	 */
10282 	while (compressor_perf_test_pages_processed < page_count) {
10283 		assert_wait((event_t) &compressor_perf_test_pages_processed, THREAD_UNINT);
10284 		vm_page_unlock_queues();
10285 		thread_block(THREAD_CONTINUE_NULL);
10286 		vm_page_lock_queues();
10287 	}
10288 	compressor_perf_test_end = mach_absolute_time();
10289 	compressed_bytes_end = os_atomic_load(&c_segment_compressed_bytes, relaxed);
10290 	vm_page_unlock_queues();
10291 
10292 
10293 out:
10294 	/*
10295 	 * If we errored out above, then we could still have some pages
10296 	 * on the local queue. Make sure to put them back on the active queue before
10297 	 * returning so they're not orphaned.
10298 	 */
10299 	vm_page_lock_queues();
10300 	absolutetime_to_nanoseconds(compressor_perf_test_end - compressor_perf_test_start, time);
10301 	p = (vm_page_t) vm_page_queue_first(&compressor_perf_test_queue);
10302 	while (p && !vm_page_queue_end(&compressor_perf_test_queue, (vm_page_queue_entry_t)p)) {
10303 		next = (vm_page_t)VM_PAGE_UNPACK_PTR(p->vmp_pageq.next);
10304 
10305 		vm_page_enqueue_active(p, FALSE);
10306 		p = next;
10307 	}
10308 
10309 	compressor_running_perf_test = false;
10310 	vm_page_unlock_queues();
10311 	if (err == KERN_SUCCESS) {
10312 		*bytes_compressed = page_count * PAGE_SIZE_64;
10313 		*compressor_growth = compressed_bytes_end - compressed_bytes_start;
10314 	}
10315 
10316 	/*
10317 	 * pageout_scan will consider waking the compactor swapper
10318 	 * before it blocks. Do the same thing here before we return
10319 	 * to ensure that back to back benchmark runs can't overly fragment the
10320 	 * compressor pool.
10321 	 */
10322 	vm_consider_waking_compactor_swapper();
10323 	return err;
10324 }
10325 #endif /* DEVELOPMENT || DEBUG */
10326