xref: /xnu-10063.121.3/bsd/kern/kern_memorystatus_policy.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2006-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  *
28  */
29 
30 #include <kern/task.h>
31 #include <libkern/libkern.h>
32 #include <machine/atomic.h>
33 #include <mach/coalition.h>
34 #include <os/log.h>
35 #include <sys/coalition.h>
36 #include <sys/proc.h>
37 #include <sys/proc_internal.h>
38 #include <sys/kdebug.h>
39 #include <sys/kern_memorystatus.h>
40 #include <vm/vm_protos.h>
41 
42 #include <kern/kern_memorystatus_internal.h>
43 
44 /*
45  * All memory pressure policy decisions should live here, and there should be
46  * as little mechanism as possible. This file prioritizes readability.
47  */
48 
49 #pragma mark Policy Function Declarations
50 
51 #if CONFIG_JETSAM
52 static bool memorystatus_check_aggressive_jetsam_needed(int *jld_idle_kills);
53 #endif /* CONFIG_JETSAM */
54 
55 #pragma mark Memorystatus Health Check
56 
57 /*
58  * Each subsystem that relies on the memorystatus thread
59  * for resource exhaustion should put a health check in this section.
60  * The memorystatus thread runs all of the health checks
61  * to determine if the system is healthy. If the system is unhealthy
62  * it picks an action based on the system health status. See the
63  * Memorystatus Thread Actions section below.
64  */
65 
66 extern bool vm_compressor_needs_to_swap(bool wake_memorystatus_thread);
67 extern boolean_t vm_compressor_low_on_space(void);
68 extern bool vm_compressor_compressed_pages_nearing_limit(void);
69 extern bool vm_compressor_is_thrashing(void);
70 extern bool vm_compressor_swapout_is_ripe(void);
71 
72 #if XNU_TARGET_OS_WATCH
73 #define FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED true
74 #define FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED_TIMEOUT_SECONDS (60 * 15)
75 #else
76 #define FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED false
77 #endif
78 extern pid_t memorystatus_freeze_last_pid_thawed;
79 extern uint64_t memorystatus_freeze_last_pid_thawed_ts;
80 
81 static void
memorystatus_health_check(memorystatus_system_health_t * status)82 memorystatus_health_check(memorystatus_system_health_t *status)
83 {
84 	memset(status, 0, sizeof(memorystatus_system_health_t));
85 #if CONFIG_JETSAM
86 	status->msh_available_pages_below_pressure = memorystatus_avail_pages_below_pressure();
87 	status->msh_available_pages_below_critical = memorystatus_avail_pages_below_critical();
88 	status->msh_compressor_is_low_on_space = (vm_compressor_low_on_space() == TRUE);
89 	status->msh_compressed_pages_nearing_limit = vm_compressor_compressed_pages_nearing_limit();
90 	status->msh_compressor_is_thrashing = !memorystatus_swap_all_apps && vm_compressor_is_thrashing();
91 #if CONFIG_PHANTOM_CACHE
92 	status->msh_phantom_cache_pressure = os_atomic_load(&memorystatus_phantom_cache_pressure, acquire);
93 #else
94 	status->msh_phantom_cache_pressure = false;
95 #endif /* CONFIG_PHANTOM_CACHE */
96 	if (!memorystatus_swap_all_apps &&
97 	    status->msh_phantom_cache_pressure &&
98 	    !(status->msh_compressor_is_thrashing && status->msh_compressor_is_low_on_space)) {
99 		status->msh_filecache_is_thrashing = true;
100 	}
101 	status->msh_compressor_is_low_on_space = os_atomic_load(&memorystatus_compressor_space_shortage, acquire);
102 	status->msh_pageout_starved = os_atomic_load(&memorystatus_pageout_starved, acquire);
103 	status->msh_swappable_compressor_segments_over_limit = memorystatus_swap_over_trigger(100);
104 	status->msh_swapin_queue_over_limit = memorystatus_swapin_over_trigger();
105 	status->msh_swap_low_on_space = vm_swap_low_on_space();
106 	status->msh_swap_out_of_space = vm_swap_out_of_space();
107 #endif /* CONFIG_JETSAM */
108 	status->msh_zone_map_is_exhausted = os_atomic_load(&memorystatus_zone_map_is_exhausted, relaxed);
109 }
110 
111 bool
memorystatus_is_system_healthy(const memorystatus_system_health_t * status)112 memorystatus_is_system_healthy(const memorystatus_system_health_t *status)
113 {
114 #if CONFIG_JETSAM
115 	return !(status->msh_available_pages_below_critical ||
116 	       status->msh_compressor_is_low_on_space ||
117 	       status->msh_compressor_is_thrashing ||
118 	       status->msh_filecache_is_thrashing ||
119 	       status->msh_zone_map_is_exhausted ||
120 	       status->msh_pageout_starved);
121 #else /* CONFIG_JETSAM */
122 	return !status->msh_zone_map_is_exhausted;
123 #endif /* CONFIG_JETSAM */
124 }
125 
126 
127 #pragma mark Memorystatus Thread Actions
128 
129 /*
130  * This section picks the appropriate memorystatus_action & deploys it.
131  */
132 
133 /*
134  * Inspects the state of various resources in the system to see if
135  * the system is healthy. If the system is not healthy, picks a
136  * memorystatus_action_t to recover the system.
137  *
138  * Every time the memorystatus thread wakes up it calls into here
139  * to pick an action. It will continue performing memorystatus actions until this
140  * function returns MEMORYSTATUS_KILL_NONE. At that point the thread will block.
141  */
142 memorystatus_action_t
memorystatus_pick_action(struct jetsam_thread_state * jetsam_thread,uint32_t * kill_cause,bool highwater_remaining,bool suspended_swappable_apps_remaining,bool swappable_apps_remaining,int * jld_idle_kills)143 memorystatus_pick_action(struct jetsam_thread_state *jetsam_thread,
144     uint32_t *kill_cause,
145     bool highwater_remaining,
146     bool suspended_swappable_apps_remaining,
147     bool swappable_apps_remaining,
148     int *jld_idle_kills)
149 {
150 	memorystatus_system_health_t status;
151 	memorystatus_health_check(&status);
152 	memorystatus_log_system_health(&status);
153 	bool is_system_healthy = memorystatus_is_system_healthy(&status);
154 
155 #if CONFIG_JETSAM
156 	if (status.msh_available_pages_below_pressure || !is_system_healthy) {
157 		/*
158 		 * If swap is enabled, first check if we're running low or are out of swap space.
159 		 */
160 		if (memorystatus_swap_all_apps && jetsam_kill_on_low_swap) {
161 			if (swappable_apps_remaining && status.msh_swap_out_of_space) {
162 				*kill_cause = kMemorystatusKilledLowSwap;
163 				return MEMORYSTATUS_KILL_SWAPPABLE;
164 			} else if (suspended_swappable_apps_remaining && status.msh_swap_low_on_space) {
165 				*kill_cause = kMemorystatusKilledLowSwap;
166 				return MEMORYSTATUS_KILL_SUSPENDED_SWAPPABLE;
167 			}
168 		}
169 
170 		/*
171 		 * We're below the pressure level or the system is unhealthy,
172 		 * regardless of the system health let's check if we should be swapping
173 		 * and if there are high watermark kills left to do.
174 		 */
175 		if (memorystatus_swap_all_apps) {
176 			if (status.msh_swappable_compressor_segments_over_limit && !vm_swapout_thread_running && !os_atomic_load(&vm_swapout_wake_pending, relaxed)) {
177 				/*
178 				 * TODO: The swapper will keep running until it has drained the entire early swapout queue.
179 				 * That might be overly aggressive & we should look into tuning it.
180 				 * See rdar://84102304.
181 				 */
182 				return MEMORYSTATUS_WAKE_SWAPPER;
183 			} else if (status.msh_swapin_queue_over_limit) {
184 				return MEMORYSTATUS_PROCESS_SWAPIN_QUEUE;
185 			} else if (status.msh_swappable_compressor_segments_over_limit) {
186 				memorystatus_log_info(
187 					"memorystatus: Skipping swap wakeup because the swap thread is already running. vm_swapout_thread_running=%d, vm_swapout_wake_pending=%d\n",
188 					vm_swapout_thread_running, os_atomic_load(&vm_swapout_wake_pending, relaxed));
189 			}
190 		}
191 
192 		if (highwater_remaining) {
193 			*kill_cause = kMemorystatusKilledHiwat;
194 			memorystatus_log("memorystatus: Looking for highwatermark kills.\n");
195 			return MEMORYSTATUS_KILL_HIWATER;
196 		}
197 	}
198 
199 	if (is_system_healthy) {
200 		*kill_cause = 0;
201 		return MEMORYSTATUS_KILL_NONE;
202 	}
203 
204 	/*
205 	 * At this point the system is unhealthy and there are no
206 	 * more highwatermark processes to kill.
207 	 */
208 
209 	if (!jetsam_thread->limit_to_low_bands) {
210 		if (memorystatus_check_aggressive_jetsam_needed(jld_idle_kills)) {
211 			memorystatus_log("memorystatus: Starting aggressive jetsam.\n");
212 			*kill_cause = kMemorystatusKilledProcThrashing;
213 			return MEMORYSTATUS_KILL_AGGRESSIVE;
214 		}
215 	}
216 	/*
217 	 * The system is unhealthy and we either don't need aggressive jetsam
218 	 * or are not allowed to deploy it.
219 	 * Kill in priority order. We'll use LRU within every band except the
220 	 * FG (which will be sorted by coalition role).
221 	 */
222 	*kill_cause = memorystatus_pick_kill_cause(&status);
223 	return MEMORYSTATUS_KILL_TOP_PROCESS;
224 #else /* CONFIG_JETSAM */
225 	(void) jetsam_thread;
226 	(void) jld_idle_kills;
227 	(void) suspended_swappable_apps_remaining;
228 	(void) swappable_apps_remaining;
229 	/*
230 	 * Without CONFIG_JETSAM, we only kill if the system is unhealthy.
231 	 * There is no aggressive jetsam and no
232 	 * early highwatermark killing.
233 	 */
234 	if (is_system_healthy) {
235 		*kill_cause = 0;
236 		return MEMORYSTATUS_KILL_NONE;
237 	}
238 	if (highwater_remaining) {
239 		*kill_cause = kMemorystatusKilledHiwat;
240 		return MEMORYSTATUS_KILL_HIWATER;
241 	} else {
242 		*kill_cause = memorystatus_pick_kill_cause(&status);
243 		return MEMORYSTATUS_KILL_TOP_PROCESS;
244 	}
245 #endif /* CONFIG_JETSAM */
246 }
247 
248 #pragma mark Aggressive Jetsam
249 /*
250  * This section defines when we deploy aggressive jetsam.
251  * Aggressive jetsam kills everything up to the jld_priority_band_max band.
252  */
253 
254 #if CONFIG_JETSAM
255 
256 static bool
257 memorystatus_aggressive_jetsam_needed_sysproc_aging(__unused int jld_eval_aggressive_count, __unused int *jld_idle_kills, __unused int jld_idle_kill_candidates, int *total_candidates);
258 
259 /*
260  * kJetsamHighRelaunchCandidatesThreshold defines the percentage of candidates
261  * in the idle & deferred bands that need to be bad candidates in order to trigger
262  * aggressive jetsam.
263  */
264 #define kJetsamHighRelaunchCandidatesThreshold  (100)
265 
266 /* kJetsamMinCandidatesThreshold defines the minimum number of candidates in the
267  * idle/deferred bands to trigger aggressive jetsam. This value basically decides
268  * how much memory the system is ready to hold in the lower bands without triggering
269  * aggressive jetsam. This number should ideally be tuned based on the memory config
270  * of the device.
271  */
272 #define kJetsamMinCandidatesThreshold           (5)
273 
274 static bool
memorystatus_check_aggressive_jetsam_needed(int * jld_idle_kills)275 memorystatus_check_aggressive_jetsam_needed(int *jld_idle_kills)
276 {
277 	bool aggressive_jetsam_needed = false;
278 	int total_candidates = 0;
279 	/*
280 	 * The aggressive jetsam logic looks at the number of times it has been in the
281 	 * aggressive loop to determine the max priority band it should kill upto. The
282 	 * static variables below are used to track that property.
283 	 *
284 	 * To reset those values, the implementation checks if it has been
285 	 * memorystatus_jld_eval_period_msecs since the parameters were reset.
286 	 */
287 
288 	if (memorystatus_jld_enabled == FALSE) {
289 		/* If aggressive jetsam is disabled, nothing to do here */
290 		return FALSE;
291 	}
292 
293 	/* Get current timestamp (msecs only) */
294 	struct timeval  jld_now_tstamp = {0, 0};
295 	uint64_t        jld_now_msecs = 0;
296 	microuptime(&jld_now_tstamp);
297 	jld_now_msecs = (jld_now_tstamp.tv_sec * 1000);
298 
299 	/*
300 	 * Look at the number of candidates in the idle and deferred band and
301 	 * how many out of them are marked as high relaunch probability.
302 	 */
303 	aggressive_jetsam_needed = memorystatus_aggressive_jetsam_needed_sysproc_aging(jld_eval_aggressive_count,
304 	    jld_idle_kills, jld_idle_kill_candidates, &total_candidates);
305 
306 	/*
307 	 * Check if its been really long since the aggressive jetsam evaluation
308 	 * parameters have been refreshed. This logic also resets the jld_eval_aggressive_count
309 	 * counter to make sure we reset the aggressive jetsam severity.
310 	 */
311 	boolean_t param_reval = false;
312 
313 	if ((total_candidates == 0) ||
314 	    (jld_now_msecs > (jld_timestamp_msecs + memorystatus_jld_eval_period_msecs))) {
315 		jld_timestamp_msecs      = jld_now_msecs;
316 		jld_idle_kill_candidates = total_candidates;
317 		*jld_idle_kills          = 0;
318 		jld_eval_aggressive_count = 0;
319 		jld_priority_band_max   = JETSAM_PRIORITY_UI_SUPPORT;
320 		param_reval = true;
321 	}
322 
323 	/*
324 	 * It is also possible that the system is down to a very small number of processes in the candidate
325 	 * bands. In that case, the decisions made by the memorystatus_aggressive_jetsam_needed_* routines
326 	 * would not be useful. In that case, do not trigger aggressive jetsam.
327 	 */
328 	if (total_candidates < kJetsamMinCandidatesThreshold) {
329 #if DEVELOPMENT || DEBUG
330 		memorystatus_log_info(
331 			"memorystatus: aggressive: [FAILED] Low Candidate Count (current: %d, threshold: %d)\n", total_candidates, kJetsamMinCandidatesThreshold);
332 #endif /* DEVELOPMENT || DEBUG */
333 		aggressive_jetsam_needed = false;
334 	}
335 	return aggressive_jetsam_needed;
336 }
337 
338 static bool
memorystatus_aggressive_jetsam_needed_sysproc_aging(__unused int eval_aggressive_count,__unused int * idle_kills,__unused int idle_kill_candidates,int * total_candidates)339 memorystatus_aggressive_jetsam_needed_sysproc_aging(__unused int eval_aggressive_count, __unused int *idle_kills, __unused int idle_kill_candidates, int *total_candidates)
340 {
341 	bool aggressive_jetsam_needed = false;
342 
343 	/*
344 	 * For the kJetsamAgingPolicySysProcsReclaimedFirst aging policy, we maintain the jetsam
345 	 * relaunch behavior for all daemons. Also, daemons and apps are aged in deferred bands on
346 	 * every dirty->clean transition. For this aging policy, the best way to determine if
347 	 * aggressive jetsam is needed, is to see if the kill candidates are mostly bad candidates.
348 	 * If yes, then we need to go to higher bands to reclaim memory.
349 	 */
350 	proc_list_lock();
351 	/* Get total candidate counts for idle and idle deferred bands */
352 	*total_candidates = memstat_bucket[JETSAM_PRIORITY_IDLE].count + memstat_bucket[system_procs_aging_band].count;
353 	/* Get counts of bad kill candidates in idle and idle deferred bands */
354 	int bad_candidates = memstat_bucket[JETSAM_PRIORITY_IDLE].relaunch_high_count + memstat_bucket[system_procs_aging_band].relaunch_high_count;
355 
356 	proc_list_unlock();
357 
358 	/* Check if the number of bad candidates is greater than kJetsamHighRelaunchCandidatesThreshold % */
359 	aggressive_jetsam_needed = (((bad_candidates * 100) / *total_candidates) >= kJetsamHighRelaunchCandidatesThreshold);
360 
361 	/*
362 	 * Since the new aging policy bases the aggressive jetsam trigger on percentage of
363 	 * bad candidates, it is prone to being overly aggressive. In order to mitigate that,
364 	 * make sure the system is really under memory pressure before triggering aggressive
365 	 * jetsam.
366 	 */
367 	if (memorystatus_available_pages > memorystatus_sysproc_aging_aggr_pages) {
368 		aggressive_jetsam_needed = false;
369 	}
370 
371 #if DEVELOPMENT || DEBUG
372 	memorystatus_log_info(
373 		"memorystatus: aggressive%d: [%s] Bad Candidate Threshold Check (total: %d, bad: %d, threshold: %d %%); Memory Pressure Check (available_pgs: %llu, threshold_pgs: %llu)\n",
374 		eval_aggressive_count, aggressive_jetsam_needed ? "PASSED" : "FAILED", *total_candidates, bad_candidates,
375 		kJetsamHighRelaunchCandidatesThreshold, (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES, (uint64_t)memorystatus_sysproc_aging_aggr_pages);
376 #endif /* DEVELOPMENT || DEBUG */
377 	return aggressive_jetsam_needed;
378 }
379 
380 #endif /* CONFIG_JETSAM */
381 
382 #pragma mark Freezer
383 #if CONFIG_FREEZE
384 /*
385  * Freezer policies
386  */
387 
388 /*
389  * These functions determine what is eligible for the freezer
390  * and the order that we consider freezing them
391  */
392 
393 /*
394  * Checks if the given process is eligible for the freezer.
395  * Processes can only be frozen if this returns true.
396  */
397 bool
memorystatus_is_process_eligible_for_freeze(proc_t p)398 memorystatus_is_process_eligible_for_freeze(proc_t p)
399 {
400 	/*
401 	 * Called with proc_list_lock held.
402 	 */
403 
404 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
405 
406 	bool should_freeze = false;
407 	uint32_t state = 0, pages = 0;
408 	bool first_consideration = true;
409 	task_t task;
410 
411 	state = p->p_memstat_state;
412 
413 	if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) {
414 		if (state & P_MEMSTAT_FREEZE_DISABLED) {
415 			p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonDisabled;
416 		}
417 		goto out;
418 	}
419 
420 	task = proc_task(p);
421 
422 	if (isSysProc(p)) {
423 		/*
424 		 * Daemon:- We consider freezing it if:
425 		 * - it belongs to a coalition and the leader is frozen, and,
426 		 * - its role in the coalition is XPC service.
427 		 *
428 		 * We skip memory size requirements in this case.
429 		 */
430 		int task_role_in_coalition = 0;
431 		proc_t leader_proc = memorystatus_get_coalition_leader_and_role(p, &task_role_in_coalition);
432 		if (leader_proc == PROC_NULL || leader_proc == p) {
433 			/*
434 			 * Jetsam coalition is leaderless or the leader is not an app.
435 			 * Either way, don't freeze this proc.
436 			 */
437 			goto out;
438 		}
439 
440 		/* Leader must be frozen */
441 		if (!(leader_proc->p_memstat_state & P_MEMSTAT_FROZEN)) {
442 			goto out;
443 		}
444 		/* Only freeze XPC services */
445 		if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
446 			should_freeze = true;
447 		}
448 
449 		goto out;
450 	} else {
451 		/*
452 		 * Application. Only freeze if it's suspended.
453 		 */
454 		if (!(state & P_MEMSTAT_SUSPENDED)) {
455 			goto out;
456 		}
457 	}
458 
459 	/*
460 	 * We're interested in tracking what percentage of
461 	 * eligible apps actually get frozen.
462 	 * To avoid skewing the metrics towards processes which
463 	 * are considered more frequently, we only track failures once
464 	 * per process.
465 	 */
466 	first_consideration = !(state & P_MEMSTAT_FREEZE_CONSIDERED);
467 
468 	if (first_consideration) {
469 		memorystatus_freezer_stats.mfs_process_considered_count++;
470 		p->p_memstat_state |= P_MEMSTAT_FREEZE_CONSIDERED;
471 	}
472 
473 	/* Only freeze applications meeting our minimum resident page criteria */
474 	memorystatus_get_task_page_counts(proc_task(p), &pages, NULL, NULL);
475 	if (pages < memorystatus_freeze_pages_min) {
476 		if (first_consideration) {
477 			memorystatus_freezer_stats.mfs_error_below_min_pages_count++;
478 		}
479 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonBelowMinPages;
480 		goto out;
481 	}
482 
483 	/* Don't freeze processes that are already exiting on core. It may have started exiting
484 	 * after we chose it for freeze, but before we obtained the proc_list_lock.
485 	 * NB: This is only possible if we're coming in from memorystatus_freeze_process_sync.
486 	 * memorystatus_freeze_top_process holds the proc_list_lock while it traverses the bands.
487 	 */
488 	if (proc_list_exited(p)) {
489 		if (first_consideration) {
490 			memorystatus_freezer_stats.mfs_error_other_count++;
491 		}
492 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOther;
493 		goto out;
494 	}
495 
496 	if (!memorystatus_freezer_use_ordered_list) {
497 		/*
498 		 * We're not using the ordered list so we need to check
499 		 * that dasd recommended the process. Note that the ordered list
500 		 * algorithm only considers processes on the list in the first place
501 		 * so there's no need to double check here.
502 		 */
503 		if (!memorystatus_freeze_process_is_recommended(p)) {
504 			if (first_consideration) {
505 				memorystatus_freezer_stats.mfs_error_low_probability_of_use_count++;
506 			}
507 			p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonLowProbOfUse;
508 			goto out;
509 		}
510 	}
511 
512 	if (!(state & P_MEMSTAT_FROZEN) && p->p_memstat_effectivepriority > memorystatus_freeze_max_candidate_band) {
513 		/*
514 		 * Proc has been elevated by something else.
515 		 * Don't freeze it.
516 		 */
517 		if (first_consideration) {
518 			memorystatus_freezer_stats.mfs_error_elevated_count++;
519 		}
520 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonElevated;
521 		goto out;
522 	}
523 
524 	should_freeze = true;
525 out:
526 	if (should_freeze && !(state & P_MEMSTAT_FROZEN)) {
527 		/*
528 		 * Reset the skip reason. If it's killed before we manage to actually freeze it
529 		 * we failed to consider it early enough.
530 		 */
531 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
532 		if (!first_consideration) {
533 			/*
534 			 * We're freezing this for the first time and we previously considered it ineligible.
535 			 * Bump the considered count so that we track this as 1 failure
536 			 * and 1 success.
537 			 */
538 			memorystatus_freezer_stats.mfs_process_considered_count++;
539 		}
540 	}
541 	return should_freeze;
542 }
543 
544 bool
memorystatus_freeze_proc_is_refreeze_eligible(proc_t p)545 memorystatus_freeze_proc_is_refreeze_eligible(proc_t p)
546 {
547 	return (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) != 0;
548 }
549 
550 
551 static proc_t
memorystatus_freeze_pick_refreeze_process(proc_t last_p)552 memorystatus_freeze_pick_refreeze_process(proc_t last_p)
553 {
554 	proc_t p = PROC_NULL, next_p = PROC_NULL;
555 	unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
556 	if (last_p == PROC_NULL) {
557 		next_p = memorystatus_get_first_proc_locked(&band, FALSE);
558 	} else {
559 		next_p = memorystatus_get_next_proc_locked(&band, last_p, FALSE);
560 	}
561 	while (next_p) {
562 		p = next_p;
563 		next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
564 		if ((p->p_memstat_state & P_MEMSTAT_FROZEN) && !memorystatus_freeze_proc_is_refreeze_eligible(p)) {
565 			/* Process is already frozen & hasn't been thawed. */
566 			continue;
567 		}
568 		/*
569 		 * Has to have been frozen once before.
570 		 */
571 		if (!(p->p_memstat_state & P_MEMSTAT_FROZEN)) {
572 			continue;
573 		}
574 
575 		/*
576 		 * Not currently being looked at for something.
577 		 */
578 		if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
579 			continue;
580 		}
581 
582 #if FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED
583 		/*
584 		 * Don't refreeze the last process we just thawed if still within the timeout window
585 		 */
586 		if (p->p_pid == memorystatus_freeze_last_pid_thawed) {
587 			uint64_t timeout_delta_abs;
588 			nanoseconds_to_absolutetime(FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED_TIMEOUT_SECONDS * NSEC_PER_SEC, &timeout_delta_abs);
589 			if (mach_absolute_time() < (memorystatus_freeze_last_pid_thawed_ts + timeout_delta_abs)) {
590 				continue;
591 			}
592 		}
593 #endif
594 
595 		/*
596 		 * Found it
597 		 */
598 		return p;
599 	}
600 	return PROC_NULL;
601 }
602 
603 proc_t
memorystatus_freeze_pick_process(struct memorystatus_freeze_list_iterator * iterator)604 memorystatus_freeze_pick_process(struct memorystatus_freeze_list_iterator *iterator)
605 {
606 	proc_t p = PROC_NULL, next_p = PROC_NULL;
607 	unsigned int band = JETSAM_PRIORITY_IDLE;
608 
609 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
610 	/*
611 	 * If the freezer is full, only consider refreezes.
612 	 */
613 	if (iterator->refreeze_only || memorystatus_frozen_count >= memorystatus_frozen_processes_max) {
614 		if (!iterator->refreeze_only) {
615 			/*
616 			 * The first time the iterator starts to return refreeze
617 			 * candidates, we need to reset the last pointer b/c it's pointing into the wrong band.
618 			 */
619 			iterator->last_p = PROC_NULL;
620 			iterator->refreeze_only = true;
621 		}
622 		iterator->last_p = memorystatus_freeze_pick_refreeze_process(iterator->last_p);
623 		return iterator->last_p;
624 	}
625 
626 	/*
627 	 * Search for the next freezer candidate.
628 	 */
629 	if (memorystatus_freezer_use_ordered_list) {
630 		while (iterator->global_freeze_list_index < memorystatus_global_freeze_list.mfcl_length) {
631 			p = memorystatus_freezer_candidate_list_get_proc(
632 				&memorystatus_global_freeze_list,
633 				(iterator->global_freeze_list_index)++,
634 				&memorystatus_freezer_stats.mfs_freeze_pid_mismatches);
635 
636 			if (p != PROC_NULL && memorystatus_is_process_eligible_for_freeze(p)) {
637 				iterator->last_p = p;
638 				return iterator->last_p;
639 			}
640 		}
641 	} else {
642 		if (iterator->last_p == PROC_NULL) {
643 			next_p = memorystatus_get_first_proc_locked(&band, FALSE);
644 		} else {
645 			next_p = memorystatus_get_next_proc_locked(&band, iterator->last_p, FALSE);
646 		}
647 		while (next_p) {
648 			p = next_p;
649 			if (memorystatus_is_process_eligible_for_freeze(p)) {
650 				iterator->last_p = p;
651 				return iterator->last_p;
652 			} else {
653 				next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
654 			}
655 		}
656 	}
657 
658 	/*
659 	 * Failed to find a new freezer candidate.
660 	 * Try to re-freeze.
661 	 */
662 	if (memorystatus_refreeze_eligible_count >= memorystatus_min_thaw_refreeze_threshold) {
663 		assert(!iterator->refreeze_only);
664 		iterator->refreeze_only = true;
665 		iterator->last_p = memorystatus_freeze_pick_refreeze_process(PROC_NULL);
666 		return iterator->last_p;
667 	}
668 	return PROC_NULL;
669 }
670 
671 /*
672  * memorystatus_pages_update calls this function whenever the number
673  * of available pages changes. It wakes the freezer thread iff the function returns
674  * true. The freezer thread will try to freeze (or refreeze) up to 1 process
675  * before blocking again.
676  *
677  * Note the freezer thread is also woken up by memorystatus_on_inactivity.
678  */
679 
680 bool
memorystatus_freeze_thread_should_run()681 memorystatus_freeze_thread_should_run()
682 {
683 	/*
684 	 * No freezer_mutex held here...see why near call-site
685 	 * within memorystatus_pages_update().
686 	 */
687 
688 	if (memorystatus_freeze_enabled == false) {
689 		return false;
690 	}
691 
692 	if (memorystatus_available_pages > memorystatus_freeze_threshold) {
693 		return false;
694 	}
695 
696 	memorystatus_freezer_stats.mfs_below_threshold_count++;
697 
698 	if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
699 		/*
700 		 * Consider this as a skip even if we wake up to refreeze because
701 		 * we won't freeze any new procs.
702 		 */
703 		memorystatus_freezer_stats.mfs_skipped_full_count++;
704 		if (memorystatus_refreeze_eligible_count < memorystatus_min_thaw_refreeze_threshold) {
705 			return false;
706 		}
707 	}
708 
709 	if (memorystatus_frozen_shared_mb_max && (memorystatus_frozen_shared_mb >= memorystatus_frozen_shared_mb_max)) {
710 		memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count++;
711 		return false;
712 	}
713 
714 	uint64_t curr_time = mach_absolute_time();
715 
716 	if (curr_time < memorystatus_freezer_thread_next_run_ts) {
717 		return false;
718 	}
719 
720 	return true;
721 }
722 
723 size_t
memorystatus_pick_freeze_count_for_wakeup()724 memorystatus_pick_freeze_count_for_wakeup()
725 {
726 	size_t num_to_freeze = 0;
727 	if (!memorystatus_swap_all_apps) {
728 		num_to_freeze = 1;
729 	} else {
730 		/*
731 		 * When app swap is enabled, we want the freezer thread to aggressively freeze
732 		 * all candidates so we clear out space for the fg working set.
733 		 * But we still cap it to the current size of the candidate bands to avoid
734 		 * consuming excessive CPU if there's a lot of churn in the candidate band.
735 		 */
736 		proc_list_lock();
737 		for (unsigned int band = JETSAM_PRIORITY_IDLE; band <= memorystatus_freeze_max_candidate_band; band++) {
738 			num_to_freeze += memstat_bucket[band].count;
739 		}
740 		proc_list_unlock();
741 	}
742 
743 	return num_to_freeze;
744 }
745 
746 #endif /* CONFIG_FREEZE */
747