xref: /xnu-11215.61.5/bsd/kern/kern_memorystatus_policy.c (revision 4f1223e81cd707a65cc109d0b8ad6653699da3c4)
1 /*
2  * Copyright (c) 2006-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  *
28  */
29 
30 #include <kern/task.h>
31 #include <libkern/libkern.h>
32 #include <machine/atomic.h>
33 #include <mach/coalition.h>
34 #include <os/log.h>
35 #include <sys/coalition.h>
36 #include <sys/proc.h>
37 #include <sys/proc_internal.h>
38 #include <sys/sysctl.h>
39 #include <sys/kdebug.h>
40 #include <sys/kern_memorystatus.h>
41 #include <vm/vm_protos.h>
42 #include <vm/vm_compressor_xnu.h>
43 
44 #include <kern/kern_memorystatus_internal.h>
45 
46 /*
47  * All memory pressure policy decisions should live here, and there should be
48  * as little mechanism as possible. This file prioritizes readability.
49  */
50 
51 #pragma mark Policy Function Declarations
52 
53 #if CONFIG_JETSAM
54 static bool memorystatus_check_aggressive_jetsam_needed(int *jld_idle_kills);
55 #endif /* CONFIG_JETSAM */
56 
57 #pragma mark Memorystatus Health Check
58 
59 /*
60  * Each subsystem that relies on the memorystatus thread
61  * for resource exhaustion should put a health check in this section.
62  * The memorystatus thread runs all of the health checks
63  * to determine if the system is healthy. If the system is unhealthy
64  * it picks an action based on the system health status. See the
65  * Memorystatus Thread Actions section below.
66  */
67 
68 
69 #if XNU_TARGET_OS_WATCH
70 #define FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED true
71 #define FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED_TIMEOUT_SECONDS (60 * 15)
72 #else
73 #define FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED false
74 #endif
75 extern pid_t memorystatus_freeze_last_pid_thawed;
76 extern uint64_t memorystatus_freeze_last_pid_thawed_ts;
77 
78 static void
memorystatus_health_check(memorystatus_system_health_t * status)79 memorystatus_health_check(memorystatus_system_health_t *status)
80 {
81 	memset(status, 0, sizeof(memorystatus_system_health_t));
82 #if CONFIG_JETSAM
83 	memstat_evaluate_page_shortage(
84 		&status->msh_available_pages_below_soft,
85 		&status->msh_available_pages_below_idle,
86 		&status->msh_available_pages_below_critical);
87 	status->msh_compressor_is_low_on_space = (vm_compressor_low_on_space() == TRUE);
88 	status->msh_compressed_pages_nearing_limit = vm_compressor_compressed_pages_nearing_limit();
89 	status->msh_compressor_is_thrashing = !memorystatus_swap_all_apps && vm_compressor_is_thrashing();
90 #if CONFIG_PHANTOM_CACHE
91 	status->msh_phantom_cache_pressure = os_atomic_load(&memorystatus_phantom_cache_pressure, acquire);
92 #else
93 	status->msh_phantom_cache_pressure = false;
94 #endif /* CONFIG_PHANTOM_CACHE */
95 	if (!memorystatus_swap_all_apps &&
96 	    status->msh_phantom_cache_pressure &&
97 	    !(status->msh_compressor_is_thrashing && status->msh_compressor_is_low_on_space)) {
98 		status->msh_filecache_is_thrashing = true;
99 	}
100 	status->msh_compressor_is_low_on_space = os_atomic_load(&memorystatus_compressor_space_shortage, acquire);
101 	status->msh_pageout_starved = os_atomic_load(&memorystatus_pageout_starved, acquire);
102 	status->msh_swappable_compressor_segments_over_limit = memorystatus_swap_over_trigger(100);
103 	status->msh_swapin_queue_over_limit = memorystatus_swapin_over_trigger();
104 	status->msh_swap_low_on_space = vm_swap_low_on_space();
105 	status->msh_swap_out_of_space = vm_swap_out_of_space();
106 #endif /* CONFIG_JETSAM */
107 	status->msh_zone_map_is_exhausted = os_atomic_load(&memorystatus_zone_map_is_exhausted, relaxed);
108 }
109 
110 bool
memorystatus_is_system_healthy(const memorystatus_system_health_t * status)111 memorystatus_is_system_healthy(const memorystatus_system_health_t *status)
112 {
113 #if CONFIG_JETSAM
114 	return !(status->msh_available_pages_below_critical ||
115 	       status->msh_compressor_is_low_on_space ||
116 	       status->msh_compressor_is_thrashing ||
117 	       status->msh_filecache_is_thrashing ||
118 	       status->msh_zone_map_is_exhausted ||
119 	       status->msh_pageout_starved);
120 #else /* CONFIG_JETSAM */
121 	return !status->msh_zone_map_is_exhausted;
122 #endif /* CONFIG_JETSAM */
123 }
124 
125 
126 #pragma mark Memorystatus Thread Actions
127 
128 /*
129  * This section picks the appropriate memorystatus_action & deploys it.
130  */
131 
132 /*
133  * Inspects the state of various resources in the system to see if
134  * the system is healthy. If the system is not healthy, picks a
135  * memorystatus_action_t to recover the system.
136  *
137  * Every time the memorystatus thread wakes up it calls into here
138  * to pick an action. It will continue performing memorystatus actions until this
139  * function returns MEMORYSTATUS_KILL_NONE. At that point the thread will block.
140  */
141 memorystatus_action_t
memorystatus_pick_action(jetsam_state_t state,uint32_t * kill_cause,bool highwater_remaining,bool suspended_swappable_apps_remaining,bool swappable_apps_remaining,int * jld_idle_kills)142 memorystatus_pick_action(jetsam_state_t state,
143     uint32_t *kill_cause,
144     bool highwater_remaining,
145     bool suspended_swappable_apps_remaining,
146     bool swappable_apps_remaining,
147     int *jld_idle_kills)
148 {
149 	memorystatus_system_health_t status;
150 	memorystatus_health_check(&status);
151 	memorystatus_log_system_health(&status);
152 	bool is_system_healthy = memorystatus_is_system_healthy(&status);
153 
154 #if CONFIG_JETSAM
155 	if (status.msh_available_pages_below_soft || !is_system_healthy) {
156 		/*
157 		 * If swap is enabled, first check if we're running low or are out of swap space.
158 		 */
159 		if (memorystatus_swap_all_apps && jetsam_kill_on_low_swap) {
160 			if (swappable_apps_remaining && status.msh_swap_out_of_space) {
161 				*kill_cause = kMemorystatusKilledLowSwap;
162 				return MEMORYSTATUS_KILL_SWAPPABLE;
163 			} else if (suspended_swappable_apps_remaining && status.msh_swap_low_on_space) {
164 				*kill_cause = kMemorystatusKilledLowSwap;
165 				return MEMORYSTATUS_KILL_SUSPENDED_SWAPPABLE;
166 			}
167 		}
168 
169 		/*
170 		 * We're below the pressure level or the system is unhealthy,
171 		 * regardless of the system health let's check if we should be swapping
172 		 * and if there are high watermark kills left to do.
173 		 */
174 		if (memorystatus_swap_all_apps) {
175 			if (status.msh_swappable_compressor_segments_over_limit && !vm_swapout_thread_running && !os_atomic_load(&vm_swapout_wake_pending, relaxed)) {
176 				/*
177 				 * TODO: The swapper will keep running until it has drained the entire early swapout queue.
178 				 * That might be overly aggressive & we should look into tuning it.
179 				 * See rdar://84102304.
180 				 */
181 				return MEMORYSTATUS_WAKE_SWAPPER;
182 			} else if (status.msh_swapin_queue_over_limit) {
183 				return MEMORYSTATUS_PROCESS_SWAPIN_QUEUE;
184 			} else if (status.msh_swappable_compressor_segments_over_limit) {
185 				memorystatus_log_info(
186 					"memorystatus: Skipping swap wakeup because the swap thread is already running. vm_swapout_thread_running=%d, vm_swapout_wake_pending=%d\n",
187 					vm_swapout_thread_running, os_atomic_load(&vm_swapout_wake_pending, relaxed));
188 			}
189 		}
190 
191 		if (highwater_remaining) {
192 			*kill_cause = kMemorystatusKilledHiwat;
193 			return MEMORYSTATUS_KILL_HIWATER;
194 		}
195 	}
196 
197 	if (status.msh_available_pages_below_idle &&
198 	    os_atomic_load(&memstat_bucket[JETSAM_PRIORITY_IDLE].count, relaxed) > 0 &&
199 	    is_system_healthy) {
200 		/*
201 		 * The system is below the idle threshold but otherwise healthy.
202 		 */
203 		*kill_cause = kMemorystatusKilledIdleExit;
204 		return MEMORYSTATUS_KILL_IDLE;
205 	}
206 
207 	if (is_system_healthy) {
208 		*kill_cause = 0;
209 		return MEMORYSTATUS_KILL_NONE;
210 	}
211 
212 	/*
213 	 * At this point the system is unhealthy and there are no
214 	 * more highwatermark processes to kill.
215 	 */
216 
217 	if (!state->limit_to_low_bands) {
218 		if (memorystatus_check_aggressive_jetsam_needed(jld_idle_kills)) {
219 			memorystatus_log("memorystatus: Starting aggressive jetsam.\n");
220 			*kill_cause = kMemorystatusKilledProcThrashing;
221 			return MEMORYSTATUS_KILL_AGGRESSIVE;
222 		}
223 	}
224 
225 	/*
226 	 * The system is unhealthy and we either don't need aggressive jetsam
227 	 * or are not allowed to deploy it.
228 	 * Kill in priority order. We'll use LRU within every band except the
229 	 * FG (which will be sorted by coalition role).
230 	 */
231 	*kill_cause = memorystatus_pick_kill_cause(&status);
232 	return MEMORYSTATUS_KILL_TOP_PROCESS;
233 #else /* CONFIG_JETSAM */
234 	(void) state;
235 	(void) jld_idle_kills;
236 	(void) suspended_swappable_apps_remaining;
237 	(void) swappable_apps_remaining;
238 	/*
239 	 * Without CONFIG_JETSAM, we only kill if the system is unhealthy.
240 	 * There is no aggressive jetsam and no
241 	 * early highwatermark killing.
242 	 */
243 	if (is_system_healthy) {
244 		*kill_cause = 0;
245 		return MEMORYSTATUS_KILL_NONE;
246 	}
247 	if (highwater_remaining) {
248 		*kill_cause = kMemorystatusKilledHiwat;
249 		return MEMORYSTATUS_KILL_HIWATER;
250 	} else {
251 		*kill_cause = memorystatus_pick_kill_cause(&status);
252 		return MEMORYSTATUS_KILL_TOP_PROCESS;
253 	}
254 #endif /* CONFIG_JETSAM */
255 }
256 
257 #pragma mark Aggressive Jetsam
258 /*
259  * This section defines when we deploy aggressive jetsam.
260  * Aggressive jetsam kills everything up to the jld_priority_band_max band.
261  */
262 
263 #if CONFIG_JETSAM
264 
265 static bool
266 memorystatus_aggressive_jetsam_needed_sysproc_aging(__unused int jld_eval_aggressive_count, __unused int *jld_idle_kills, __unused int jld_idle_kill_candidates, int *total_candidates);
267 
268 /*
269  * kJetsamHighRelaunchCandidatesThreshold defines the percentage of candidates
270  * in the idle & deferred bands that need to be bad candidates in order to trigger
271  * aggressive jetsam.
272  */
273 TUNABLE_DEV_WRITEABLE(unsigned int, kJetsamHighRelaunchCandidatesThreshold, "jetsam_high_relaunch_candidates_threshold_percent", 100);
274 #if DEVELOPMENT || DEBUG
275 SYSCTL_UINT(_kern, OID_AUTO, jetsam_high_relaunch_candidates_threshold_percent, CTLFLAG_RW | CTLFLAG_LOCKED, &kJetsamHighRelaunchCandidatesThreshold, 100, "");
276 #endif /* DEVELOPMENT || DEBUG */
277 
278 /* kJetsamMinCandidatesThreshold defines the minimum number of candidates in the
279  * idle/deferred bands to trigger aggressive jetsam. This value basically decides
280  * how much memory the system is ready to hold in the lower bands without triggering
281  * aggressive jetsam. This number should ideally be tuned based on the memory config
282  * of the device.
283  */
284 TUNABLE_DT_DEV_WRITEABLE(unsigned int, kJetsamMinCandidatesThreshold, "/defaults", "kern.jetsam_min_candidates_threshold", "jetsam_min_candidates_threshold", 5, TUNABLE_DT_CHECK_CHOSEN);
285 #if DEVELOPMENT || DEBUG
286 SYSCTL_UINT(_kern, OID_AUTO, jetsam_min_candidates_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &kJetsamMinCandidatesThreshold, 5, "");
287 #endif /* DEVELOPMENT || DEBUG */
288 
289 static bool
memorystatus_check_aggressive_jetsam_needed(int * jld_idle_kills)290 memorystatus_check_aggressive_jetsam_needed(int *jld_idle_kills)
291 {
292 	bool aggressive_jetsam_needed = false;
293 	int total_candidates = 0;
294 	/*
295 	 * The aggressive jetsam logic looks at the number of times it has been in the
296 	 * aggressive loop to determine the max priority band it should kill upto. The
297 	 * static variables below are used to track that property.
298 	 *
299 	 * To reset those values, the implementation checks if it has been
300 	 * memorystatus_jld_eval_period_msecs since the parameters were reset.
301 	 */
302 
303 	if (memorystatus_jld_enabled == FALSE) {
304 		/* If aggressive jetsam is disabled, nothing to do here */
305 		return false;
306 	}
307 
308 	/* Get current timestamp (msecs only) */
309 	struct timeval  jld_now_tstamp = {0, 0};
310 	uint64_t        jld_now_msecs = 0;
311 	microuptime(&jld_now_tstamp);
312 	jld_now_msecs = (jld_now_tstamp.tv_sec * 1000);
313 
314 	/*
315 	 * Look at the number of candidates in the idle and deferred band and
316 	 * how many out of them are marked as high relaunch probability.
317 	 */
318 	aggressive_jetsam_needed = memorystatus_aggressive_jetsam_needed_sysproc_aging(jld_eval_aggressive_count,
319 	    jld_idle_kills, jld_idle_kill_candidates, &total_candidates);
320 
321 	/*
322 	 * It is also possible that the system is down to a very small number of processes in the candidate
323 	 * bands. In that case, the decisions made by the memorystatus_aggressive_jetsam_needed_* routines
324 	 * would not be useful. In that case, do not trigger aggressive jetsam.
325 	 */
326 	if (total_candidates < kJetsamMinCandidatesThreshold) {
327 		memorystatus_log_debug(
328 			"memorystatus: aggressive: [FAILED] Low Candidate "
329 			"Count (current: %d, threshold: %d)\n",
330 			total_candidates, kJetsamMinCandidatesThreshold);
331 		aggressive_jetsam_needed = false;
332 	}
333 
334 	/*
335 	 * Check if its been really long since the aggressive jetsam evaluation
336 	 * parameters have been refreshed. This logic also resets the jld_eval_aggressive_count
337 	 * counter to make sure we reset the aggressive jetsam severity.
338 	 */
339 	if ((total_candidates == 0) ||
340 	    (jld_now_msecs > (jld_timestamp_msecs + memorystatus_jld_eval_period_msecs))) {
341 		jld_timestamp_msecs       = jld_now_msecs;
342 		jld_idle_kill_candidates  = total_candidates;
343 		*jld_idle_kills           = 0;
344 		jld_eval_aggressive_count = 0;
345 	}
346 
347 	return aggressive_jetsam_needed;
348 }
349 
350 static bool
memorystatus_aggressive_jetsam_needed_sysproc_aging(__unused int eval_aggressive_count,__unused int * idle_kills,__unused int idle_kill_candidates,int * total_candidates)351 memorystatus_aggressive_jetsam_needed_sysproc_aging(__unused int eval_aggressive_count, __unused int *idle_kills, __unused int idle_kill_candidates, int *total_candidates)
352 {
353 	bool aggressive_jetsam_needed = false;
354 
355 	/*
356 	 * For the kJetsamAgingPolicySysProcsReclaimedFirst aging policy, we maintain the jetsam
357 	 * relaunch behavior for all daemons. Also, daemons and apps are aged in deferred bands on
358 	 * every dirty->clean transition. For this aging policy, the best way to determine if
359 	 * aggressive jetsam is needed, is to see if the kill candidates are mostly bad candidates.
360 	 * If yes, then we need to go to higher bands to reclaim memory.
361 	 */
362 	proc_list_lock();
363 	/* Get total candidate counts for idle and idle deferred bands */
364 	*total_candidates = memstat_bucket[JETSAM_PRIORITY_IDLE].count + memstat_bucket[system_procs_aging_band].count;
365 	/* Get counts of bad kill candidates in idle and idle deferred bands */
366 	int bad_candidates = memstat_bucket[JETSAM_PRIORITY_IDLE].relaunch_high_count + memstat_bucket[system_procs_aging_band].relaunch_high_count;
367 
368 	proc_list_unlock();
369 
370 	/* Check if the number of bad candidates is greater than kJetsamHighRelaunchCandidatesThreshold % */
371 	aggressive_jetsam_needed = (((bad_candidates * 100) / *total_candidates) >= kJetsamHighRelaunchCandidatesThreshold);
372 
373 	/*
374 	 * Since the new aging policy bases the aggressive jetsam trigger on percentage of
375 	 * bad candidates, it is prone to being overly aggressive. In order to mitigate that,
376 	 * make sure the system is really under memory pressure before triggering aggressive
377 	 * jetsam.
378 	 */
379 	if (memorystatus_available_pages > memorystatus_sysproc_aging_aggr_pages) {
380 		aggressive_jetsam_needed = false;
381 	}
382 
383 #if DEVELOPMENT || DEBUG
384 	memorystatus_log_info(
385 		"memorystatus: aggressive%d: [%s] Bad Candidate Threshold Check (total: %d, bad: %d, threshold: %d %%); Memory Pressure Check (available_pgs: %llu, threshold_pgs: %llu)\n",
386 		eval_aggressive_count, aggressive_jetsam_needed ? "PASSED" : "FAILED", *total_candidates, bad_candidates,
387 		kJetsamHighRelaunchCandidatesThreshold, (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES, (uint64_t)memorystatus_sysproc_aging_aggr_pages);
388 #endif /* DEVELOPMENT || DEBUG */
389 	return aggressive_jetsam_needed;
390 }
391 
392 #endif /* CONFIG_JETSAM */
393 
394 #pragma mark Freezer
395 #if CONFIG_FREEZE
396 /*
397  * Freezer policies
398  */
399 
400 /*
401  * These functions determine what is eligible for the freezer
402  * and the order that we consider freezing them
403  */
404 
405 /*
406  * Checks if the given process is eligible for the freezer.
407  * Processes can only be frozen if this returns true.
408  */
409 bool
memorystatus_is_process_eligible_for_freeze(proc_t p)410 memorystatus_is_process_eligible_for_freeze(proc_t p)
411 {
412 	/*
413 	 * Called with proc_list_lock held.
414 	 */
415 
416 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
417 
418 	bool should_freeze = false;
419 	uint32_t state = 0, pages = 0;
420 	bool first_consideration = true;
421 	task_t task;
422 
423 	state = p->p_memstat_state;
424 
425 	if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) {
426 		if (state & P_MEMSTAT_FREEZE_DISABLED) {
427 			p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonDisabled;
428 		}
429 		goto out;
430 	}
431 
432 	task = proc_task(p);
433 
434 	if (isSysProc(p)) {
435 		/*
436 		 * Daemon:- We consider freezing it if:
437 		 * - it belongs to a coalition and the leader is frozen, and,
438 		 * - its role in the coalition is XPC service.
439 		 *
440 		 * We skip memory size requirements in this case.
441 		 */
442 		int task_role_in_coalition = 0;
443 		proc_t leader_proc = memorystatus_get_coalition_leader_and_role(p, &task_role_in_coalition);
444 		if (leader_proc == PROC_NULL || leader_proc == p) {
445 			/*
446 			 * Jetsam coalition is leaderless or the leader is not an app.
447 			 * Either way, don't freeze this proc.
448 			 */
449 			goto out;
450 		}
451 
452 		/* Leader must be frozen */
453 		if (!(leader_proc->p_memstat_state & P_MEMSTAT_FROZEN)) {
454 			goto out;
455 		}
456 		/* Only freeze XPC services */
457 		if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
458 			should_freeze = true;
459 		}
460 
461 		goto out;
462 	} else {
463 		/*
464 		 * Application. Only freeze if it's suspended.
465 		 */
466 		if (!(state & P_MEMSTAT_SUSPENDED)) {
467 			goto out;
468 		}
469 	}
470 
471 	/*
472 	 * We're interested in tracking what percentage of
473 	 * eligible apps actually get frozen.
474 	 * To avoid skewing the metrics towards processes which
475 	 * are considered more frequently, we only track failures once
476 	 * per process.
477 	 */
478 	first_consideration = !(state & P_MEMSTAT_FREEZE_CONSIDERED);
479 
480 	if (first_consideration) {
481 		memorystatus_freezer_stats.mfs_process_considered_count++;
482 		p->p_memstat_state |= P_MEMSTAT_FREEZE_CONSIDERED;
483 	}
484 
485 	/* Only freeze applications meeting our minimum resident page criteria */
486 	memorystatus_get_task_page_counts(proc_task(p), &pages, NULL, NULL);
487 	if (pages < memorystatus_freeze_pages_min) {
488 		if (first_consideration) {
489 			memorystatus_freezer_stats.mfs_error_below_min_pages_count++;
490 		}
491 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonBelowMinPages;
492 		goto out;
493 	}
494 
495 	/* Don't freeze processes that are already exiting on core. It may have started exiting
496 	 * after we chose it for freeze, but before we obtained the proc_list_lock.
497 	 * NB: This is only possible if we're coming in from memorystatus_freeze_process_sync.
498 	 * memorystatus_freeze_top_process holds the proc_list_lock while it traverses the bands.
499 	 */
500 	if (proc_list_exited(p)) {
501 		if (first_consideration) {
502 			memorystatus_freezer_stats.mfs_error_other_count++;
503 		}
504 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOther;
505 		goto out;
506 	}
507 
508 	if (!memorystatus_freezer_use_ordered_list) {
509 		/*
510 		 * We're not using the ordered list so we need to check
511 		 * that dasd recommended the process. Note that the ordered list
512 		 * algorithm only considers processes on the list in the first place
513 		 * so there's no need to double check here.
514 		 */
515 		if (!memorystatus_freeze_process_is_recommended(p)) {
516 			if (first_consideration) {
517 				memorystatus_freezer_stats.mfs_error_low_probability_of_use_count++;
518 			}
519 			p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonLowProbOfUse;
520 			goto out;
521 		}
522 	}
523 
524 	if (!(state & P_MEMSTAT_FROZEN) && p->p_memstat_effectivepriority > memorystatus_freeze_max_candidate_band) {
525 		/*
526 		 * Proc has been elevated by something else.
527 		 * Don't freeze it.
528 		 */
529 		if (first_consideration) {
530 			memorystatus_freezer_stats.mfs_error_elevated_count++;
531 		}
532 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonElevated;
533 		goto out;
534 	}
535 
536 	should_freeze = true;
537 out:
538 	if (should_freeze && !(state & P_MEMSTAT_FROZEN)) {
539 		/*
540 		 * Reset the skip reason. If it's killed before we manage to actually freeze it
541 		 * we failed to consider it early enough.
542 		 */
543 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
544 		if (!first_consideration) {
545 			/*
546 			 * We're freezing this for the first time and we previously considered it ineligible.
547 			 * Bump the considered count so that we track this as 1 failure
548 			 * and 1 success.
549 			 */
550 			memorystatus_freezer_stats.mfs_process_considered_count++;
551 		}
552 	}
553 	return should_freeze;
554 }
555 
556 bool
memorystatus_freeze_proc_is_refreeze_eligible(proc_t p)557 memorystatus_freeze_proc_is_refreeze_eligible(proc_t p)
558 {
559 	return (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) != 0;
560 }
561 
562 
563 static proc_t
memorystatus_freeze_pick_refreeze_process(proc_t last_p)564 memorystatus_freeze_pick_refreeze_process(proc_t last_p)
565 {
566 	proc_t p = PROC_NULL, next_p = PROC_NULL;
567 	unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
568 	if (last_p == PROC_NULL) {
569 		next_p = memorystatus_get_first_proc_locked(&band, FALSE);
570 	} else {
571 		next_p = memorystatus_get_next_proc_locked(&band, last_p, FALSE);
572 	}
573 	while (next_p) {
574 		p = next_p;
575 		next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
576 		if ((p->p_memstat_state & P_MEMSTAT_FROZEN) && !memorystatus_freeze_proc_is_refreeze_eligible(p)) {
577 			/* Process is already frozen & hasn't been thawed. */
578 			continue;
579 		}
580 		/*
581 		 * Has to have been frozen once before.
582 		 */
583 		if (!(p->p_memstat_state & P_MEMSTAT_FROZEN)) {
584 			continue;
585 		}
586 
587 		/*
588 		 * Not currently being looked at for something.
589 		 */
590 		if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
591 			continue;
592 		}
593 
594 #if FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED
595 		/*
596 		 * Don't refreeze the last process we just thawed if still within the timeout window
597 		 */
598 		if (p->p_pid == memorystatus_freeze_last_pid_thawed) {
599 			uint64_t timeout_delta_abs;
600 			nanoseconds_to_absolutetime(FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED_TIMEOUT_SECONDS * NSEC_PER_SEC, &timeout_delta_abs);
601 			if (mach_absolute_time() < (memorystatus_freeze_last_pid_thawed_ts + timeout_delta_abs)) {
602 				continue;
603 			}
604 		}
605 #endif
606 
607 		/*
608 		 * Found it
609 		 */
610 		return p;
611 	}
612 	return PROC_NULL;
613 }
614 
615 proc_t
memorystatus_freeze_pick_process(struct memorystatus_freeze_list_iterator * iterator)616 memorystatus_freeze_pick_process(struct memorystatus_freeze_list_iterator *iterator)
617 {
618 	proc_t p = PROC_NULL, next_p = PROC_NULL;
619 	unsigned int band = JETSAM_PRIORITY_IDLE;
620 
621 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
622 	/*
623 	 * If the freezer is full, only consider refreezes.
624 	 */
625 	if (iterator->refreeze_only || memorystatus_frozen_count >= memorystatus_frozen_processes_max) {
626 		if (!iterator->refreeze_only) {
627 			/*
628 			 * The first time the iterator starts to return refreeze
629 			 * candidates, we need to reset the last pointer b/c it's pointing into the wrong band.
630 			 */
631 			iterator->last_p = PROC_NULL;
632 			iterator->refreeze_only = true;
633 		}
634 		iterator->last_p = memorystatus_freeze_pick_refreeze_process(iterator->last_p);
635 		return iterator->last_p;
636 	}
637 
638 	/*
639 	 * Search for the next freezer candidate.
640 	 */
641 	if (memorystatus_freezer_use_ordered_list) {
642 		while (iterator->global_freeze_list_index < memorystatus_global_freeze_list.mfcl_length) {
643 			p = memorystatus_freezer_candidate_list_get_proc(
644 				&memorystatus_global_freeze_list,
645 				(iterator->global_freeze_list_index)++,
646 				&memorystatus_freezer_stats.mfs_freeze_pid_mismatches);
647 
648 			if (p != PROC_NULL && memorystatus_is_process_eligible_for_freeze(p)) {
649 #if FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED
650 				/*
651 				 * Don't refreeze the last process we just thawed if still within the timeout window
652 				 */
653 				if (p->p_pid == memorystatus_freeze_last_pid_thawed) {
654 					uint64_t timeout_delta_abs;
655 					nanoseconds_to_absolutetime(FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED_TIMEOUT_SECONDS * NSEC_PER_SEC, &timeout_delta_abs);
656 					if (mach_absolute_time() < (memorystatus_freeze_last_pid_thawed_ts + timeout_delta_abs)) {
657 						continue;
658 					}
659 				}
660 #endif
661 				iterator->last_p = p;
662 				return iterator->last_p;
663 			}
664 		}
665 	} else {
666 		if (iterator->last_p == PROC_NULL) {
667 			next_p = memorystatus_get_first_proc_locked(&band, FALSE);
668 		} else {
669 			next_p = memorystatus_get_next_proc_locked(&band, iterator->last_p, FALSE);
670 		}
671 		while (next_p) {
672 			p = next_p;
673 			if (memorystatus_is_process_eligible_for_freeze(p)) {
674 				iterator->last_p = p;
675 				return iterator->last_p;
676 			} else {
677 				next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
678 			}
679 		}
680 	}
681 
682 	/*
683 	 * Failed to find a new freezer candidate.
684 	 * Try to re-freeze.
685 	 */
686 	if (memorystatus_refreeze_eligible_count >= memorystatus_min_thaw_refreeze_threshold) {
687 		assert(!iterator->refreeze_only);
688 		iterator->refreeze_only = true;
689 		iterator->last_p = memorystatus_freeze_pick_refreeze_process(PROC_NULL);
690 		return iterator->last_p;
691 	}
692 	return PROC_NULL;
693 }
694 
695 /*
696  * memorystatus_pages_update calls this function whenever the number
697  * of available pages changes. It wakes the freezer thread iff the function returns
698  * true. The freezer thread will try to freeze (or refreeze) up to 1 process
699  * before blocking again.
700  *
701  * Note the freezer thread is also woken up by memorystatus_on_inactivity.
702  */
703 
704 bool
memorystatus_freeze_thread_should_run()705 memorystatus_freeze_thread_should_run()
706 {
707 	/*
708 	 * No freezer_mutex held here...see why near call-site
709 	 * within memorystatus_pages_update().
710 	 */
711 
712 	if (memorystatus_freeze_enabled == false) {
713 		return false;
714 	}
715 
716 	if (memorystatus_available_pages > memorystatus_freeze_threshold) {
717 		return false;
718 	}
719 
720 	memorystatus_freezer_stats.mfs_below_threshold_count++;
721 
722 	if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
723 		/*
724 		 * Consider this as a skip even if we wake up to refreeze because
725 		 * we won't freeze any new procs.
726 		 */
727 		memorystatus_freezer_stats.mfs_skipped_full_count++;
728 		if (memorystatus_refreeze_eligible_count < memorystatus_min_thaw_refreeze_threshold) {
729 			return false;
730 		}
731 	}
732 
733 	if (memorystatus_frozen_shared_mb_max && (memorystatus_frozen_shared_mb >= memorystatus_frozen_shared_mb_max)) {
734 		memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count++;
735 		return false;
736 	}
737 
738 	uint64_t curr_time = mach_absolute_time();
739 
740 	if (curr_time < memorystatus_freezer_thread_next_run_ts) {
741 		return false;
742 	}
743 
744 	return true;
745 }
746 
747 size_t
memorystatus_pick_freeze_count_for_wakeup()748 memorystatus_pick_freeze_count_for_wakeup()
749 {
750 	size_t num_to_freeze = 0;
751 	if (!memorystatus_swap_all_apps) {
752 		num_to_freeze = 1;
753 	} else {
754 		/*
755 		 * When app swap is enabled, we want the freezer thread to aggressively freeze
756 		 * all candidates so we clear out space for the fg working set.
757 		 * But we still cap it to the current size of the candidate bands to avoid
758 		 * consuming excessive CPU if there's a lot of churn in the candidate band.
759 		 */
760 		proc_list_lock();
761 		for (unsigned int band = JETSAM_PRIORITY_IDLE; band <= memorystatus_freeze_max_candidate_band; band++) {
762 			num_to_freeze += memstat_bucket[band].count;
763 		}
764 		proc_list_unlock();
765 	}
766 
767 	return num_to_freeze;
768 }
769 
770 #endif /* CONFIG_FREEZE */
771