xref: /xnu-11417.121.6/bsd/kern/kern_memorystatus_policy.c (revision a1e26a70f38d1d7daa7b49b258e2f8538ad81650)
1 /*
2  * Copyright (c) 2006-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  *
28  */
29 
30 #include <kern/task.h>
31 #include <libkern/libkern.h>
32 #include <machine/atomic.h>
33 #include <mach/coalition.h>
34 #include <os/log.h>
35 #include <sys/coalition.h>
36 #include <sys/proc.h>
37 #include <sys/proc_internal.h>
38 #include <sys/sysctl.h>
39 #include <sys/kdebug.h>
40 #include <sys/kern_memorystatus.h>
41 #include <vm/vm_protos.h>
42 #include <vm/vm_compressor_xnu.h>
43 
44 #include <kern/kern_memorystatus_internal.h>
45 
46 /*
47  * All memory pressure policy decisions should live here, and there should be
48  * as little mechanism as possible. This file prioritizes readability.
49  */
50 
51 #pragma mark Policy Function Declarations
52 
53 #if CONFIG_JETSAM
54 static bool memorystatus_check_aggressive_jetsam_needed(int *jld_idle_kills);
55 #endif /* CONFIG_JETSAM */
56 
57 #pragma mark Memorystatus Health Check
58 
59 /*
60  * Each subsystem that relies on the memorystatus thread
61  * for resource exhaustion should put a health check in this section.
62  * The memorystatus thread runs all of the health checks
63  * to determine if the system is healthy. If the system is unhealthy
64  * it picks an action based on the system health status. See the
65  * Memorystatus Thread Actions section below.
66  */
67 
68 
69 #if XNU_TARGET_OS_WATCH
70 #define FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED true
71 #define FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED_TIMEOUT_SECONDS (60 * 15)
72 #else
73 #define FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED false
74 #endif
75 extern pid_t memorystatus_freeze_last_pid_thawed;
76 extern uint64_t memorystatus_freeze_last_pid_thawed_ts;
77 
78 extern uint64_t memstat_oldest_reapable_proc_prio_start;
79 extern uint64_t memstat_reaper_min_age_secs;
80 extern uint64_t memstat_oldest_reapable_proc_will_be_reapable_at_ts_matu;
81 extern bool     memstat_reaper_is_currently_sweeping;
82 
83 static void
memorystatus_health_check(memorystatus_system_health_t * status)84 memorystatus_health_check(memorystatus_system_health_t *status)
85 {
86 	memset(status, 0, sizeof(memorystatus_system_health_t));
87 	status->msh_compressor_exhausted = vm_compressor_low_on_space() ||
88 	    os_atomic_load(&memorystatus_compressor_space_shortage, relaxed);
89 	status->msh_swap_low_on_space = vm_swap_low_on_space();
90 	status->msh_swap_exhausted = vm_swap_out_of_space();
91 #if CONFIG_JETSAM
92 	memstat_evaluate_page_shortage(
93 		&status->msh_available_pages_below_soft,
94 		&status->msh_available_pages_below_idle,
95 		&status->msh_available_pages_below_critical,
96 		&status->msh_available_pages_below_reaper);
97 	status->msh_compressor_is_thrashing = !memorystatus_swap_all_apps && vm_compressor_is_thrashing();
98 #if CONFIG_PHANTOM_CACHE
99 	status->msh_phantom_cache_pressure = os_atomic_load(&memorystatus_phantom_cache_pressure, relaxed);
100 #else
101 	status->msh_phantom_cache_pressure = false;
102 #endif /* CONFIG_PHANTOM_CACHE */
103 	if (!memorystatus_swap_all_apps &&
104 	    status->msh_phantom_cache_pressure &&
105 	    !(status->msh_compressor_is_thrashing && status->msh_compressor_exhausted)) {
106 		status->msh_filecache_is_thrashing = true;
107 	}
108 	status->msh_pageout_starved = os_atomic_load(&memorystatus_pageout_starved, relaxed);
109 	status->msh_swappable_compressor_segments_over_limit = memorystatus_swap_over_trigger(100);
110 	status->msh_swapin_queue_over_limit = memorystatus_swapin_over_trigger();
111 #endif /* CONFIG_JETSAM */
112 	status->msh_zone_map_is_exhausted = os_atomic_load(&memorystatus_zone_map_is_exhausted, relaxed);
113 }
114 
115 bool
memorystatus_is_system_healthy(const memorystatus_system_health_t * status)116 memorystatus_is_system_healthy(const memorystatus_system_health_t *status)
117 {
118 #if CONFIG_JETSAM
119 	return !(status->msh_available_pages_below_critical ||
120 	       status->msh_compressor_is_thrashing ||
121 	       status->msh_compressor_exhausted ||
122 	       status->msh_filecache_is_thrashing ||
123 	       status->msh_zone_map_is_exhausted ||
124 	       status->msh_pageout_starved);
125 #else /* CONFIG_JETSAM */
126 	return !(status->msh_zone_map_is_exhausted ||
127 	       status->msh_compressor_exhausted ||
128 	       status->msh_swap_exhausted);
129 #endif /* CONFIG_JETSAM */
130 }
131 
132 
133 #pragma mark Memorystatus Thread Actions
134 
135 /*
136  * This section picks the appropriate memorystatus_action & deploys it.
137  */
138 
139 /*
140  * Inspects the state of various resources in the system to see if
141  * the system is healthy. If the system is not healthy, picks a
142  * memorystatus_action_t to recover the system.
143  *
144  * Every time the memorystatus thread wakes up it calls into here
145  * to pick an action. It will continue performing memorystatus actions until this
146  * function returns MEMORYSTATUS_KILL_NONE. At that point the thread will block.
147  */
148 memorystatus_action_t
memorystatus_pick_action(jetsam_state_t state,uint32_t * kill_cause,bool highwater_remaining,bool suspended_swappable_apps_remaining,bool swappable_apps_remaining,int * jld_idle_kills)149 memorystatus_pick_action(jetsam_state_t state,
150     uint32_t *kill_cause,
151     bool highwater_remaining,
152     bool suspended_swappable_apps_remaining,
153     bool swappable_apps_remaining,
154     int *jld_idle_kills)
155 {
156 	memorystatus_system_health_t status;
157 	memorystatus_health_check(&status);
158 	memorystatus_log_system_health(&status);
159 	bool is_system_healthy = memorystatus_is_system_healthy(&status);
160 
161 #if CONFIG_JETSAM
162 	if (status.msh_available_pages_below_soft || !is_system_healthy) {
163 		/*
164 		 * If swap is enabled, first check if we're running low or are out of swap space.
165 		 */
166 		if (memorystatus_swap_all_apps && jetsam_kill_on_low_swap) {
167 			if (swappable_apps_remaining && status.msh_swap_exhausted) {
168 				*kill_cause = kMemorystatusKilledLowSwap;
169 				return MEMORYSTATUS_KILL_SWAPPABLE;
170 			} else if (suspended_swappable_apps_remaining && status.msh_swap_low_on_space) {
171 				*kill_cause = kMemorystatusKilledLowSwap;
172 				return MEMORYSTATUS_KILL_SUSPENDED_SWAPPABLE;
173 			}
174 		}
175 
176 		/*
177 		 * We're below the pressure level or the system is unhealthy,
178 		 * regardless of the system health let's check if we should be swapping
179 		 * and if there are high watermark kills left to do.
180 		 */
181 		if (memorystatus_swap_all_apps) {
182 			if (status.msh_swappable_compressor_segments_over_limit && !vm_swapout_thread_running && !os_atomic_load(&vm_swapout_wake_pending, relaxed)) {
183 				/*
184 				 * TODO: The swapper will keep running until it has drained the entire early swapout queue.
185 				 * That might be overly aggressive & we should look into tuning it.
186 				 * See rdar://84102304.
187 				 */
188 				return MEMORYSTATUS_WAKE_SWAPPER;
189 			} else if (status.msh_swapin_queue_over_limit) {
190 				return MEMORYSTATUS_PROCESS_SWAPIN_QUEUE;
191 			} else if (status.msh_swappable_compressor_segments_over_limit) {
192 				memorystatus_log_info(
193 					"memorystatus: Skipping swap wakeup because the swap thread is already running. vm_swapout_thread_running=%d, vm_swapout_wake_pending=%d\n",
194 					vm_swapout_thread_running, os_atomic_load(&vm_swapout_wake_pending, relaxed));
195 			}
196 		}
197 
198 		if (status.msh_compressor_exhausted) {
199 			*kill_cause = kMemorystatusKilledVMCompressorSpaceShortage;
200 			return MEMORYSTATUS_KILL_TOP_PROCESS;
201 		}
202 
203 		if (highwater_remaining) {
204 			*kill_cause = kMemorystatusKilledHiwat;
205 			return MEMORYSTATUS_KILL_HIWATER;
206 		}
207 	}
208 
209 	if (status.msh_available_pages_below_idle &&
210 	    memstat_get_idle_proccnt() > 0 &&
211 	    is_system_healthy) {
212 		/*
213 		 * The system is below the idle threshold but otherwise healthy.
214 		 */
215 		*kill_cause = kMemorystatusKilledIdleExit;
216 		return MEMORYSTATUS_KILL_IDLE;
217 	}
218 
219 	if (memstat_reaper_is_currently_sweeping && is_system_healthy) {
220 		/*
221 		 * The system is healthy and we're in a reaper sweep.
222 		 */
223 		*kill_cause = kMemorystatusKilledLongIdleExit;
224 		return MEMORYSTATUS_KILL_LONG_IDLE;
225 	}
226 
227 	if (is_system_healthy) {
228 		*kill_cause = 0;
229 		return MEMORYSTATUS_KILL_NONE;
230 	}
231 
232 	/*
233 	 * At this point the system is unhealthy and there are no
234 	 * more highwatermark processes to kill.
235 	 */
236 
237 	if (!state->limit_to_low_bands) {
238 		if (memorystatus_check_aggressive_jetsam_needed(jld_idle_kills)) {
239 			memorystatus_log("memorystatus: Starting aggressive jetsam.\n");
240 			*kill_cause = kMemorystatusKilledProcThrashing;
241 			return MEMORYSTATUS_KILL_AGGRESSIVE;
242 		}
243 	}
244 
245 	/*
246 	 * The system is unhealthy and we either don't need aggressive jetsam
247 	 * or are not allowed to deploy it.
248 	 * Kill in priority order. We'll use LRU within every band except the
249 	 * FG (which will be sorted by coalition role).
250 	 */
251 	*kill_cause = memorystatus_pick_kill_cause(&status);
252 	return MEMORYSTATUS_KILL_TOP_PROCESS;
253 #else /* !CONFIG_JETSAM */
254 	(void) state;
255 	(void) jld_idle_kills;
256 	(void) suspended_swappable_apps_remaining;
257 	(void) swappable_apps_remaining;
258 
259 	/*
260 	 * Without CONFIG_JETSAM, we only kill if the system is unhealthy.
261 	 * There is no aggressive jetsam and no
262 	 * early highwatermark killing.
263 	 */
264 	if (is_system_healthy) {
265 		*kill_cause = 0;
266 		return MEMORYSTATUS_KILL_NONE;
267 	}
268 	if (highwater_remaining) {
269 		*kill_cause = kMemorystatusKilledHiwat;
270 		return MEMORYSTATUS_KILL_HIWATER;
271 	}
272 	*kill_cause = memorystatus_pick_kill_cause(&status);
273 	if (status.msh_zone_map_is_exhausted) {
274 		return MEMORYSTATUS_KILL_TOP_PROCESS;
275 	} else if (status.msh_compressor_exhausted || status.msh_swap_exhausted) {
276 		if (kill_on_no_paging_space) {
277 			return MEMORYSTATUS_KILL_TOP_PROCESS;
278 		} else if (memstat_get_idle_proccnt() > 0) {
279 			return MEMORYSTATUS_KILL_IDLE;
280 		} else {
281 			/*
282 			 * The no paging space action will be performed synchronously by the the
283 			 * thread performing the compression/swap.
284 			 */
285 			return MEMORYSTATUS_KILL_NONE;
286 		}
287 	} else {
288 		panic("System is unhealthy but compressor, swap, and zone map are not exhausted");
289 	}
290 #endif /* CONFIG_JETSAM */
291 }
292 
293 #pragma mark Aggressive Jetsam
294 /*
295  * This section defines when we deploy aggressive jetsam.
296  * Aggressive jetsam kills everything up to the jld_priority_band_max band.
297  */
298 
299 #if CONFIG_JETSAM
300 
301 static bool
302 memorystatus_aggressive_jetsam_needed_sysproc_aging(__unused int jld_eval_aggressive_count, __unused int *jld_idle_kills, __unused int jld_idle_kill_candidates, int *total_candidates);
303 
304 /*
305  * kJetsamHighRelaunchCandidatesThreshold defines the percentage of candidates
306  * in the idle & deferred bands that need to be bad candidates in order to trigger
307  * aggressive jetsam.
308  */
309 TUNABLE_DEV_WRITEABLE(unsigned int, kJetsamHighRelaunchCandidatesThreshold, "jetsam_high_relaunch_candidates_threshold_percent", 100);
310 #if DEVELOPMENT || DEBUG
311 SYSCTL_UINT(_kern, OID_AUTO, jetsam_high_relaunch_candidates_threshold_percent, CTLFLAG_RW | CTLFLAG_LOCKED, &kJetsamHighRelaunchCandidatesThreshold, 100, "");
312 #endif /* DEVELOPMENT || DEBUG */
313 
314 /* kJetsamMinCandidatesThreshold defines the minimum number of candidates in the
315  * idle/deferred bands to trigger aggressive jetsam. This value basically decides
316  * how much memory the system is ready to hold in the lower bands without triggering
317  * aggressive jetsam. This number should ideally be tuned based on the memory config
318  * of the device.
319  */
320 TUNABLE_DT_DEV_WRITEABLE(unsigned int, kJetsamMinCandidatesThreshold, "/defaults", "kern.jetsam_min_candidates_threshold", "jetsam_min_candidates_threshold", 5, TUNABLE_DT_CHECK_CHOSEN);
321 #if DEVELOPMENT || DEBUG
322 SYSCTL_UINT(_kern, OID_AUTO, jetsam_min_candidates_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &kJetsamMinCandidatesThreshold, 5, "");
323 #endif /* DEVELOPMENT || DEBUG */
324 
325 static bool
memorystatus_check_aggressive_jetsam_needed(int * jld_idle_kills)326 memorystatus_check_aggressive_jetsam_needed(int *jld_idle_kills)
327 {
328 	bool aggressive_jetsam_needed = false;
329 	int total_candidates = 0;
330 	/*
331 	 * The aggressive jetsam logic looks at the number of times it has been in the
332 	 * aggressive loop to determine the max priority band it should kill upto. The
333 	 * static variables below are used to track that property.
334 	 *
335 	 * To reset those values, the implementation checks if it has been
336 	 * memorystatus_jld_eval_period_msecs since the parameters were reset.
337 	 */
338 
339 	if (memorystatus_jld_enabled == FALSE) {
340 		/* If aggressive jetsam is disabled, nothing to do here */
341 		return false;
342 	}
343 
344 	/* Get current timestamp (msecs only) */
345 	struct timeval  jld_now_tstamp = {0, 0};
346 	uint64_t        jld_now_msecs = 0;
347 	microuptime(&jld_now_tstamp);
348 	jld_now_msecs = (jld_now_tstamp.tv_sec * 1000);
349 
350 	/*
351 	 * Look at the number of candidates in the idle and deferred band and
352 	 * how many out of them are marked as high relaunch probability.
353 	 */
354 	aggressive_jetsam_needed = memorystatus_aggressive_jetsam_needed_sysproc_aging(jld_eval_aggressive_count,
355 	    jld_idle_kills, jld_idle_kill_candidates, &total_candidates);
356 
357 	/*
358 	 * It is also possible that the system is down to a very small number of processes in the candidate
359 	 * bands. In that case, the decisions made by the memorystatus_aggressive_jetsam_needed_* routines
360 	 * would not be useful. In that case, do not trigger aggressive jetsam.
361 	 */
362 	if (total_candidates < kJetsamMinCandidatesThreshold) {
363 		memorystatus_log_debug(
364 			"memorystatus: aggressive: [FAILED] Low Candidate "
365 			"Count (current: %d, threshold: %d)\n",
366 			total_candidates, kJetsamMinCandidatesThreshold);
367 		aggressive_jetsam_needed = false;
368 	}
369 
370 	/*
371 	 * Check if its been really long since the aggressive jetsam evaluation
372 	 * parameters have been refreshed. This logic also resets the jld_eval_aggressive_count
373 	 * counter to make sure we reset the aggressive jetsam severity.
374 	 */
375 	if ((total_candidates == 0) ||
376 	    (jld_now_msecs > (jld_timestamp_msecs + memorystatus_jld_eval_period_msecs))) {
377 		jld_timestamp_msecs       = jld_now_msecs;
378 		jld_idle_kill_candidates  = total_candidates;
379 		*jld_idle_kills           = 0;
380 		jld_eval_aggressive_count = 0;
381 	}
382 
383 	return aggressive_jetsam_needed;
384 }
385 
386 static bool
memorystatus_aggressive_jetsam_needed_sysproc_aging(__unused int eval_aggressive_count,__unused int * idle_kills,__unused int idle_kill_candidates,int * total_candidates)387 memorystatus_aggressive_jetsam_needed_sysproc_aging(__unused int eval_aggressive_count, __unused int *idle_kills, __unused int idle_kill_candidates, int *total_candidates)
388 {
389 	bool aggressive_jetsam_needed = false;
390 
391 	/*
392 	 * For the kJetsamAgingPolicySysProcsReclaimedFirst aging policy, we maintain the jetsam
393 	 * relaunch behavior for all daemons. Also, daemons and apps are aged in deferred bands on
394 	 * every dirty->clean transition. For this aging policy, the best way to determine if
395 	 * aggressive jetsam is needed, is to see if the kill candidates are mostly bad candidates.
396 	 * If yes, then we need to go to higher bands to reclaim memory.
397 	 */
398 	proc_list_lock();
399 	/* Get total candidate counts for idle and idle deferred bands */
400 	*total_candidates = memstat_bucket[JETSAM_PRIORITY_IDLE].count + memstat_bucket[system_procs_aging_band].count;
401 	/* Get counts of bad kill candidates in idle and idle deferred bands */
402 	int bad_candidates = memstat_bucket[JETSAM_PRIORITY_IDLE].relaunch_high_count + memstat_bucket[system_procs_aging_band].relaunch_high_count;
403 
404 	proc_list_unlock();
405 
406 	/* Check if the number of bad candidates is greater than kJetsamHighRelaunchCandidatesThreshold % */
407 	aggressive_jetsam_needed = (((bad_candidates * 100) / *total_candidates) >= kJetsamHighRelaunchCandidatesThreshold);
408 
409 	/*
410 	 * Since the new aging policy bases the aggressive jetsam trigger on percentage of
411 	 * bad candidates, it is prone to being overly aggressive. In order to mitigate that,
412 	 * make sure the system is really under memory pressure before triggering aggressive
413 	 * jetsam.
414 	 */
415 	if (memorystatus_available_pages > memorystatus_sysproc_aging_aggr_pages) {
416 		aggressive_jetsam_needed = false;
417 	}
418 
419 #if DEVELOPMENT || DEBUG
420 	memorystatus_log_info(
421 		"memorystatus: aggressive%d: [%s] Bad Candidate Threshold Check (total: %d, bad: %d, threshold: %d %%); Memory Pressure Check (available_pgs: %llu, threshold_pgs: %llu)\n",
422 		eval_aggressive_count, aggressive_jetsam_needed ? "PASSED" : "FAILED", *total_candidates, bad_candidates,
423 		kJetsamHighRelaunchCandidatesThreshold, (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES, (uint64_t)memorystatus_sysproc_aging_aggr_pages);
424 #endif /* DEVELOPMENT || DEBUG */
425 	return aggressive_jetsam_needed;
426 }
427 
428 #endif /* CONFIG_JETSAM */
429 
430 #pragma mark Freezer
431 #if CONFIG_FREEZE
432 /*
433  * Freezer policies
434  */
435 
436 /*
437  * These functions determine what is eligible for the freezer
438  * and the order that we consider freezing them
439  */
440 
441 /*
442  * Checks if the given process is eligible for the freezer.
443  * Processes can only be frozen if this returns true.
444  */
445 bool
memorystatus_is_process_eligible_for_freeze(proc_t p)446 memorystatus_is_process_eligible_for_freeze(proc_t p)
447 {
448 	/*
449 	 * Called with proc_list_lock held.
450 	 */
451 
452 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
453 
454 	bool should_freeze = false;
455 	uint32_t state = 0, pages = 0;
456 	bool first_consideration = true;
457 	task_t task;
458 
459 	state = p->p_memstat_state;
460 
461 	if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) {
462 		if (state & P_MEMSTAT_FREEZE_DISABLED) {
463 			p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonDisabled;
464 		}
465 		goto out;
466 	}
467 
468 	task = proc_task(p);
469 
470 	if (isSysProc(p)) {
471 		/*
472 		 * Daemon:- We consider freezing it if:
473 		 * - it belongs to a coalition and the leader is frozen, and,
474 		 * - its role in the coalition is XPC service.
475 		 *
476 		 * We skip memory size requirements in this case.
477 		 */
478 		int task_role_in_coalition = 0;
479 		proc_t leader_proc = memorystatus_get_coalition_leader_and_role(p, &task_role_in_coalition);
480 		if (leader_proc == PROC_NULL || leader_proc == p) {
481 			/*
482 			 * Jetsam coalition is leaderless or the leader is not an app.
483 			 * Either way, don't freeze this proc.
484 			 */
485 			goto out;
486 		}
487 
488 		/* Leader must be frozen */
489 		if (!(leader_proc->p_memstat_state & P_MEMSTAT_FROZEN)) {
490 			goto out;
491 		}
492 		/* Only freeze XPC services */
493 		if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
494 			should_freeze = true;
495 		}
496 
497 		goto out;
498 	} else {
499 		/*
500 		 * Application. Only freeze if it's suspended.
501 		 */
502 		if (!(state & P_MEMSTAT_SUSPENDED)) {
503 			goto out;
504 		}
505 	}
506 
507 	/*
508 	 * We're interested in tracking what percentage of
509 	 * eligible apps actually get frozen.
510 	 * To avoid skewing the metrics towards processes which
511 	 * are considered more frequently, we only track failures once
512 	 * per process.
513 	 */
514 	first_consideration = !(state & P_MEMSTAT_FREEZE_CONSIDERED);
515 
516 	if (first_consideration) {
517 		memorystatus_freezer_stats.mfs_process_considered_count++;
518 		p->p_memstat_state |= P_MEMSTAT_FREEZE_CONSIDERED;
519 	}
520 
521 	/* Only freeze applications meeting our minimum resident page criteria */
522 	memorystatus_get_task_page_counts(proc_task(p), &pages, NULL, NULL);
523 	if (pages < memorystatus_freeze_pages_min) {
524 		if (first_consideration) {
525 			memorystatus_freezer_stats.mfs_error_below_min_pages_count++;
526 		}
527 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonBelowMinPages;
528 		goto out;
529 	}
530 
531 	/* Don't freeze processes that are already exiting on core. It may have started exiting
532 	 * after we chose it for freeze, but before we obtained the proc_list_lock.
533 	 * NB: This is only possible if we're coming in from memorystatus_freeze_process_sync.
534 	 * memorystatus_freeze_top_process holds the proc_list_lock while it traverses the bands.
535 	 */
536 	if (proc_list_exited(p)) {
537 		if (first_consideration) {
538 			memorystatus_freezer_stats.mfs_error_other_count++;
539 		}
540 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOther;
541 		goto out;
542 	}
543 
544 	if (!memorystatus_freezer_use_ordered_list) {
545 		/*
546 		 * We're not using the ordered list so we need to check
547 		 * that dasd recommended the process. Note that the ordered list
548 		 * algorithm only considers processes on the list in the first place
549 		 * so there's no need to double check here.
550 		 */
551 		if (!memorystatus_freeze_process_is_recommended(p)) {
552 			if (first_consideration) {
553 				memorystatus_freezer_stats.mfs_error_low_probability_of_use_count++;
554 			}
555 			p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonLowProbOfUse;
556 			goto out;
557 		}
558 	}
559 
560 	if (!(state & P_MEMSTAT_FROZEN) && p->p_memstat_effectivepriority > memorystatus_freeze_max_candidate_band) {
561 		/*
562 		 * Proc has been elevated by something else.
563 		 * Don't freeze it.
564 		 */
565 		if (first_consideration) {
566 			memorystatus_freezer_stats.mfs_error_elevated_count++;
567 		}
568 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonElevated;
569 		goto out;
570 	}
571 
572 	should_freeze = true;
573 out:
574 	if (should_freeze && !(state & P_MEMSTAT_FROZEN)) {
575 		/*
576 		 * Reset the skip reason. If it's killed before we manage to actually freeze it
577 		 * we failed to consider it early enough.
578 		 */
579 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
580 		if (!first_consideration) {
581 			/*
582 			 * We're freezing this for the first time and we previously considered it ineligible.
583 			 * Bump the considered count so that we track this as 1 failure
584 			 * and 1 success.
585 			 */
586 			memorystatus_freezer_stats.mfs_process_considered_count++;
587 		}
588 	}
589 	return should_freeze;
590 }
591 
592 bool
memorystatus_freeze_proc_is_refreeze_eligible(proc_t p)593 memorystatus_freeze_proc_is_refreeze_eligible(proc_t p)
594 {
595 	return (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) != 0;
596 }
597 
598 
599 static proc_t
memorystatus_freeze_pick_refreeze_process(proc_t last_p)600 memorystatus_freeze_pick_refreeze_process(proc_t last_p)
601 {
602 	proc_t p = PROC_NULL, next_p = PROC_NULL;
603 	unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
604 	if (last_p == PROC_NULL) {
605 		next_p = memorystatus_get_first_proc_locked(&band, FALSE);
606 	} else {
607 		next_p = memorystatus_get_next_proc_locked(&band, last_p, FALSE);
608 	}
609 	while (next_p) {
610 		p = next_p;
611 		next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
612 		if ((p->p_memstat_state & P_MEMSTAT_FROZEN) && !memorystatus_freeze_proc_is_refreeze_eligible(p)) {
613 			/* Process is already frozen & hasn't been thawed. */
614 			continue;
615 		}
616 		/*
617 		 * Has to have been frozen once before.
618 		 */
619 		if (!(p->p_memstat_state & P_MEMSTAT_FROZEN)) {
620 			continue;
621 		}
622 
623 		/*
624 		 * Not currently being looked at for something.
625 		 */
626 		if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
627 			continue;
628 		}
629 
630 #if FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED
631 		/*
632 		 * Don't refreeze the last process we just thawed if still within the timeout window
633 		 */
634 		if (p->p_pid == memorystatus_freeze_last_pid_thawed) {
635 			uint64_t timeout_delta_abs;
636 			nanoseconds_to_absolutetime(FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED_TIMEOUT_SECONDS * NSEC_PER_SEC, &timeout_delta_abs);
637 			if (mach_absolute_time() < (memorystatus_freeze_last_pid_thawed_ts + timeout_delta_abs)) {
638 				continue;
639 			}
640 		}
641 #endif
642 
643 		/*
644 		 * Found it
645 		 */
646 		return p;
647 	}
648 	return PROC_NULL;
649 }
650 
651 proc_t
memorystatus_freeze_pick_process(struct memorystatus_freeze_list_iterator * iterator)652 memorystatus_freeze_pick_process(struct memorystatus_freeze_list_iterator *iterator)
653 {
654 	proc_t p = PROC_NULL, next_p = PROC_NULL;
655 	unsigned int band = JETSAM_PRIORITY_IDLE;
656 
657 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
658 	/*
659 	 * If the freezer is full, only consider refreezes.
660 	 */
661 	if (iterator->refreeze_only || memorystatus_frozen_count >= memorystatus_frozen_processes_max) {
662 		if (!iterator->refreeze_only) {
663 			/*
664 			 * The first time the iterator starts to return refreeze
665 			 * candidates, we need to reset the last pointer b/c it's pointing into the wrong band.
666 			 */
667 			iterator->last_p = PROC_NULL;
668 			iterator->refreeze_only = true;
669 		}
670 		iterator->last_p = memorystatus_freeze_pick_refreeze_process(iterator->last_p);
671 		return iterator->last_p;
672 	}
673 
674 	/*
675 	 * Search for the next freezer candidate.
676 	 */
677 	if (memorystatus_freezer_use_ordered_list) {
678 		while (iterator->global_freeze_list_index < memorystatus_global_freeze_list.mfcl_length) {
679 			p = memorystatus_freezer_candidate_list_get_proc(
680 				&memorystatus_global_freeze_list,
681 				(iterator->global_freeze_list_index)++,
682 				&memorystatus_freezer_stats.mfs_freeze_pid_mismatches);
683 
684 			if (p != PROC_NULL && memorystatus_is_process_eligible_for_freeze(p)) {
685 #if FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED
686 				/*
687 				 * Don't refreeze the last process we just thawed if still within the timeout window
688 				 */
689 				if (p->p_pid == memorystatus_freeze_last_pid_thawed) {
690 					uint64_t timeout_delta_abs;
691 					nanoseconds_to_absolutetime(FREEZE_PREVENT_REFREEZE_OF_LAST_THAWED_TIMEOUT_SECONDS * NSEC_PER_SEC, &timeout_delta_abs);
692 					if (mach_absolute_time() < (memorystatus_freeze_last_pid_thawed_ts + timeout_delta_abs)) {
693 						continue;
694 					}
695 				}
696 #endif
697 				iterator->last_p = p;
698 				return iterator->last_p;
699 			}
700 		}
701 	} else {
702 		if (iterator->last_p == PROC_NULL) {
703 			next_p = memorystatus_get_first_proc_locked(&band, FALSE);
704 		} else {
705 			next_p = memorystatus_get_next_proc_locked(&band, iterator->last_p, FALSE);
706 		}
707 		while (next_p) {
708 			p = next_p;
709 			if (memorystatus_is_process_eligible_for_freeze(p)) {
710 				iterator->last_p = p;
711 				return iterator->last_p;
712 			} else {
713 				next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
714 			}
715 		}
716 	}
717 
718 	/*
719 	 * Failed to find a new freezer candidate.
720 	 * Try to re-freeze.
721 	 */
722 	if (memorystatus_refreeze_eligible_count >= memorystatus_min_thaw_refreeze_threshold) {
723 		assert(!iterator->refreeze_only);
724 		iterator->refreeze_only = true;
725 		iterator->last_p = memorystatus_freeze_pick_refreeze_process(PROC_NULL);
726 		return iterator->last_p;
727 	}
728 	return PROC_NULL;
729 }
730 
731 /*
732  * memorystatus_pages_update calls this function whenever the number
733  * of available pages changes. It wakes the freezer thread iff the function returns
734  * true. The freezer thread will try to freeze (or refreeze) up to 1 process
735  * before blocking again.
736  *
737  * Note the freezer thread is also woken up by memorystatus_on_inactivity.
738  */
739 
740 bool
memorystatus_freeze_thread_should_run()741 memorystatus_freeze_thread_should_run()
742 {
743 	/*
744 	 * No freezer_mutex held here...see why near call-site
745 	 * within memorystatus_pages_update().
746 	 */
747 
748 	if (memorystatus_freeze_enabled == false) {
749 		return false;
750 	}
751 
752 	if (memorystatus_available_pages > memorystatus_freeze_threshold) {
753 		return false;
754 	}
755 
756 	memorystatus_freezer_stats.mfs_below_threshold_count++;
757 
758 	if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
759 		/*
760 		 * Consider this as a skip even if we wake up to refreeze because
761 		 * we won't freeze any new procs.
762 		 */
763 		memorystatus_freezer_stats.mfs_skipped_full_count++;
764 		if (memorystatus_refreeze_eligible_count < memorystatus_min_thaw_refreeze_threshold) {
765 			return false;
766 		}
767 	}
768 
769 	if (memorystatus_frozen_shared_mb_max && (memorystatus_frozen_shared_mb >= memorystatus_frozen_shared_mb_max)) {
770 		memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count++;
771 		return false;
772 	}
773 
774 	uint64_t curr_time = mach_absolute_time();
775 
776 	if (curr_time < memorystatus_freezer_thread_next_run_ts) {
777 		return false;
778 	}
779 
780 	return true;
781 }
782 
783 size_t
memorystatus_pick_freeze_count_for_wakeup()784 memorystatus_pick_freeze_count_for_wakeup()
785 {
786 	size_t num_to_freeze = 0;
787 	if (!memorystatus_swap_all_apps) {
788 		num_to_freeze = 1;
789 	} else {
790 		/*
791 		 * When app swap is enabled, we want the freezer thread to aggressively freeze
792 		 * all candidates so we clear out space for the fg working set.
793 		 * But we still cap it to the current size of the candidate bands to avoid
794 		 * consuming excessive CPU if there's a lot of churn in the candidate band.
795 		 */
796 		proc_list_lock();
797 		for (unsigned int band = JETSAM_PRIORITY_IDLE; band <= memorystatus_freeze_max_candidate_band; band++) {
798 			num_to_freeze += memstat_bucket[band].count;
799 		}
800 		proc_list_unlock();
801 	}
802 
803 	return num_to_freeze;
804 }
805 
806 #endif /* CONFIG_FREEZE */
807