1 /*
2 * Copyright (c) 2006-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 */
29
30 #include <kern/task.h>
31 #include <libkern/libkern.h>
32 #include <machine/atomic.h>
33 #include <mach/coalition.h>
34 #include <os/log.h>
35 #include <sys/coalition.h>
36 #include <sys/proc.h>
37 #include <sys/proc_internal.h>
38 #include <sys/sysctl.h>
39 #include <sys/kdebug.h>
40 #include <sys/kern_memorystatus.h>
41 #include <vm/vm_protos.h>
42 #include <vm/vm_compressor_xnu.h>
43
44 #include <kern/kern_memorystatus_internal.h>
45
46 /*
47 * All memory pressure policy decisions should live here, and there should be
48 * as little mechanism as possible. This file prioritizes readability.
49 */
50
51 #pragma mark Policy Function Declarations
52
53 #if CONFIG_JETSAM
54 static bool memorystatus_check_aggressive_jetsam_needed(int *jld_idle_kills);
55 #endif /* CONFIG_JETSAM */
56
57 #pragma mark Memorystatus Health Check
58
59 /*
60 * Each subsystem that relies on the memorystatus thread
61 * for resource exhaustion should put a health check in this section.
62 * The memorystatus thread runs all of the health checks
63 * to determine if the system is healthy. If the system is unhealthy
64 * it picks an action based on the system health status. See the
65 * Memorystatus Thread Actions section below.
66 */
67
68
69 extern uint64_t memstat_oldest_reapable_proc_prio_start;
70 extern uint64_t memstat_reaper_min_age_secs;
71 extern uint64_t memstat_oldest_reapable_proc_will_be_reapable_at_ts_matu;
72 extern bool memstat_reaper_is_currently_sweeping;
73
74 extern vm_pressure_level_t memorystatus_vm_pressure_level;
75
76 static void
memstat_evaluate_health_conditions(memorystatus_system_health_t status)77 memstat_evaluate_health_conditions(memorystatus_system_health_t status)
78 {
79 memset(status, 0, sizeof(memorystatus_system_health_t));
80 status->msh_compressor_low_on_space = vm_compressor_low_on_space() ||
81 os_atomic_load(&memorystatus_compressor_space_shortage, relaxed);
82 status->msh_compressor_exhausted = vm_compressor_out_of_space();
83 status->msh_swap_low_on_space = vm_swap_low_on_space();
84 status->msh_swap_exhausted = vm_swap_out_of_space();
85 #if CONFIG_JETSAM
86 memstat_evaluate_page_shortage(
87 &status->msh_available_pages_below_soft,
88 &status->msh_available_pages_below_idle,
89 &status->msh_available_pages_below_critical,
90 &status->msh_available_pages_below_reaper);
91 status->msh_compressor_is_thrashing = !memorystatus_swap_all_apps && vm_compressor_is_thrashing();
92 #if CONFIG_PHANTOM_CACHE
93 status->msh_phantom_cache_pressure = os_atomic_load(&memorystatus_phantom_cache_pressure, relaxed);
94 #else
95 status->msh_phantom_cache_pressure = false;
96 #endif /* CONFIG_PHANTOM_CACHE */
97 if (!memorystatus_swap_all_apps &&
98 status->msh_phantom_cache_pressure &&
99 !(status->msh_compressor_is_thrashing && status->msh_compressor_exhausted)) {
100 status->msh_filecache_is_thrashing = true;
101 }
102 status->msh_pageout_starved = os_atomic_load(&memorystatus_pageout_starved, relaxed);
103 status->msh_swappable_compressor_segments_over_limit = memorystatus_swap_over_trigger(100);
104 status->msh_swapin_queue_over_limit = memorystatus_swapin_over_trigger();
105 #else /* !CONFIG_JETSAM */
106 vm_pressure_level_t pressure_level = memorystatus_vm_pressure_level;
107 status->msh_vm_pressure_critical = (pressure_level == kVMPressureCritical);
108 status->msh_vm_pressure_warning = (pressure_level >= kVMPressureWarning);
109 #endif /* CONFIG_JETSAM */
110 status->msh_zone_map_is_exhausted = os_atomic_load(&memorystatus_zone_map_is_exhausted, relaxed);
111 }
112
113 static bool
memstat_is_system_healthy(const memorystatus_system_health_t status)114 memstat_is_system_healthy(const memorystatus_system_health_t status)
115 {
116 #if CONFIG_JETSAM
117 return !(status->msh_available_pages_below_critical ||
118 status->msh_compressor_is_thrashing ||
119 status->msh_compressor_exhausted ||
120 status->msh_compressor_low_on_space ||
121 status->msh_filecache_is_thrashing ||
122 status->msh_zone_map_is_exhausted ||
123 status->msh_pageout_starved);
124 #else /* CONFIG_JETSAM */
125 return !(status->msh_zone_map_is_exhausted ||
126 status->msh_compressor_exhausted ||
127 status->msh_compressor_low_on_space ||
128 status->msh_swap_exhausted ||
129 status->msh_swap_low_on_space ||
130 status->msh_vm_pressure_critical ||
131 status->msh_vm_pressure_warning);
132 #endif /* CONFIG_JETSAM */
133 }
134
135 static void
memstat_log_system_health(const memorystatus_system_health_t status)136 memstat_log_system_health(const memorystatus_system_health_t status)
137 {
138 static struct memorystatus_system_health_s prev_status = {0};
139
140 bool healthy = memstat_is_system_healthy(status);
141
142 /*
143 * Avoid spamming logs by only logging when the system status has changed.
144 */
145 if (prev_status.msh_zone_map_is_exhausted == status->msh_zone_map_is_exhausted &&
146 prev_status.msh_compressor_exhausted == status->msh_compressor_exhausted &&
147 prev_status.msh_swap_low_on_space == status->msh_swap_low_on_space &&
148 prev_status.msh_swap_exhausted == status->msh_swap_exhausted
149 #if CONFIG_JETSAM
150 &&
151 prev_status.msh_available_pages_below_idle == status->msh_available_pages_below_idle &&
152 prev_status.msh_available_pages_below_soft == status->msh_available_pages_below_soft &&
153 prev_status.msh_available_pages_below_critical == status->msh_available_pages_below_critical &&
154 prev_status.msh_available_pages_below_reaper == status->msh_available_pages_below_reaper &&
155 prev_status.msh_compressor_needs_to_swap == status->msh_compressor_needs_to_swap &&
156 prev_status.msh_compressor_is_thrashing == status->msh_compressor_is_thrashing &&
157 prev_status.msh_filecache_is_thrashing == status->msh_filecache_is_thrashing &&
158 prev_status.msh_phantom_cache_pressure == status->msh_phantom_cache_pressure &&
159 prev_status.msh_swapin_queue_over_limit == status->msh_swapin_queue_over_limit &&
160 prev_status.msh_pageout_starved == status->msh_pageout_starved
161 #endif /* CONFIG_JETSAM */
162 ) {
163 /* No change */
164 return;
165 }
166
167 #if CONFIG_JETSAM
168 if (healthy) {
169 if (status->msh_available_pages_below_soft) {
170 memorystatus_log(
171 "memorystatus: System will begin enforcing "
172 "soft memory limits. "
173 "memorystatus_available_pages: %llu compressor_size: %u\n",
174 (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES, vm_compressor_pool_size());
175 } else if (status->msh_available_pages_below_idle) {
176 memorystatus_log(
177 "memorystatus: System will begin enacting "
178 "idle-exits. "
179 "memorystatus_available_pages: %llu compressor_size: %u\n",
180 (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES, vm_compressor_pool_size());
181 } else if (status->msh_available_pages_below_reaper) {
182 memorystatus_log(
183 "memorystatus: System will begin reaping "
184 "long-idle processes. "
185 "memorystatus_available_pages: %llu compressor_size: %u\n",
186 (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES, vm_compressor_pool_size());
187 } else {
188 memorystatus_log(
189 "memorystatus: System is healthy. "
190 "memorystatus_available_pages: %llu compressor_size:%u\n",
191 (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES, vm_compressor_pool_size());
192 }
193 } else {
194 /* Unhealthy */
195 memorystatus_log("memorystatus: System is unhealthy! memorystatus_available_pages: %llu compressor_size:%u\n",
196 (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES, vm_compressor_pool_size());
197 memorystatus_log(
198 "memorystatus: {"
199 "\"available_pages_below_critical\": %d, "
200 "\"available_pages_below_idle\": %d, "
201 "\"available_pages_below_soft\": %d, "
202 "\"available_pages_below_reaper\": %d, "
203 "\"compressor_needs_to_swap\": %d, "
204 "\"compressor_exhausted\": %d, "
205 "\"compressor_is_thrashing\": %d, "
206 "\"filecache_is_thrashing\": %d, "
207 "\"zone_map_is_exhausted\": %d, "
208 "\"phantom_cache_pressure\": %d, "
209 "\"swappable_compressor_segments_over_limit\": %d, "
210 "\"swapin_queue_over_limit\": %d, "
211 "\"swap_low\": %d, "
212 "\"swap_exhausted\": %d"
213 "}\n",
214 status->msh_available_pages_below_critical,
215 status->msh_available_pages_below_idle,
216 status->msh_available_pages_below_soft,
217 status->msh_available_pages_below_reaper,
218 status->msh_compressor_needs_to_swap,
219 status->msh_compressor_exhausted,
220 status->msh_compressor_is_thrashing,
221 status->msh_filecache_is_thrashing,
222 status->msh_zone_map_is_exhausted,
223 status->msh_phantom_cache_pressure,
224 status->msh_swappable_compressor_segments_over_limit,
225 status->msh_swapin_queue_over_limit,
226 status->msh_swap_low_on_space,
227 status->msh_swap_exhausted);
228 }
229 #else /* CONFIG_JETSAM */
230 memorystatus_log("memorystatus: System is %s. memorystatus_available_pages: %llu compressor_size:%u\n",
231 healthy ? "healthy" : "unhealthy",
232 (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES, vm_compressor_pool_size());
233 if (!healthy) {
234 memorystatus_log(
235 "memorystatus: {"
236 "\"compressor_exhausted\": %d, "
237 "\"zone_map_is_exhausted\": %d, "
238 "\"swap_low\": %d, "
239 "\"swap_exhausted\": %d"
240 "}\n",
241 status->msh_compressor_exhausted,
242 status->msh_zone_map_is_exhausted,
243 status->msh_swap_low_on_space,
244 status->msh_swap_exhausted);
245 }
246 #endif /* CONFIG_JETSAM */
247 prev_status = *status;
248 }
249
250 bool
memstat_check_system_health(memorystatus_system_health_t status)251 memstat_check_system_health(memorystatus_system_health_t status)
252 {
253 memstat_evaluate_health_conditions(status);
254 memstat_log_system_health(status);
255 return memstat_is_system_healthy(status);
256 }
257
258 #pragma mark Memorystatus Thread Actions
259
260 /*
261 * This section picks the appropriate memorystatus_action & deploys it.
262 */
263
264 uint64_t memstat_last_cache_purge_ts;
265 /* Purge caches under critical pressure up to every 1 min */
266 TUNABLE(uint64_t, memstat_cache_purge_backoff_ns,
267 "memorystatus_cache_purge_backoff_ns", 1 * 60 * NSEC_PER_SEC);
268
269 static uint32_t
memorystatus_pick_kill_cause(const memorystatus_system_health_t status)270 memorystatus_pick_kill_cause(const memorystatus_system_health_t status)
271 {
272 assert(!memstat_is_system_healthy(status));
273 #if CONFIG_JETSAM
274 if (status->msh_available_pages_below_critical) {
275 return kMemorystatusKilledVMPageShortage;
276 } else if (status->msh_compressor_exhausted) {
277 return kMemorystatusKilledVMCompressorSpaceShortage;
278 } else if (status->msh_compressor_is_thrashing) {
279 return kMemorystatusKilledVMCompressorThrashing;
280 } else if (status->msh_filecache_is_thrashing) {
281 return kMemorystatusKilledFCThrashing;
282 } else if (status->msh_zone_map_is_exhausted) {
283 return kMemorystatusKilledZoneMapExhaustion;
284 } else if (status->msh_pageout_starved) {
285 return kMemorystatusKilledVMPageoutStarvation;
286 } else {
287 panic("decided to kill-top-process for unknown cause");
288 }
289 #else /* CONFIG_JETSAM */
290 if (status->msh_zone_map_is_exhausted) {
291 return kMemorystatusKilledZoneMapExhaustion;
292 } else if (status->msh_compressor_exhausted) {
293 return kMemorystatusKilledVMCompressorSpaceShortage;
294 } else if (status->msh_swap_exhausted) {
295 return kMemorystatusKilledLowSwap;
296 } else {
297 return kMemorystatusKilled;
298 }
299 #endif /* CONFIG_JETSAM */
300 }
301
302 /*
303 * Inspects the state of various resources in the system to see if
304 * the system is healthy. If the system is not healthy, picks a
305 * memorystatus_action_t to recover the system.
306 *
307 * Every time the memorystatus thread wakes up it calls into here
308 * to pick an action. It will continue performing memorystatus actions until this
309 * function returns MEMORYSTATUS_KILL_NONE. At that point the thread will block.
310 */
311 memorystatus_action_t
memorystatus_pick_action(jetsam_state_t state,uint32_t * kill_cause,bool highwater_remaining,bool suspended_swappable_apps_remaining,bool swappable_apps_remaining,int * jld_idle_kills)312 memorystatus_pick_action(jetsam_state_t state,
313 uint32_t *kill_cause,
314 bool highwater_remaining,
315 bool suspended_swappable_apps_remaining,
316 bool swappable_apps_remaining,
317 int *jld_idle_kills)
318 {
319 struct memorystatus_system_health_s status;
320 bool is_system_healthy = memstat_check_system_health(&status);
321
322 #if CONFIG_JETSAM
323 if (status.msh_available_pages_below_soft || !is_system_healthy) {
324 /*
325 * If swap is enabled, first check if we're running low or are out of swap space.
326 */
327 if (memorystatus_swap_all_apps && jetsam_kill_on_low_swap) {
328 if (swappable_apps_remaining && status.msh_swap_exhausted) {
329 *kill_cause = kMemorystatusKilledLowSwap;
330 return MEMORYSTATUS_KILL_SWAPPABLE;
331 } else if (suspended_swappable_apps_remaining && status.msh_swap_low_on_space) {
332 *kill_cause = kMemorystatusKilledLowSwap;
333 return MEMORYSTATUS_KILL_SUSPENDED_SWAPPABLE;
334 }
335 }
336
337 /*
338 * We're below the pressure level or the system is unhealthy,
339 * regardless of the system health let's check if we should be swapping
340 * and if there are high watermark kills left to do.
341 */
342 if (memorystatus_swap_all_apps) {
343 if (status.msh_swappable_compressor_segments_over_limit && !vm_swapout_thread_running && !os_atomic_load(&vm_swapout_wake_pending, relaxed)) {
344 /*
345 * TODO: The swapper will keep running until it has drained the entire early swapout queue.
346 * That might be overly aggressive & we should look into tuning it.
347 * See rdar://84102304.
348 */
349 return MEMORYSTATUS_WAKE_SWAPPER;
350 } else if (status.msh_swapin_queue_over_limit) {
351 return MEMORYSTATUS_PROCESS_SWAPIN_QUEUE;
352 } else if (status.msh_swappable_compressor_segments_over_limit) {
353 memorystatus_log_info(
354 "memorystatus: Skipping swap wakeup because the swap thread is already running. vm_swapout_thread_running=%d, vm_swapout_wake_pending=%d\n",
355 vm_swapout_thread_running, os_atomic_load(&vm_swapout_wake_pending, relaxed));
356 }
357 }
358
359 if (status.msh_compressor_exhausted || status.msh_compressor_low_on_space) {
360 *kill_cause = kMemorystatusKilledVMCompressorSpaceShortage;
361 return MEMORYSTATUS_KILL_TOP_PROCESS;
362 }
363
364 if (highwater_remaining) {
365 *kill_cause = kMemorystatusKilledHiwat;
366 return MEMORYSTATUS_KILL_HIWATER;
367 }
368 }
369
370 if (status.msh_available_pages_below_idle &&
371 memstat_get_idle_proccnt() > 0 &&
372 is_system_healthy) {
373 /*
374 * The system is below the idle threshold but otherwise healthy.
375 */
376 *kill_cause = kMemorystatusKilledIdleExit;
377 return MEMORYSTATUS_KILL_IDLE;
378 }
379
380 if (memstat_reaper_is_currently_sweeping && is_system_healthy) {
381 /*
382 * The system is healthy and we're in a reaper sweep.
383 */
384 *kill_cause = kMemorystatusKilledLongIdleExit;
385 return MEMORYSTATUS_KILL_LONG_IDLE;
386 }
387
388 if (is_system_healthy) {
389 *kill_cause = 0;
390 return MEMORYSTATUS_KILL_NONE;
391 }
392
393 /*
394 * At this point the system is unhealthy and there are no
395 * more highwatermark processes to kill.
396 */
397
398 if (!state->limit_to_low_bands) {
399 if (memorystatus_check_aggressive_jetsam_needed(jld_idle_kills)) {
400 memorystatus_log("memorystatus: Starting aggressive jetsam.\n");
401 *kill_cause = kMemorystatusKilledProcThrashing;
402 return MEMORYSTATUS_KILL_AGGRESSIVE;
403 }
404 }
405
406 /*
407 * The system is unhealthy and we either don't need aggressive jetsam
408 * or are not allowed to deploy it.
409 * Kill in priority order. We'll use LRU within every band except the
410 * FG (which will be sorted by coalition role).
411 */
412 *kill_cause = memorystatus_pick_kill_cause(&status);
413 return MEMORYSTATUS_KILL_TOP_PROCESS;
414 #else /* !CONFIG_JETSAM */
415 (void) state;
416 (void) jld_idle_kills;
417 (void) suspended_swappable_apps_remaining;
418 (void) swappable_apps_remaining;
419 (void) highwater_remaining;
420
421 /*
422 * Without CONFIG_JETSAM, we only kill if the system is unhealthy.
423 * There is no aggressive jetsam and no
424 * early highwatermark killing.
425 */
426 if (is_system_healthy) {
427 *kill_cause = 0;
428 return MEMORYSTATUS_KILL_NONE;
429 }
430 *kill_cause = memorystatus_pick_kill_cause(&status);
431 if (status.msh_zone_map_is_exhausted) {
432 return MEMORYSTATUS_KILL_TOP_PROCESS;
433 }
434 if (status.msh_compressor_exhausted || status.msh_swap_exhausted) {
435 if (kill_on_no_paging_space) {
436 return MEMORYSTATUS_KILL_TOP_PROCESS;
437 }
438 }
439 if (status.msh_compressor_low_on_space || status.msh_swap_low_on_space) {
440 if (memstat_get_idle_proccnt() > 0) {
441 /* Kill all idle processes before invoking the no paging space action */
442 return MEMORYSTATUS_KILL_IDLE;
443 }
444 /*
445 * Throttle how often the no-paging-space action is performed.
446 */
447 uint64_t now = mach_absolute_time();
448 uint64_t delta_since_last_no_space_ns;
449 uint64_t last_action_ts = os_atomic_load(&last_no_space_action_ts, relaxed);
450 assert3u(now, >=, last_action_ts);
451 absolutetime_to_nanoseconds(now - last_action_ts, &delta_since_last_no_space_ns);
452 if (delta_since_last_no_space_ns > no_paging_space_action_throttle_delay_ns) {
453 return MEMORYSTATUS_NO_PAGING_SPACE;
454 } else {
455 return MEMORYSTATUS_KILL_NONE;
456 }
457 }
458 if (status.msh_vm_pressure_critical) {
459 /*
460 * The system is under critical memory pressure. First terminate any low-risk
461 * idle processes. When they are exhausted, purge system memory caches.
462 */
463 if (memstat_pressure_config & MEMSTAT_WARNING_KILL_LONG_IDLE &&
464 memstat_get_long_idle_proccnt() > 0) {
465 *kill_cause = kMemorystatusKilledLongIdleExit;
466 return MEMORYSTATUS_KILL_LONG_IDLE;
467 }
468 if (memstat_pressure_config & MEMSTAT_CRITICAL_KILL_IDLE &&
469 memstat_get_idle_proccnt() > 0) {
470 *kill_cause = kMemorystatusKilledIdleExit;
471 return MEMORYSTATUS_KILL_IDLE;
472 }
473 if (memstat_pressure_config & MEMSTAT_CRITICAL_PURGE_CACHES) {
474 uint64_t now = mach_absolute_time();
475 uint64_t delta_ns;
476 uint64_t last_purge_ts = os_atomic_load(&memstat_last_cache_purge_ts, relaxed);
477 assert3u(now, >=, last_purge_ts);
478 absolutetime_to_nanoseconds(now - last_purge_ts, &delta_ns);
479 if (delta_ns > memstat_cache_purge_backoff_ns) {
480 memstat_last_cache_purge_ts = now;
481 return MEMORYSTATUS_PURGE_CACHES;
482 }
483 }
484 return MEMORYSTATUS_KILL_NONE;
485 } else if (status.msh_vm_pressure_warning) {
486 /*
487 * The system is under pressure and is likely to start swapping soon. Reap
488 * any long-idle daemons.
489 */
490 if (memstat_pressure_config & MEMSTAT_WARNING_KILL_LONG_IDLE &&
491 memstat_get_long_idle_proccnt() > 0) {
492 *kill_cause = kMemorystatusKilledLongIdleExit;
493 return MEMORYSTATUS_KILL_LONG_IDLE;
494 }
495 return MEMORYSTATUS_KILL_NONE;
496 }
497 #endif /* CONFIG_JETSAM */
498 panic("System is unhealthy but no action has been chosen");
499 }
500
501 #pragma mark Aggressive Jetsam
502 /*
503 * This section defines when we deploy aggressive jetsam.
504 * Aggressive jetsam kills everything up to the jld_priority_band_max band.
505 */
506
507 #if CONFIG_JETSAM
508
509 static bool
510 memorystatus_aggressive_jetsam_needed_sysproc_aging(__unused int jld_eval_aggressive_count, __unused int *jld_idle_kills, __unused int jld_idle_kill_candidates, int *total_candidates);
511
512 /*
513 * kJetsamHighRelaunchCandidatesThreshold defines the percentage of candidates
514 * in the idle & deferred bands that need to be bad candidates in order to trigger
515 * aggressive jetsam.
516 */
517 TUNABLE_DEV_WRITEABLE(unsigned int, kJetsamHighRelaunchCandidatesThreshold, "jetsam_high_relaunch_candidates_threshold_percent", 100);
518 #if DEVELOPMENT || DEBUG
519 SYSCTL_UINT(_kern, OID_AUTO, jetsam_high_relaunch_candidates_threshold_percent, CTLFLAG_RW | CTLFLAG_LOCKED, &kJetsamHighRelaunchCandidatesThreshold, 100, "");
520 #endif /* DEVELOPMENT || DEBUG */
521
522 /* kJetsamMinCandidatesThreshold defines the minimum number of candidates in the
523 * idle/deferred bands to trigger aggressive jetsam. This value basically decides
524 * how much memory the system is ready to hold in the lower bands without triggering
525 * aggressive jetsam. This number should ideally be tuned based on the memory config
526 * of the device.
527 */
528 TUNABLE_DT_DEV_WRITEABLE(unsigned int, kJetsamMinCandidatesThreshold, "/defaults", "kern.jetsam_min_candidates_threshold", "jetsam_min_candidates_threshold", 5, TUNABLE_DT_CHECK_CHOSEN);
529 #if DEVELOPMENT || DEBUG
530 SYSCTL_UINT(_kern, OID_AUTO, jetsam_min_candidates_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &kJetsamMinCandidatesThreshold, 5, "");
531 #endif /* DEVELOPMENT || DEBUG */
532
533 static bool
memorystatus_check_aggressive_jetsam_needed(int * jld_idle_kills)534 memorystatus_check_aggressive_jetsam_needed(int *jld_idle_kills)
535 {
536 bool aggressive_jetsam_needed = false;
537 int total_candidates = 0;
538 /*
539 * The aggressive jetsam logic looks at the number of times it has been in the
540 * aggressive loop to determine the max priority band it should kill upto. The
541 * static variables below are used to track that property.
542 *
543 * To reset those values, the implementation checks if it has been
544 * memorystatus_jld_eval_period_msecs since the parameters were reset.
545 */
546
547 if (memorystatus_jld_enabled == FALSE) {
548 /* If aggressive jetsam is disabled, nothing to do here */
549 return false;
550 }
551
552 /* Get current timestamp (msecs only) */
553 struct timeval jld_now_tstamp = {0, 0};
554 uint64_t jld_now_msecs = 0;
555 microuptime(&jld_now_tstamp);
556 jld_now_msecs = (jld_now_tstamp.tv_sec * 1000);
557
558 /*
559 * Look at the number of candidates in the idle and deferred band and
560 * how many out of them are marked as high relaunch probability.
561 */
562 aggressive_jetsam_needed = memorystatus_aggressive_jetsam_needed_sysproc_aging(jld_eval_aggressive_count,
563 jld_idle_kills, jld_idle_kill_candidates, &total_candidates);
564
565 /*
566 * It is also possible that the system is down to a very small number of processes in the candidate
567 * bands. In that case, the decisions made by the memorystatus_aggressive_jetsam_needed_* routines
568 * would not be useful. In that case, do not trigger aggressive jetsam.
569 */
570 if (total_candidates < kJetsamMinCandidatesThreshold) {
571 memorystatus_log_debug(
572 "memorystatus: aggressive: [FAILED] Low Candidate "
573 "Count (current: %d, threshold: %d)\n",
574 total_candidates, kJetsamMinCandidatesThreshold);
575 aggressive_jetsam_needed = false;
576 }
577
578 /*
579 * Check if its been really long since the aggressive jetsam evaluation
580 * parameters have been refreshed. This logic also resets the jld_eval_aggressive_count
581 * counter to make sure we reset the aggressive jetsam severity.
582 */
583 if ((total_candidates == 0) ||
584 (jld_now_msecs > (jld_timestamp_msecs + memorystatus_jld_eval_period_msecs))) {
585 jld_timestamp_msecs = jld_now_msecs;
586 jld_idle_kill_candidates = total_candidates;
587 *jld_idle_kills = 0;
588 jld_eval_aggressive_count = 0;
589 }
590
591 return aggressive_jetsam_needed;
592 }
593
594 static bool
memorystatus_aggressive_jetsam_needed_sysproc_aging(__unused int eval_aggressive_count,__unused int * idle_kills,__unused int idle_kill_candidates,int * total_candidates)595 memorystatus_aggressive_jetsam_needed_sysproc_aging(__unused int eval_aggressive_count, __unused int *idle_kills, __unused int idle_kill_candidates, int *total_candidates)
596 {
597 bool aggressive_jetsam_needed = false;
598
599 /*
600 * For the kJetsamAgingPolicySysProcsReclaimedFirst aging policy, we maintain the jetsam
601 * relaunch behavior for all daemons. Also, daemons and apps are aged in deferred bands on
602 * every dirty->clean transition. For this aging policy, the best way to determine if
603 * aggressive jetsam is needed, is to see if the kill candidates are mostly bad candidates.
604 * If yes, then we need to go to higher bands to reclaim memory.
605 */
606 proc_list_lock();
607 /* Get total candidate counts for idle and idle deferred bands */
608 *total_candidates = memstat_bucket[JETSAM_PRIORITY_IDLE].count + memstat_bucket[system_procs_aging_band].count;
609 /* Get counts of bad kill candidates in idle and idle deferred bands */
610 int bad_candidates = memstat_bucket[JETSAM_PRIORITY_IDLE].relaunch_high_count + memstat_bucket[system_procs_aging_band].relaunch_high_count;
611
612 proc_list_unlock();
613
614 /* Check if the number of bad candidates is greater than kJetsamHighRelaunchCandidatesThreshold % */
615 aggressive_jetsam_needed = (((bad_candidates * 100) / *total_candidates) >= kJetsamHighRelaunchCandidatesThreshold);
616
617 /*
618 * Since the new aging policy bases the aggressive jetsam trigger on percentage of
619 * bad candidates, it is prone to being overly aggressive. In order to mitigate that,
620 * make sure the system is really under memory pressure before triggering aggressive
621 * jetsam.
622 */
623 if (memorystatus_available_pages > memorystatus_sysproc_aging_aggr_pages) {
624 aggressive_jetsam_needed = false;
625 }
626
627 #if DEVELOPMENT || DEBUG
628 memorystatus_log_info(
629 "memorystatus: aggressive%d: [%s] Bad Candidate Threshold Check (total: %d, bad: %d, threshold: %d %%); Memory Pressure Check (available_pgs: %llu, threshold_pgs: %llu)\n",
630 eval_aggressive_count, aggressive_jetsam_needed ? "PASSED" : "FAILED", *total_candidates, bad_candidates,
631 kJetsamHighRelaunchCandidatesThreshold, (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES, (uint64_t)memorystatus_sysproc_aging_aggr_pages);
632 #endif /* DEVELOPMENT || DEBUG */
633 return aggressive_jetsam_needed;
634 }
635
636 #endif /* CONFIG_JETSAM */
637
638 #pragma mark Freezer
639 #if CONFIG_FREEZE
640 /*
641 * Freezer policies
642 */
643
644 /*
645 * These functions determine what is eligible for the freezer
646 * and the order that we consider freezing them
647 */
648
649 /*
650 * Checks if the given process is eligible for the freezer.
651 * Processes can only be frozen if this returns true.
652 */
653 bool
memorystatus_is_process_eligible_for_freeze(proc_t p)654 memorystatus_is_process_eligible_for_freeze(proc_t p)
655 {
656 /*
657 * Called with proc_list_lock held.
658 */
659
660 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
661
662 bool should_freeze = false;
663 uint32_t state = 0, pages = 0;
664 bool first_consideration = true;
665 task_t task;
666
667 state = p->p_memstat_state;
668
669 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) {
670 if (state & P_MEMSTAT_FREEZE_DISABLED) {
671 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonDisabled;
672 }
673 goto out;
674 }
675
676 task = proc_task(p);
677
678 if (isSysProc(p)) {
679 /*
680 * Daemon:- We consider freezing it if:
681 * - it belongs to a coalition and the leader is frozen, and,
682 * - its role in the coalition is XPC service.
683 *
684 * We skip memory size requirements in this case.
685 */
686 int task_role_in_coalition = 0;
687 proc_t leader_proc = memorystatus_get_coalition_leader_and_role(p, &task_role_in_coalition);
688 if (leader_proc == PROC_NULL || leader_proc == p) {
689 /*
690 * Jetsam coalition is leaderless or the leader is not an app.
691 * Either way, don't freeze this proc.
692 */
693 goto out;
694 }
695
696 /* Leader must be frozen */
697 if (!(leader_proc->p_memstat_state & P_MEMSTAT_FROZEN)) {
698 goto out;
699 }
700 /* Only freeze XPC services */
701 if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
702 should_freeze = true;
703 }
704
705 goto out;
706 } else {
707 /*
708 * Application. Only freeze if it's suspended.
709 */
710 if (!(state & P_MEMSTAT_SUSPENDED)) {
711 goto out;
712 }
713 }
714
715 /*
716 * We're interested in tracking what percentage of
717 * eligible apps actually get frozen.
718 * To avoid skewing the metrics towards processes which
719 * are considered more frequently, we only track failures once
720 * per process.
721 */
722 first_consideration = !(state & P_MEMSTAT_FREEZE_CONSIDERED);
723
724 if (first_consideration) {
725 memorystatus_freezer_stats.mfs_process_considered_count++;
726 p->p_memstat_state |= P_MEMSTAT_FREEZE_CONSIDERED;
727 }
728
729 /* Only freeze applications meeting our minimum resident page criteria */
730 memorystatus_get_task_page_counts(proc_task(p), &pages, NULL, NULL);
731 if (pages < memorystatus_freeze_pages_min) {
732 if (first_consideration) {
733 memorystatus_freezer_stats.mfs_error_below_min_pages_count++;
734 }
735 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonBelowMinPages;
736 goto out;
737 }
738
739 /* Don't freeze processes that are already exiting on core. It may have started exiting
740 * after we chose it for freeze, but before we obtained the proc_list_lock.
741 * NB: This is only possible if we're coming in from memorystatus_freeze_process_sync.
742 * memorystatus_freeze_top_process holds the proc_list_lock while it traverses the bands.
743 */
744 if (proc_list_exited(p)) {
745 if (first_consideration) {
746 memorystatus_freezer_stats.mfs_error_other_count++;
747 }
748 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOther;
749 goto out;
750 }
751
752 if (!memorystatus_freezer_use_ordered_list) {
753 /*
754 * We're not using the ordered list so we need to check
755 * that dasd recommended the process. Note that the ordered list
756 * algorithm only considers processes on the list in the first place
757 * so there's no need to double check here.
758 */
759 if (!memorystatus_freeze_process_is_recommended(p)) {
760 if (first_consideration) {
761 memorystatus_freezer_stats.mfs_error_low_probability_of_use_count++;
762 }
763 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonLowProbOfUse;
764 goto out;
765 }
766 }
767
768 if (!(state & P_MEMSTAT_FROZEN) && p->p_memstat_effectivepriority > memorystatus_freeze_max_candidate_band) {
769 /*
770 * Proc has been elevated by something else.
771 * Don't freeze it.
772 */
773 if (first_consideration) {
774 memorystatus_freezer_stats.mfs_error_elevated_count++;
775 }
776 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonElevated;
777 goto out;
778 }
779
780 should_freeze = true;
781 out:
782 if (should_freeze && !(state & P_MEMSTAT_FROZEN)) {
783 /*
784 * Reset the skip reason. If it's killed before we manage to actually freeze it
785 * we failed to consider it early enough.
786 */
787 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
788 if (!first_consideration) {
789 /*
790 * We're freezing this for the first time and we previously considered it ineligible.
791 * Bump the considered count so that we track this as 1 failure
792 * and 1 success.
793 */
794 memorystatus_freezer_stats.mfs_process_considered_count++;
795 }
796 }
797 return should_freeze;
798 }
799
800 bool
memorystatus_freeze_proc_is_refreeze_eligible(proc_t p)801 memorystatus_freeze_proc_is_refreeze_eligible(proc_t p)
802 {
803 return (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) != 0;
804 }
805
806
807 static proc_t
memorystatus_freeze_pick_refreeze_process(proc_t last_p)808 memorystatus_freeze_pick_refreeze_process(proc_t last_p)
809 {
810 proc_t p = PROC_NULL, next_p = PROC_NULL;
811 unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
812 if (last_p == PROC_NULL) {
813 next_p = memorystatus_get_first_proc_locked(&band, FALSE);
814 } else {
815 next_p = memorystatus_get_next_proc_locked(&band, last_p, FALSE);
816 }
817 while (next_p) {
818 p = next_p;
819 next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
820 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) && !memorystatus_freeze_proc_is_refreeze_eligible(p)) {
821 /* Process is already frozen & hasn't been thawed. */
822 continue;
823 }
824 /*
825 * Has to have been frozen once before.
826 */
827 if (!(p->p_memstat_state & P_MEMSTAT_FROZEN)) {
828 continue;
829 }
830
831 /*
832 * Not currently being looked at for something.
833 */
834 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
835 continue;
836 }
837
838 /*
839 * Don't refreeze a last process we just thawed if still within the timeout window
840 */
841 if (memorystatus_freeze_prevent_refreeze_of_recently_thawed && memorystatus_freeze_was_process_recently_thawed(p)) {
842 memorystatus_log("memorystatus: too soon to refreeze pid %d [%s], in memorystatus_freeze_pick_refreeze_process\n", p->p_pid, proc_best_name(p));
843 continue;
844 }
845
846 /*
847 * Found it
848 */
849 return p;
850 }
851 return PROC_NULL;
852 }
853
854 proc_t
memorystatus_freeze_pick_process(struct memorystatus_freeze_list_iterator * iterator)855 memorystatus_freeze_pick_process(struct memorystatus_freeze_list_iterator *iterator)
856 {
857 proc_t p = PROC_NULL, next_p = PROC_NULL;
858 unsigned int band = JETSAM_PRIORITY_IDLE;
859
860 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
861 /*
862 * If the freezer is full, only consider refreezes.
863 */
864 if (iterator->refreeze_only || memorystatus_frozen_count >= memorystatus_frozen_processes_max) {
865 if (!iterator->refreeze_only) {
866 /*
867 * The first time the iterator starts to return refreeze
868 * candidates, we need to reset the last pointer b/c it's pointing into the wrong band.
869 */
870 iterator->last_p = PROC_NULL;
871 iterator->refreeze_only = true;
872 }
873 iterator->last_p = memorystatus_freeze_pick_refreeze_process(iterator->last_p);
874 return iterator->last_p;
875 }
876
877 /*
878 * Search for the next freezer candidate.
879 */
880 if (memorystatus_freezer_use_ordered_list) {
881 while (iterator->global_freeze_list_index < memorystatus_global_freeze_list.mfcl_length) {
882 p = memorystatus_freezer_candidate_list_get_proc(
883 &memorystatus_global_freeze_list,
884 (iterator->global_freeze_list_index)++,
885 &memorystatus_freezer_stats.mfs_freeze_pid_mismatches);
886
887 if (p != PROC_NULL && memorystatus_is_process_eligible_for_freeze(p)) {
888 /*
889 * Don't refreeze the a process we just thawed if still within the timeout window
890 */
891 if (memorystatus_freeze_prevent_refreeze_of_recently_thawed && memorystatus_freeze_was_process_recently_thawed(p)) {
892 memorystatus_log("memorystatus: too soon to refreeze pid %d [%s], in memorystatus_freeze_pick_process\n", p->p_pid, proc_best_name(p));
893 continue;
894 }
895 iterator->last_p = p;
896 return iterator->last_p;
897 }
898 }
899 } else {
900 if (iterator->last_p == PROC_NULL) {
901 next_p = memorystatus_get_first_proc_locked(&band, FALSE);
902 } else {
903 next_p = memorystatus_get_next_proc_locked(&band, iterator->last_p, FALSE);
904 }
905 while (next_p) {
906 p = next_p;
907 if (memorystatus_is_process_eligible_for_freeze(p)) {
908 iterator->last_p = p;
909 return iterator->last_p;
910 } else {
911 next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
912 }
913 }
914 }
915
916 /*
917 * Failed to find a new freezer candidate.
918 * Try to re-freeze.
919 */
920 if (memorystatus_refreeze_eligible_count >= memorystatus_min_thaw_refreeze_threshold) {
921 assert(!iterator->refreeze_only);
922 iterator->refreeze_only = true;
923 iterator->last_p = memorystatus_freeze_pick_refreeze_process(PROC_NULL);
924 return iterator->last_p;
925 }
926 return PROC_NULL;
927 }
928
929 /*
930 * memorystatus_pages_update calls this function whenever the number
931 * of available pages changes. It wakes the freezer thread iff the function returns
932 * true. The freezer thread will try to freeze (or refreeze) up to 1 process
933 * before blocking again.
934 *
935 * Note the freezer thread is also woken up by memorystatus_on_inactivity.
936 */
937
938 bool
memorystatus_freeze_thread_should_run()939 memorystatus_freeze_thread_should_run()
940 {
941 /*
942 * No freezer_mutex held here...see why near call-site
943 * within memorystatus_pages_update().
944 */
945
946 if (memorystatus_freeze_enabled == false) {
947 return false;
948 }
949
950 if (memorystatus_available_pages > memorystatus_freeze_threshold) {
951 return false;
952 }
953
954 memorystatus_freezer_stats.mfs_below_threshold_count++;
955
956 if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
957 /*
958 * Consider this as a skip even if we wake up to refreeze because
959 * we won't freeze any new procs.
960 */
961 memorystatus_freezer_stats.mfs_skipped_full_count++;
962 if (memorystatus_refreeze_eligible_count < memorystatus_min_thaw_refreeze_threshold) {
963 return false;
964 }
965 }
966
967 if (memorystatus_frozen_shared_mb_max && (memorystatus_frozen_shared_mb >= memorystatus_frozen_shared_mb_max)) {
968 memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count++;
969 return false;
970 }
971
972 uint64_t curr_time = mach_absolute_time();
973
974 if (curr_time < memorystatus_freezer_thread_next_run_ts) {
975 return false;
976 }
977
978 return true;
979 }
980
981 size_t
memorystatus_pick_freeze_count_for_wakeup()982 memorystatus_pick_freeze_count_for_wakeup()
983 {
984 size_t num_to_freeze = 0;
985 if (!memorystatus_swap_all_apps) {
986 num_to_freeze = 1;
987 } else {
988 /*
989 * When app swap is enabled, we want the freezer thread to aggressively freeze
990 * all candidates so we clear out space for the fg working set.
991 * But we still cap it to the current size of the candidate bands to avoid
992 * consuming excessive CPU if there's a lot of churn in the candidate band.
993 */
994 proc_list_lock();
995 for (unsigned int band = JETSAM_PRIORITY_IDLE; band <= memorystatus_freeze_max_candidate_band; band++) {
996 num_to_freeze += memstat_bucket[band].count;
997 }
998 proc_list_unlock();
999 }
1000
1001 return num_to_freeze;
1002 }
1003
1004 #endif /* CONFIG_FREEZE */
1005