xref: /xnu-11215.61.5/bsd/kern/kern_memorystatus_internal.h (revision 4f1223e81cd707a65cc109d0b8ad6653699da3c4)
1 /*
2  * Copyright (c) 2006-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  *
28  */
29 
30 #ifndef _KERN_MEMORYSTATUS_INTERNAL_H_
31 #define _KERN_MEMORYSTATUS_INTERNAL_H_
32 
33 /*
34  * Contains memorystatus subsystem definitions that are not
35  * exported outside of the memorystatus subsystem.
36  *
37  * For example, all of the mechanisms used by kern_memorystatus_policy.c
38  * should be defined in this header.
39  */
40 
41 #if BSD_KERNEL_PRIVATE
42 
43 #include <mach/boolean.h>
44 #include <stdbool.h>
45 #include <os/atomic_private.h>
46 #include <os/base.h>
47 #include <os/log.h>
48 #include <os/overflow.h>
49 #include <kern/locks.h>
50 #include <kern/sched_prim.h>
51 #include <sys/kern_memorystatus.h>
52 #include <sys/kernel_types.h>
53 #include <sys/proc.h>
54 #include <sys/proc_internal.h>
55 
56 #if CONFIG_FREEZE
57 #include <sys/kern_memorystatus_freeze.h>
58 #endif /* CONFIG_FREEZE */
59 
60 /*
61  * memorystatus subsystem globals
62  */
63 extern uint32_t memorystatus_available_pages;
64 #if CONFIG_JETSAM
65 extern uint32_t jetsam_kill_on_low_swap;
66 #endif /* CONFIG_JETSAM */
67 extern int block_corpses; /* counter to block new corpses if jetsam purges them */
68 extern int system_procs_aging_band;
69 extern int applications_aging_band;
70 /* the jetsam band which will contain P_MEMSTAT_FROZEN processes */
71 extern int memorystatus_freeze_jetsam_band;
72 #if CONFIG_FREEZE
73 extern unsigned int memorystatus_suspended_count;
74 #endif /* CONFIG_FREEZE */
75 extern uint64_t memorystatus_sysprocs_idle_delay_time;
76 extern uint64_t memorystatus_apps_idle_delay_time;
77 
78 /*
79  * TODO(jason): This should really be calculated dynamically by the zalloc
80  * subsystem before we do a zone map exhaustion kill. But the zone_gc
81  * logic is non-trivial, so for now it just sets this global.
82  */
83 extern _Atomic bool memorystatus_zone_map_is_exhausted;
84 /*
85  * TODO(jason): We should get rid of this global
86  * and have the memorystatus thread check for compressor space shortages
87  * itself. However, there are 3 async call sites remaining that require more work to get us there:
88  * 2 of them are in vm_swap_defragment. When it's about to swap in a segment, it checks if that
89  * will cause a compressor space shortage & pre-emptively triggers jetsam. vm_compressor_backing_store
90  * needs to keep track of in-flight swapins due to defrag so we can perform those checks
91  * in the memorystatus thread.
92  * The other is in no_paging_space_action. This is only on macOS right now, but will
93  * be needed on iPad when we run out of swap space. This should be a new kill
94  * reason and we need to add a new health check for it.
95  * We need to maintain the macOS behavior though that we kill no more than 1 process
96  * every 5 seconds.
97  */
98 extern _Atomic bool memorystatus_compressor_space_shortage;
99 /*
100  * TODO(jason): We should also get rid of this global
101  * and check for phantom cache pressure from the memorystatus
102  * thread. But first we need to fix the syncronization in
103  * vm_phantom_cache_check_pressure
104  */
105 extern _Atomic bool memorystatus_phantom_cache_pressure;
106 
107 extern _Atomic bool memorystatus_pageout_starved;
108 /*
109  * The actions that the memorystatus thread can perform
110  * when we're low on memory.
111  * See memorystatus_pick_action to see when each action is deployed.
112  */
113 OS_CLOSED_ENUM(memorystatus_action, uint32_t,
114     MEMORYSTATUS_KILL_HIWATER,     // Kill 1 highwatermark process
115     MEMORYSTATUS_KILL_AGGRESSIVE,     // Do aggressive jetsam
116     MEMORYSTATUS_KILL_TOP_PROCESS,     // Kill based on jetsam priority
117     MEMORYSTATUS_WAKE_SWAPPER,  // Wake up the swap thread
118     MEMORYSTATUS_PROCESS_SWAPIN_QUEUE, // Compact the swapin queue and move segments to the swapout queue
119     MEMORYSTATUS_KILL_SUSPENDED_SWAPPABLE, // Kill a suspended swap-eligible processes based on jetsam priority
120     MEMORYSTATUS_KILL_SWAPPABLE, // Kill a swap-eligible process (even if it's running)  based on jetsam priority
121     MEMORYSTATUS_KILL_IDLE, // Kill an idle process
122     MEMORYSTATUS_KILL_NONE,     // Do nothing
123     );
124 
125 /*
126  * Structure to hold state for a jetsam thread.
127  * Typically there should be a single jetsam thread
128  * unless parallel jetsam is enabled.
129  */
130 typedef struct jetsam_state_s {
131 	bool                            inited; /* if the thread is initialized */
132 	bool                            limit_to_low_bands; /* limit kills to < JETSAM_PRIORITY_ELEVATED_INACTIVE */
133 	int                             index; /* jetsam thread index */
134 	thread_t                        thread; /* jetsam thread pointer */
135 	int                             jld_idle_kills; /*  idle jetsam kill counter for this session */
136 	uint32_t                        errors; /* Error accumulator */
137 	bool                            sort_flag; /* Sort the fg band (idle on macOS) before killing? */
138 	bool                            corpse_list_purged; /* Has the corpse list been purged? */
139 	bool                            post_snapshot; /* Do we need to post a jetsam snapshot after this session? */
140 	uint64_t                        memory_reclaimed; /* Amount of memory that was just reclaimed */
141 	uint32_t                        hwm_kills; /* hwm kill counter for this session */
142 	sched_cond_atomic_t             jt_wakeup_cond; /* condition var used to synchronize wake/sleep operations for this jetsam thread */
143 } *jetsam_state_t;
144 
145 /*
146  * The memorystatus thread monitors these conditions
147  * and will continue to act until the system is considered
148  * healthy.
149  */
150 typedef struct memorystatus_system_health {
151 #if CONFIG_JETSAM
152 	bool msh_available_pages_below_soft;
153 	bool msh_available_pages_below_idle;
154 	bool msh_available_pages_below_critical;
155 	bool msh_compressor_needs_to_swap;
156 	bool msh_compressor_is_low_on_space;
157 	bool msh_compressor_is_thrashing;
158 	bool msh_compressed_pages_nearing_limit;
159 	bool msh_filecache_is_thrashing;
160 	bool msh_phantom_cache_pressure;
161 	bool msh_swappable_compressor_segments_over_limit;
162 	bool msh_swapin_queue_over_limit;
163 	bool msh_swap_low_on_space;
164 	bool msh_swap_out_of_space;
165 	bool msh_pageout_starved;
166 #endif /* CONFIG_JETSAM */
167 	bool msh_zone_map_is_exhausted;
168 } memorystatus_system_health_t;
169 
170 void memorystatus_log_system_health(const memorystatus_system_health_t *health);
171 bool memorystatus_is_system_healthy(const memorystatus_system_health_t *status);
172 /* Picks a kill cause given an unhealthy system status */
173 uint32_t memorystatus_pick_kill_cause(const memorystatus_system_health_t *status);
174 
175 #pragma mark Locks
176 
177 extern lck_mtx_t memorystatus_jetsam_broadcast_lock;
178 
179 #pragma mark Agressive jetsam tunables
180 
181 extern boolean_t memorystatus_jld_enabled;              /* Enable jetsam loop detection */
182 extern uint32_t memorystatus_jld_eval_period_msecs;         /* Init pass sets this based on device memory size */
183 extern int      memorystatus_jld_max_kill_loops;            /* How many times should we try and kill up to the target band */
184 extern unsigned int memorystatus_sysproc_aging_aggr_pages; /* Aggressive jetsam pages threshold for sysproc aging policy */
185 extern unsigned int jld_eval_aggressive_count;
186 extern uint64_t  jld_timestamp_msecs;
187 extern int       jld_idle_kill_candidates;
188 
189 
190 /*
191  * VM globals read by the memorystatus subsystem
192  */
193 extern unsigned int    vm_page_free_count;
194 extern unsigned int    vm_page_active_count;
195 extern unsigned int    vm_page_inactive_count;
196 extern unsigned int    vm_page_throttled_count;
197 extern unsigned int    vm_page_purgeable_count;
198 extern unsigned int    vm_page_wire_count;
199 extern unsigned int    vm_page_speculative_count;
200 extern uint32_t        c_late_swapout_count, c_late_swappedin_count;
201 extern uint32_t        c_seg_allocsize;
202 extern bool            vm_swapout_thread_running;
203 extern _Atomic bool    vm_swapout_wake_pending;
204 #define VM_PAGE_DONATE_DISABLED     0
205 #define VM_PAGE_DONATE_ENABLED      1
206 extern uint32_t vm_page_donate_mode;
207 
208 #if CONFIG_JETSAM
209 #define MEMORYSTATUS_LOG_AVAILABLE_PAGES os_atomic_load(&memorystatus_available_pages, relaxed)
210 #else /* CONFIG_JETSAM */
211 #define MEMORYSTATUS_LOG_AVAILABLE_PAGES (vm_page_active_count + vm_page_inactive_count + vm_page_free_count + vm_page_speculative_count)
212 #endif /* CONFIG_JETSAM */
213 
214 bool memorystatus_avail_pages_below_pressure(void);
215 bool memorystatus_avail_pages_below_critical(void);
216 #if CONFIG_JETSAM
217 bool memorystatus_swap_over_trigger(uint64_t adjustment_factor);
218 bool memorystatus_swapin_over_trigger(void);
219 #endif /* CONFIG_JETSAM */
220 
221 /* Does cause indicate vm or fc thrashing? */
222 bool is_reason_thrashing(unsigned cause);
223 /* Is the zone map almost full? */
224 bool is_reason_zone_map_exhaustion(unsigned cause);
225 
226 memorystatus_action_t memorystatus_pick_action(jetsam_state_t state,
227     uint32_t *kill_cause, bool highwater_remaining,
228     bool suspended_swappable_apps_remaining,
229     bool swappable_apps_remaining, int *jld_idle_kills);
230 
231 #define MEMSTAT_PERCENT_TOTAL_PAGES(p) ((uint32_t)(p * atop_64(max_mem) / 100))
232 
233 /*
234  * Take a (redacted) zprint snapshot along with the jetsam snapshot.
235  */
236 #define JETSAM_ZPRINT_SNAPSHOT (CONFIG_MEMORYSTATUS && (DEBUG || DEVELOPMENT))
237 
238 #pragma mark Logging Utilities
239 
240 __enum_decl(memorystatus_log_level_t, unsigned int, {
241 	MEMORYSTATUS_LOG_LEVEL_DEFAULT = 0,
242 	MEMORYSTATUS_LOG_LEVEL_INFO = 1,
243 	MEMORYSTATUS_LOG_LEVEL_DEBUG = 2,
244 });
245 
246 extern os_log_t memorystatus_log_handle;
247 extern memorystatus_log_level_t memorystatus_log_level;
248 
249 /*
250  * NB: Critical memorystatus logs (e.g. jetsam kills) are load-bearing for OS
251  * performance testing infrastructure. Be careful when modifying the log-level for
252  * important system events.
253  *
254  * Memorystatus logs are interpreted by a wide audience. To avoid logging information
255  * that could lead to false diagnoses, INFO and DEBUG messages are only logged if the
256  * system has been configured to do so via `kern.memorystatus_log_level` (sysctl) or
257  * `memorystatus_log_level` (boot-arg).
258  *
259  * os_log supports a mechanism for configuring these properties dynamically; however,
260  * this mechanism is currently unsupported in XNU.
261  *
262  * TODO (JC) Deprecate sysctl/boot-arg and move to subsystem preferences pending:
263  *  - rdar://27006343 (Custom kernel log handles)
264  *  - rdar://80958044 (Kernel Logging Configuration)
265  */
266 #define _memorystatus_log_with_type(type, format, ...) os_log_with_type(memorystatus_log_handle, type, format, ##__VA_ARGS__)
267 #define memorystatus_log(format, ...) _memorystatus_log_with_type(OS_LOG_TYPE_DEFAULT, format, ##__VA_ARGS__)
268 #define memorystatus_log_info(format, ...) if (memorystatus_log_level >= MEMORYSTATUS_LOG_LEVEL_INFO) { _memorystatus_log_with_type(OS_LOG_TYPE_INFO, format, ##__VA_ARGS__); }
269 #define memorystatus_log_debug(format, ...) if (memorystatus_log_level >= MEMORYSTATUS_LOG_LEVEL_DEBUG) { _memorystatus_log_with_type(OS_LOG_TYPE_DEBUG, format, ##__VA_ARGS__); }
270 #define memorystatus_log_error(format, ...) _memorystatus_log_with_type(OS_LOG_TYPE_ERROR, format, ##__VA_ARGS__)
271 #define memorystatus_log_fault(format, ...) _memorystatus_log_with_type(OS_LOG_TYPE_FAULT, format, ##__VA_ARGS__)
272 
273 #pragma mark Jetsam Priority Management
274 
275 /*
276  * Cancel a process' idle aging
277  * Returns whether a reschedule of the idle demotion thread is needed.
278  */
279 void memstat_update_priority_locked(proc_t p, int priority,
280     memstat_priority_options_t options);
281 
282 static inline bool
_memstat_proc_is_aging(proc_t p)283 _memstat_proc_is_aging(proc_t p)
284 {
285 	return p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS;
286 }
287 
288 static inline bool
_memstat_proc_is_tracked(proc_t p)289 _memstat_proc_is_tracked(proc_t p)
290 {
291 	return p->p_memstat_dirty & P_DIRTY_TRACK;
292 }
293 
294 static inline bool
_memstat_proc_is_dirty(proc_t p)295 _memstat_proc_is_dirty(proc_t p)
296 {
297 	return p->p_memstat_dirty & P_DIRTY_IS_DIRTY;
298 }
299 
300 static inline bool
_memstat_proc_can_idle_exit(proc_t p)301 _memstat_proc_can_idle_exit(proc_t p)
302 {
303 	return _memstat_proc_is_tracked(p) &&
304 	       (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT);
305 }
306 
307 static inline bool
_memstat_proc_has_priority_assertion(proc_t p)308 _memstat_proc_has_priority_assertion(proc_t p)
309 {
310 	return p->p_memstat_state & P_MEMSTAT_PRIORITY_ASSERTION;
311 }
312 
313 static inline bool
_memstat_proc_is_managed(proc_t p)314 _memstat_proc_is_managed(proc_t p)
315 {
316 	return p->p_memstat_state & P_MEMSTAT_MANAGED;
317 }
318 
319 static inline bool
_memstat_proc_is_frozen(proc_t p)320 _memstat_proc_is_frozen(proc_t p)
321 {
322 	return p->p_memstat_state & P_MEMSTAT_FROZEN;
323 }
324 
325 static inline bool
_memstat_proc_is_suspended(proc_t p)326 _memstat_proc_is_suspended(proc_t p)
327 {
328 	return p->p_memstat_state & P_MEMSTAT_SUSPENDED;
329 }
330 
331 static inline void
_memstat_proc_set_suspended(proc_t p)332 _memstat_proc_set_suspended(proc_t p)
333 {
334 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_ASSERT_OWNED);
335 	if (!_memstat_proc_is_suspended(p)) {
336 		p->p_memstat_state |= P_MEMSTAT_SUSPENDED;
337 #if CONFIG_FREEZE
338 		if (os_inc_overflow(&memorystatus_suspended_count)) {
339 			panic("Overflowed memorystatus_suspended_count");
340 		}
341 #endif /* CONFIG_FREEZE */
342 	}
343 }
344 
345 static inline void
_memstat_proc_set_resumed(proc_t p)346 _memstat_proc_set_resumed(proc_t p)
347 {
348 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_ASSERT_OWNED);
349 	if (_memstat_proc_is_suspended(p)) {
350 		p->p_memstat_state &= ~P_MEMSTAT_SUSPENDED;
351 #if CONFIG_FREEZE
352 		if (os_dec_overflow(&memorystatus_suspended_count)) {
353 			panic("Underflowed memorystatus_suspended_count");
354 		}
355 #endif /* CONFIG_FREEZE */
356 	}
357 }
358 
359 /*
360  * Return whether the process is to be placed in an elevated band while idle.
361  */
362 static inline bool
_memstat_proc_is_elevated(proc_t p)363 _memstat_proc_is_elevated(proc_t p)
364 {
365 	return p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
366 }
367 
368 /*
369  * Return whether p's ledger-enforced memlimit is fatal (as last cached by
370  * memorystatus)
371  */
372 static inline bool
_memstat_proc_cached_memlimit_is_fatal(proc_t p)373 _memstat_proc_cached_memlimit_is_fatal(proc_t p)
374 {
375 	return p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT;
376 }
377 
378 /*
379  * Return whether p's inactive/active memlimit is fatal
380  */
381 static inline bool
_memstat_proc_memlimit_is_fatal(proc_t p,bool is_active)382 _memstat_proc_memlimit_is_fatal(proc_t p, bool is_active)
383 {
384 	const uint32_t flag = is_active ?
385 	    P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL : P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL;
386 	return p->p_memstat_state & flag;
387 }
388 
389 static inline bool
_memstat_proc_active_memlimit_is_fatal(proc_t p)390 _memstat_proc_active_memlimit_is_fatal(proc_t p)
391 {
392 	return _memstat_proc_memlimit_is_fatal(p, true);
393 }
394 
395 static inline bool
_memstat_proc_inactive_memlimit_is_fatal(proc_t p)396 _memstat_proc_inactive_memlimit_is_fatal(proc_t p)
397 {
398 	return _memstat_proc_memlimit_is_fatal(p, false);
399 }
400 
401 #pragma mark Jetsam
402 
403 /*
404  * @func memstat_evaluate_page_shortage
405  *
406  * @brief
407  * Evaluate page shortage conditions. Returns true if the jetsam thread should be woken up.
408  *
409  * @param should_enforce_memlimits
410  * Set to true if soft memory limits should be enforced
411  *
412  * @param should_idle_exit
413  * Set to true if idle processes should begin exiting
414  *
415  * @param should_jetsam
416  * Set to true if non-idle processes should be jetsammed
417  */
418 bool memstat_evaluate_page_shortage(
419 	bool *should_enforce_memlimits,
420 	bool *should_idle_exit,
421 	bool *should_jetsam);
422 
423 /*
424  * In nautical applications, ballast tanks are tanks on boats or submarines
425  * which can be filled with water. When flooded, they provide stability and
426  * reduce buoyancy. When drained (and filled with air), they provide buoyancy.
427  *
428  * In our analogy, the ballast tanks may be drained of unneeded weight (as
429  * occupied by idle processes or processes who have exceeded their memory
430  * limit) and filled with air (available memory). Userspace may toggle between
431  * these two states (filled/drained) depending on system requirements. For
432  * example, drained ballast tanks (i.e. evelated available memory pools) may
433  * have benefits to power and latency. However, applications with large
434  * working sets may need to flood the ballast tanks (i.e. with
435  * anonymous/wired memory) to avoid issues like jetsam loops of daemons that it
436  * has IPC relationships with.
437  *
438  * Mechanically, "draining" the ballast tanks means applying a configurable
439  * offset to the idle and soft available page shortage thresholds. This offset
440  * is then removed when the policy is disengaged.
441  *
442  * The ballast mechanism is intended to be used over long time periods and the
443  * ballast_offset should be sustainable for general applications. If response to
444  * transient spikes in memory demand is desired, the clear-the-decks policy
445  * should be used instead.
446  *
447  * Clients may toggle this behavior via sysctl: kern.memorystatus.ballast_drained
448  */
449 int memorystatus_ballast_control(bool drain);
450 
451 #pragma mark Freezer
452 #if CONFIG_FREEZE
453 /*
454  * Freezer data types
455  */
456 
457 /* An ordered list of freeze or demotion candidates */
458 struct memorystatus_freezer_candidate_list {
459 	memorystatus_properties_freeze_entry_v1 *mfcl_list;
460 	size_t mfcl_length;
461 };
462 
463 struct memorystatus_freeze_list_iterator {
464 	bool refreeze_only;
465 	proc_t last_p;
466 	size_t global_freeze_list_index;
467 };
468 
469 /*
470  * Freezer globals
471  */
472 extern struct memorystatus_freezer_stats_t memorystatus_freezer_stats;
473 extern int memorystatus_freezer_use_ordered_list;
474 extern struct memorystatus_freezer_candidate_list memorystatus_global_freeze_list;
475 extern struct memorystatus_freezer_candidate_list memorystatus_global_demote_list;
476 extern uint64_t memorystatus_freezer_thread_next_run_ts;
477 bool memorystatus_is_process_eligible_for_freeze(proc_t p);
478 bool memorystatus_freeze_proc_is_refreeze_eligible(proc_t p);
479 
480 proc_t memorystatus_freezer_candidate_list_get_proc(
481 	struct memorystatus_freezer_candidate_list *list,
482 	size_t index,
483 	uint64_t *pid_mismatch_counter);
484 /*
485  * Returns the leader of the p's jetsam coalition
486  * and the role of p in that coalition.
487  */
488 proc_t memorystatus_get_coalition_leader_and_role(proc_t p, int *role_in_coalition);
489 bool memorystatus_freeze_process_is_recommended(const proc_t p);
490 
491 /*
492  * Ordered iterator over all freeze candidates.
493  * The iterator should initially be zeroed out by the caller and
494  * can be zeroed out whenever the caller wishes to start from the beginning
495  * of the list again.
496  * Returns PROC_NULL when all candidates have been iterated over.
497  */
498 proc_t memorystatus_freeze_pick_process(struct memorystatus_freeze_list_iterator *iterator);
499 
500 /*
501  * Returns the number of processes that the freezer thread should try to freeze
502  * on this wakeup.
503  */
504 size_t memorystatus_pick_freeze_count_for_wakeup(void);
505 
506 /*
507  * Configure the freezer for app-based swap mode.
508  * Should be called at boot.
509  */
510 void memorystatus_freeze_configure_for_swap(void);
511 /*
512  * Undo memorystatus_freeze_configure_for_swap
513  */
514 void memorystatus_freeze_disable_swap(void);
515 #endif /* CONFIG_FREEZE */
516 
517 #endif /* BSD_KERNEL_PRIVATE */
518 
519 #endif /* _KERN_MEMORYSTATUS_INTERNAL_H_ */
520