1 /*
2 * Copyright (c) 2006-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 */
29
30 #ifndef _KERN_MEMORYSTATUS_INTERNAL_H_
31 #define _KERN_MEMORYSTATUS_INTERNAL_H_
32
33 /*
34 * Contains memorystatus subsystem definitions that are not
35 * exported outside of the memorystatus subsystem.
36 *
37 * For example, all of the mechanisms used by kern_memorystatus_policy.c
38 * should be defined in this header.
39 */
40
41 #if BSD_KERNEL_PRIVATE
42
43 #include <mach/boolean.h>
44 #include <stdbool.h>
45 #include <os/base.h>
46 #include <os/log.h>
47 #include <os/overflow.h>
48 #include <kern/locks.h>
49 #include <kern/sched_prim.h>
50 #include <sys/kern_memorystatus.h>
51 #include <sys/kernel_types.h>
52 #include <sys/proc.h>
53 #include <sys/proc_internal.h>
54
55 #if CONFIG_FREEZE
56 #include <sys/kern_memorystatus_freeze.h>
57 #endif /* CONFIG_FREEZE */
58
59 /*
60 * memorystatus subsystem globals
61 */
62 #if CONFIG_JETSAM
63 extern unsigned int memorystatus_available_pages;
64 extern unsigned int memorystatus_available_pages_pressure;
65 extern unsigned int memorystatus_available_pages_critical;
66 extern uint32_t jetsam_kill_on_low_swap;
67 #else /* CONFIG_JETSAM */
68 extern uint64_t memorystatus_available_pages;
69 extern uint64_t memorystatus_available_pages_pressure;
70 extern uint64_t memorystatus_available_pages_critical;
71 #endif /* CONFIG_JETSAM */
72 extern int block_corpses; /* counter to block new corpses if jetsam purges them */
73 extern int system_procs_aging_band;
74 extern int applications_aging_band;
75 /* the jetsam band which will contain P_MEMSTAT_FROZEN processes */
76 extern int memorystatus_freeze_jetsam_band;
77 #if CONFIG_FREEZE
78 extern unsigned int memorystatus_suspended_count;
79 #endif /* CONFIG_FREEZE */
80
81 /*
82 * TODO(jason): This should really be calculated dynamically by the zalloc
83 * subsystem before we do a zone map exhaustion kill. But the zone_gc
84 * logic is non-trivial, so for now it just sets this global.
85 */
86 extern _Atomic bool memorystatus_zone_map_is_exhausted;
87 /*
88 * TODO(jason): We should get rid of this global
89 * and have the memorystatus thread check for compressor space shortages
90 * itself. However, there are 3 async call sites remaining that require more work to get us there:
91 * 2 of them are in vm_swap_defragment. When it's about to swap in a segment, it checks if that
92 * will cause a compressor space shortage & pre-emptively triggers jetsam. vm_compressor_backing_store
93 * needs to keep track of in-flight swapins due to defrag so we can perform those checks
94 * in the memorystatus thread.
95 * The other is in no_paging_space_action. This is only on macOS right now, but will
96 * be needed on iPad when we run out of swap space. This should be a new kill
97 * reason and we need to add a new health check for it.
98 * We need to maintain the macOS behavior though that we kill no more than 1 process
99 * every 5 seconds.
100 */
101 extern _Atomic bool memorystatus_compressor_space_shortage;
102 /*
103 * TODO(jason): We should also get rid of this global
104 * and check for phantom cache pressure from the memorystatus
105 * thread. But first we need to fix the syncronization in
106 * vm_phantom_cache_check_pressure
107 */
108 extern _Atomic bool memorystatus_phantom_cache_pressure;
109
110 extern _Atomic bool memorystatus_pageout_starved;
111 /*
112 * The actions that the memorystatus thread can perform
113 * when we're low on memory.
114 * See memorystatus_pick_action to see when each action is deployed.
115 */
116 OS_CLOSED_ENUM(memorystatus_action, uint32_t,
117 MEMORYSTATUS_KILL_HIWATER, // Kill 1 highwatermark process
118 MEMORYSTATUS_KILL_AGGRESSIVE, // Do aggressive jetsam
119 MEMORYSTATUS_KILL_TOP_PROCESS, // Kill based on jetsam priority
120 MEMORYSTATUS_WAKE_SWAPPER, // Wake up the swap thread
121 MEMORYSTATUS_PROCESS_SWAPIN_QUEUE, // Compact the swapin queue and move segments to the swapout queue
122 MEMORYSTATUS_KILL_SUSPENDED_SWAPPABLE, // Kill a suspended swap-eligible processes based on jetsam priority
123 MEMORYSTATUS_KILL_SWAPPABLE, // Kill a swap-eligible process (even if it's running) based on jetsam priority
124 MEMORYSTATUS_KILL_NONE, // Do nothing
125 );
126
127 /*
128 * Structure to hold state for a jetsam thread.
129 * Typically there should be a single jetsam thread
130 * unless parallel jetsam is enabled.
131 */
132 typedef struct jetsam_state_s {
133 bool inited; /* if the thread is initialized */
134 bool limit_to_low_bands; /* limit kills to < JETSAM_PRIORITY_ELEVATED_INACTIVE */
135 int index; /* jetsam thread index */
136 thread_t thread; /* jetsam thread pointer */
137 int jld_idle_kills; /* idle jetsam kill counter for this session */
138 uint32_t errors; /* Error accumulator */
139 bool sort_flag; /* Sort the fg band (idle on macOS) before killing? */
140 bool corpse_list_purged; /* Has the corpse list been purged? */
141 bool post_snapshot; /* Do we need to post a jetsam snapshot after this session? */
142 uint64_t memory_reclaimed; /* Amount of memory that was just reclaimed */
143 uint32_t hwm_kills; /* hwm kill counter for this session */
144 sched_cond_atomic_t jt_wakeup_cond; /* condition var used to synchronize wake/sleep operations for this jetsam thread */
145 } *jetsam_state_t;
146
147 /*
148 * The memorystatus thread monitors these conditions
149 * and will continue to act until the system is considered
150 * healthy.
151 */
152 typedef struct memorystatus_system_health {
153 #if CONFIG_JETSAM
154 bool msh_available_pages_below_pressure;
155 bool msh_available_pages_below_critical;
156 bool msh_compressor_needs_to_swap;
157 bool msh_compressor_is_low_on_space;
158 bool msh_compressor_is_thrashing;
159 bool msh_compressed_pages_nearing_limit;
160 bool msh_filecache_is_thrashing;
161 bool msh_phantom_cache_pressure;
162 bool msh_swappable_compressor_segments_over_limit;
163 bool msh_swapin_queue_over_limit;
164 bool msh_swap_low_on_space;
165 bool msh_swap_out_of_space;
166 bool msh_pageout_starved;
167 #endif /* CONFIG_JETSAM */
168 bool msh_zone_map_is_exhausted;
169 } memorystatus_system_health_t;
170
171 void memorystatus_log_system_health(const memorystatus_system_health_t *health);
172 bool memorystatus_is_system_healthy(const memorystatus_system_health_t *status);
173 /* Picks a kill cause given an unhealthy system status */
174 uint32_t memorystatus_pick_kill_cause(const memorystatus_system_health_t *status);
175
176 #pragma mark Locks
177
178 extern lck_mtx_t memorystatus_jetsam_broadcast_lock;
179
180 #pragma mark Agressive jetsam tunables
181
182 extern boolean_t memorystatus_jld_enabled; /* Enable jetsam loop detection */
183 extern uint32_t memorystatus_jld_eval_period_msecs; /* Init pass sets this based on device memory size */
184 extern int memorystatus_jld_max_kill_loops; /* How many times should we try and kill up to the target band */
185 extern unsigned int memorystatus_sysproc_aging_aggr_pages; /* Aggressive jetsam pages threshold for sysproc aging policy */
186 extern unsigned int jld_eval_aggressive_count;
187 extern uint64_t jld_timestamp_msecs;
188 extern int jld_idle_kill_candidates;
189
190
191 /*
192 * VM globals read by the memorystatus subsystem
193 */
194 extern unsigned int vm_page_free_count;
195 extern unsigned int vm_page_active_count;
196 extern unsigned int vm_page_inactive_count;
197 extern unsigned int vm_page_throttled_count;
198 extern unsigned int vm_page_purgeable_count;
199 extern unsigned int vm_page_wire_count;
200 extern unsigned int vm_page_speculative_count;
201 extern uint32_t c_late_swapout_count, c_late_swappedin_count;
202 extern uint32_t c_seg_allocsize;
203 extern bool vm_swapout_thread_running;
204 extern _Atomic bool vm_swapout_wake_pending;
205 #define VM_PAGE_DONATE_DISABLED 0
206 #define VM_PAGE_DONATE_ENABLED 1
207 extern uint32_t vm_page_donate_mode;
208
209 #if CONFIG_JETSAM
210 #define MEMORYSTATUS_LOG_AVAILABLE_PAGES memorystatus_available_pages
211 #else /* CONFIG_JETSAM */
212 #define MEMORYSTATUS_LOG_AVAILABLE_PAGES (vm_page_active_count + vm_page_inactive_count + vm_page_free_count + vm_page_speculative_count)
213 #endif /* CONFIG_JETSAM */
214
215 bool memorystatus_avail_pages_below_pressure(void);
216 bool memorystatus_avail_pages_below_critical(void);
217 #if CONFIG_JETSAM
218 bool memorystatus_swap_over_trigger(uint64_t adjustment_factor);
219 bool memorystatus_swapin_over_trigger(void);
220 #endif /* CONFIG_JETSAM */
221
222 /* Does cause indicate vm or fc thrashing? */
223 bool is_reason_thrashing(unsigned cause);
224 /* Is the zone map almost full? */
225 bool is_reason_zone_map_exhaustion(unsigned cause);
226
227 memorystatus_action_t memorystatus_pick_action(jetsam_state_t state,
228 uint32_t *kill_cause, bool highwater_remaining,
229 bool suspended_swappable_apps_remaining,
230 bool swappable_apps_remaining, int *jld_idle_kills);
231
232 #define MEMSTAT_PERCENT_TOTAL_PAGES(p) (p * atop_64(max_mem) / 100)
233
234 /*
235 * Take a (redacted) zprint snapshot along with the jetsam snapshot.
236 */
237 #define JETSAM_ZPRINT_SNAPSHOT (CONFIG_MEMORYSTATUS && (DEBUG || DEVELOPMENT))
238
239 #pragma mark Logging Utilities
240
241 __enum_decl(memorystatus_log_level_t, unsigned int, {
242 MEMORYSTATUS_LOG_LEVEL_DEFAULT = 0,
243 MEMORYSTATUS_LOG_LEVEL_INFO = 1,
244 MEMORYSTATUS_LOG_LEVEL_DEBUG = 2,
245 });
246
247 extern os_log_t memorystatus_log_handle;
248 extern memorystatus_log_level_t memorystatus_log_level;
249
250 /*
251 * NB: Critical memorystatus logs (e.g. jetsam kills) are load-bearing for OS
252 * performance testing infrastructure. Be careful when modifying the log-level for
253 * important system events.
254 *
255 * Memorystatus logs are interpreted by a wide audience. To avoid logging information
256 * that could lead to false diagnoses, INFO and DEBUG messages are only logged if the
257 * system has been configured to do so via `kern.memorystatus_log_level` (sysctl) or
258 * `memorystatus_log_level` (boot-arg).
259 *
260 * os_log supports a mechanism for configuring these properties dynamically; however,
261 * this mechanism is currently unsupported in XNU.
262 *
263 * TODO (JC) Deprecate sysctl/boot-arg and move to subsystem preferences pending:
264 * - rdar://27006343 (Custom kernel log handles)
265 * - rdar://80958044 (Kernel Logging Configuration)
266 */
267 #define _memorystatus_log_with_type(type, format, ...) os_log_with_type(memorystatus_log_handle, type, format, ##__VA_ARGS__)
268 #define memorystatus_log(format, ...) _memorystatus_log_with_type(OS_LOG_TYPE_DEFAULT, format, ##__VA_ARGS__)
269 #define memorystatus_log_info(format, ...) if (memorystatus_log_level >= MEMORYSTATUS_LOG_LEVEL_INFO) { _memorystatus_log_with_type(OS_LOG_TYPE_INFO, format, ##__VA_ARGS__); }
270 #define memorystatus_log_debug(format, ...) if (memorystatus_log_level >= MEMORYSTATUS_LOG_LEVEL_DEBUG) { _memorystatus_log_with_type(OS_LOG_TYPE_DEBUG, format, ##__VA_ARGS__); }
271 #define memorystatus_log_error(format, ...) _memorystatus_log_with_type(OS_LOG_TYPE_ERROR, format, ##__VA_ARGS__)
272 #define memorystatus_log_fault(format, ...) _memorystatus_log_with_type(OS_LOG_TYPE_FAULT, format, ##__VA_ARGS__)
273
274 #pragma mark Jetsam Priority Management
275
276 /*
277 * Cancel a process' idle aging
278 * Returns whether a reschedule of the idle demotion thread is needed.
279 */
280 void memstat_update_priority_locked(proc_t p, int priority,
281 memstat_priority_options_t options);
282
283 static inline bool
_memstat_proc_is_aging(proc_t p)284 _memstat_proc_is_aging(proc_t p)
285 {
286 return p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS;
287 }
288
289 static inline bool
_memstat_proc_is_tracked(proc_t p)290 _memstat_proc_is_tracked(proc_t p)
291 {
292 return p->p_memstat_dirty & P_DIRTY_TRACK;
293 }
294
295 static inline bool
_memstat_proc_is_dirty(proc_t p)296 _memstat_proc_is_dirty(proc_t p)
297 {
298 return p->p_memstat_dirty & P_DIRTY_IS_DIRTY;
299 }
300
301 static inline bool
_memstat_proc_can_idle_exit(proc_t p)302 _memstat_proc_can_idle_exit(proc_t p)
303 {
304 return _memstat_proc_is_tracked(p) &&
305 (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT);
306 }
307
308 static inline bool
_memstat_proc_has_priority_assertion(proc_t p)309 _memstat_proc_has_priority_assertion(proc_t p)
310 {
311 return p->p_memstat_state & P_MEMSTAT_PRIORITY_ASSERTION;
312 }
313
314 static inline bool
_memstat_proc_is_managed(proc_t p)315 _memstat_proc_is_managed(proc_t p)
316 {
317 return p->p_memstat_state & P_MEMSTAT_MANAGED;
318 }
319
320 static inline bool
_memstat_proc_is_frozen(proc_t p)321 _memstat_proc_is_frozen(proc_t p)
322 {
323 return p->p_memstat_state & P_MEMSTAT_FROZEN;
324 }
325
326 static inline bool
_memstat_proc_is_suspended(proc_t p)327 _memstat_proc_is_suspended(proc_t p)
328 {
329 return p->p_memstat_state & P_MEMSTAT_SUSPENDED;
330 }
331
332 static inline void
_memstat_proc_set_suspended(proc_t p)333 _memstat_proc_set_suspended(proc_t p)
334 {
335 LCK_MTX_ASSERT(&proc_list_mlock, LCK_ASSERT_OWNED);
336 if (!_memstat_proc_is_suspended(p)) {
337 p->p_memstat_state |= P_MEMSTAT_SUSPENDED;
338 #if CONFIG_FREEZE
339 if (os_inc_overflow(&memorystatus_suspended_count)) {
340 panic("Overflowed memorystatus_suspended_count");
341 }
342 #endif /* CONFIG_FREEZE */
343 }
344 }
345
346 static inline void
_memstat_proc_set_resumed(proc_t p)347 _memstat_proc_set_resumed(proc_t p)
348 {
349 LCK_MTX_ASSERT(&proc_list_mlock, LCK_ASSERT_OWNED);
350 if (_memstat_proc_is_suspended(p)) {
351 p->p_memstat_state &= ~P_MEMSTAT_SUSPENDED;
352 #if CONFIG_FREEZE
353 if (os_dec_overflow(&memorystatus_suspended_count)) {
354 panic("Underflowed memorystatus_suspended_count");
355 }
356 #endif /* CONFIG_FREEZE */
357 }
358 }
359
360 /*
361 * Return whether the process is to be placed in an elevated band while idle.
362 */
363 static inline bool
_memstat_proc_is_elevated(proc_t p)364 _memstat_proc_is_elevated(proc_t p)
365 {
366 return p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
367 }
368
369 /*
370 * Return whether p's ledger-enforced memlimit is fatal (as last cached by
371 * memorystatus)
372 */
373 static inline bool
_memstat_proc_cached_memlimit_is_fatal(proc_t p)374 _memstat_proc_cached_memlimit_is_fatal(proc_t p)
375 {
376 return p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT;
377 }
378
379 /*
380 * Return whether p's inactive/active memlimit is fatal
381 */
382 static inline bool
_memstat_proc_memlimit_is_fatal(proc_t p,bool is_active)383 _memstat_proc_memlimit_is_fatal(proc_t p, bool is_active)
384 {
385 const uint32_t flag = is_active ?
386 P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL : P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL;
387 return p->p_memstat_state & flag;
388 }
389
390 static inline bool
_memstat_proc_active_memlimit_is_fatal(proc_t p)391 _memstat_proc_active_memlimit_is_fatal(proc_t p)
392 {
393 return _memstat_proc_memlimit_is_fatal(p, true);
394 }
395
396 static inline bool
_memstat_proc_inactive_memlimit_is_fatal(proc_t p)397 _memstat_proc_inactive_memlimit_is_fatal(proc_t p)
398 {
399 return _memstat_proc_memlimit_is_fatal(p, false);
400 }
401
402 #pragma mark Freezer
403 #if CONFIG_FREEZE
404 /*
405 * Freezer data types
406 */
407
408 /* An ordered list of freeze or demotion candidates */
409 struct memorystatus_freezer_candidate_list {
410 memorystatus_properties_freeze_entry_v1 *mfcl_list;
411 size_t mfcl_length;
412 };
413
414 struct memorystatus_freeze_list_iterator {
415 bool refreeze_only;
416 proc_t last_p;
417 size_t global_freeze_list_index;
418 };
419
420 /*
421 * Freezer globals
422 */
423 extern struct memorystatus_freezer_stats_t memorystatus_freezer_stats;
424 extern int memorystatus_freezer_use_ordered_list;
425 extern struct memorystatus_freezer_candidate_list memorystatus_global_freeze_list;
426 extern struct memorystatus_freezer_candidate_list memorystatus_global_demote_list;
427 extern uint64_t memorystatus_freezer_thread_next_run_ts;
428 bool memorystatus_is_process_eligible_for_freeze(proc_t p);
429 bool memorystatus_freeze_proc_is_refreeze_eligible(proc_t p);
430
431 proc_t memorystatus_freezer_candidate_list_get_proc(
432 struct memorystatus_freezer_candidate_list *list,
433 size_t index,
434 uint64_t *pid_mismatch_counter);
435 /*
436 * Returns the leader of the p's jetsam coalition
437 * and the role of p in that coalition.
438 */
439 proc_t memorystatus_get_coalition_leader_and_role(proc_t p, int *role_in_coalition);
440 bool memorystatus_freeze_process_is_recommended(const proc_t p);
441
442 /*
443 * Ordered iterator over all freeze candidates.
444 * The iterator should initially be zeroed out by the caller and
445 * can be zeroed out whenever the caller wishes to start from the beginning
446 * of the list again.
447 * Returns PROC_NULL when all candidates have been iterated over.
448 */
449 proc_t memorystatus_freeze_pick_process(struct memorystatus_freeze_list_iterator *iterator);
450
451 /*
452 * Returns the number of processes that the freezer thread should try to freeze
453 * on this wakeup.
454 */
455 size_t memorystatus_pick_freeze_count_for_wakeup(void);
456
457 /*
458 * Configure the freezer for app-based swap mode.
459 * Should be called at boot.
460 */
461 void memorystatus_freeze_configure_for_swap(void);
462 /*
463 * Undo memorystatus_freeze_configure_for_swap
464 */
465 void memorystatus_freeze_disable_swap(void);
466 #endif /* CONFIG_FREEZE */
467
468 #endif /* BSD_KERNEL_PRIVATE */
469
470 #endif /* _KERN_MEMORYSTATUS_INTERNAL_H_ */
471