1 /*
2 * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 */
29
30 #include <kern/sched_prim.h>
31 #include <kern/kalloc.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h>
34 #include <kern/locks.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/host.h>
38 #include <kern/policy_internal.h>
39 #include <kern/thread_call.h>
40 #include <kern/thread_group.h>
41
42 #include <libkern/libkern.h>
43 #include <mach/coalition.h>
44 #include <mach/mach_time.h>
45 #include <mach/task.h>
46 #include <mach/host_priv.h>
47 #include <mach/mach_host.h>
48 #include <os/log.h>
49 #include <pexpert/pexpert.h>
50 #include <sys/coalition.h>
51 #include <sys/kern_event.h>
52 #include <sys/kdebug.h>
53 #include <sys/kdebug_kernel.h>
54 #include <sys/proc.h>
55 #include <sys/proc_info.h>
56 #include <sys/reason.h>
57 #include <sys/signal.h>
58 #include <sys/signalvar.h>
59 #include <sys/sysctl.h>
60 #include <sys/sysproto.h>
61 #include <sys/ubc.h> /* mach_to_bsd_errno */
62 #include <sys/wait.h>
63 #include <sys/tree.h>
64 #include <sys/priv.h>
65 #include <vm/vm_pageout.h>
66 #include <vm/vm_protos.h>
67 #include <vm/vm_page.h>
68 #include <vm/vm_compressor_xnu.h>
69 #include <vm/vm_compressor_backing_store_xnu.h>
70 #include <mach/machine/sdt.h>
71 #include <libkern/coreanalytics/coreanalytics.h>
72 #include <libkern/section_keywords.h>
73 #include <stdatomic.h>
74
75 #include <IOKit/IOBSD.h>
76
77 #if CONFIG_FREEZE
78 #include <vm/vm_map_xnu.h>
79 #endif /* CONFIG_FREEZE */
80
81 #include <kern/kern_memorystatus_internal.h>
82 #include <sys/kern_memorystatus.h>
83 #include <sys/kern_memorystatus_freeze.h>
84 #include <sys/kern_memorystatus_notify.h>
85 #include <sys/ubc.h>
86
87 unsigned int memorystatus_frozen_count = 0;
88 unsigned int memorystatus_frozen_count_webcontent = 0;
89 unsigned int memorystatus_frozen_count_xpc_service = 0;
90
91 #if CONFIG_FREEZE
92
93 static LCK_GRP_DECLARE(freezer_lck_grp, "freezer");
94 static LCK_MTX_DECLARE(freezer_mutex, &freezer_lck_grp);
95
96 /* Thresholds */
97 unsigned int memorystatus_freeze_threshold = 0;
98 unsigned int memorystatus_freeze_pages_min = 0;
99 unsigned int memorystatus_freeze_pages_max = 0;
100 unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
101 unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT;
102 uint64_t memorystatus_freeze_budget_pages_remaining = 0; /* Remaining # of pages that can be frozen to disk */
103 uint64_t memorystatus_freeze_budget_multiplier = 100; /* Multiplies the daily budget by 100/multiplier */
104 boolean_t memorystatus_freeze_degradation = FALSE; /* Protected by the freezer mutex. Signals we are in a degraded freeze mode. */
105 unsigned int memorystatus_freeze_max_candidate_band = FREEZE_MAX_CANDIDATE_BAND;
106
107 unsigned int memorystatus_max_frozen_demotions_daily = 0;
108 unsigned int memorystatus_thaw_count_demotion_threshold = 0;
109 unsigned int memorystatus_min_thaw_refreeze_threshold;
110
111 #define FREEZE_DYNAMIC_THREAD_DELAY_ENABLED_DEFAULT true
112 boolean_t memorystatus_freeze_dynamic_thread_delay_enabled = FREEZE_DYNAMIC_THREAD_DELAY_ENABLED_DEFAULT;
113 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_dynamic_thread_delay_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_dynamic_thread_delay_enabled, 0, "");
114
115 #define FREEZE_APPS_IDLE_DELAY_MULTIPLIER_FAST 1
116 #define FREEZE_APPS_IDLE_DELAY_MULTIPLIER_SLOW 30
117 #define FREEZE_APPS_IDLE_DELAY_MULTIPLIER_DEFAULT FREEZE_APPS_IDLE_DELAY_MULTIPLIER_FAST
118 unsigned int memorystatus_freeze_apps_idle_delay_multiplier = FREEZE_APPS_IDLE_DELAY_MULTIPLIER_DEFAULT;
119
120 #define MEMORYSTATUS_FREEZE_PREVENT_REFREEZE_OF_RECENTLY_THAWED_DEFAULT true
121
122 #if XNU_TARGET_OS_WATCH
123 #define MEMORYSTATUS_FREEZE_LAST_PROCESSES_THAWED_CACHE_SIZE_DEFAULT 1
124 #define MEMORYSTATUS_FREEZE_LAST_PROCESSES_THAWED_PREVENT_REFREEZE_SECONDS_DEFAULT (60 * 15)
125 #else
126 #define MEMORYSTATUS_FREEZE_LAST_PROCESSES_THAWED_CACHE_SIZE_DEFAULT 5
127 #define MEMORYSTATUS_FREEZE_LAST_PROCESSES_THAWED_PREVENT_REFREEZE_SECONDS_DEFAULT (60 * 25)
128 #endif
129 #define MEMORYSTATUS_FREEZE_LAST_PROCESSES_THAWED_CACHE_SIZE_MAX 10
130
131 TUNABLE_WRITEABLE(boolean_t, memorystatus_freeze_prevent_refreeze_of_recently_thawed,
132 "memorystatus_freeze_prevent_refreeze_of_recently_thawed",
133 MEMORYSTATUS_FREEZE_PREVENT_REFREEZE_OF_RECENTLY_THAWED_DEFAULT);
134 TUNABLE_WRITEABLE(uint32_t, memorystatus_freeze_last_processes_thawed_cache_size,
135 "memorystatus_freeze_last_processes_thawed_cache_size",
136 MEMORYSTATUS_FREEZE_LAST_PROCESSES_THAWED_CACHE_SIZE_DEFAULT);
137 TUNABLE_WRITEABLE(uint32_t, memorystatus_freeze_last_processes_thawed_prevent_refreeze_seconds,
138 "memorystatus_freeze_last_processes_thawed_prevent_refreeze_seconds",
139 MEMORYSTATUS_FREEZE_LAST_PROCESSES_THAWED_PREVENT_REFREEZE_SECONDS_DEFAULT);
140 EXPERIMENT_FACTOR_UINT(memorystatus_freeze_prevent_refreeze_of_recently_thawed,
141 &memorystatus_freeze_prevent_refreeze_of_recently_thawed, 0, 1, "");
142 EXPERIMENT_FACTOR_UINT(memorystatus_freeze_last_processes_thawed_cache_size,
143 &memorystatus_freeze_last_processes_thawed_cache_size, 0, MEMORYSTATUS_FREEZE_LAST_PROCESSES_THAWED_CACHE_SIZE_MAX, "");
144 EXPERIMENT_FACTOR_UINT(memorystatus_freeze_last_processes_thawed_prevent_refreeze_seconds,
145 &memorystatus_freeze_last_processes_thawed_prevent_refreeze_seconds, 0, UINT32_MAX, "");
146 pid_t memorystatus_freeze_last_processes_thawed_pid[MEMORYSTATUS_FREEZE_LAST_PROCESSES_THAWED_CACHE_SIZE_MAX];
147 uint64_t memorystatus_freeze_last_processes_thawed_ts[MEMORYSTATUS_FREEZE_LAST_PROCESSES_THAWED_CACHE_SIZE_MAX];
148
149 #if (XNU_TARGET_OS_IOS && !XNU_TARGET_OS_XR) || XNU_TARGET_OS_WATCH
150 #define FREEZE_ENABLED_DEFAULT TRUE
151 #else
152 #define FREEZE_ENABLED_DEFAULT FALSE
153 #endif
154 boolean_t memorystatus_freeze_enabled = FREEZE_ENABLED_DEFAULT;
155
156 int memorystatus_freeze_wakeup = 0;
157
158 #define MAX_XPC_SERVICE_PIDS 10 /* Max. # of XPC services per coalition we'll consider freezing. */
159
160 unsigned int memorystatus_frozen_processes_max = 0;
161 unsigned int memorystatus_frozen_shared_mb = 0;
162 unsigned int memorystatus_frozen_shared_mb_max = 0;
163 unsigned int memorystatus_freeze_shared_mb_per_process_max = 0; /* Max. MB allowed per process to be freezer-eligible. */
164 #if XNU_TARGET_OS_WATCH
165 unsigned int memorystatus_freeze_private_shared_pages_ratio = 1; /* Ratio of private:shared pages for a process to be freezer-eligible. */
166 #else
167 unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of private:shared pages for a process to be freezer-eligible. */
168 #endif
169 unsigned int memorystatus_thaw_count = 0; /* # of thaws in the current freezer interval */
170 uint64_t memorystatus_thaw_count_since_boot = 0; /* The number of thaws since boot */
171 unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
172
173 struct memorystatus_freezer_stats_t memorystatus_freezer_stats = {0};
174
175 static inline boolean_t memorystatus_can_freeze_processes(void);
176 static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
177 static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
178 static uint32_t memorystatus_freeze_calculate_new_budget(
179 unsigned int time_since_last_interval_expired_sec,
180 unsigned int burst_multiple,
181 unsigned int interval_duration_min,
182 uint32_t rollover);
183 static void memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts);
184
185 static void memorystatus_set_freeze_is_enabled(bool enabled);
186 static void memorystatus_disable_freeze(void);
187 static bool kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out);
188
189 /* Stats */
190 static uint64_t memorystatus_freeze_pageouts = 0;
191
192 /* Throttling */
193 #define DEGRADED_WINDOW_MINS (30)
194 #define NORMAL_WINDOW_MINS (24 * 60)
195
196 /* Protected by the freezer_mutex */
197 static throttle_interval_t throttle_intervals[] = {
198 { DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
199 { NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
200 };
201 throttle_interval_t *degraded_throttle_window = &throttle_intervals[0];
202 throttle_interval_t *normal_throttle_window = &throttle_intervals[1];
203 uint32_t memorystatus_freeze_current_interval = 0;
204 static thread_call_t freeze_interval_reset_thread_call;
205 static uint32_t memorystatus_freeze_calculate_new_budget(
206 unsigned int time_since_last_interval_expired_sec,
207 unsigned int burst_multiple,
208 unsigned int interval_duration_min,
209 uint32_t rollover);
210
211 struct memorystatus_freezer_candidate_list memorystatus_global_freeze_list = {NULL, 0};
212 struct memorystatus_freezer_candidate_list memorystatus_global_demote_list = {NULL, 0};
213 /*
214 * When enabled, freeze candidates are chosen from the memorystatus_global_freeze_list
215 * in order (as opposed to using the older LRU approach).
216 */
217 #if XNU_TARGET_OS_WATCH
218 #define FREEZER_USE_ORDERED_LIST_DEFAULT 1
219 #else
220 #define FREEZER_USE_ORDERED_LIST_DEFAULT 0
221 #endif
222 int memorystatus_freezer_use_ordered_list = FREEZER_USE_ORDERED_LIST_DEFAULT;
223 EXPERIMENT_FACTOR_LEGACY_UINT(_kern, memorystatus_freezer_use_ordered_list, &memorystatus_freezer_use_ordered_list, 0, 1, "");
224 /*
225 * When enabled, demotion candidates are chosen from memorystatus_global_demotion_list
226 */
227 int memorystatus_freezer_use_demotion_list = 0;
228 EXPERIMENT_FACTOR_LEGACY_UINT(_kern, memorystatus_freezer_use_demotion_list, &memorystatus_freezer_use_demotion_list, 0, 1, "");
229
230 extern boolean_t vm_swap_max_budget(uint64_t *);
231
232 static void memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed);
233 static void memorystatus_demote_frozen_processes(bool urgent_mode);
234
235 static void memorystatus_freeze_handle_error(proc_t p, const freezer_error_code_t freezer_error_code, bool was_refreeze, pid_t pid, const coalition_t coalition, const char* log_prefix);
236 static void memorystatus_freeze_out_of_slots(void);
237 uint64_t memorystatus_freezer_thread_next_run_ts = 0;
238
239 /* Sysctls needed for aggd stats */
240
241 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count, 0, "");
242 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_webcontent, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_webcontent, 0, "");
243 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_xpc_service, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_xpc_service, 0, "");
244 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
245 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_thaw_count_since_boot, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count_since_boot, "");
246 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
247 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_interval, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_current_interval, 0, "");
248
249 /*
250 * Force a new interval with the given budget (no rollover).
251 */
252 static void
memorystatus_freeze_force_new_interval(uint64_t new_budget)253 memorystatus_freeze_force_new_interval(uint64_t new_budget)
254 {
255 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
256 mach_timespec_t now_ts;
257 clock_sec_t sec;
258 clock_nsec_t nsec;
259
260 clock_get_system_nanotime(&sec, &nsec);
261 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
262 now_ts.tv_nsec = nsec;
263 memorystatus_freeze_start_normal_throttle_interval((uint32_t) MIN(new_budget, UINT32_MAX), now_ts);
264 /* Don't carry over any excess pageouts since we're forcing a new budget */
265 normal_throttle_window->pageouts = 0;
266 memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
267 }
268 #if DEVELOPMENT || DEBUG
269 static int sysctl_memorystatus_freeze_budget_pages_remaining SYSCTL_HANDLER_ARGS
270 {
271 #pragma unused(arg1, arg2, oidp)
272 int error, changed;
273 uint64_t new_budget = memorystatus_freeze_budget_pages_remaining;
274
275 lck_mtx_lock(&freezer_mutex);
276
277 error = sysctl_io_number(req, memorystatus_freeze_budget_pages_remaining, sizeof(uint64_t), &new_budget, &changed);
278 if (changed) {
279 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
280 lck_mtx_unlock(&freezer_mutex);
281 return ENOTSUP;
282 }
283 memorystatus_freeze_force_new_interval(new_budget);
284 }
285
286 lck_mtx_unlock(&freezer_mutex);
287 return error;
288 }
289
290 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freeze_budget_pages_remaining, "Q", "");
291 #else /* DEVELOPMENT || DEBUG */
292 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, "");
293 #endif /* DEVELOPMENT || DEBUG */
294 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_excess_shared_memory_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_excess_shared_memory_count, "");
295 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_private_shared_ratio_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count, "");
296 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_compressor_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_compressor_space_count, "");
297 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_swap_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_swap_space_count, "");
298 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_below_min_pages_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_below_min_pages_count, "");
299 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_probability_of_use_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_probability_of_use_count, "");
300 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_elevated_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_elevated_count, "");
301 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_other_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_other_count, "");
302 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_process_considered_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_process_considered_count, "");
303 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_below_threshold_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_below_threshold_count, "");
304 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_full_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_full_count, "");
305 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_shared_mb_high_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count, "");
306 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_shared_pages_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_shared_pages_skipped, "");
307 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_bytes_refrozen, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_bytes_refrozen, "");
308 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_refreeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_refreeze_count, "");
309 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_freeze_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_freeze_pid_mismatches, "");
310 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_demote_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_demote_pid_mismatches, "");
311
312 static_assert(_kMemorystatusFreezeSkipReasonMax <= UINT8_MAX);
313
314 /*
315 * Calculates the hit rate for the freezer.
316 * The hit rate is defined as the percentage of procs that are currently in the
317 * freezer which we have thawed.
318 * A low hit rate means we're freezing bad candidates since they're not re-used.
319 */
320 static int
calculate_thaw_percentage(uint64_t frozen_count,uint64_t thaw_count)321 calculate_thaw_percentage(uint64_t frozen_count, uint64_t thaw_count)
322 {
323 int thaw_percentage = 100;
324
325 if (frozen_count > 0) {
326 if (thaw_count > frozen_count) {
327 /*
328 * Both counts are using relaxed atomics & could be out of sync
329 * causing us to see thaw_percentage > 100.
330 */
331 thaw_percentage = 100;
332 } else {
333 thaw_percentage = (int)(100 * thaw_count / frozen_count);
334 }
335 }
336 return thaw_percentage;
337 }
338
339 static int
get_thaw_percentage()340 get_thaw_percentage()
341 {
342 uint64_t processes_frozen, processes_thawed;
343 processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
344 processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
345 return calculate_thaw_percentage(processes_frozen, processes_thawed);
346 }
347
348 static int
349 sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS
350 {
351 #pragma unused(arg1, arg2)
352 int thaw_percentage = get_thaw_percentage();
353 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
354 }
355 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage, "I", "");
356
357 static int
get_thaw_percentage_fg()358 get_thaw_percentage_fg()
359 {
360 uint64_t processes_frozen, processes_thawed_fg;
361 processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
362 processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
363 return calculate_thaw_percentage(processes_frozen, processes_thawed_fg);
364 }
365
366 static int sysctl_memorystatus_freezer_thaw_percentage_fg SYSCTL_HANDLER_ARGS
367 {
368 #pragma unused(arg1, arg2)
369 int thaw_percentage = get_thaw_percentage_fg();
370 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
371 }
372 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg, "I", "");
373
374 static int
get_thaw_percentage_webcontent()375 get_thaw_percentage_webcontent()
376 {
377 uint64_t processes_frozen_webcontent, processes_thawed_webcontent;
378 processes_frozen_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, relaxed);
379 processes_thawed_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, relaxed);
380 return calculate_thaw_percentage(processes_frozen_webcontent, processes_thawed_webcontent);
381 }
382
383 static int sysctl_memorystatus_freezer_thaw_percentage_webcontent SYSCTL_HANDLER_ARGS
384 {
385 #pragma unused(arg1, arg2)
386 int thaw_percentage = get_thaw_percentage_webcontent();
387 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
388 }
389 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_webcontent, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_webcontent, "I", "");
390
391
392 static int
get_thaw_percentage_bg()393 get_thaw_percentage_bg()
394 {
395 uint64_t processes_frozen, processes_thawed_fg, processes_thawed;
396 processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
397 processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
398 processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
399 return calculate_thaw_percentage(processes_frozen, processes_thawed - processes_thawed_fg);
400 }
401
402 static int sysctl_memorystatus_freezer_thaw_percentage_bg SYSCTL_HANDLER_ARGS
403 {
404 #pragma unused(arg1, arg2)
405 int thaw_percentage = get_thaw_percentage_bg();
406 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
407 }
408 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_bg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_bg, "I", "");
409
410 static int
get_thaw_percentage_fg_non_xpc_service()411 get_thaw_percentage_fg_non_xpc_service()
412 {
413 uint64_t processes_frozen, processes_frozen_xpc_service, processes_thawed_fg, processes_thawed_fg_xpc_service;
414 processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
415 processes_frozen_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, relaxed);
416 processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
417 processes_thawed_fg_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, relaxed);
418 /*
419 * Since these are all relaxed loads, it's possible (although unlikely) to read a value for
420 * frozen/thawed xpc services that's > the value for processes frozen / thawed.
421 * Clamp just in case.
422 */
423 processes_frozen_xpc_service = MIN(processes_frozen_xpc_service, processes_frozen);
424 processes_thawed_fg_xpc_service = MIN(processes_thawed_fg_xpc_service, processes_thawed_fg);
425 return calculate_thaw_percentage(processes_frozen - processes_frozen_xpc_service, processes_thawed_fg - processes_thawed_fg_xpc_service);
426 }
427
428 static int sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service SYSCTL_HANDLER_ARGS
429 {
430 #pragma unused(arg1, arg2)
431 int thaw_percentage = get_thaw_percentage_fg_non_xpc_service();
432 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
433 }
434
435 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg_non_xpc_service, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service, "I", "");
436
437 #define FREEZER_ERROR_STRING_LENGTH 128
438
439 EXPERIMENT_FACTOR_LEGACY_UINT(_kern, memorystatus_freeze_pages_min, &memorystatus_freeze_pages_min, 0, UINT32_MAX, "");
440 EXPERIMENT_FACTOR_LEGACY_UINT(_kern, memorystatus_freeze_pages_max, &memorystatus_freeze_pages_max, 0, UINT32_MAX, "");
441 EXPERIMENT_FACTOR_LEGACY_UINT(_kern, memorystatus_freeze_processes_max, &memorystatus_frozen_processes_max, 0, UINT32_MAX, "");
442 EXPERIMENT_FACTOR_LEGACY_UINT(_kern, memorystatus_freeze_jetsam_band, &memorystatus_freeze_jetsam_band, JETSAM_PRIORITY_BACKGROUND, JETSAM_PRIORITY_FOREGROUND, "");
443 EXPERIMENT_FACTOR_LEGACY_UINT(_kern, memorystatus_freeze_private_shared_pages_ratio, &memorystatus_freeze_private_shared_pages_ratio, 0, UINT32_MAX, "");
444 EXPERIMENT_FACTOR_LEGACY_UINT(_kern, memorystatus_freeze_min_processes, &memorystatus_freeze_suspended_threshold, 0, UINT32_MAX, "");
445 EXPERIMENT_FACTOR_LEGACY_UINT(_kern, memorystatus_freeze_max_candidate_band, &memorystatus_freeze_max_candidate_band, JETSAM_PRIORITY_IDLE, JETSAM_PRIORITY_FOREGROUND, "");
446 static int
447 sysctl_memorystatus_freeze_budget_multiplier SYSCTL_HANDLER_ARGS
448 {
449 #pragma unused(arg1, arg2, oidp, req)
450 int error = 0, changed = 0;
451 uint64_t val = memorystatus_freeze_budget_multiplier;
452 unsigned int new_budget;
453 clock_sec_t sec;
454 clock_nsec_t nsec;
455 mach_timespec_t now_ts;
456
457 error = sysctl_io_number(req, memorystatus_freeze_budget_multiplier, sizeof(val), &val, &changed);
458 if (error) {
459 return error;
460 }
461 if (changed) {
462 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
463 return ENOTSUP;
464 }
465 lck_mtx_lock(&freezer_mutex);
466
467 memorystatus_freeze_budget_multiplier = val;
468 /* Start a new throttle interval with this budget multiplier */
469 new_budget = memorystatus_freeze_calculate_new_budget(0, 1, NORMAL_WINDOW_MINS, 0);
470 clock_get_system_nanotime(&sec, &nsec);
471 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
472 now_ts.tv_nsec = nsec;
473 memorystatus_freeze_start_normal_throttle_interval(new_budget, now_ts);
474 memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
475
476 lck_mtx_unlock(&freezer_mutex);
477 }
478 return 0;
479 }
480 EXPERIMENT_FACTOR_LEGACY_PROC(_kern, memorystatus_freeze_budget_multiplier, CTLTYPE_QUAD | CTLFLAG_RW, 0, 0, &sysctl_memorystatus_freeze_budget_multiplier, "Q", "");
481 /*
482 * max. # of frozen process demotions we will allow in our daily cycle.
483 */
484 EXPERIMENT_FACTOR_LEGACY_UINT(_kern, memorystatus_max_freeze_demotions_daily, &memorystatus_max_frozen_demotions_daily, 0, UINT32_MAX, "");
485
486 /*
487 * min # of thaws needed by a process to protect it from getting demoted into the IDLE band.
488 */
489 EXPERIMENT_FACTOR_LEGACY_UINT(_kern, memorystatus_thaw_count_demotion_threshold, &memorystatus_thaw_count_demotion_threshold, 0, UINT32_MAX, "");
490
491 /*
492 * min # of global thaws needed for us to consider refreezing these processes.
493 */
494 EXPERIMENT_FACTOR_LEGACY_UINT(_kern, memorystatus_min_thaw_refreeze_threshold, &memorystatus_min_thaw_refreeze_threshold, 0, UINT32_MAX, "");
495
496 #if DEVELOPMENT || DEBUG
497
498 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, "");
499 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, "");
500 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
501 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, "");
502
503 /*
504 * Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band.
505 * "0" means no limit.
506 * Default is 10% of system-wide task limit.
507 */
508
509 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb_max, 0, "");
510 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, "");
511
512 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, "");
513
514 boolean_t memorystatus_freeze_throttle_enabled = TRUE;
515 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
516
517 /*
518 * When set to true, this keeps frozen processes in the compressor pool in memory, instead of swapping them out to disk.
519 * Exposed via the sysctl kern.memorystatus_freeze_to_memory.
520 */
521 boolean_t memorystatus_freeze_to_memory = FALSE;
522 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_to_memory, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_to_memory, 0, "");
523
524 #define VM_PAGES_FOR_ALL_PROCS (2)
525
526 /*
527 * Manual trigger of freeze and thaw for dev / debug kernels only.
528 */
529 static int
530 sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
531 {
532 #pragma unused(arg1, arg2)
533 int error, pid = 0;
534 proc_t p;
535 freezer_error_code_t freezer_error_code = 0;
536 pid_t pid_list[MAX_XPC_SERVICE_PIDS];
537 int ntasks = 0;
538 coalition_t coal = COALITION_NULL;
539
540 error = sysctl_handle_int(oidp, &pid, 0, req);
541 if (error || !req->newptr) {
542 return error;
543 }
544
545 if (pid == VM_PAGES_FOR_ALL_PROCS) {
546 error = mach_to_bsd_errno(vm_pageout_anonymous_pages());
547 return error;
548 }
549
550 lck_mtx_lock(&freezer_mutex);
551 if (memorystatus_freeze_enabled == false) {
552 lck_mtx_unlock(&freezer_mutex);
553 memorystatus_log("sysctl_freeze: Freeze is DISABLED\n");
554 return ENOTSUP;
555 }
556
557 again:
558 p = proc_find(pid);
559 if (p != NULL) {
560 memorystatus_freezer_stats.mfs_process_considered_count++;
561 uint32_t purgeable, wired, clean, dirty, shared;
562 uint32_t max_pages = 0, state = 0;
563
564 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
565 /*
566 * Freezer backed by the compressor and swap file(s)
567 * will hold compressed data.
568 *
569 * Set the sysctl kern.memorystatus_freeze_to_memory to true to keep compressed data from
570 * being swapped out to disk. Note that this disables freezer swap support globally,
571 * not just for the process being frozen.
572 *
573 *
574 * We don't care about the global freezer budget or the process's (min/max) budget here.
575 * The freeze sysctl is meant to force-freeze a process.
576 *
577 * We also don't update any global or process stats on this path, so that the jetsam/ freeze
578 * logic remains unaffected. The tasks we're performing here are: freeze the process, set the
579 * P_MEMSTAT_FROZEN bit, and elevate the process to a higher band (if the freezer is active).
580 */
581 max_pages = memorystatus_freeze_pages_max;
582 } else {
583 /*
584 * We only have the compressor without any swap.
585 */
586 max_pages = UINT32_MAX - 1;
587 }
588
589 proc_list_lock();
590 state = p->p_memstat_state;
591 proc_list_unlock();
592
593 /*
594 * The jetsam path also verifies that the process is a suspended App. We don't care about that here.
595 * We simply ensure that jetsam is not already working on the process and that the process has not
596 * explicitly disabled freezing.
597 */
598 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED)) {
599 memorystatus_log_error("sysctl_freeze: p_memstat_state check failed, process is%s%s%s\n",
600 (state & P_MEMSTAT_TERMINATED) ? " terminated" : "",
601 (state & P_MEMSTAT_LOCKED) ? " locked" : "",
602 (state & P_MEMSTAT_FREEZE_DISABLED) ? " unfreezable" : "");
603
604 proc_rele(p);
605 lck_mtx_unlock(&freezer_mutex);
606 return EPERM;
607 }
608
609 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, memorystatus_available_pages, pid, max_pages);
610 error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
611 if (!error || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
612 memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
613 }
614 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, purgeable, wired, clean, dirty);
615
616 if (error) {
617 memorystatus_freeze_handle_error(p, freezer_error_code, state & P_MEMSTAT_FROZEN, pid, coal, "sysctl_freeze");
618 if (error == KERN_NO_SPACE) {
619 /* Make it easy to distinguish between failures due to low compressor/ swap space and other failures. */
620 error = ENOSPC;
621 } else {
622 error = EIO;
623 }
624 } else {
625 proc_list_lock();
626 if (!_memstat_proc_is_frozen(p)) {
627 p->p_memstat_state |= P_MEMSTAT_FROZEN;
628 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
629 memorystatus_frozen_count++;
630 os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
631 if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
632 memorystatus_frozen_count_webcontent++;
633 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
634 }
635 if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
636 memorystatus_freeze_out_of_slots();
637 }
638 } else {
639 // This was a re-freeze
640 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
641 memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
642 memorystatus_freezer_stats.mfs_refreeze_count++;
643 }
644 }
645 p->p_memstat_frozen_count++;
646
647 if (coal != NULL) {
648 /* We just froze an xpc service. Mark it as such for telemetry */
649 p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
650 memorystatus_frozen_count_xpc_service++;
651 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
652 }
653
654
655 proc_list_unlock();
656
657 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
658 /*
659 * We elevate only if we are going to swap out the data.
660 */
661 error = memorystatus_update_inactive_jetsam_priority_band(pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
662 memorystatus_freeze_jetsam_band, TRUE);
663
664 if (error) {
665 memorystatus_log_error("sysctl_freeze: Elevating frozen process to higher jetsam band failed with %d\n", error);
666 }
667 }
668 }
669
670 if ((error == 0) && (coal == NULL)) {
671 /*
672 * We froze a process and so we check to see if it was
673 * a coalition leader and if it has XPC services that
674 * might need freezing.
675 * Only one leader can be frozen at a time and so we shouldn't
676 * enter this block more than once per call. Hence the
677 * check that 'coal' has to be NULL. We should make this an
678 * assert() or panic() once we have a much more concrete way
679 * to detect an app vs a daemon.
680 */
681
682 task_t curr_task = NULL;
683
684 curr_task = proc_task(p);
685 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
686 if (coalition_is_leader(curr_task, coal)) {
687 ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
688 COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
689
690 if (ntasks > MAX_XPC_SERVICE_PIDS) {
691 ntasks = MAX_XPC_SERVICE_PIDS;
692 }
693 }
694 }
695
696 proc_rele(p);
697
698 while (ntasks) {
699 pid = pid_list[--ntasks];
700 goto again;
701 }
702
703 lck_mtx_unlock(&freezer_mutex);
704 return error;
705 } else {
706 memorystatus_log_error("sysctl_freeze: Invalid process\n");
707 }
708
709
710 lck_mtx_unlock(&freezer_mutex);
711 return EINVAL;
712 }
713
714 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
715 0, 0, &sysctl_memorystatus_freeze, "I", "");
716
717 /*
718 * Manual trigger of agressive frozen demotion for dev / debug kernels only.
719 */
720 static int
721 sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS
722 {
723 #pragma unused(arg1, arg2)
724 int error, val;
725 /*
726 * Only demote on write to prevent demoting during `sysctl -a`.
727 * The actual value written doesn't matter.
728 */
729 error = sysctl_handle_int(oidp, &val, 0, req);
730 if (error || !req->newptr) {
731 return error;
732 }
733 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
734 return ENOTSUP;
735 }
736 lck_mtx_lock(&freezer_mutex);
737 memorystatus_demote_frozen_processes(false);
738 lck_mtx_unlock(&freezer_mutex);
739 return 0;
740 }
741
742 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
743
744 static int
745 sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
746 {
747 #pragma unused(arg1, arg2)
748
749 int error, pid = 0;
750 proc_t p;
751
752 if (memorystatus_freeze_enabled == false) {
753 return ENOTSUP;
754 }
755
756 error = sysctl_handle_int(oidp, &pid, 0, req);
757 if (error || !req->newptr) {
758 return error;
759 }
760
761 if (pid == VM_PAGES_FOR_ALL_PROCS) {
762 do_fastwake_warmup_all();
763 return 0;
764 } else {
765 p = proc_find(pid);
766 if (p != NULL) {
767 error = task_thaw(proc_task(p));
768
769 if (error) {
770 error = EIO;
771 } else {
772 /*
773 * task_thaw() succeeded.
774 *
775 * We increment memorystatus_frozen_count on the sysctl freeze path.
776 * And so we need the P_MEMSTAT_FROZEN to decrement the frozen count
777 * when this process exits.
778 *
779 * proc_list_lock();
780 * p->p_memstat_state &= ~P_MEMSTAT_FROZEN;
781 * proc_list_unlock();
782 */
783 }
784 proc_rele(p);
785 return error;
786 }
787 }
788
789 return EINVAL;
790 }
791
792 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
793 0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
794
795
796 typedef struct _global_freezable_status {
797 boolean_t freeze_pages_threshold_crossed;
798 boolean_t freeze_eligible_procs_available;
799 boolean_t freeze_scheduled_in_future;
800 }global_freezable_status_t;
801
802 typedef struct _proc_freezable_status {
803 boolean_t freeze_has_memstat_state;
804 boolean_t freeze_has_pages_min;
805 int freeze_has_probability;
806 int freeze_leader_eligible;
807 boolean_t freeze_attempted;
808 uint32_t p_memstat_state;
809 uint32_t p_pages;
810 int p_freeze_error_code;
811 int p_pid;
812 int p_leader_pid;
813 char p_name[MAXCOMLEN + 1];
814 }proc_freezable_status_t;
815
816 #define MAX_FREEZABLE_PROCESSES 200 /* Total # of processes in band 0 that we evaluate for freezability */
817
818 /*
819 * For coalition based freezing evaluations, we proceed as follows:
820 * - detect that the process is a coalition member and a XPC service
821 * - mark its 'freeze_leader_eligible' field with FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN
822 * - continue its freezability evaluation assuming its leader will be freezable too
823 *
824 * Once we are done evaluating all processes, we do a quick run thru all
825 * processes and for a coalition member XPC service we look up the 'freezable'
826 * status of its leader and iff:
827 * - the xpc service is freezable i.e. its individual freeze evaluation worked
828 * - and, its leader is also marked freezable
829 * we update its 'freeze_leader_eligible' to FREEZE_PROC_LEADER_FREEZABLE_SUCCESS.
830 */
831
832 #define FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN (-1)
833 #define FREEZE_PROC_LEADER_FREEZABLE_SUCCESS (1)
834 #define FREEZE_PROC_LEADER_FREEZABLE_FAILURE (2)
835
836 static int
memorystatus_freezer_get_status(user_addr_t buffer,size_t buffer_size,int32_t * retval)837 memorystatus_freezer_get_status(user_addr_t buffer, size_t buffer_size, int32_t *retval)
838 {
839 uint32_t proc_count = 0, freeze_eligible_proc_considered = 0, band = 0, xpc_index = 0, leader_index = 0;
840 global_freezable_status_t *list_head;
841 proc_freezable_status_t *list_entry, *list_entry_start;
842 size_t list_size = 0, entry_count = 0;
843 proc_t p, leader_proc;
844 memstat_bucket_t *bucket;
845 uint32_t state = 0, pages = 0;
846 boolean_t try_freeze = TRUE, xpc_skip_size_probability_check = FALSE;
847 int error = 0, probability_of_use = 0;
848 pid_t leader_pid = 0;
849 struct memorystatus_freeze_list_iterator iterator;
850
851 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
852 return ENOTSUP;
853 }
854
855 bzero(&iterator, sizeof(struct memorystatus_freeze_list_iterator));
856
857 list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
858
859 if (buffer_size < list_size) {
860 return EINVAL;
861 }
862
863 list_head = (global_freezable_status_t *)kalloc_data(list_size, Z_WAITOK | Z_ZERO);
864 if (list_head == NULL) {
865 return ENOMEM;
866 }
867
868 list_size = sizeof(global_freezable_status_t);
869
870 lck_mtx_lock(&freezer_mutex);
871 proc_list_lock();
872
873 uint64_t curr_time = mach_absolute_time();
874
875 list_head->freeze_pages_threshold_crossed = (memorystatus_available_pages < memorystatus_freeze_threshold);
876 if (memorystatus_freezer_use_ordered_list) {
877 list_head->freeze_eligible_procs_available = memorystatus_frozen_count < memorystatus_global_freeze_list.mfcl_length;
878 } else {
879 list_head->freeze_eligible_procs_available = ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold);
880 }
881 list_head->freeze_scheduled_in_future = (curr_time < memorystatus_freezer_thread_next_run_ts);
882
883 list_entry_start = (proc_freezable_status_t*) ((uintptr_t)list_head + sizeof(global_freezable_status_t));
884 list_entry = list_entry_start;
885
886 bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
887
888 entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
889
890 if (memorystatus_freezer_use_ordered_list) {
891 while (iterator.global_freeze_list_index < memorystatus_global_freeze_list.mfcl_length) {
892 p = memorystatus_freezer_candidate_list_get_proc(
893 &memorystatus_global_freeze_list,
894 (iterator.global_freeze_list_index)++,
895 NULL);
896 if (p != PROC_NULL) {
897 break;
898 }
899 }
900 } else {
901 p = memorystatus_get_first_proc_locked(&band, FALSE);
902 }
903
904 proc_count++;
905
906 while ((proc_count <= MAX_FREEZABLE_PROCESSES) &&
907 (p) &&
908 (list_size < buffer_size)) {
909 if (isSysProc(p)) {
910 /*
911 * Daemon:- We will consider freezing it iff:
912 * - it belongs to a coalition and the leader is freeze-eligible (delayed evaluation)
913 * - its role in the coalition is XPC service.
914 *
915 * We skip memory size requirements in this case.
916 */
917
918 coalition_t coal = COALITION_NULL;
919 task_t leader_task = NULL, curr_task = NULL;
920 int task_role_in_coalition = 0;
921
922 curr_task = proc_task(p);
923 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
924
925 if (coal == COALITION_NULL || coalition_is_leader(curr_task, coal)) {
926 /*
927 * By default, XPC services without an app
928 * will be the leader of their own single-member
929 * coalition.
930 */
931 goto skip_ineligible_xpc;
932 }
933
934 leader_task = coalition_get_leader(coal);
935 if (leader_task == TASK_NULL) {
936 /*
937 * This jetsam coalition is currently leader-less.
938 * This could happen if the app died, but XPC services
939 * have not yet exited.
940 */
941 goto skip_ineligible_xpc;
942 }
943
944 leader_proc = (proc_t)get_bsdtask_info(leader_task);
945 task_deallocate(leader_task);
946
947 if (leader_proc == PROC_NULL) {
948 /* leader task is exiting */
949 goto skip_ineligible_xpc;
950 }
951
952 task_role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
953
954 if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
955 xpc_skip_size_probability_check = TRUE;
956 leader_pid = proc_getpid(leader_proc);
957 goto continue_eval;
958 }
959
960 skip_ineligible_xpc:
961 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
962 proc_count++;
963 continue;
964 }
965
966 continue_eval:
967 strlcpy(list_entry->p_name, p->p_name, MAXCOMLEN + 1);
968
969 list_entry->p_pid = proc_getpid(p);
970
971 state = p->p_memstat_state;
972
973 if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) ||
974 !(state & P_MEMSTAT_SUSPENDED)) {
975 try_freeze = list_entry->freeze_has_memstat_state = FALSE;
976 } else {
977 try_freeze = list_entry->freeze_has_memstat_state = TRUE;
978 }
979
980 list_entry->p_memstat_state = state;
981
982 if (xpc_skip_size_probability_check == TRUE) {
983 /*
984 * Assuming the coalition leader is freezable
985 * we don't care re. minimum pages and probability
986 * as long as the process isn't marked P_MEMSTAT_FREEZE_DISABLED.
987 * XPC services have to be explicity opted-out of the disabled
988 * state. And we checked that state above.
989 */
990 list_entry->freeze_has_pages_min = TRUE;
991 list_entry->p_pages = -1;
992 list_entry->freeze_has_probability = -1;
993
994 list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN;
995 list_entry->p_leader_pid = leader_pid;
996
997 xpc_skip_size_probability_check = FALSE;
998 } else {
999 list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; /* Apps are freeze eligible and their own leaders. */
1000 list_entry->p_leader_pid = 0; /* Setting this to 0 signifies this isn't a coalition driven freeze. */
1001
1002 memorystatus_get_task_page_counts(proc_task(p), &pages, NULL, NULL);
1003 if (pages < memorystatus_freeze_pages_min) {
1004 try_freeze = list_entry->freeze_has_pages_min = FALSE;
1005 } else {
1006 list_entry->freeze_has_pages_min = TRUE;
1007 }
1008
1009 list_entry->p_pages = pages;
1010
1011 if (entry_count) {
1012 uint32_t j = 0;
1013 for (j = 0; j < entry_count; j++) {
1014 if (strncmp(memorystatus_global_probabilities_table[j].proc_name,
1015 p->p_name,
1016 MAXCOMLEN) == 0) {
1017 probability_of_use = memorystatus_global_probabilities_table[j].use_probability;
1018 break;
1019 }
1020 }
1021
1022 list_entry->freeze_has_probability = probability_of_use;
1023
1024 try_freeze = ((probability_of_use > 0) && try_freeze);
1025 } else {
1026 list_entry->freeze_has_probability = -1;
1027 }
1028 }
1029
1030 if (try_freeze) {
1031 uint32_t purgeable, wired, clean, dirty, shared;
1032 uint32_t max_pages = 0;
1033 int freezer_error_code = 0;
1034
1035 error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, TRUE /* eval only */);
1036
1037 if (error) {
1038 list_entry->p_freeze_error_code = freezer_error_code;
1039 }
1040
1041 list_entry->freeze_attempted = TRUE;
1042 }
1043
1044 list_entry++;
1045 freeze_eligible_proc_considered++;
1046
1047 list_size += sizeof(proc_freezable_status_t);
1048
1049 if (memorystatus_freezer_use_ordered_list) {
1050 p = PROC_NULL;
1051 while (iterator.global_freeze_list_index < memorystatus_global_freeze_list.mfcl_length) {
1052 p = memorystatus_freezer_candidate_list_get_proc(
1053 &memorystatus_global_freeze_list,
1054 (iterator.global_freeze_list_index)++,
1055 NULL);
1056 if (p != PROC_NULL) {
1057 break;
1058 }
1059 }
1060 } else {
1061 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1062 }
1063
1064 proc_count++;
1065 }
1066
1067 proc_list_unlock();
1068 lck_mtx_unlock(&freezer_mutex);
1069
1070 list_entry = list_entry_start;
1071
1072 for (xpc_index = 0; xpc_index < freeze_eligible_proc_considered; xpc_index++) {
1073 if (list_entry[xpc_index].freeze_leader_eligible == FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN) {
1074 leader_pid = list_entry[xpc_index].p_leader_pid;
1075
1076 leader_proc = proc_find(leader_pid);
1077
1078 if (leader_proc) {
1079 if (_memstat_proc_is_frozen(leader_proc)) {
1080 /*
1081 * Leader has already been frozen.
1082 */
1083 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1084 proc_rele(leader_proc);
1085 continue;
1086 }
1087 proc_rele(leader_proc);
1088 }
1089
1090 for (leader_index = 0; leader_index < freeze_eligible_proc_considered; leader_index++) {
1091 if (list_entry[leader_index].p_pid == leader_pid) {
1092 if (list_entry[leader_index].freeze_attempted && list_entry[leader_index].p_freeze_error_code == 0) {
1093 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1094 } else {
1095 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1096 list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1097 }
1098 break;
1099 }
1100 }
1101
1102 /*
1103 * Didn't find the leader entry. This might be likely because
1104 * the leader never made it down to band 0.
1105 */
1106 if (leader_index == freeze_eligible_proc_considered) {
1107 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1108 list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1109 }
1110 }
1111 }
1112
1113 buffer_size = MIN(list_size, INT32_MAX);
1114
1115 error = copyout(list_head, buffer, buffer_size);
1116 if (error == 0) {
1117 *retval = (int32_t) buffer_size;
1118 } else {
1119 *retval = 0;
1120 }
1121
1122 list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
1123 kfree_data(list_head, list_size);
1124
1125 memorystatus_log_debug("memorystatus_freezer_get_status: returning %d (%lu - size)\n", error, (unsigned long)list_size);
1126
1127 return error;
1128 }
1129
1130 #endif /* DEVELOPMENT || DEBUG */
1131
1132 /*
1133 * Get a list of all processes in the freezer band which are currently frozen.
1134 * Used by powerlog to collect analytics on frozen process.
1135 */
1136 static int
memorystatus_freezer_get_procs(user_addr_t buffer,size_t buffer_size,int32_t * retval)1137 memorystatus_freezer_get_procs(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1138 {
1139 global_frozen_procs_t *frozen_procs = NULL;
1140 uint32_t band = memorystatus_freeze_jetsam_band;
1141 proc_t p;
1142 int error;
1143 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
1144 return ENOTSUP;
1145 }
1146 if (buffer_size < sizeof(global_frozen_procs_t)) {
1147 return EINVAL;
1148 }
1149 frozen_procs = (global_frozen_procs_t *)kalloc_data(sizeof(global_frozen_procs_t), Z_WAITOK | Z_ZERO);
1150 if (frozen_procs == NULL) {
1151 return ENOMEM;
1152 }
1153
1154 proc_list_lock();
1155 p = memorystatus_get_first_proc_locked(&band, FALSE);
1156 while (p && frozen_procs->gfp_num_frozen < FREEZER_CONTROL_GET_PROCS_MAX_COUNT) {
1157 if (_memstat_proc_is_frozen(p)) {
1158 frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_pid = proc_getpid(p);
1159 strlcpy(frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_name,
1160 p->p_name, sizeof(proc_name_t));
1161 frozen_procs->gfp_num_frozen++;
1162 }
1163 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1164 }
1165 proc_list_unlock();
1166
1167 buffer_size = MIN(buffer_size, sizeof(global_frozen_procs_t));
1168 error = copyout(frozen_procs, buffer, buffer_size);
1169 if (error == 0) {
1170 *retval = (int32_t) buffer_size;
1171 } else {
1172 *retval = 0;
1173 }
1174 kfree_data(frozen_procs, sizeof(global_frozen_procs_t));
1175
1176 return error;
1177 }
1178
1179 /*
1180 * If dasd is running an experiment that impacts their freezer candidate selection,
1181 * we record that in our telemetry.
1182 */
1183 static memorystatus_freezer_trial_identifiers_v1 dasd_trial_identifiers;
1184
1185 static int
memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer,size_t buffer_size,int32_t * retval)1186 memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1187 {
1188 memorystatus_freezer_trial_identifiers_v1 identifiers;
1189 int error = 0;
1190
1191 if (buffer_size != sizeof(identifiers)) {
1192 return EINVAL;
1193 }
1194 error = copyin(buffer, &identifiers, sizeof(identifiers));
1195 if (error != 0) {
1196 return error;
1197 }
1198 if (identifiers.version != 1) {
1199 return EINVAL;
1200 }
1201 dasd_trial_identifiers = identifiers;
1202 *retval = 0;
1203 return error;
1204 }
1205
1206 /*
1207 * Reset the freezer state by wiping out all suspended frozen apps, clearing
1208 * per-process freezer state, and starting a fresh interval.
1209 */
1210 static int
memorystatus_freezer_reset_state(int32_t * retval)1211 memorystatus_freezer_reset_state(int32_t *retval)
1212 {
1213 uint32_t band = JETSAM_PRIORITY_IDLE;
1214 /* Don't kill above the frozen band */
1215 uint32_t kMaxBand = memorystatus_freeze_jetsam_band;
1216 proc_t next_p = PROC_NULL;
1217 uint64_t new_budget;
1218
1219 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1220 return ENOTSUP;
1221 }
1222
1223 os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_GENERIC);
1224 if (jetsam_reason == OS_REASON_NULL) {
1225 memorystatus_log_error("memorystatus_freezer_reset_state -- sync: failed to allocate jetsam reason\n");
1226 }
1227 lck_mtx_lock(&freezer_mutex);
1228 kill_all_frozen_processes(kMaxBand, true, jetsam_reason, NULL);
1229 proc_list_lock();
1230
1231 /*
1232 * Clear the considered and skip reason flags on all processes
1233 * so we're starting fresh with the new policy.
1234 */
1235 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1236 while (next_p) {
1237 proc_t p = next_p;
1238 uint32_t state = p->p_memstat_state;
1239 next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1240
1241 if (p->p_memstat_effectivepriority > kMaxBand) {
1242 break;
1243 }
1244 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1245 continue;
1246 }
1247
1248 p->p_memstat_state &= ~(P_MEMSTAT_FREEZE_CONSIDERED);
1249 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1250 }
1251
1252 proc_list_unlock();
1253
1254 new_budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1255 memorystatus_freeze_force_new_interval(new_budget);
1256
1257 lck_mtx_unlock(&freezer_mutex);
1258 *retval = 0;
1259 return 0;
1260 }
1261
1262 int
memorystatus_freezer_control(int32_t flags,user_addr_t buffer,size_t buffer_size,int32_t * retval)1263 memorystatus_freezer_control(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval)
1264 {
1265 int err = ENOTSUP;
1266
1267 #if DEVELOPMENT || DEBUG
1268 if (flags == FREEZER_CONTROL_GET_STATUS) {
1269 err = memorystatus_freezer_get_status(buffer, buffer_size, retval);
1270 }
1271 #endif /* DEVELOPMENT || DEBUG */
1272 if (flags == FREEZER_CONTROL_GET_PROCS) {
1273 err = memorystatus_freezer_get_procs(buffer, buffer_size, retval);
1274 } else if (flags == FREEZER_CONTROL_SET_DASD_TRIAL_IDENTIFIERS) {
1275 err = memorystatus_freezer_set_dasd_trial_identifiers(buffer, buffer_size, retval);
1276 } else if (flags == FREEZER_CONTROL_RESET_STATE) {
1277 err = memorystatus_freezer_reset_state(retval);
1278 }
1279
1280 return err;
1281 }
1282
1283 static bool
kill_all_frozen_processes(uint64_t max_band,bool suspended_only,os_reason_t jetsam_reason,uint64_t * memory_reclaimed_out)1284 kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out)
1285 {
1286 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1287 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1288
1289 unsigned int band = 0;
1290 proc_t p = PROC_NULL, next_p = PROC_NULL;
1291 pid_t pid = 0;
1292 bool retval = false, killed = false;
1293 uint32_t state;
1294 uint64_t memory_reclaimed = 0, footprint = 0, skips = 0;
1295 proc_list_lock();
1296
1297 band = JETSAM_PRIORITY_IDLE;
1298 p = PROC_NULL;
1299 next_p = PROC_NULL;
1300
1301 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1302 while (next_p) {
1303 p = next_p;
1304 next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1305 state = p->p_memstat_state;
1306
1307 if (p->p_memstat_effectivepriority > max_band) {
1308 break;
1309 }
1310
1311 if (!(state & P_MEMSTAT_FROZEN)) {
1312 continue;
1313 }
1314
1315 if (suspended_only && !(state & P_MEMSTAT_SUSPENDED)) {
1316 continue;
1317 }
1318
1319 if (state & P_MEMSTAT_ERROR) {
1320 p->p_memstat_state &= ~P_MEMSTAT_ERROR;
1321 }
1322
1323 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1324 memorystatus_log("memorystatus: Skipping kill of frozen process %s (%d) because it's already exiting.\n", p->p_name, proc_getpid(p));
1325 skips++;
1326 continue;
1327 }
1328
1329 footprint = get_task_phys_footprint(proc_task(p));
1330 pid = proc_getpid(p);
1331 proc_list_unlock();
1332
1333 /* memorystatus_kill_with_jetsam_reason_sync drops a reference. */
1334 os_reason_ref(jetsam_reason);
1335 retval = memstat_kill_with_jetsam_reason_sync(pid, jetsam_reason);
1336 if (retval) {
1337 killed = true;
1338 memory_reclaimed += footprint;
1339 }
1340 proc_list_lock();
1341 /*
1342 * The bands might have changed when we dropped the proc list lock.
1343 * So start from the beginning.
1344 * Since we're preventing any further freezing by holding the freezer mutex,
1345 * and we skip anything we've already tried to kill this is guaranteed to terminate.
1346 */
1347 band = 0;
1348 skips = 0;
1349 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1350 }
1351
1352 assert(skips <= memorystatus_frozen_count);
1353 #if MACH_ASSERT
1354 if (!suspended_only && max_band == JETSAM_PRIORITY_MAX) {
1355 /*
1356 * Check that we've killed all frozen processes.
1357 * Note that they may still be exiting (represented by skips).
1358 */
1359 if (memorystatus_frozen_count - skips > 0) {
1360 assert(memorystatus_freeze_enabled == false);
1361
1362 panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d",
1363 memorystatus_frozen_count);
1364 }
1365 }
1366 #endif /* MACH_ASSERT */
1367 if (memory_reclaimed_out) {
1368 *memory_reclaimed_out = memory_reclaimed;
1369 }
1370 proc_list_unlock();
1371 return killed;
1372 }
1373
1374 /*
1375 * Disables the freezer, jetsams all frozen processes,
1376 * and reclaims the swap space immediately.
1377 */
1378
1379 void
memorystatus_disable_freeze(void)1380 memorystatus_disable_freeze(void)
1381 {
1382 uint64_t memory_reclaimed = 0;
1383 bool killed = false;
1384 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1385 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1386
1387
1388 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_START,
1389 memorystatus_available_pages);
1390 memorystatus_log("memorystatus: Disabling freezer. Will kill all frozen processes\n");
1391
1392 /*
1393 * We hold the freezer_mutex (preventing anything from being frozen in parallel)
1394 * and all frozen processes will be killed
1395 * by the time we release it. Setting memorystatus_freeze_enabled to false,
1396 * ensures that no new processes will be frozen once we release the mutex.
1397 *
1398 */
1399 memorystatus_freeze_enabled = false;
1400
1401 /*
1402 * Move dirty pages out from the throttle to the active queue since we're not freezing anymore.
1403 */
1404 vm_page_reactivate_all_throttled();
1405 os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE);
1406 if (jetsam_reason == OS_REASON_NULL) {
1407 memorystatus_log_error("memorystatus_disable_freeze -- sync: failed to allocate jetsam reason\n");
1408 }
1409
1410 killed = kill_all_frozen_processes(JETSAM_PRIORITY_MAX, false, jetsam_reason, &memory_reclaimed);
1411
1412 if (killed) {
1413 memorystatus_log_info("memorystatus: Killed all frozen processes.\n");
1414 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM);
1415
1416 memorystatus_post_snapshot();
1417 } else {
1418 memorystatus_log_info("memorystatus: No frozen processes to kill.\n");
1419 }
1420
1421 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_END,
1422 memorystatus_available_pages, memory_reclaimed);
1423
1424 return;
1425 }
1426
1427 static void
memorystatus_set_freeze_is_enabled(bool enabled)1428 memorystatus_set_freeze_is_enabled(bool enabled)
1429 {
1430 lck_mtx_lock(&freezer_mutex);
1431 if (enabled != memorystatus_freeze_enabled) {
1432 if (enabled) {
1433 memorystatus_freeze_enabled = true;
1434 } else {
1435 memorystatus_disable_freeze();
1436 }
1437 }
1438 lck_mtx_unlock(&freezer_mutex);
1439 }
1440
1441
1442 static int
1443 sysctl_freeze_enabled SYSCTL_HANDLER_ARGS
1444 {
1445 #pragma unused(arg1, arg2)
1446 int error, val = memorystatus_freeze_enabled ? 1 : 0;
1447
1448 error = sysctl_handle_int(oidp, &val, 0, req);
1449 if (error || !req->newptr) {
1450 return error;
1451 }
1452
1453 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1454 memorystatus_log_error("memorystatus: Failed attempt to set vm.freeze_enabled sysctl\n");
1455 return EINVAL;
1456 }
1457
1458 memorystatus_set_freeze_is_enabled(val);
1459
1460 return 0;
1461 }
1462
1463 EXPERIMENT_FACTOR_LEGACY_PROC(_vm, freeze_enabled, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY, NULL, 0, sysctl_freeze_enabled, "I", "");
1464
1465 static void
schedule_interval_reset(thread_call_t reset_thread_call,throttle_interval_t * interval)1466 schedule_interval_reset(thread_call_t reset_thread_call, throttle_interval_t *interval)
1467 {
1468 uint64_t interval_expiration_ns = interval->ts.tv_sec * NSEC_PER_SEC + interval->ts.tv_nsec;
1469 uint64_t interval_expiration_absolutetime;
1470 nanoseconds_to_absolutetime(interval_expiration_ns, &interval_expiration_absolutetime);
1471 memorystatus_log_info("memorystatus: scheduling new freezer interval at %llu absolute time\n", interval_expiration_absolutetime);
1472
1473 thread_call_enter_delayed(reset_thread_call, interval_expiration_absolutetime);
1474 }
1475
1476 extern uuid_string_t trial_treatment_id;
1477 extern uuid_string_t trial_experiment_id;
1478 extern int trial_deployment_id;
1479
1480 CA_EVENT(freezer_interval,
1481 CA_INT, budget_remaining,
1482 CA_INT, error_below_min_pages,
1483 CA_INT, error_excess_shared_memory,
1484 CA_INT, error_low_private_shared_ratio,
1485 CA_INT, error_no_compressor_space,
1486 CA_INT, error_no_swap_space,
1487 CA_INT, error_low_probability_of_use,
1488 CA_INT, error_elevated,
1489 CA_INT, error_other,
1490 CA_INT, frozen_count,
1491 CA_INT, pageouts,
1492 CA_INT, refreeze_average,
1493 CA_INT, skipped_full,
1494 CA_INT, skipped_shared_mb_high,
1495 CA_INT, swapusage,
1496 CA_INT, thaw_count,
1497 CA_INT, thaw_percentage,
1498 CA_INT, thaws_per_gb,
1499 CA_INT, trial_deployment_id,
1500 CA_INT, dasd_trial_deployment_id,
1501 CA_INT, budget_exhaustion_duration_remaining,
1502 CA_INT, thaw_percentage_webcontent,
1503 CA_INT, thaw_percentage_fg,
1504 CA_INT, thaw_percentage_bg,
1505 CA_INT, thaw_percentage_fg_non_xpc_service,
1506 CA_INT, fg_resume_count,
1507 CA_INT, unique_freeze_count,
1508 CA_INT, unique_thaw_count,
1509 CA_STATIC_STRING(CA_UUID_LEN), trial_treatment_id,
1510 CA_STATIC_STRING(CA_UUID_LEN), trial_experiment_id,
1511 CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_treatment_id,
1512 CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_experiment_id);
1513
1514
1515 /*
1516 * Record statistics from the expiring interval
1517 * via core analytics.
1518 */
1519 static void
memorystatus_freeze_record_interval_analytics(void)1520 memorystatus_freeze_record_interval_analytics(void)
1521 {
1522 ca_event_t event = CA_EVENT_ALLOCATE(freezer_interval);
1523 CA_EVENT_TYPE(freezer_interval) * e = event->data;
1524 e->budget_remaining = memorystatus_freeze_budget_pages_remaining * PAGE_SIZE / (1UL << 20);
1525 uint64_t process_considered_count, refrozen_count, below_threshold_count;
1526 memory_object_size_t swap_size;
1527 process_considered_count = memorystatus_freezer_stats.mfs_process_considered_count;
1528 if (process_considered_count != 0) {
1529 e->error_below_min_pages = memorystatus_freezer_stats.mfs_error_below_min_pages_count * 100 / process_considered_count;
1530 e->error_excess_shared_memory = memorystatus_freezer_stats.mfs_error_excess_shared_memory_count * 100 / process_considered_count;
1531 e->error_low_private_shared_ratio = memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count * 100 / process_considered_count;
1532 e->error_no_compressor_space = memorystatus_freezer_stats.mfs_error_no_compressor_space_count * 100 / process_considered_count;
1533 e->error_no_swap_space = memorystatus_freezer_stats.mfs_error_no_swap_space_count * 100 / process_considered_count;
1534 e->error_low_probability_of_use = memorystatus_freezer_stats.mfs_error_low_probability_of_use_count * 100 / process_considered_count;
1535 e->error_elevated = memorystatus_freezer_stats.mfs_error_elevated_count * 100 / process_considered_count;
1536 e->error_other = memorystatus_freezer_stats.mfs_error_other_count * 100 / process_considered_count;
1537 }
1538 e->frozen_count = memorystatus_frozen_count;
1539 e->pageouts = normal_throttle_window->pageouts * PAGE_SIZE / (1UL << 20);
1540 refrozen_count = memorystatus_freezer_stats.mfs_refreeze_count;
1541 if (refrozen_count != 0) {
1542 e->refreeze_average = (memorystatus_freezer_stats.mfs_bytes_refrozen / (1UL << 20)) / refrozen_count;
1543 }
1544 below_threshold_count = memorystatus_freezer_stats.mfs_below_threshold_count;
1545 if (below_threshold_count != 0) {
1546 e->skipped_full = memorystatus_freezer_stats.mfs_skipped_full_count * 100 / below_threshold_count;
1547 e->skipped_shared_mb_high = memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count * 100 / below_threshold_count;
1548 }
1549 if (VM_CONFIG_SWAP_IS_PRESENT) {
1550 swap_size = vm_swap_get_total_space();
1551 if (swap_size) {
1552 e->swapusage = vm_swap_get_free_space() * 100 / swap_size;
1553 }
1554 }
1555 e->thaw_count = memorystatus_thaw_count;
1556 e->thaw_percentage = get_thaw_percentage();
1557 e->thaw_percentage_webcontent = get_thaw_percentage_webcontent();
1558 e->thaw_percentage_fg = get_thaw_percentage_fg();
1559 e->thaw_percentage_bg = get_thaw_percentage_bg();
1560 e->thaw_percentage_fg_non_xpc_service = get_thaw_percentage_fg_non_xpc_service();
1561
1562 if (e->pageouts / (1UL << 10) != 0) {
1563 e->thaws_per_gb = memorystatus_thaw_count / (e->pageouts / (1UL << 10));
1564 }
1565 e->budget_exhaustion_duration_remaining = memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining;
1566 e->fg_resume_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
1567 e->unique_freeze_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1568 e->unique_thaw_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
1569
1570 /*
1571 * Record any xnu or dasd experiment information
1572 */
1573 strlcpy(e->trial_treatment_id, trial_treatment_id, CA_UUID_LEN);
1574 strlcpy(e->trial_experiment_id, trial_experiment_id, CA_UUID_LEN);
1575 e->trial_deployment_id = trial_deployment_id;
1576 strlcpy(e->dasd_trial_treatment_id, dasd_trial_identifiers.treatment_id, CA_UUID_LEN);
1577 strlcpy(e->dasd_trial_experiment_id, dasd_trial_identifiers.experiment_id, CA_UUID_LEN);
1578 e->dasd_trial_deployment_id = dasd_trial_identifiers.deployment_id;
1579
1580 CA_EVENT_SEND(event);
1581 }
1582
1583 static void
memorystatus_freeze_reset_interval(void * arg0,void * arg1)1584 memorystatus_freeze_reset_interval(void *arg0, void *arg1)
1585 {
1586 #pragma unused(arg0, arg1)
1587 struct throttle_interval_t *interval = NULL;
1588 clock_sec_t sec;
1589 clock_nsec_t nsec;
1590 mach_timespec_t now_ts;
1591 uint32_t budget_rollover = 0;
1592
1593 clock_get_system_nanotime(&sec, &nsec);
1594 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
1595 now_ts.tv_nsec = nsec;
1596 interval = normal_throttle_window;
1597
1598 /* Record analytics from the old interval before resetting. */
1599 memorystatus_freeze_record_interval_analytics();
1600
1601 lck_mtx_lock(&freezer_mutex);
1602 /* How long has it been since the previous interval expired? */
1603 mach_timespec_t expiration_period_ts = now_ts;
1604 SUB_MACH_TIMESPEC(&expiration_period_ts, &interval->ts);
1605 /* Get unused budget. Clamp to 0. We'll adjust for overused budget in the next interval. */
1606 budget_rollover = interval->pageouts > interval->max_pageouts ?
1607 0 : interval->max_pageouts - interval->pageouts;
1608
1609 memorystatus_freeze_start_normal_throttle_interval(memorystatus_freeze_calculate_new_budget(
1610 expiration_period_ts.tv_sec, interval->burst_multiple,
1611 interval->mins, budget_rollover),
1612 now_ts);
1613 memorystatus_freeze_budget_pages_remaining = interval->max_pageouts;
1614
1615 if (!memorystatus_freezer_use_demotion_list) {
1616 memorystatus_demote_frozen_processes(false); /* normal mode...don't force a demotion */
1617 }
1618 lck_mtx_unlock(&freezer_mutex);
1619 }
1620
1621
1622 proc_t
memorystatus_get_coalition_leader_and_role(proc_t p,int * role_in_coalition)1623 memorystatus_get_coalition_leader_and_role(proc_t p, int *role_in_coalition)
1624 {
1625 coalition_t coal = COALITION_NULL;
1626 task_t leader_task = NULL, curr_task = NULL;
1627 proc_t leader_proc = PROC_NULL;
1628
1629 curr_task = proc_task(p);
1630 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1631
1632 if (coal == NULL || coalition_is_leader(curr_task, coal)) {
1633 return p;
1634 }
1635
1636 leader_task = coalition_get_leader(coal);
1637 if (leader_task == TASK_NULL) {
1638 /*
1639 * This jetsam coalition is currently leader-less.
1640 * This could happen if the app died, but XPC services
1641 * have not yet exited.
1642 */
1643 return PROC_NULL;
1644 }
1645
1646 leader_proc = (proc_t)get_bsdtask_info(leader_task);
1647 task_deallocate(leader_task);
1648
1649 if (leader_proc == PROC_NULL) {
1650 /* leader task is exiting */
1651 return PROC_NULL;
1652 }
1653
1654 *role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
1655
1656 return leader_proc;
1657 }
1658
1659 bool
memorystatus_freeze_process_is_recommended(const proc_t p)1660 memorystatus_freeze_process_is_recommended(const proc_t p)
1661 {
1662 assert(!memorystatus_freezer_use_ordered_list);
1663 int probability_of_use = 0;
1664
1665 size_t entry_count = 0, i = 0;
1666 entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
1667 if (entry_count == 0) {
1668 /*
1669 * If dasd hasn't supplied a table yet, we default to every app being eligible
1670 * for the freezer.
1671 */
1672 return true;
1673 }
1674 for (i = 0; i < entry_count; i++) {
1675 /*
1676 * NB: memorystatus_internal_probabilities.proc_name is MAXCOMLEN + 1 bytes
1677 * proc_t.p_name is 2*MAXCOMLEN + 1 bytes. So we only compare the first
1678 * MAXCOMLEN bytes here since the name in the probabilities table could
1679 * be truncated from the proc_t's p_name.
1680 */
1681 if (strncmp(memorystatus_global_probabilities_table[i].proc_name,
1682 p->p_name,
1683 MAXCOMLEN) == 0) {
1684 probability_of_use = memorystatus_global_probabilities_table[i].use_probability;
1685 break;
1686 }
1687 }
1688 return probability_of_use > 0;
1689 }
1690
1691 __private_extern__ void
memorystatus_freeze_init(void)1692 memorystatus_freeze_init(void)
1693 {
1694 kern_return_t result;
1695 thread_t thread;
1696
1697 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1698 int32_t memorystatus_freezer_use_ordered_list_bootarg = 0;
1699 if (PE_parse_boot_argn("memorystatus_freezer_use_ordered_list", &memorystatus_freezer_use_ordered_list_bootarg, sizeof(memorystatus_freezer_use_ordered_list_bootarg))) {
1700 memorystatus_freezer_use_ordered_list = (memorystatus_freezer_use_ordered_list_bootarg != 0);
1701 }
1702
1703 int32_t memorystatus_freeze_max_candidate_band_bootarg = 0;
1704 if (PE_parse_boot_argn("memorystatus_freeze_max_candidate_band", &memorystatus_freeze_max_candidate_band_bootarg, sizeof(memorystatus_freeze_max_candidate_band_bootarg))) {
1705 if (memorystatus_freeze_max_candidate_band_bootarg >= 0 && memorystatus_freeze_max_candidate_band_bootarg <= 1000) {
1706 memorystatus_freeze_max_candidate_band = memorystatus_freeze_max_candidate_band_bootarg;
1707 }
1708 }
1709
1710 /*
1711 * This is just the default value if the underlying
1712 * storage device doesn't have any specific budget.
1713 * We check with the storage layer in memorystatus_freeze_update_throttle()
1714 * before we start our freezing the first time.
1715 */
1716 memorystatus_freeze_budget_pages_remaining = (memorystatus_freeze_daily_mb_max * 1024 * 1024) / PAGE_SIZE;
1717
1718 result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
1719 if (result == KERN_SUCCESS) {
1720 proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
1721 proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1722 thread_set_thread_name(thread, "VM_freezer");
1723
1724 thread_deallocate(thread);
1725 } else {
1726 panic("Could not create memorystatus_freeze_thread");
1727 }
1728
1729 freeze_interval_reset_thread_call = thread_call_allocate_with_options(memorystatus_freeze_reset_interval, NULL, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
1730 /* Start a new interval */
1731
1732 lck_mtx_lock(&freezer_mutex);
1733 uint32_t budget;
1734 budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1735 memorystatus_freeze_force_new_interval(budget);
1736 lck_mtx_unlock(&freezer_mutex);
1737 } else {
1738 memorystatus_freeze_budget_pages_remaining = 0;
1739 }
1740 }
1741
1742 void
memorystatus_freeze_configure_for_swap()1743 memorystatus_freeze_configure_for_swap()
1744 {
1745 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1746 return;
1747 }
1748
1749 assert(memorystatus_swap_all_apps);
1750
1751 /*
1752 * We expect both a larger working set and larger individual apps
1753 * in this mode, so tune up the freezer accordingly.
1754 */
1755 memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX_SWAP_ENABLED_DEFAULT;
1756 memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS_SWAP_ENABLED_DEFAULT;
1757 memorystatus_freeze_pages_max = FREEZE_PAGES_MAX_SWAP_ENABLED_DEFAULT;
1758
1759 /*
1760 * We don't have a budget when running with full app swap.
1761 * Force a new interval. memorystatus_freeze_calculate_new_budget should give us an
1762 * unlimited budget.
1763 */
1764 lck_mtx_lock(&freezer_mutex);
1765 uint32_t budget;
1766 budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1767 memorystatus_freeze_force_new_interval(budget);
1768 lck_mtx_unlock(&freezer_mutex);
1769 }
1770
1771 void
memorystatus_freeze_disable_swap()1772 memorystatus_freeze_disable_swap()
1773 {
1774 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1775 return;
1776 }
1777
1778 assert(!memorystatus_swap_all_apps);
1779
1780 memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX_DEFAULT;
1781 memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS_DEFAULT;
1782 memorystatus_freeze_pages_max = FREEZE_PAGES_MAX_DEFAULT;
1783
1784 /*
1785 * Calculate a new budget now that we're constrained by our daily write budget again.
1786 */
1787 lck_mtx_lock(&freezer_mutex);
1788 uint32_t budget;
1789 budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1790 memorystatus_freeze_force_new_interval(budget);
1791 lck_mtx_unlock(&freezer_mutex);
1792 }
1793
1794 /*
1795 * Called with both the freezer_mutex and proc_list_lock held & both will be held on return.
1796 */
1797 static int
memorystatus_freeze_process(proc_t p,coalition_t * coal,pid_t * coalition_list,unsigned int * coalition_list_length)1798 memorystatus_freeze_process(
1799 proc_t p,
1800 coalition_t *coal, /* IN / OUT */
1801 pid_t *coalition_list, /* OUT */
1802 unsigned int *coalition_list_length /* OUT */)
1803 {
1804 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1805 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1806
1807 kern_return_t kr;
1808 uint32_t purgeable, wired, clean, dirty, shared;
1809 uint64_t max_pages = 0;
1810 freezer_error_code_t freezer_error_code = 0;
1811 bool is_refreeze = false;
1812 task_t curr_task = TASK_NULL;
1813
1814 pid_t aPid = proc_getpid(p);
1815
1816 is_refreeze = _memstat_proc_is_frozen(p);
1817
1818 /* Ensure the process is eligible for (re-)freezing */
1819 if (is_refreeze && !memorystatus_freeze_proc_is_refreeze_eligible(p)) {
1820 /* Process is already frozen & hasn't been thawed. Nothing to do here. */
1821 return EINVAL;
1822 }
1823 if (is_refreeze) {
1824 /*
1825 * Not currently being looked at for something.
1826 */
1827 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1828 return EBUSY;
1829 }
1830
1831 /*
1832 * We are going to try and refreeze and so re-evaluate
1833 * the process. We don't want to double count the shared
1834 * memory. So deduct the old snapshot here.
1835 */
1836 memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages;
1837 p->p_memstat_freeze_sharedanon_pages = 0;
1838
1839 p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
1840 memorystatus_refreeze_eligible_count--;
1841 } else {
1842 if (!memorystatus_is_process_eligible_for_freeze(p)) {
1843 return EINVAL;
1844 }
1845 if (memorystatus_frozen_count >= memorystatus_frozen_processes_max) {
1846 memorystatus_freeze_handle_error(p, FREEZER_ERROR_NO_SLOTS, is_refreeze, aPid, (coal ? *coal : NULL), "memorystatus_freeze_process");
1847 return ENOSPC;
1848 }
1849 }
1850
1851 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1852 /*
1853 * Freezer backed by the compressor and swap file(s)
1854 * will hold compressed data.
1855 */
1856
1857 max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1858 } else {
1859 /*
1860 * We only have the compressor pool.
1861 */
1862 max_pages = UINT32_MAX - 1;
1863 }
1864
1865 /* Mark as locked temporarily to avoid kill */
1866 p->p_memstat_state |= P_MEMSTAT_LOCKED;
1867
1868 p = proc_ref(p, true);
1869 if (!p) {
1870 memorystatus_freezer_stats.mfs_error_other_count++;
1871 return EBUSY;
1872 }
1873
1874 proc_list_unlock();
1875
1876 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, memorystatus_available_pages, aPid, max_pages);
1877
1878 max_pages = MIN(max_pages, UINT32_MAX);
1879 kr = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, (uint32_t) max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1880 if (kr == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1881 memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
1882 }
1883
1884 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, purgeable, wired, clean, dirty);
1885
1886 memorystatus_log_debug("memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
1887 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %llu, shared %d",
1888 (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1889 memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1890
1891 proc_list_lock();
1892
1893 /* Success? */
1894 if (KERN_SUCCESS == kr) {
1895 memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1896
1897 p->p_memstat_freeze_sharedanon_pages += shared;
1898
1899 memorystatus_frozen_shared_mb += shared;
1900
1901 if (!is_refreeze) {
1902 p->p_memstat_state |= P_MEMSTAT_FROZEN;
1903 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1904 memorystatus_frozen_count++;
1905 os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1906 if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
1907 memorystatus_frozen_count_webcontent++;
1908 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
1909 }
1910 if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
1911 memorystatus_freeze_out_of_slots();
1912 }
1913 } else {
1914 // This was a re-freeze
1915 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1916 memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
1917 memorystatus_freezer_stats.mfs_refreeze_count++;
1918 }
1919 }
1920
1921 p->p_memstat_frozen_count++;
1922
1923 /*
1924 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1925 * to its higher jetsam band.
1926 */
1927 proc_list_unlock();
1928
1929 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1930
1931 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1932 #if FREEZE_USE_ELEVATED_INACTIVE_BAND
1933 int ret;
1934 ret = memorystatus_update_inactive_jetsam_priority_band(proc_getpid(p), MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, memorystatus_freeze_jetsam_band, TRUE);
1935
1936 if (ret) {
1937 memorystatus_log_error("Elevating the frozen process failed with %d\n", ret);
1938 /* not fatal */
1939 }
1940 #endif
1941
1942 /* Update stats */
1943 for (unsigned int i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1944 throttle_intervals[i].pageouts += dirty;
1945 }
1946 }
1947 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1948 memorystatus_log("memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
1949 is_refreeze ? "re" : "", ((!coal || !*coal) ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"),
1950 memorystatus_freeze_budget_pages_remaining, is_refreeze ? "Re" : "", dirty);
1951
1952 proc_list_lock();
1953
1954 memorystatus_freeze_pageouts += dirty;
1955
1956 if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1957 /*
1958 * Add some eviction logic here? At some point should we
1959 * jetsam a process to get back its swap space so that we
1960 * can freeze a more eligible process at this moment in time?
1961 */
1962 }
1963
1964 /* Check if we just froze a coalition leader. If so, return the list of XPC services to freeze next. */
1965 if (coal != NULL && *coal == NULL) {
1966 curr_task = proc_task(p);
1967 *coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1968 if (coalition_is_leader(curr_task, *coal)) {
1969 *coalition_list_length = coalition_get_pid_list(*coal, COALITION_ROLEMASK_XPC,
1970 COALITION_SORT_DEFAULT, coalition_list, MAX_XPC_SERVICE_PIDS);
1971
1972 if (*coalition_list_length > MAX_XPC_SERVICE_PIDS) {
1973 *coalition_list_length = MAX_XPC_SERVICE_PIDS;
1974 }
1975 }
1976 } else {
1977 /* We just froze an xpc service. Mark it as such for telemetry */
1978 p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
1979 memorystatus_frozen_count_xpc_service++;
1980 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
1981 }
1982
1983 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1984 wakeup(&p->p_memstat_state);
1985 proc_rele(p);
1986 return 0;
1987 } else {
1988 if (is_refreeze) {
1989 if ((freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) ||
1990 (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO)) {
1991 /*
1992 * Keeping this prior-frozen process in this high band when
1993 * we failed to re-freeze it due to bad shared memory usage
1994 * could cause excessive pressure on the lower bands.
1995 * We need to demote it for now. It'll get re-evaluated next
1996 * time because we don't set the P_MEMSTAT_FREEZE_IGNORE
1997 * bit.
1998 */
1999
2000 p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
2001 memstat_update_priority_locked(p, JETSAM_PRIORITY_IDLE,
2002 MEMSTAT_PRIORITY_INSERT_HEAD | MEMSTAT_PRIORITY_NO_AGING);
2003 }
2004 } else {
2005 p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
2006 }
2007 memorystatus_freeze_handle_error(p, freezer_error_code, p->p_memstat_state & P_MEMSTAT_FROZEN, aPid, (coal != NULL) ? *coal : NULL, "memorystatus_freeze_process");
2008
2009 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
2010 wakeup(&p->p_memstat_state);
2011 proc_rele(p);
2012
2013 return EINVAL;
2014 }
2015 }
2016
2017 /*
2018 * Synchronously freeze the passed proc. Called with a reference to the proc held.
2019 *
2020 * Doesn't deal with:
2021 * - re-freezing because this is called on a specific process and
2022 * not by the freezer thread. If that changes, we'll have to teach it about
2023 * refreezing a frozen process.
2024 *
2025 * - grouped/coalition freezing because we are hoping to deprecate this
2026 * interface as it was used by user-space to freeze particular processes. But
2027 * we have moved away from that approach to having the kernel choose the optimal
2028 * candidates to be frozen.
2029 *
2030 * Returns ENOTSUP if the freezer isn't supported on this device. Otherwise
2031 * returns EINVAL or the value returned by task_freeze().
2032 */
2033 int
memorystatus_freeze_process_sync(proc_t p)2034 memorystatus_freeze_process_sync(proc_t p)
2035 {
2036 int ret = EINVAL;
2037 boolean_t memorystatus_freeze_swap_low = FALSE;
2038
2039 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2040 return ENOTSUP;
2041 }
2042
2043 lck_mtx_lock(&freezer_mutex);
2044
2045 if (p == NULL) {
2046 memorystatus_log_error("memorystatus_freeze_process_sync: Invalid process\n");
2047 goto exit;
2048 }
2049
2050 if (memorystatus_freeze_enabled == false) {
2051 memorystatus_log_error("memorystatus_freeze_process_sync: Freezing is DISABLED\n");
2052 goto exit;
2053 }
2054
2055 if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
2056 memorystatus_log_info("memorystatus_freeze_process_sync: Low compressor and/or low swap space...skipping freeze\n");
2057 goto exit;
2058 }
2059
2060 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2061 if (!memorystatus_freeze_budget_pages_remaining) {
2062 memorystatus_log_info("memorystatus_freeze_process_sync: exit with NO available budget\n");
2063 goto exit;
2064 }
2065
2066 proc_list_lock();
2067
2068 ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2069
2070 exit:
2071 lck_mtx_unlock(&freezer_mutex);
2072
2073 return ret;
2074 }
2075
2076 proc_t
memorystatus_freezer_candidate_list_get_proc(struct memorystatus_freezer_candidate_list * list,size_t index,uint64_t * pid_mismatch_counter)2077 memorystatus_freezer_candidate_list_get_proc(
2078 struct memorystatus_freezer_candidate_list *list,
2079 size_t index,
2080 uint64_t *pid_mismatch_counter)
2081 {
2082 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2083 if (list->mfcl_list == NULL || list->mfcl_length <= index) {
2084 return NULL;
2085 }
2086 memorystatus_properties_freeze_entry_v1 *entry = &list->mfcl_list[index];
2087 if (entry->pid == NO_PID) {
2088 /* Entry has been removed. */
2089 return NULL;
2090 }
2091
2092 proc_t p = proc_find_locked(entry->pid);
2093 if (p && strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2094 /*
2095 * We grab a reference when we are about to freeze the process. So drop
2096 * the reference that proc_find_locked() grabbed for us.
2097 * We also have the proc_list_lock so this process is stable.
2098 */
2099 proc_rele(p);
2100 return p;
2101 } else {
2102 if (p) {
2103 /* pid rollover. */
2104 proc_rele(p);
2105 }
2106 /*
2107 * The proc has exited since we received this list.
2108 * It may have re-launched with a new pid, so we go looking for it.
2109 */
2110 unsigned int band = JETSAM_PRIORITY_IDLE;
2111 p = memorystatus_get_first_proc_locked(&band, TRUE);
2112 while (p != NULL && band <= memorystatus_freeze_max_candidate_band) {
2113 if (strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2114 if (pid_mismatch_counter != NULL) {
2115 (*pid_mismatch_counter)++;
2116 }
2117 /* Stash the pid for faster lookup next time. */
2118 entry->pid = proc_getpid(p);
2119 return p;
2120 }
2121 p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2122 }
2123 /* No match. */
2124 return NULL;
2125 }
2126 }
2127
2128 static size_t
memorystatus_freeze_pid_list(pid_t * pid_list,unsigned int num_pids)2129 memorystatus_freeze_pid_list(pid_t *pid_list, unsigned int num_pids)
2130 {
2131 int ret = 0;
2132 size_t num_frozen = 0;
2133 while (num_pids > 0 &&
2134 memorystatus_frozen_count < memorystatus_frozen_processes_max) {
2135 pid_t pid = pid_list[--num_pids];
2136 proc_t p = proc_find_locked(pid);
2137 if (p) {
2138 proc_rele(p);
2139 ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2140 if (ret != 0) {
2141 break;
2142 }
2143 num_frozen++;
2144 }
2145 }
2146 return num_frozen;
2147 }
2148
2149 /*
2150 * Attempt to freeze the best candidate process.
2151 * Keep trying until we freeze something or run out of candidates.
2152 * Returns the number of processes frozen (including coalition members).
2153 */
2154 static size_t
memorystatus_freeze_top_process(void)2155 memorystatus_freeze_top_process(void)
2156 {
2157 int freeze_ret;
2158 size_t num_frozen = 0;
2159 coalition_t coal = COALITION_NULL;
2160 pid_t pid_list[MAX_XPC_SERVICE_PIDS];
2161 unsigned int ntasks = 0;
2162 struct memorystatus_freeze_list_iterator iterator;
2163 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2164
2165 bzero(&iterator, sizeof(struct memorystatus_freeze_list_iterator));
2166 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages);
2167
2168 proc_list_lock();
2169 while (true) {
2170 proc_t p = memorystatus_freeze_pick_process(&iterator);
2171 if (p == PROC_NULL) {
2172 /* Nothing left to freeze */
2173 break;
2174 }
2175 freeze_ret = memorystatus_freeze_process(p, &coal, pid_list, &ntasks);
2176 if (freeze_ret == 0) {
2177 num_frozen = 1;
2178 /*
2179 * We froze a process successfully.
2180 * If it's a coalition head, freeze the coalition.
2181 * Then we're done for now.
2182 */
2183 if (coal != NULL) {
2184 num_frozen += memorystatus_freeze_pid_list(pid_list, ntasks);
2185 }
2186 break;
2187 } else {
2188 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
2189 break;
2190 }
2191 /*
2192 * Freeze failed but we're not out of space.
2193 * Keep trying to find a good candidate,
2194 * memorystatus_freeze_pick_process will not return this proc again until
2195 * we reset the iterator.
2196 */
2197 }
2198 }
2199 proc_list_unlock();
2200
2201 KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_END, memorystatus_available_pages);
2202
2203 return num_frozen;
2204 }
2205
2206 #if DEVELOPMENT || DEBUG
2207 /* For testing memorystatus_freeze_top_process */
2208 static int
2209 sysctl_memorystatus_freeze_top_process SYSCTL_HANDLER_ARGS
2210 {
2211 #pragma unused(arg1, arg2)
2212 int error, val, ret = 0;
2213 size_t num_frozen;
2214 /*
2215 * Only freeze on write to prevent freezing during `sysctl -a`.
2216 * The actual value written doesn't matter.
2217 */
2218 error = sysctl_handle_int(oidp, &val, 0, req);
2219 if (error || !req->newptr) {
2220 return error;
2221 }
2222
2223 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2224 return ENOTSUP;
2225 }
2226
2227 lck_mtx_lock(&freezer_mutex);
2228 num_frozen = memorystatus_freeze_top_process();
2229 lck_mtx_unlock(&freezer_mutex);
2230
2231 if (num_frozen == 0) {
2232 ret = ESRCH;
2233 }
2234 return ret;
2235 }
2236 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_top_process, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2237 0, 0, &sysctl_memorystatus_freeze_top_process, "I", "");
2238 #endif /* DEVELOPMENT || DEBUG */
2239
2240 static inline boolean_t
memorystatus_can_freeze_processes(void)2241 memorystatus_can_freeze_processes(void)
2242 {
2243 boolean_t ret;
2244
2245 proc_list_lock();
2246
2247 if (memorystatus_suspended_count) {
2248 memorystatus_freeze_suspended_threshold = MIN(memorystatus_freeze_suspended_threshold, FREEZE_SUSPENDED_THRESHOLD_DEFAULT);
2249
2250 if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
2251 ret = TRUE;
2252 } else {
2253 ret = FALSE;
2254 }
2255 } else {
2256 ret = FALSE;
2257 }
2258
2259 proc_list_unlock();
2260
2261 return ret;
2262 }
2263
2264 static boolean_t
memorystatus_can_freeze(boolean_t * memorystatus_freeze_swap_low)2265 memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
2266 {
2267 boolean_t can_freeze = TRUE;
2268
2269 /* Only freeze if we're sufficiently low on memory; this holds off freeze right
2270 * after boot, and is generally is a no-op once we've reached steady state. */
2271 if (memorystatus_available_pages > memorystatus_freeze_threshold) {
2272 return FALSE;
2273 }
2274
2275 /* Check minimum suspended process threshold. */
2276 if (!memorystatus_can_freeze_processes()) {
2277 return FALSE;
2278 }
2279 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
2280
2281 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2282 /*
2283 * In-core compressor used for freezing WITHOUT on-disk swap support.
2284 */
2285 if (vm_compressor_low_on_space()) {
2286 if (*memorystatus_freeze_swap_low) {
2287 *memorystatus_freeze_swap_low = TRUE;
2288 }
2289
2290 can_freeze = FALSE;
2291 } else {
2292 if (*memorystatus_freeze_swap_low) {
2293 *memorystatus_freeze_swap_low = FALSE;
2294 }
2295
2296 can_freeze = TRUE;
2297 }
2298 } else {
2299 /*
2300 * Freezing WITH on-disk swap support.
2301 *
2302 * In-core compressor fronts the swap.
2303 */
2304 if (vm_swap_low_on_space()) {
2305 if (*memorystatus_freeze_swap_low) {
2306 *memorystatus_freeze_swap_low = TRUE;
2307 }
2308
2309 can_freeze = FALSE;
2310 }
2311 }
2312
2313 return can_freeze;
2314 }
2315
2316 /*
2317 * Demote the given frozen process.
2318 * Caller must hold the proc_list_lock & it will be held on return.
2319 */
2320 static void
memorystatus_demote_frozen_process(proc_t p,bool urgent_mode __unused)2321 memorystatus_demote_frozen_process(proc_t p, bool urgent_mode __unused)
2322 {
2323 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2324
2325 /* We demote to IDLE unless someone has asserted a higher priority on this process. */
2326 int priority = JETSAM_PRIORITY_IDLE;
2327 p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
2328 if (_memstat_proc_has_priority_assertion(p)) {
2329 priority = MAX(p->p_memstat_assertionpriority, priority);
2330 }
2331 if (_memstat_proc_is_tracked(p) && _memstat_proc_is_dirty(p)) {
2332 priority = MAX(p->p_memstat_requestedpriority, priority);
2333 }
2334 memstat_update_priority_locked(p, priority, MEMSTAT_PRIORITY_NO_AGING);
2335 #if DEVELOPMENT || DEBUG
2336 memorystatus_log("memorystatus_demote_frozen_process(%s) pid %d [%s]\n",
2337 (urgent_mode ? "urgent" : "normal"), (p ? proc_getpid(p) : -1), ((p && *p->p_name) ? p->p_name : "unknown"));
2338 #endif /* DEVELOPMENT || DEBUG */
2339
2340 /*
2341 * The freezer thread will consider this a normal app to be frozen
2342 * because it is in the IDLE band. So we don't need the
2343 * P_MEMSTAT_REFREEZE_ELIGIBLE state here. Also, if it gets resumed
2344 * we'll correctly count it as eligible for re-freeze again.
2345 *
2346 * We don't drop the frozen count because this process still has
2347 * state on disk. So there's a chance it gets resumed and then it
2348 * should land in the higher jetsam band. For that it needs to
2349 * remain marked frozen.
2350 */
2351 if (memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2352 p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
2353 memorystatus_refreeze_eligible_count--;
2354 }
2355 }
2356
2357 static unsigned int
memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)2358 memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)
2359 {
2360 unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
2361 unsigned int demoted_proc_count = 0;
2362 proc_t p = PROC_NULL, next_p = PROC_NULL;
2363 proc_list_lock();
2364
2365 next_p = memorystatus_get_first_proc_locked(&band, FALSE);
2366 while (next_p) {
2367 p = next_p;
2368 next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2369
2370 if (!_memstat_proc_is_frozen(p)) {
2371 continue;
2372 }
2373
2374 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
2375 continue;
2376 }
2377
2378 if (urgent_mode) {
2379 if (!memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2380 /*
2381 * This process hasn't been thawed recently and so most of
2382 * its state sits on NAND and so we skip it -- jetsamming it
2383 * won't help with memory pressure.
2384 */
2385 continue;
2386 }
2387 } else {
2388 if (p->p_memstat_thaw_count >= memorystatus_thaw_count_demotion_threshold) {
2389 /*
2390 * This process has met / exceeded our thaw count demotion threshold
2391 * and so we let it live in the higher bands.
2392 */
2393 continue;
2394 }
2395 }
2396
2397 memorystatus_demote_frozen_process(p, urgent_mode);
2398 demoted_proc_count++;
2399 if ((urgent_mode) || (demoted_proc_count == memorystatus_max_frozen_demotions_daily)) {
2400 break;
2401 }
2402 }
2403
2404 proc_list_unlock();
2405 return demoted_proc_count;
2406 }
2407
2408 static unsigned int
memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)2409 memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)
2410 {
2411 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2412 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2413 assert(memorystatus_freezer_use_demotion_list);
2414 unsigned int demoted_proc_count = 0;
2415
2416 proc_list_lock();
2417 for (size_t i = 0; i < memorystatus_global_demote_list.mfcl_length; i++) {
2418 proc_t p = memorystatus_freezer_candidate_list_get_proc(
2419 &memorystatus_global_demote_list,
2420 i,
2421 &memorystatus_freezer_stats.mfs_demote_pid_mismatches);
2422 if (p != NULL) {
2423 memorystatus_demote_frozen_process(p, urgent_mode);
2424 /* Remove this entry now that it's been demoted. */
2425 memorystatus_global_demote_list.mfcl_list[i].pid = NO_PID;
2426 demoted_proc_count++;
2427 }
2428 }
2429
2430 proc_list_unlock();
2431 return demoted_proc_count;
2432 }
2433
2434 /*
2435 * This function evaluates if the currently frozen processes deserve
2436 * to stay in the higher jetsam band. There are 2 modes:
2437 * - 'force one == TRUE': (urgent mode)
2438 * We are out of budget and can't refreeze a process. The process's
2439 * state, if it was resumed, will stay in compressed memory. If we let it
2440 * remain up in the higher frozen jetsam band, it'll put a lot of pressure on
2441 * the lower bands. So we force-demote the least-recently-used-and-thawed
2442 * process.
2443 *
2444 * - 'force_one == FALSE': (normal mode)
2445 * If the # of thaws of a process is below our threshold, then we
2446 * will demote that process into the IDLE band.
2447 * We don't immediately kill the process here because it already has
2448 * state on disk and so it might be worth giving it another shot at
2449 * getting thawed/resumed and used.
2450 */
2451 static void
memorystatus_demote_frozen_processes(bool urgent_mode)2452 memorystatus_demote_frozen_processes(bool urgent_mode)
2453 {
2454 unsigned int demoted_proc_count = 0;
2455
2456 if (memorystatus_freeze_enabled == false) {
2457 /*
2458 * Freeze has been disabled likely to
2459 * reclaim swap space. So don't change
2460 * any state on the frozen processes.
2461 */
2462 return;
2463 }
2464
2465 /*
2466 * We have two demotion policies which can be toggled by userspace.
2467 * In non-urgent mode, the ordered list policy will
2468 * choose a demotion candidate using the list provided by dasd.
2469 * The thaw count policy will demote the oldest process that hasn't been
2470 * thawed more than memorystatus_thaw_count_demotion_threshold times.
2471 *
2472 * If urgent_mode is set, both policies will only consider demoting
2473 * processes that are re-freeze eligible. But the ordering is different.
2474 * The ordered list policy will scan in the order given by dasd.
2475 * The thaw count policy will scan through the frozen band.
2476 */
2477 if (memorystatus_freezer_use_demotion_list) {
2478 demoted_proc_count += memorystatus_demote_frozen_processes_using_demote_list(urgent_mode);
2479
2480 if (demoted_proc_count == 0 && urgent_mode) {
2481 /*
2482 * We're out of budget and the demotion list doesn't contain any valid
2483 * candidates. We still need to demote something. Fall back to scanning
2484 * the frozen band.
2485 */
2486 memorystatus_demote_frozen_processes_using_thaw_count(true);
2487 }
2488 } else {
2489 demoted_proc_count += memorystatus_demote_frozen_processes_using_thaw_count(urgent_mode);
2490 }
2491 }
2492
2493 /*
2494 * Calculate a new freezer budget.
2495 * @param time_since_last_interval_expired_sec How long has it been (in seconds) since the previous interval expired.
2496 * @param burst_multiple The burst_multiple for the new period
2497 * @param interval_duration_min How many minutes will the new interval be?
2498 * @param rollover The amount to rollover from the previous budget.
2499 *
2500 * @return A budget for the new interval.
2501 */
2502 static uint32_t
memorystatus_freeze_calculate_new_budget(unsigned int time_since_last_interval_expired_sec,unsigned int burst_multiple,unsigned int interval_duration_min,uint32_t rollover)2503 memorystatus_freeze_calculate_new_budget(
2504 unsigned int time_since_last_interval_expired_sec,
2505 unsigned int burst_multiple,
2506 unsigned int interval_duration_min,
2507 uint32_t rollover)
2508 {
2509 uint64_t freeze_daily_budget = 0, freeze_daily_budget_mb = 0, daily_budget_pageouts = 0, budget_missed = 0, freeze_daily_pageouts_max = 0, new_budget = 0;
2510 const static unsigned int kNumSecondsInDay = 60 * 60 * 24;
2511 /* Precision factor for days_missed. 2 decimal points. */
2512 const static unsigned int kFixedPointFactor = 100;
2513 unsigned int days_missed;
2514
2515 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2516 return 0;
2517 }
2518 if (memorystatus_swap_all_apps) {
2519 /*
2520 * We effectively have an unlimited budget when app swap is enabled.
2521 */
2522 memorystatus_freeze_daily_mb_max = UINT32_MAX;
2523 return UINT32_MAX;
2524 }
2525
2526 /* Get the daily budget from the storage layer */
2527 if (vm_swap_max_budget(&freeze_daily_budget)) {
2528 freeze_daily_budget_mb = freeze_daily_budget / (1024 * 1024);
2529 assert(freeze_daily_budget_mb <= UINT32_MAX);
2530 memorystatus_freeze_daily_mb_max = (unsigned int) freeze_daily_budget_mb;
2531 memorystatus_log_info("memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
2532 }
2533 /* Calculate the daily pageout budget */
2534 freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
2535 /* Multiply by memorystatus_freeze_budget_multiplier */
2536 freeze_daily_pageouts_max = ((kFixedPointFactor * memorystatus_freeze_budget_multiplier / 100) * freeze_daily_pageouts_max) / kFixedPointFactor;
2537
2538 daily_budget_pageouts = (burst_multiple * (((uint64_t) interval_duration_min * freeze_daily_pageouts_max) / (kNumSecondsInDay / 60)));
2539
2540 /*
2541 * Add additional budget for time since the interval expired.
2542 * For example, if the interval expired n days ago, we should get an additional n days
2543 * of budget since we didn't use any budget during those n days.
2544 */
2545 days_missed = time_since_last_interval_expired_sec * kFixedPointFactor / kNumSecondsInDay;
2546 budget_missed = days_missed * freeze_daily_pageouts_max / kFixedPointFactor;
2547 new_budget = rollover + daily_budget_pageouts + budget_missed;
2548 return (uint32_t) MIN(new_budget, UINT32_MAX);
2549 }
2550
2551 /*
2552 * Mark all non frozen, freezer-eligible processes as skipped for the given reason.
2553 * Used when we hit some system freeze limit and know that we won't be considering remaining processes.
2554 * If you're using this for a new reason, make sure to add it to memorystatus_freeze_init_proc so that
2555 * it gets set for new processes.
2556 * NB: These processes will retain this skip reason until they are reconsidered by memorystatus_is_process_eligible_for_freeze.
2557 */
2558 static void
memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason,bool locked)2559 memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason, bool locked)
2560 {
2561 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2562 LCK_MTX_ASSERT(&proc_list_mlock, locked ? LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED);
2563 unsigned int band = JETSAM_PRIORITY_IDLE;
2564 proc_t p;
2565
2566 if (!locked) {
2567 proc_list_lock();
2568 }
2569 p = memorystatus_get_first_proc_locked(&band, FALSE);
2570 while (p) {
2571 assert(p->p_memstat_effectivepriority == (int32_t) band);
2572 if (!_memstat_proc_is_frozen(p) &&
2573 memorystatus_is_process_eligible_for_freeze(p)) {
2574 assert(p->p_memstat_freeze_skip_reason == kMemorystatusFreezeSkipReasonNone);
2575 p->p_memstat_freeze_skip_reason = (uint8_t) reason;
2576 }
2577 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2578 }
2579 if (!locked) {
2580 proc_list_unlock();
2581 }
2582 }
2583
2584 /*
2585 * Called after we fail to freeze a process.
2586 * Logs the failure, marks the process with the failure reason, and updates freezer stats.
2587 */
2588 static void
memorystatus_freeze_handle_error(proc_t p,const freezer_error_code_t freezer_error_code,bool was_refreeze,pid_t pid,const coalition_t coalition,const char * log_prefix)2589 memorystatus_freeze_handle_error(
2590 proc_t p,
2591 const freezer_error_code_t freezer_error_code,
2592 bool was_refreeze,
2593 pid_t pid,
2594 const coalition_t coalition,
2595 const char* log_prefix)
2596 {
2597 const char *reason;
2598 memorystatus_freeze_skip_reason_t skip_reason;
2599
2600 switch (freezer_error_code) {
2601 case FREEZER_ERROR_EXCESS_SHARED_MEMORY:
2602 memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
2603 reason = "too much shared memory";
2604 skip_reason = kMemorystatusFreezeSkipReasonExcessSharedMemory;
2605 break;
2606 case FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO:
2607 memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
2608 reason = "private-shared pages ratio";
2609 skip_reason = kMemorystatusFreezeSkipReasonLowPrivateSharedRatio;
2610 break;
2611 case FREEZER_ERROR_NO_COMPRESSOR_SPACE:
2612 memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
2613 reason = "no compressor space";
2614 skip_reason = kMemorystatusFreezeSkipReasonNoCompressorSpace;
2615 break;
2616 case FREEZER_ERROR_NO_SWAP_SPACE:
2617 memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
2618 reason = "no swap space";
2619 skip_reason = kMemorystatusFreezeSkipReasonNoSwapSpace;
2620 break;
2621 case FREEZER_ERROR_NO_SLOTS:
2622 memorystatus_freezer_stats.mfs_skipped_full_count++;
2623 reason = "no slots";
2624 skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
2625 break;
2626 default:
2627 reason = "unknown error";
2628 skip_reason = kMemorystatusFreezeSkipReasonOther;
2629 }
2630
2631 p->p_memstat_freeze_skip_reason = (uint8_t) skip_reason;
2632
2633 memorystatus_log("%s: %sfreezing (%s) pid %d [%s]...skipped (%s)\n",
2634 log_prefix, was_refreeze ? "re" : "",
2635 (coalition == NULL ? "general" : "coalition-driven"), pid,
2636 ((p && *p->p_name) ? p->p_name : "unknown"), reason);
2637 }
2638
2639 /*
2640 * Start a new normal throttle interval with the given budget.
2641 * Caller must hold the freezer mutex
2642 */
2643 static void
memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget,mach_timespec_t start_ts)2644 memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts)
2645 {
2646 unsigned int band;
2647 proc_t p, next_p;
2648 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2649 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2650
2651 normal_throttle_window->max_pageouts = new_budget;
2652 normal_throttle_window->ts.tv_sec = normal_throttle_window->mins * 60;
2653 normal_throttle_window->ts.tv_nsec = 0;
2654 ADD_MACH_TIMESPEC(&normal_throttle_window->ts, &start_ts);
2655 /* Since we update the throttle stats pre-freeze, adjust for overshoot here */
2656 if (normal_throttle_window->pageouts > normal_throttle_window->max_pageouts) {
2657 normal_throttle_window->pageouts -= normal_throttle_window->max_pageouts;
2658 } else {
2659 normal_throttle_window->pageouts = 0;
2660 }
2661 /* Ensure the normal window is now active. */
2662 memorystatus_freeze_degradation = FALSE;
2663
2664 /*
2665 * Reset interval statistics.
2666 */
2667 memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
2668 memorystatus_freezer_stats.mfs_process_considered_count = 0;
2669 memorystatus_freezer_stats.mfs_error_below_min_pages_count = 0;
2670 memorystatus_freezer_stats.mfs_error_excess_shared_memory_count = 0;
2671 memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count = 0;
2672 memorystatus_freezer_stats.mfs_error_no_compressor_space_count = 0;
2673 memorystatus_freezer_stats.mfs_error_no_swap_space_count = 0;
2674 memorystatus_freezer_stats.mfs_error_low_probability_of_use_count = 0;
2675 memorystatus_freezer_stats.mfs_error_elevated_count = 0;
2676 memorystatus_freezer_stats.mfs_error_other_count = 0;
2677 memorystatus_freezer_stats.mfs_refreeze_count = 0;
2678 memorystatus_freezer_stats.mfs_bytes_refrozen = 0;
2679 memorystatus_freezer_stats.mfs_below_threshold_count = 0;
2680 memorystatus_freezer_stats.mfs_skipped_full_count = 0;
2681 memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count = 0;
2682 memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = 0;
2683 memorystatus_thaw_count = 0;
2684 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed, 0, release);
2685 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, 0, release);
2686 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg, 0, release);
2687 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, 0, release);
2688 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen, memorystatus_frozen_count, release);
2689 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, memorystatus_frozen_count_webcontent, release);
2690 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, memorystatus_frozen_count_xpc_service, release);
2691 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_fg_resumed, 0, release);
2692 os_atomic_inc(&memorystatus_freeze_current_interval, release);
2693
2694 /* Clear the focal thaw bit */
2695 proc_list_lock();
2696 band = JETSAM_PRIORITY_IDLE;
2697 p = PROC_NULL;
2698 next_p = PROC_NULL;
2699
2700 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
2701 while (next_p) {
2702 p = next_p;
2703 next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2704
2705 if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) {
2706 break;
2707 }
2708 p->p_memstat_state &= ~P_MEMSTAT_FROZEN_FOCAL_THAW;
2709 }
2710 proc_list_unlock();
2711
2712 schedule_interval_reset(freeze_interval_reset_thread_call, normal_throttle_window);
2713 }
2714
2715 #if DEVELOPMENT || DEBUG
2716
2717 static int
2718 sysctl_memorystatus_freeze_calculate_new_budget SYSCTL_HANDLER_ARGS
2719 {
2720 #pragma unused(arg1, arg2)
2721 int error = 0;
2722 unsigned int time_since_last_interval_expired_sec = 0;
2723 unsigned int new_budget;
2724
2725 error = sysctl_handle_int(oidp, &time_since_last_interval_expired_sec, 0, req);
2726 if (error || !req->newptr) {
2727 return error;
2728 }
2729
2730 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2731 return ENOTSUP;
2732 }
2733 new_budget = memorystatus_freeze_calculate_new_budget(time_since_last_interval_expired_sec, 1, NORMAL_WINDOW_MINS, 0);
2734 return copyout(&new_budget, req->oldptr, MIN(sizeof(req->oldlen), sizeof(new_budget)));
2735 }
2736
2737 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_calculate_new_budget, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2738 0, 0, &sysctl_memorystatus_freeze_calculate_new_budget, "I", "");
2739
2740 #endif /* DEVELOPMENT || DEBUG */
2741
2742 /*
2743 * Called when we first run out of budget in an interval.
2744 * Marks idle processes as not frozen due to lack of budget.
2745 * NB: It might be worth having a CA event here.
2746 */
2747 static void
memorystatus_freeze_out_of_budget(const struct throttle_interval_t * interval)2748 memorystatus_freeze_out_of_budget(const struct throttle_interval_t *interval)
2749 {
2750 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2751 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2752
2753 mach_timespec_t time_left = {0, 0};
2754 mach_timespec_t now_ts;
2755 clock_sec_t sec;
2756 clock_nsec_t nsec;
2757
2758 time_left.tv_sec = interval->ts.tv_sec;
2759 time_left.tv_nsec = 0;
2760 clock_get_system_nanotime(&sec, &nsec);
2761 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2762 now_ts.tv_nsec = nsec;
2763
2764 SUB_MACH_TIMESPEC(&time_left, &now_ts);
2765 memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = time_left.tv_sec;
2766 memorystatus_log(
2767 "memorystatus_freeze: Out of NAND write budget with %u minutes left in the current freezer interval. %u procs are frozen.\n",
2768 time_left.tv_sec / 60, memorystatus_frozen_count);
2769
2770 memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfBudget, false);
2771 }
2772
2773 /*
2774 * Called when we cross over the threshold of maximum frozen processes allowed.
2775 * Marks remaining idle processes as not frozen due to lack of slots.
2776 */
2777 static void
memorystatus_freeze_out_of_slots(void)2778 memorystatus_freeze_out_of_slots(void)
2779 {
2780 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2781 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2782 assert(memorystatus_frozen_count == memorystatus_frozen_processes_max);
2783
2784 memorystatus_log(
2785 "memorystatus_freeze: Out of slots in the freezer. %u procs are frozen.\n",
2786 memorystatus_frozen_count);
2787
2788 memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfSlots, true);
2789 }
2790
2791 /*
2792 * This function will do 4 things:
2793 *
2794 * 1) check to see if we are currently in a degraded freezer mode, and if so:
2795 * - check to see if our window has expired and we should exit this mode, OR,
2796 * - return a budget based on the degraded throttle window's max. pageouts vs current pageouts.
2797 *
2798 * 2) check to see if we are in a NEW normal window and update the normal throttle window's params.
2799 *
2800 * 3) check what the current normal window allows for a budget.
2801 *
2802 * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below
2803 * what we would normally expect, then we are running low on our daily budget and need to enter
2804 * degraded perf. mode.
2805 *
2806 * Caller must hold the freezer mutex
2807 * Caller must not hold the proc_list lock
2808 */
2809
2810 static void
memorystatus_freeze_update_throttle(uint64_t * budget_pages_allowed)2811 memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
2812 {
2813 clock_sec_t sec;
2814 clock_nsec_t nsec;
2815 mach_timespec_t now_ts;
2816 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2817 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2818
2819 unsigned int freeze_daily_pageouts_max = 0;
2820 bool started_with_budget = (*budget_pages_allowed > 0);
2821
2822 #if DEVELOPMENT || DEBUG
2823 if (!memorystatus_freeze_throttle_enabled) {
2824 /*
2825 * No throttling...we can use the full budget everytime.
2826 */
2827 *budget_pages_allowed = UINT64_MAX;
2828 return;
2829 }
2830 #endif
2831
2832 clock_get_system_nanotime(&sec, &nsec);
2833 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2834 now_ts.tv_nsec = nsec;
2835
2836 struct throttle_interval_t *interval = NULL;
2837
2838 if (memorystatus_freeze_degradation == TRUE) {
2839 interval = degraded_throttle_window;
2840
2841 if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
2842 interval->pageouts = 0;
2843 interval->max_pageouts = 0;
2844 } else {
2845 *budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2846 }
2847 }
2848
2849 interval = normal_throttle_window;
2850
2851 /*
2852 * Current throttle window.
2853 * Deny freezing if we have no budget left.
2854 * Try graceful degradation if we are within 25% of:
2855 * - the daily budget, and
2856 * - the current budget left is below our normal budget expectations.
2857 */
2858
2859 if (memorystatus_freeze_degradation == FALSE) {
2860 if (interval->pageouts >= interval->max_pageouts) {
2861 *budget_pages_allowed = 0;
2862 if (started_with_budget) {
2863 memorystatus_freeze_out_of_budget(interval);
2864 }
2865 } else {
2866 int budget_left = interval->max_pageouts - interval->pageouts;
2867 int budget_threshold = (freeze_daily_pageouts_max * FREEZE_DEGRADATION_BUDGET_THRESHOLD) / 100;
2868
2869 mach_timespec_t time_left = {0, 0};
2870
2871 time_left.tv_sec = interval->ts.tv_sec;
2872 time_left.tv_nsec = 0;
2873
2874 SUB_MACH_TIMESPEC(&time_left, &now_ts);
2875
2876 if (budget_left <= budget_threshold) {
2877 /*
2878 * For the current normal window, calculate how much we would pageout in a DEGRADED_WINDOW_MINS duration.
2879 * And also calculate what we would pageout for the same DEGRADED_WINDOW_MINS duration if we had the full
2880 * daily pageout budget.
2881 */
2882
2883 unsigned int current_budget_rate_allowed = ((budget_left / time_left.tv_sec) / 60) * DEGRADED_WINDOW_MINS;
2884 unsigned int normal_budget_rate_allowed = (freeze_daily_pageouts_max / NORMAL_WINDOW_MINS) * DEGRADED_WINDOW_MINS;
2885
2886 /*
2887 * The current rate of pageouts is below what we would expect for
2888 * the normal rate i.e. we have below normal budget left and so...
2889 */
2890
2891 if (current_budget_rate_allowed < normal_budget_rate_allowed) {
2892 memorystatus_freeze_degradation = TRUE;
2893 degraded_throttle_window->max_pageouts = current_budget_rate_allowed;
2894 degraded_throttle_window->pageouts = 0;
2895
2896 /*
2897 * Switch over to the degraded throttle window so the budget
2898 * doled out is based on that window.
2899 */
2900 interval = degraded_throttle_window;
2901 }
2902 }
2903
2904 *budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2905 }
2906 }
2907
2908 memorystatus_log_debug(
2909 "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining\n",
2910 interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - now_ts.tv_sec) / 60);
2911 }
2912
2913 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_apps_idle_delay_multiplier, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_apps_idle_delay_multiplier, 0, "");
2914
2915 bool memorystatus_freeze_thread_init = false;
2916 static void
memorystatus_freeze_thread(void * param __unused,wait_result_t wr __unused)2917 memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
2918 {
2919 static boolean_t memorystatus_freeze_swap_low = FALSE;
2920 size_t max_to_freeze = 0, num_frozen = 0, num_frozen_this_iteration = 0;
2921
2922 if (!memorystatus_freeze_thread_init) {
2923 #if CONFIG_THREAD_GROUPS
2924 thread_group_vm_add();
2925 #endif
2926 memorystatus_freeze_thread_init = true;
2927 }
2928
2929 max_to_freeze = memorystatus_pick_freeze_count_for_wakeup();
2930
2931 lck_mtx_lock(&freezer_mutex);
2932 if (memorystatus_freeze_enabled) {
2933 if (memorystatus_freezer_use_demotion_list) {
2934 memorystatus_demote_frozen_processes(false); /* Normal mode. Demote all in list. */
2935 }
2936 while (num_frozen < max_to_freeze &&
2937 memorystatus_can_freeze(&memorystatus_freeze_swap_low) &&
2938 ((memorystatus_frozen_count < memorystatus_frozen_processes_max) ||
2939 (memorystatus_refreeze_eligible_count >= memorystatus_min_thaw_refreeze_threshold))) {
2940 /* Only freeze if we've not exceeded our pageout budgets.*/
2941 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2942
2943 if (memorystatus_freeze_budget_pages_remaining) {
2944 num_frozen_this_iteration = memorystatus_freeze_top_process();
2945 if (num_frozen_this_iteration == 0) {
2946 /* Nothing left to freeze. */
2947 break;
2948 }
2949 num_frozen += num_frozen_this_iteration;
2950 } else {
2951 memorystatus_demote_frozen_processes(true); /* urgent mode..force one demotion */
2952 break;
2953 }
2954 }
2955 }
2956
2957 /*
2958 * Give applications currently in the aging band a chance to age out into the idle band before
2959 * running the freezer again.
2960 */
2961 if (memorystatus_freeze_dynamic_thread_delay_enabled) {
2962 if ((num_frozen > 0) || (memorystatus_frozen_count == 0)) {
2963 memorystatus_freeze_apps_idle_delay_multiplier = FREEZE_APPS_IDLE_DELAY_MULTIPLIER_FAST;
2964 } else {
2965 memorystatus_freeze_apps_idle_delay_multiplier = FREEZE_APPS_IDLE_DELAY_MULTIPLIER_SLOW;
2966 }
2967 }
2968 memorystatus_freezer_thread_next_run_ts = mach_absolute_time() + (memorystatus_apps_idle_delay_time * memorystatus_freeze_apps_idle_delay_multiplier);
2969
2970 assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
2971 lck_mtx_unlock(&freezer_mutex);
2972
2973 thread_block((thread_continue_t) memorystatus_freeze_thread);
2974 }
2975
2976 int
memorystatus_get_process_is_freezable(pid_t pid,int * is_freezable)2977 memorystatus_get_process_is_freezable(pid_t pid, int *is_freezable)
2978 {
2979 proc_t p = PROC_NULL;
2980
2981 if (pid == 0) {
2982 return EINVAL;
2983 }
2984
2985 p = proc_find(pid);
2986 if (!p) {
2987 return ESRCH;
2988 }
2989
2990 /*
2991 * Only allow this on the current proc for now.
2992 * We can check for privileges and allow targeting another process in the future.
2993 */
2994 if (p != current_proc()) {
2995 proc_rele(p);
2996 return EPERM;
2997 }
2998
2999 proc_list_lock();
3000 *is_freezable = ((p->p_memstat_state & P_MEMSTAT_FREEZE_DISABLED) ? 0 : 1);
3001 proc_rele(p);
3002 proc_list_unlock();
3003
3004 return 0;
3005 }
3006
3007 errno_t
memorystatus_get_process_is_frozen(pid_t pid,int * is_frozen)3008 memorystatus_get_process_is_frozen(pid_t pid, int *is_frozen)
3009 {
3010 proc_t p = PROC_NULL;
3011
3012 if (pid == 0) {
3013 return EINVAL;
3014 }
3015
3016 /*
3017 * Only allow this on the current proc for now.
3018 * We can check for privileges and allow targeting another process in the future.
3019 */
3020 p = current_proc();
3021 if (proc_getpid(p) != pid) {
3022 return EPERM;
3023 }
3024
3025 proc_list_lock();
3026 *is_frozen = _memstat_proc_is_frozen(p);
3027 proc_list_unlock();
3028
3029 return 0;
3030 }
3031
3032 int
memorystatus_set_process_is_freezable(pid_t pid,boolean_t is_freezable)3033 memorystatus_set_process_is_freezable(pid_t pid, boolean_t is_freezable)
3034 {
3035 proc_t p = PROC_NULL;
3036
3037 if (pid == 0) {
3038 return EINVAL;
3039 }
3040
3041 /*
3042 * To enable freezable status, you need to be root or an entitlement.
3043 */
3044 if (is_freezable &&
3045 !kauth_cred_issuser(kauth_cred_get()) &&
3046 !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
3047 return EPERM;
3048 }
3049
3050 p = proc_find(pid);
3051 if (!p) {
3052 return ESRCH;
3053 }
3054
3055 /*
3056 * A process can change its own status. A coalition leader can
3057 * change the status of coalition members.
3058 * An entitled process (or root) can change anyone's status.
3059 */
3060 if (p != current_proc() &&
3061 !kauth_cred_issuser(kauth_cred_get()) &&
3062 !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
3063 coalition_t coal = task_get_coalition(proc_task(p), COALITION_TYPE_JETSAM);
3064 if (!coalition_is_leader(proc_task(current_proc()), coal)) {
3065 proc_rele(p);
3066 return EPERM;
3067 }
3068 }
3069
3070 proc_list_lock();
3071 if (is_freezable == FALSE) {
3072 /* Freeze preference set to FALSE. Set the P_MEMSTAT_FREEZE_DISABLED bit. */
3073 p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED;
3074 memorystatus_log_info("memorystatus_set_process_is_freezable: disabling freeze for pid %d [%s]\n",
3075 proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
3076 } else {
3077 p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED;
3078 memorystatus_log_info("memorystatus_set_process_is_freezable: enabling freeze for pid %d [%s]\n",
3079 proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
3080 }
3081 proc_rele(p);
3082 proc_list_unlock();
3083
3084 return 0;
3085 }
3086
3087 /*
3088 * Called when process is created before it is added to a memorystatus bucket.
3089 */
3090 void
memorystatus_freeze_init_proc(proc_t p)3091 memorystatus_freeze_init_proc(proc_t p)
3092 {
3093 /* NB: Process is not on the memorystatus lists yet so it's safe to modify the skip reason without the freezer mutex. */
3094 if (memorystatus_freeze_budget_pages_remaining == 0) {
3095 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfBudget;
3096 } else if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
3097 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
3098 } else {
3099 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
3100 }
3101 }
3102
3103 static int
3104 sysctl_memorystatus_do_fastwake_warmup_all SYSCTL_HANDLER_ARGS
3105 {
3106 if (!req->newptr) {
3107 return EINVAL;
3108 }
3109
3110 /* Need to be root or have entitlement */
3111 if (!kauth_cred_issuser(kauth_cred_get()) && !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
3112 return EPERM;
3113 }
3114
3115 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
3116 return ENOTSUP;
3117 }
3118
3119 if (!memorystatus_freeze_enabled && !memorystatus_swap_all_apps) {
3120 /* Nothing to do. Swap is not enabled on this system. */
3121 assert3u(vm_compressor_get_swapped_segment_count(), ==, 0);
3122 memorystatus_log("memorystatus: swap is disabled, bypassing fast-wake warmup");
3123 return 0;
3124 }
3125
3126 memorystatus_log("memorystatus: swapping-in all swapped-out compressor "
3127 "segments\n");
3128
3129 do_fastwake_warmup_all();
3130
3131 return 0;
3132 }
3133
3134 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_do_fastwake_warmup_all,
3135 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3136 0, 0, &sysctl_memorystatus_do_fastwake_warmup_all, "I",
3137 "Swap-in any compressed data that resides in swapfiles");
3138
3139 /*
3140 * Takes in a candidate list from the user_addr, validates it, and copies it into the list pointer.
3141 * Takes ownership over the original value of list.
3142 * Assumes that list is protected by the freezer_mutex.
3143 * The caller should not hold any locks.
3144 */
3145 static errno_t
set_freezer_candidate_list(user_addr_t buffer,size_t buffer_size,struct memorystatus_freezer_candidate_list * list)3146 set_freezer_candidate_list(user_addr_t buffer, size_t buffer_size, struct memorystatus_freezer_candidate_list *list)
3147 {
3148 errno_t error = 0;
3149 memorystatus_properties_freeze_entry_v1 *entries = NULL, *tmp_entries = NULL;
3150 size_t entry_count = 0, entries_size = 0, tmp_size = 0;
3151
3152 /* Validate the user provided list. */
3153 if ((buffer == USER_ADDR_NULL) || (buffer_size == 0)) {
3154 memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: NULL or empty list\n");
3155 return EINVAL;
3156 }
3157
3158 if (buffer_size % sizeof(memorystatus_properties_freeze_entry_v1) != 0) {
3159 memorystatus_log_error(
3160 "memorystatus_cmd_grp_set_freeze_priority: Invalid list length (caller might have comiled agsinst invalid headers.)\n");
3161 return EINVAL;
3162 }
3163
3164 entry_count = buffer_size / sizeof(memorystatus_properties_freeze_entry_v1);
3165 entries_size = buffer_size;
3166 entries = kalloc_data(buffer_size, Z_WAITOK | Z_ZERO);
3167 if (entries == NULL) {
3168 return ENOMEM;
3169 }
3170
3171 error = copyin(buffer, entries, buffer_size);
3172 if (error != 0) {
3173 goto out;
3174 }
3175
3176 #if MACH_ASSERT
3177 for (size_t i = 0; i < entry_count; i++) {
3178 memorystatus_properties_freeze_entry_v1 *entry = &entries[i];
3179 if (entry->version != 1) {
3180 memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: Invalid entry version number.");
3181 error = EINVAL;
3182 goto out;
3183 }
3184 if (i > 0 && entry->priority >= entries[i - 1].priority) {
3185 memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: Entry list is not in descending order.");
3186 error = EINVAL;
3187 goto out;
3188 }
3189 }
3190 #endif /* MACH_ASSERT */
3191
3192 lck_mtx_lock(&freezer_mutex);
3193
3194 tmp_entries = list->mfcl_list;
3195 tmp_size = list->mfcl_length * sizeof(memorystatus_properties_freeze_entry_v1);
3196 list->mfcl_list = entries;
3197 list->mfcl_length = entry_count;
3198
3199 lck_mtx_unlock(&freezer_mutex);
3200
3201 entries = tmp_entries;
3202 entries_size = tmp_size;
3203
3204 out:
3205 kfree_data(entries, entries_size);
3206 return error;
3207 }
3208
3209 errno_t
memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer,size_t buffer_size)3210 memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer, size_t buffer_size)
3211 {
3212 return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_freeze_list);
3213 }
3214
3215 errno_t
memorystatus_cmd_grp_set_demote_list(user_addr_t buffer,size_t buffer_size)3216 memorystatus_cmd_grp_set_demote_list(user_addr_t buffer, size_t buffer_size)
3217 {
3218 errno_t ret = set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_demote_list);
3219 if (ret == 0) {
3220 thread_wakeup((event_t)&memorystatus_freeze_wakeup);
3221 }
3222 return ret;
3223 }
3224
3225 void
memorystatus_freezer_mark_ui_transition(proc_t p)3226 memorystatus_freezer_mark_ui_transition(proc_t p)
3227 {
3228 bool frozen = false, previous_focal_thaw = false, xpc_service = false, suspended = false;
3229 proc_list_lock();
3230
3231 if (isSysProc(p)) {
3232 goto out;
3233 }
3234
3235 frozen = _memstat_proc_is_frozen(p);
3236 previous_focal_thaw = (p->p_memstat_state & P_MEMSTAT_FROZEN_FOCAL_THAW) != 0;
3237 xpc_service = (p->p_memstat_state & P_MEMSTAT_FROZEN_XPC_SERVICE) != 0;
3238 suspended = (p->p_memstat_state & P_MEMSTAT_SUSPENDED) != 0;
3239 if (!suspended) {
3240 if (frozen) {
3241 if (!previous_focal_thaw) {
3242 p->p_memstat_state |= P_MEMSTAT_FROZEN_FOCAL_THAW;
3243 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg), relaxed);
3244 if (xpc_service) {
3245 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service), relaxed);
3246 }
3247 }
3248 }
3249 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_fg_resumed), relaxed);
3250 }
3251
3252 out:
3253 proc_list_unlock();
3254 }
3255
3256 /*
3257 * Cache of pids of most-recently thawed processes.
3258 * Used to reduce excessive rapid thaw/refreeze cycles.
3259 *
3260 * There is a very small chance of false-positive mis-identification
3261 * of a process due to eventual pid reuse, but for that to happen,
3262 * a pid would have to be reused within the timeout period (e.g. 15 minutes),
3263 * and even then the only consequence might be that that one process is
3264 * temporarily passed over for refreezing until the timeout expires.
3265 *
3266 * FIXME (rdar://161250797) switch from PIDs to unique process IDs
3267 */
3268
3269 void
memorystatus_freeze_record_process_thawed(proc_t p)3270 memorystatus_freeze_record_process_thawed(proc_t p)
3271 {
3272 unsigned int slot = 0;
3273 pid_t procpid = proc_getpid(p);
3274
3275 assert(memorystatus_freeze_last_processes_thawed_cache_size <= MEMORYSTATUS_FREEZE_LAST_PROCESSES_THAWED_CACHE_SIZE_MAX);
3276
3277 for (slot = 0; slot < memorystatus_freeze_last_processes_thawed_cache_size; slot++) {
3278 if (memorystatus_freeze_last_processes_thawed_pid[slot] == procpid) {
3279 // Found existing table entry for this pid
3280 break;
3281 }
3282 }
3283
3284 if (slot == memorystatus_freeze_last_processes_thawed_cache_size) {
3285 // Did not find an existing table entry for this pid; select the oldest to evict
3286 int oldest_slot = 0;
3287 uint64_t oldest_ts = memorystatus_freeze_last_processes_thawed_ts[oldest_slot];
3288 for (slot = 1; slot < memorystatus_freeze_last_processes_thawed_cache_size; slot++) {
3289 if (memorystatus_freeze_last_processes_thawed_ts[slot] < oldest_ts) {
3290 oldest_ts = memorystatus_freeze_last_processes_thawed_ts[slot];
3291 oldest_slot = slot;
3292 }
3293 }
3294 slot = oldest_slot;
3295 }
3296
3297 memorystatus_freeze_last_processes_thawed_pid[slot] = proc_getpid(p);
3298 memorystatus_freeze_last_processes_thawed_ts[slot] = mach_absolute_time();
3299 }
3300
3301 bool
memorystatus_freeze_was_process_recently_thawed(proc_t p)3302 memorystatus_freeze_was_process_recently_thawed(proc_t p)
3303 {
3304 unsigned int slot;
3305 bool recent_thaw = false;
3306 pid_t procpid = proc_getpid(p);
3307
3308 for (slot = 0; slot < memorystatus_freeze_last_processes_thawed_cache_size; slot++) {
3309 if (memorystatus_freeze_last_processes_thawed_pid[slot] == procpid) {
3310 // Found existing table entry for this pid
3311 uint64_t timeout_delta_abs;
3312 nanoseconds_to_absolutetime(memorystatus_freeze_last_processes_thawed_prevent_refreeze_seconds * NSEC_PER_SEC, &timeout_delta_abs);
3313 if (mach_absolute_time() < (memorystatus_freeze_last_processes_thawed_ts[slot] + timeout_delta_abs)) {
3314 recent_thaw = true;
3315 }
3316 break;
3317 }
3318 }
3319
3320 return recent_thaw;
3321 }
3322
3323 #endif /* CONFIG_FREEZE */
3324