1 /*
2 * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 */
29
30 #include <kern/sched_prim.h>
31 #include <kern/kalloc.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h>
34 #include <kern/locks.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/host.h>
38 #include <kern/policy_internal.h>
39 #include <kern/thread_call.h>
40 #include <kern/thread_group.h>
41
42 #include <libkern/libkern.h>
43 #include <mach/coalition.h>
44 #include <mach/mach_time.h>
45 #include <mach/task.h>
46 #include <mach/host_priv.h>
47 #include <mach/mach_host.h>
48 #include <os/log.h>
49 #include <pexpert/pexpert.h>
50 #include <sys/coalition.h>
51 #include <sys/kern_event.h>
52 #include <sys/proc.h>
53 #include <sys/proc_info.h>
54 #include <sys/reason.h>
55 #include <sys/signal.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/sysproto.h>
59 #include <sys/wait.h>
60 #include <sys/tree.h>
61 #include <sys/priv.h>
62 #include <vm/vm_pageout.h>
63 #include <vm/vm_protos.h>
64 #include <mach/machine/sdt.h>
65 #include <libkern/coreanalytics/coreanalytics.h>
66 #include <libkern/section_keywords.h>
67 #include <stdatomic.h>
68
69 #include <IOKit/IOBSD.h>
70
71 #if CONFIG_FREEZE
72 #include <vm/vm_map.h>
73 #endif /* CONFIG_FREEZE */
74
75 #include <kern/kern_memorystatus_internal.h>
76 #include <sys/kern_memorystatus.h>
77 #include <sys/kern_memorystatus_freeze.h>
78 #include <sys/kern_memorystatus_notify.h>
79
80 #if CONFIG_JETSAM
81
82 extern unsigned int memorystatus_available_pages;
83 extern unsigned int memorystatus_available_pages_pressure;
84 extern unsigned int memorystatus_available_pages_critical;
85 extern unsigned int memorystatus_available_pages_critical_base;
86 extern unsigned int memorystatus_available_pages_critical_idle_offset;
87
88 #else /* CONFIG_JETSAM */
89
90 extern uint64_t memorystatus_available_pages;
91 extern uint64_t memorystatus_available_pages_pressure;
92 extern uint64_t memorystatus_available_pages_critical;
93
94 #endif /* CONFIG_JETSAM */
95
96 unsigned int memorystatus_frozen_count = 0;
97 unsigned int memorystatus_frozen_count_webcontent = 0;
98 unsigned int memorystatus_frozen_count_xpc_service = 0;
99 unsigned int memorystatus_suspended_count = 0;
100 unsigned long freeze_threshold_percentage = 50;
101
102 #if CONFIG_FREEZE
103
104 static LCK_GRP_DECLARE(freezer_lck_grp, "freezer");
105 static LCK_MTX_DECLARE(freezer_mutex, &freezer_lck_grp);
106
107 /* Thresholds */
108 unsigned int memorystatus_freeze_threshold = 0;
109 unsigned int memorystatus_freeze_pages_min = 0;
110 unsigned int memorystatus_freeze_pages_max = 0;
111 unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
112 unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT;
113 uint64_t memorystatus_freeze_budget_pages_remaining = 0; /* Remaining # of pages that can be frozen to disk */
114 uint64_t memorystatus_freeze_budget_multiplier = 100; /* Multiplies the daily budget by 100/multiplier */
115 boolean_t memorystatus_freeze_degradation = FALSE; /* Protected by the freezer mutex. Signals we are in a degraded freeze mode. */
116 unsigned int memorystatus_freeze_max_candidate_band = FREEZE_MAX_CANDIDATE_BAND;
117
118 unsigned int memorystatus_max_frozen_demotions_daily = 0;
119 unsigned int memorystatus_thaw_count_demotion_threshold = 0;
120
121 boolean_t memorystatus_freeze_enabled = FALSE;
122 int memorystatus_freeze_wakeup = 0;
123 int memorystatus_freeze_jetsam_band = 0; /* the jetsam band which will contain P_MEMSTAT_FROZEN processes */
124
125 #define MAX_XPC_SERVICE_PIDS 10 /* Max. # of XPC services per coalition we'll consider freezing. */
126
127 #ifdef XNU_KERNEL_PRIVATE
128
129 unsigned int memorystatus_frozen_processes_max = 0;
130 unsigned int memorystatus_frozen_shared_mb = 0;
131 unsigned int memorystatus_frozen_shared_mb_max = 0;
132 unsigned int memorystatus_freeze_shared_mb_per_process_max = 0; /* Max. MB allowed per process to be freezer-eligible. */
133 unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of private:shared pages for a process to be freezer-eligible. */
134 unsigned int memorystatus_thaw_count = 0; /* # of thaws in the current freezer interval */
135 uint64_t memorystatus_thaw_count_since_boot = 0; /* The number of thaws since boot */
136 unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
137
138 struct memorystatus_freezer_stats_t memorystatus_freezer_stats = {0};
139
140 #endif /* XNU_KERNEL_PRIVATE */
141
142 static inline boolean_t memorystatus_can_freeze_processes(void);
143 static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
144 static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
145 static uint32_t memorystatus_freeze_calculate_new_budget(
146 unsigned int time_since_last_interval_expired_sec,
147 unsigned int burst_multiple,
148 unsigned int interval_duration_min,
149 uint32_t rollover);
150 static void memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts);
151
152 static void memorystatus_set_freeze_is_enabled(bool enabled);
153 static void memorystatus_disable_freeze(void);
154 static bool kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out);
155
156 /* Stats */
157 static uint64_t memorystatus_freeze_pageouts = 0;
158
159 /* Throttling */
160 #define DEGRADED_WINDOW_MINS (30)
161 #define NORMAL_WINDOW_MINS (24 * 60)
162
163 /* Protected by the freezer_mutex */
164 static throttle_interval_t throttle_intervals[] = {
165 { DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
166 { NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
167 };
168 throttle_interval_t *degraded_throttle_window = &throttle_intervals[0];
169 throttle_interval_t *normal_throttle_window = &throttle_intervals[1];
170 uint32_t memorystatus_freeze_current_interval = 0;
171 static thread_call_t freeze_interval_reset_thread_call;
172 static uint32_t memorystatus_freeze_calculate_new_budget(
173 unsigned int time_since_last_interval_expired_sec,
174 unsigned int burst_multiple,
175 unsigned int interval_duration_min,
176 uint32_t rollover);
177
178 struct memorystatus_freezer_candidate_list memorystatus_global_freeze_list = {NULL, 0};
179 struct memorystatus_freezer_candidate_list memorystatus_global_demote_list = {NULL, 0};
180 /*
181 * When enabled, freeze candidates are chosen from the memorystatus_global_freeze_list
182 * in order (as opposed to using the older LRU approach).
183 */
184 int memorystatus_freezer_use_ordered_list = 0;
185 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_ordered_list, &memorystatus_freezer_use_ordered_list, 0, 1, "");
186 /*
187 * When enabled, demotion candidates are chosen from memorystatus_global_demotion_list
188 */
189 int memorystatus_freezer_use_demotion_list = 0;
190 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_demotion_list, &memorystatus_freezer_use_demotion_list, 0, 1, "");
191
192 extern uint64_t vm_swap_get_free_space(void);
193 extern boolean_t vm_swap_max_budget(uint64_t *);
194
195 static void memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed);
196 static void memorystatus_demote_frozen_processes(bool urgent_mode);
197
198 static void memorystatus_freeze_handle_error(proc_t p, const int freezer_error_code, bool was_refreeze, pid_t pid, const coalition_t coalition, const char* log_prefix);
199 static void memorystatus_freeze_out_of_slots(void);
200 uint64_t memorystatus_freezer_thread_next_run_ts = 0;
201
202 /* Sysctls needed for aggd stats */
203
204 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count, 0, "");
205 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_webcontent, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_webcontent, 0, "");
206 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_xpc_service, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_xpc_service, 0, "");
207 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
208 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_thaw_count_since_boot, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count_since_boot, "");
209 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
210 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_interval, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_current_interval, 0, "");
211
212 /*
213 * Force a new interval with the given budget (no rollover).
214 */
215 static void
memorystatus_freeze_force_new_interval(uint64_t new_budget)216 memorystatus_freeze_force_new_interval(uint64_t new_budget)
217 {
218 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
219 mach_timespec_t now_ts;
220 clock_sec_t sec;
221 clock_nsec_t nsec;
222
223 clock_get_system_nanotime(&sec, &nsec);
224 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
225 now_ts.tv_nsec = nsec;
226 memorystatus_freeze_start_normal_throttle_interval((uint32_t) MIN(new_budget, UINT32_MAX), now_ts);
227 /* Don't carry over any excess pageouts since we're forcing a new budget */
228 normal_throttle_window->pageouts = 0;
229 memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
230 }
231 #if DEVELOPMENT || DEBUG
232 static int sysctl_memorystatus_freeze_budget_pages_remaining SYSCTL_HANDLER_ARGS
233 {
234 #pragma unused(arg1, arg2, oidp)
235 int error, changed;
236 uint64_t new_budget = memorystatus_freeze_budget_pages_remaining;
237
238 lck_mtx_lock(&freezer_mutex);
239
240 error = sysctl_io_number(req, memorystatus_freeze_budget_pages_remaining, sizeof(uint64_t), &new_budget, &changed);
241 if (changed) {
242 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
243 lck_mtx_unlock(&freezer_mutex);
244 return ENOTSUP;
245 }
246 memorystatus_freeze_force_new_interval(new_budget);
247 }
248
249 lck_mtx_unlock(&freezer_mutex);
250 return error;
251 }
252
253 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freeze_budget_pages_remaining, "Q", "");
254 #else /* DEVELOPMENT || DEBUG */
255 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, "");
256 #endif /* DEVELOPMENT || DEBUG */
257 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_excess_shared_memory_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_excess_shared_memory_count, "");
258 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_private_shared_ratio_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count, "");
259 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_compressor_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_compressor_space_count, "");
260 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_swap_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_swap_space_count, "");
261 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_below_min_pages_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_below_min_pages_count, "");
262 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_probability_of_use_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_probability_of_use_count, "");
263 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_elevated_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_elevated_count, "");
264 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_other_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_other_count, "");
265 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_process_considered_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_process_considered_count, "");
266 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_below_threshold_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_below_threshold_count, "");
267 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_full_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_full_count, "");
268 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_shared_mb_high_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count, "");
269 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_shared_pages_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_shared_pages_skipped, "");
270 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_bytes_refrozen, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_bytes_refrozen, "");
271 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_refreeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_refreeze_count, "");
272 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_freeze_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_freeze_pid_mismatches, "");
273 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_demote_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_demote_pid_mismatches, "");
274
275 static_assert(_kMemorystatusFreezeSkipReasonMax <= UINT8_MAX);
276
277 /*
278 * Calculates the hit rate for the freezer.
279 * The hit rate is defined as the percentage of procs that are currently in the
280 * freezer which we have thawed.
281 * A low hit rate means we're freezing bad candidates since they're not re-used.
282 */
283 static int
calculate_thaw_percentage(uint64_t frozen_count,uint64_t thaw_count)284 calculate_thaw_percentage(uint64_t frozen_count, uint64_t thaw_count)
285 {
286 int thaw_percentage = 100;
287
288 if (frozen_count > 0) {
289 if (thaw_count > frozen_count) {
290 /*
291 * Both counts are using relaxed atomics & could be out of sync
292 * causing us to see thaw_percentage > 100.
293 */
294 thaw_percentage = 100;
295 } else {
296 thaw_percentage = (int)(100 * thaw_count / frozen_count);
297 }
298 }
299 return thaw_percentage;
300 }
301
302 static int
get_thaw_percentage()303 get_thaw_percentage()
304 {
305 uint64_t processes_frozen, processes_thawed;
306 processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
307 processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
308 return calculate_thaw_percentage(processes_frozen, processes_thawed);
309 }
310
311 static int
312 sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS
313 {
314 #pragma unused(arg1, arg2)
315 int thaw_percentage = get_thaw_percentage();
316 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
317 }
318 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage, "I", "");
319
320 static int
get_thaw_percentage_fg()321 get_thaw_percentage_fg()
322 {
323 uint64_t processes_frozen, processes_thawed_fg;
324 processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
325 processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
326 return calculate_thaw_percentage(processes_frozen, processes_thawed_fg);
327 }
328
329 static int sysctl_memorystatus_freezer_thaw_percentage_fg SYSCTL_HANDLER_ARGS
330 {
331 #pragma unused(arg1, arg2)
332 int thaw_percentage = get_thaw_percentage_fg();
333 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
334 }
335 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg, "I", "");
336
337 static int
get_thaw_percentage_webcontent()338 get_thaw_percentage_webcontent()
339 {
340 uint64_t processes_frozen_webcontent, processes_thawed_webcontent;
341 processes_frozen_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, relaxed);
342 processes_thawed_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, relaxed);
343 return calculate_thaw_percentage(processes_frozen_webcontent, processes_thawed_webcontent);
344 }
345
346 static int sysctl_memorystatus_freezer_thaw_percentage_webcontent SYSCTL_HANDLER_ARGS
347 {
348 #pragma unused(arg1, arg2)
349 int thaw_percentage = get_thaw_percentage_webcontent();
350 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
351 }
352 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_webcontent, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_webcontent, "I", "");
353
354
355 static int
get_thaw_percentage_bg()356 get_thaw_percentage_bg()
357 {
358 uint64_t processes_frozen, processes_thawed_fg, processes_thawed;
359 processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
360 processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
361 processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
362 return calculate_thaw_percentage(processes_frozen, processes_thawed - processes_thawed_fg);
363 }
364
365 static int sysctl_memorystatus_freezer_thaw_percentage_bg SYSCTL_HANDLER_ARGS
366 {
367 #pragma unused(arg1, arg2)
368 int thaw_percentage = get_thaw_percentage_bg();
369 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
370 }
371 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_bg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_bg, "I", "");
372
373 static int
get_thaw_percentage_fg_non_xpc_service()374 get_thaw_percentage_fg_non_xpc_service()
375 {
376 uint64_t processes_frozen, processes_frozen_xpc_service, processes_thawed_fg, processes_thawed_fg_xpc_service;
377 processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
378 processes_frozen_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, relaxed);
379 processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
380 processes_thawed_fg_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, relaxed);
381 /*
382 * Since these are all relaxed loads, it's possible (although unlikely) to read a value for
383 * frozen/thawed xpc services that's > the value for processes frozen / thawed.
384 * Clamp just in case.
385 */
386 processes_frozen_xpc_service = MIN(processes_frozen_xpc_service, processes_frozen);
387 processes_thawed_fg_xpc_service = MIN(processes_thawed_fg_xpc_service, processes_thawed_fg);
388 return calculate_thaw_percentage(processes_frozen - processes_frozen_xpc_service, processes_thawed_fg - processes_thawed_fg_xpc_service);
389 }
390
391 static int sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service SYSCTL_HANDLER_ARGS
392 {
393 #pragma unused(arg1, arg2)
394 int thaw_percentage = get_thaw_percentage_fg_non_xpc_service();
395 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
396 }
397
398 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg_non_xpc_service, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service, "I", "");
399
400 #define FREEZER_ERROR_STRING_LENGTH 128
401
402 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_min, &memorystatus_freeze_pages_min, 0, UINT32_MAX, "");
403 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_max, &memorystatus_freeze_pages_max, 0, UINT32_MAX, "");
404 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_processes_max, &memorystatus_frozen_processes_max, 0, UINT32_MAX, "");
405 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_jetsam_band, &memorystatus_freeze_jetsam_band, JETSAM_PRIORITY_BACKGROUND, JETSAM_PRIORITY_FOREGROUND, "");
406 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_private_shared_pages_ratio, &memorystatus_freeze_private_shared_pages_ratio, 0, UINT32_MAX, "");
407 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_min_processes, &memorystatus_freeze_suspended_threshold, 0, UINT32_MAX, "");
408 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_max_candidate_band, &memorystatus_freeze_max_candidate_band, JETSAM_PRIORITY_IDLE, JETSAM_PRIORITY_FOREGROUND, "");
409 static int
410 sysctl_memorystatus_freeze_budget_multiplier SYSCTL_HANDLER_ARGS
411 {
412 #pragma unused(arg1, arg2, oidp, req)
413 int error = 0, changed = 0;
414 uint64_t val = memorystatus_freeze_budget_multiplier;
415 unsigned int new_budget;
416 clock_sec_t sec;
417 clock_nsec_t nsec;
418 mach_timespec_t now_ts;
419
420 error = sysctl_io_number(req, memorystatus_freeze_budget_multiplier, sizeof(val), &val, &changed);
421 if (error) {
422 return error;
423 }
424 if (changed) {
425 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
426 return ENOTSUP;
427 }
428 #if !(DEVELOPMENT || DEBUG)
429 if (val > 100) {
430 /* Can not increase budget on release. */
431 return EINVAL;
432 }
433 #endif
434 lck_mtx_lock(&freezer_mutex);
435
436 memorystatus_freeze_budget_multiplier = val;
437 /* Start a new throttle interval with this budget multiplier */
438 new_budget = memorystatus_freeze_calculate_new_budget(0, 1, NORMAL_WINDOW_MINS, 0);
439 clock_get_system_nanotime(&sec, &nsec);
440 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
441 now_ts.tv_nsec = nsec;
442 memorystatus_freeze_start_normal_throttle_interval(new_budget, now_ts);
443 memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
444
445 lck_mtx_unlock(&freezer_mutex);
446 }
447 return 0;
448 }
449 EXPERIMENT_FACTOR_PROC(_kern, memorystatus_freeze_budget_multiplier, CTLTYPE_QUAD | CTLFLAG_RW, 0, 0, &sysctl_memorystatus_freeze_budget_multiplier, "Q", "");
450 /*
451 * max. # of frozen process demotions we will allow in our daily cycle.
452 */
453 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_max_freeze_demotions_daily, &memorystatus_max_frozen_demotions_daily, 0, UINT32_MAX, "");
454
455 /*
456 * min # of thaws needed by a process to protect it from getting demoted into the IDLE band.
457 */
458 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_thaw_count_demotion_threshold, &memorystatus_thaw_count_demotion_threshold, 0, UINT32_MAX, "");
459
460 #if DEVELOPMENT || DEBUG
461
462 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, "");
463 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, "");
464 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
465 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, "");
466
467 /*
468 * Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band.
469 * "0" means no limit.
470 * Default is 10% of system-wide task limit.
471 */
472
473 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb_max, 0, "");
474 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, "");
475
476 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, "");
477
478 boolean_t memorystatus_freeze_throttle_enabled = TRUE;
479 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
480
481 /*
482 * When set to true, this keeps frozen processes in the compressor pool in memory, instead of swapping them out to disk.
483 * Exposed via the sysctl kern.memorystatus_freeze_to_memory.
484 */
485 boolean_t memorystatus_freeze_to_memory = FALSE;
486 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_to_memory, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_to_memory, 0, "");
487
488 #define VM_PAGES_FOR_ALL_PROCS (2)
489
490 /*
491 * Manual trigger of freeze and thaw for dev / debug kernels only.
492 */
493 static int
494 sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
495 {
496 #pragma unused(arg1, arg2)
497 int error, pid = 0;
498 proc_t p;
499 int freezer_error_code = 0;
500 pid_t pid_list[MAX_XPC_SERVICE_PIDS];
501 int ntasks = 0;
502 coalition_t coal = COALITION_NULL;
503
504 error = sysctl_handle_int(oidp, &pid, 0, req);
505 if (error || !req->newptr) {
506 return error;
507 }
508
509 if (pid == VM_PAGES_FOR_ALL_PROCS) {
510 vm_pageout_anonymous_pages();
511
512 return 0;
513 }
514
515 lck_mtx_lock(&freezer_mutex);
516 if (memorystatus_freeze_enabled == FALSE) {
517 lck_mtx_unlock(&freezer_mutex);
518 printf("sysctl_freeze: Freeze is DISABLED\n");
519 return ENOTSUP;
520 }
521
522 again:
523 p = proc_find(pid);
524 if (p != NULL) {
525 memorystatus_freezer_stats.mfs_process_considered_count++;
526 uint32_t purgeable, wired, clean, dirty, shared;
527 uint32_t max_pages = 0, state = 0;
528
529 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
530 /*
531 * Freezer backed by the compressor and swap file(s)
532 * will hold compressed data.
533 *
534 * Set the sysctl kern.memorystatus_freeze_to_memory to true to keep compressed data from
535 * being swapped out to disk. Note that this disables freezer swap support globally,
536 * not just for the process being frozen.
537 *
538 *
539 * We don't care about the global freezer budget or the process's (min/max) budget here.
540 * The freeze sysctl is meant to force-freeze a process.
541 *
542 * We also don't update any global or process stats on this path, so that the jetsam/ freeze
543 * logic remains unaffected. The tasks we're performing here are: freeze the process, set the
544 * P_MEMSTAT_FROZEN bit, and elevate the process to a higher band (if the freezer is active).
545 */
546 max_pages = memorystatus_freeze_pages_max;
547 } else {
548 /*
549 * We only have the compressor without any swap.
550 */
551 max_pages = UINT32_MAX - 1;
552 }
553
554 proc_list_lock();
555 state = p->p_memstat_state;
556 proc_list_unlock();
557
558 /*
559 * The jetsam path also verifies that the process is a suspended App. We don't care about that here.
560 * We simply ensure that jetsam is not already working on the process and that the process has not
561 * explicitly disabled freezing.
562 */
563 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED)) {
564 printf("sysctl_freeze: p_memstat_state check failed, process is%s%s%s\n",
565 (state & P_MEMSTAT_TERMINATED) ? " terminated" : "",
566 (state & P_MEMSTAT_LOCKED) ? " locked" : "",
567 (state & P_MEMSTAT_FREEZE_DISABLED) ? " unfreezable" : "");
568
569 proc_rele(p);
570 lck_mtx_unlock(&freezer_mutex);
571 return EPERM;
572 }
573
574 error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
575 if (!error || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
576 memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
577 }
578
579 if (error) {
580 memorystatus_freeze_handle_error(p, freezer_error_code, state & P_MEMSTAT_FROZEN, pid, coal, "sysctl_freeze");
581 if (error == KERN_NO_SPACE) {
582 /* Make it easy to distinguish between failures due to low compressor/ swap space and other failures. */
583 error = ENOSPC;
584 } else {
585 error = EIO;
586 }
587 } else {
588 proc_list_lock();
589 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
590 p->p_memstat_state |= P_MEMSTAT_FROZEN;
591 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
592 memorystatus_frozen_count++;
593 os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
594 if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
595 memorystatus_frozen_count_webcontent++;
596 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
597 }
598 if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
599 memorystatus_freeze_out_of_slots();
600 }
601 } else {
602 // This was a re-freeze
603 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
604 memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
605 memorystatus_freezer_stats.mfs_refreeze_count++;
606 }
607 }
608 p->p_memstat_frozen_count++;
609
610 if (coal != NULL) {
611 /* We just froze an xpc service. Mark it as such for telemetry */
612 p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
613 memorystatus_frozen_count_xpc_service++;
614 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
615 }
616
617
618 proc_list_unlock();
619
620 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
621 /*
622 * We elevate only if we are going to swap out the data.
623 */
624 error = memorystatus_update_inactive_jetsam_priority_band(pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
625 memorystatus_freeze_jetsam_band, TRUE);
626
627 if (error) {
628 printf("sysctl_freeze: Elevating frozen process to higher jetsam band failed with %d\n", error);
629 }
630 }
631 }
632
633 if ((error == 0) && (coal == NULL)) {
634 /*
635 * We froze a process and so we check to see if it was
636 * a coalition leader and if it has XPC services that
637 * might need freezing.
638 * Only one leader can be frozen at a time and so we shouldn't
639 * enter this block more than once per call. Hence the
640 * check that 'coal' has to be NULL. We should make this an
641 * assert() or panic() once we have a much more concrete way
642 * to detect an app vs a daemon.
643 */
644
645 task_t curr_task = NULL;
646
647 curr_task = proc_task(p);
648 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
649 if (coalition_is_leader(curr_task, coal)) {
650 ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
651 COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
652
653 if (ntasks > MAX_XPC_SERVICE_PIDS) {
654 ntasks = MAX_XPC_SERVICE_PIDS;
655 }
656 }
657 }
658
659 proc_rele(p);
660
661 while (ntasks) {
662 pid = pid_list[--ntasks];
663 goto again;
664 }
665
666 lck_mtx_unlock(&freezer_mutex);
667 return error;
668 } else {
669 printf("sysctl_freeze: Invalid process\n");
670 }
671
672
673 lck_mtx_unlock(&freezer_mutex);
674 return EINVAL;
675 }
676
677 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
678 0, 0, &sysctl_memorystatus_freeze, "I", "");
679
680 /*
681 * Manual trigger of agressive frozen demotion for dev / debug kernels only.
682 */
683 static int
684 sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS
685 {
686 #pragma unused(arg1, arg2)
687 int error, val;
688 /*
689 * Only demote on write to prevent demoting during `sysctl -a`.
690 * The actual value written doesn't matter.
691 */
692 error = sysctl_handle_int(oidp, &val, 0, req);
693 if (error || !req->newptr) {
694 return error;
695 }
696 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
697 return ENOTSUP;
698 }
699 lck_mtx_lock(&freezer_mutex);
700 memorystatus_demote_frozen_processes(false);
701 lck_mtx_unlock(&freezer_mutex);
702 return 0;
703 }
704
705 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
706
707 static int
708 sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
709 {
710 #pragma unused(arg1, arg2)
711
712 int error, pid = 0;
713 proc_t p;
714
715 if (memorystatus_freeze_enabled == FALSE) {
716 return ENOTSUP;
717 }
718
719 error = sysctl_handle_int(oidp, &pid, 0, req);
720 if (error || !req->newptr) {
721 return error;
722 }
723
724 if (pid == VM_PAGES_FOR_ALL_PROCS) {
725 do_fastwake_warmup_all();
726 return 0;
727 } else {
728 p = proc_find(pid);
729 if (p != NULL) {
730 error = task_thaw(proc_task(p));
731
732 if (error) {
733 error = EIO;
734 } else {
735 /*
736 * task_thaw() succeeded.
737 *
738 * We increment memorystatus_frozen_count on the sysctl freeze path.
739 * And so we need the P_MEMSTAT_FROZEN to decrement the frozen count
740 * when this process exits.
741 *
742 * proc_list_lock();
743 * p->p_memstat_state &= ~P_MEMSTAT_FROZEN;
744 * proc_list_unlock();
745 */
746 }
747 proc_rele(p);
748 return error;
749 }
750 }
751
752 return EINVAL;
753 }
754
755 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
756 0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
757
758
759 typedef struct _global_freezable_status {
760 boolean_t freeze_pages_threshold_crossed;
761 boolean_t freeze_eligible_procs_available;
762 boolean_t freeze_scheduled_in_future;
763 }global_freezable_status_t;
764
765 typedef struct _proc_freezable_status {
766 boolean_t freeze_has_memstat_state;
767 boolean_t freeze_has_pages_min;
768 int freeze_has_probability;
769 int freeze_leader_eligible;
770 boolean_t freeze_attempted;
771 uint32_t p_memstat_state;
772 uint32_t p_pages;
773 int p_freeze_error_code;
774 int p_pid;
775 int p_leader_pid;
776 char p_name[MAXCOMLEN + 1];
777 }proc_freezable_status_t;
778
779 #define MAX_FREEZABLE_PROCESSES 200 /* Total # of processes in band 0 that we evaluate for freezability */
780
781 /*
782 * For coalition based freezing evaluations, we proceed as follows:
783 * - detect that the process is a coalition member and a XPC service
784 * - mark its 'freeze_leader_eligible' field with FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN
785 * - continue its freezability evaluation assuming its leader will be freezable too
786 *
787 * Once we are done evaluating all processes, we do a quick run thru all
788 * processes and for a coalition member XPC service we look up the 'freezable'
789 * status of its leader and iff:
790 * - the xpc service is freezable i.e. its individual freeze evaluation worked
791 * - and, its leader is also marked freezable
792 * we update its 'freeze_leader_eligible' to FREEZE_PROC_LEADER_FREEZABLE_SUCCESS.
793 */
794
795 #define FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN (-1)
796 #define FREEZE_PROC_LEADER_FREEZABLE_SUCCESS (1)
797 #define FREEZE_PROC_LEADER_FREEZABLE_FAILURE (2)
798
799 static int
memorystatus_freezer_get_status(user_addr_t buffer,size_t buffer_size,int32_t * retval)800 memorystatus_freezer_get_status(user_addr_t buffer, size_t buffer_size, int32_t *retval)
801 {
802 uint32_t proc_count = 0, freeze_eligible_proc_considered = 0, band = 0, xpc_index = 0, leader_index = 0;
803 global_freezable_status_t *list_head;
804 proc_freezable_status_t *list_entry, *list_entry_start;
805 size_t list_size = 0, entry_count = 0;
806 proc_t p, leader_proc;
807 memstat_bucket_t *bucket;
808 uint32_t state = 0, pages = 0;
809 boolean_t try_freeze = TRUE, xpc_skip_size_probability_check = FALSE;
810 int error = 0, probability_of_use = 0;
811 pid_t leader_pid = 0;
812
813
814 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
815 return ENOTSUP;
816 }
817
818 list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
819
820 if (buffer_size < list_size) {
821 return EINVAL;
822 }
823
824 list_head = (global_freezable_status_t *)kalloc_data(list_size, Z_WAITOK | Z_ZERO);
825 if (list_head == NULL) {
826 return ENOMEM;
827 }
828
829 list_size = sizeof(global_freezable_status_t);
830
831 proc_list_lock();
832
833 uint64_t curr_time = mach_absolute_time();
834
835 list_head->freeze_pages_threshold_crossed = (memorystatus_available_pages < memorystatus_freeze_threshold);
836 list_head->freeze_eligible_procs_available = ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold);
837 list_head->freeze_scheduled_in_future = (curr_time < memorystatus_freezer_thread_next_run_ts);
838
839 list_entry_start = (proc_freezable_status_t*) ((uintptr_t)list_head + sizeof(global_freezable_status_t));
840 list_entry = list_entry_start;
841
842 bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
843
844 entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
845
846 p = memorystatus_get_first_proc_locked(&band, FALSE);
847 proc_count++;
848
849 while ((proc_count <= MAX_FREEZABLE_PROCESSES) &&
850 (p) &&
851 (list_size < buffer_size)) {
852 if (isSysProc(p)) {
853 /*
854 * Daemon:- We will consider freezing it iff:
855 * - it belongs to a coalition and the leader is freeze-eligible (delayed evaluation)
856 * - its role in the coalition is XPC service.
857 *
858 * We skip memory size requirements in this case.
859 */
860
861 coalition_t coal = COALITION_NULL;
862 task_t leader_task = NULL, curr_task = NULL;
863 int task_role_in_coalition = 0;
864
865 curr_task = proc_task(p);
866 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
867
868 if (coal == COALITION_NULL || coalition_is_leader(curr_task, coal)) {
869 /*
870 * By default, XPC services without an app
871 * will be the leader of their own single-member
872 * coalition.
873 */
874 goto skip_ineligible_xpc;
875 }
876
877 leader_task = coalition_get_leader(coal);
878 if (leader_task == TASK_NULL) {
879 /*
880 * This jetsam coalition is currently leader-less.
881 * This could happen if the app died, but XPC services
882 * have not yet exited.
883 */
884 goto skip_ineligible_xpc;
885 }
886
887 leader_proc = (proc_t)get_bsdtask_info(leader_task);
888 task_deallocate(leader_task);
889
890 if (leader_proc == PROC_NULL) {
891 /* leader task is exiting */
892 goto skip_ineligible_xpc;
893 }
894
895 task_role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
896
897 if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
898 xpc_skip_size_probability_check = TRUE;
899 leader_pid = proc_getpid(leader_proc);
900 goto continue_eval;
901 }
902
903 skip_ineligible_xpc:
904 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
905 proc_count++;
906 continue;
907 }
908
909 continue_eval:
910 strlcpy(list_entry->p_name, p->p_name, MAXCOMLEN + 1);
911
912 list_entry->p_pid = proc_getpid(p);
913
914 state = p->p_memstat_state;
915
916 if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) ||
917 !(state & P_MEMSTAT_SUSPENDED)) {
918 try_freeze = list_entry->freeze_has_memstat_state = FALSE;
919 } else {
920 try_freeze = list_entry->freeze_has_memstat_state = TRUE;
921 }
922
923 list_entry->p_memstat_state = state;
924
925 if (xpc_skip_size_probability_check == TRUE) {
926 /*
927 * Assuming the coalition leader is freezable
928 * we don't care re. minimum pages and probability
929 * as long as the process isn't marked P_MEMSTAT_FREEZE_DISABLED.
930 * XPC services have to be explicity opted-out of the disabled
931 * state. And we checked that state above.
932 */
933 list_entry->freeze_has_pages_min = TRUE;
934 list_entry->p_pages = -1;
935 list_entry->freeze_has_probability = -1;
936
937 list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN;
938 list_entry->p_leader_pid = leader_pid;
939
940 xpc_skip_size_probability_check = FALSE;
941 } else {
942 list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; /* Apps are freeze eligible and their own leaders. */
943 list_entry->p_leader_pid = 0; /* Setting this to 0 signifies this isn't a coalition driven freeze. */
944
945 memorystatus_get_task_page_counts(proc_task(p), &pages, NULL, NULL);
946 if (pages < memorystatus_freeze_pages_min) {
947 try_freeze = list_entry->freeze_has_pages_min = FALSE;
948 } else {
949 list_entry->freeze_has_pages_min = TRUE;
950 }
951
952 list_entry->p_pages = pages;
953
954 if (entry_count) {
955 uint32_t j = 0;
956 for (j = 0; j < entry_count; j++) {
957 if (strncmp(memorystatus_global_probabilities_table[j].proc_name,
958 p->p_name,
959 MAXCOMLEN) == 0) {
960 probability_of_use = memorystatus_global_probabilities_table[j].use_probability;
961 break;
962 }
963 }
964
965 list_entry->freeze_has_probability = probability_of_use;
966
967 try_freeze = ((probability_of_use > 0) && try_freeze);
968 } else {
969 list_entry->freeze_has_probability = -1;
970 }
971 }
972
973 if (try_freeze) {
974 uint32_t purgeable, wired, clean, dirty, shared;
975 uint32_t max_pages = 0;
976 int freezer_error_code = 0;
977
978 error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, TRUE /* eval only */);
979
980 if (error) {
981 list_entry->p_freeze_error_code = freezer_error_code;
982 }
983
984 list_entry->freeze_attempted = TRUE;
985 }
986
987 list_entry++;
988 freeze_eligible_proc_considered++;
989
990 list_size += sizeof(proc_freezable_status_t);
991
992 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
993 proc_count++;
994 }
995
996 proc_list_unlock();
997
998 list_entry = list_entry_start;
999
1000 for (xpc_index = 0; xpc_index < freeze_eligible_proc_considered; xpc_index++) {
1001 if (list_entry[xpc_index].freeze_leader_eligible == FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN) {
1002 leader_pid = list_entry[xpc_index].p_leader_pid;
1003
1004 leader_proc = proc_find(leader_pid);
1005
1006 if (leader_proc) {
1007 if (leader_proc->p_memstat_state & P_MEMSTAT_FROZEN) {
1008 /*
1009 * Leader has already been frozen.
1010 */
1011 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1012 proc_rele(leader_proc);
1013 continue;
1014 }
1015 proc_rele(leader_proc);
1016 }
1017
1018 for (leader_index = 0; leader_index < freeze_eligible_proc_considered; leader_index++) {
1019 if (list_entry[leader_index].p_pid == leader_pid) {
1020 if (list_entry[leader_index].freeze_attempted && list_entry[leader_index].p_freeze_error_code == 0) {
1021 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1022 } else {
1023 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1024 list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1025 }
1026 break;
1027 }
1028 }
1029
1030 /*
1031 * Didn't find the leader entry. This might be likely because
1032 * the leader never made it down to band 0.
1033 */
1034 if (leader_index == freeze_eligible_proc_considered) {
1035 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1036 list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1037 }
1038 }
1039 }
1040
1041 buffer_size = MIN(list_size, INT32_MAX);
1042
1043 error = copyout(list_head, buffer, buffer_size);
1044 if (error == 0) {
1045 *retval = (int32_t) buffer_size;
1046 } else {
1047 *retval = 0;
1048 }
1049
1050 list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
1051 kfree_data(list_head, list_size);
1052
1053 memorystatus_log_debug("memorystatus_freezer_get_status: returning %d (%lu - size)\n", error, (unsigned long)list_size);
1054
1055 return error;
1056 }
1057
1058 #endif /* DEVELOPMENT || DEBUG */
1059
1060 /*
1061 * Get a list of all processes in the freezer band which are currently frozen.
1062 * Used by powerlog to collect analytics on frozen process.
1063 */
1064 static int
memorystatus_freezer_get_procs(user_addr_t buffer,size_t buffer_size,int32_t * retval)1065 memorystatus_freezer_get_procs(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1066 {
1067 global_frozen_procs_t *frozen_procs = NULL;
1068 uint32_t band = memorystatus_freeze_jetsam_band;
1069 proc_t p;
1070 uint32_t state;
1071 int error;
1072 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
1073 return ENOTSUP;
1074 }
1075 if (buffer_size < sizeof(global_frozen_procs_t)) {
1076 return EINVAL;
1077 }
1078 frozen_procs = (global_frozen_procs_t *)kalloc_data(sizeof(global_frozen_procs_t), Z_WAITOK | Z_ZERO);
1079 if (frozen_procs == NULL) {
1080 return ENOMEM;
1081 }
1082
1083 proc_list_lock();
1084 p = memorystatus_get_first_proc_locked(&band, FALSE);
1085 while (p && frozen_procs->gfp_num_frozen < FREEZER_CONTROL_GET_PROCS_MAX_COUNT) {
1086 state = p->p_memstat_state;
1087 if (state & P_MEMSTAT_FROZEN) {
1088 frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_pid = proc_getpid(p);
1089 strlcpy(frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_name,
1090 p->p_name, sizeof(proc_name_t));
1091 frozen_procs->gfp_num_frozen++;
1092 }
1093 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1094 }
1095 proc_list_unlock();
1096
1097 buffer_size = MIN(buffer_size, sizeof(global_frozen_procs_t));
1098 error = copyout(frozen_procs, buffer, buffer_size);
1099 if (error == 0) {
1100 *retval = (int32_t) buffer_size;
1101 } else {
1102 *retval = 0;
1103 }
1104 kfree_data(frozen_procs, sizeof(global_frozen_procs_t));
1105
1106 return error;
1107 }
1108
1109 /*
1110 * If dasd is running an experiment that impacts their freezer candidate selection,
1111 * we record that in our telemetry.
1112 */
1113 static memorystatus_freezer_trial_identifiers_v1 dasd_trial_identifiers;
1114
1115 static int
memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer,size_t buffer_size,int32_t * retval)1116 memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1117 {
1118 memorystatus_freezer_trial_identifiers_v1 identifiers;
1119 int error = 0;
1120
1121 if (buffer_size != sizeof(identifiers)) {
1122 return EINVAL;
1123 }
1124 error = copyin(buffer, &identifiers, sizeof(identifiers));
1125 if (error != 0) {
1126 return error;
1127 }
1128 if (identifiers.version != 1) {
1129 return EINVAL;
1130 }
1131 dasd_trial_identifiers = identifiers;
1132 *retval = 0;
1133 return error;
1134 }
1135
1136 /*
1137 * Reset the freezer state by wiping out all suspended frozen apps, clearing
1138 * per-process freezer state, and starting a fresh interval.
1139 */
1140 static int
memorystatus_freezer_reset_state(int32_t * retval)1141 memorystatus_freezer_reset_state(int32_t *retval)
1142 {
1143 uint32_t band = JETSAM_PRIORITY_IDLE;
1144 /* Don't kill above the frozen band */
1145 uint32_t kMaxBand = memorystatus_freeze_jetsam_band;
1146 proc_t next_p = PROC_NULL;
1147 uint64_t new_budget;
1148
1149 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1150 return ENOTSUP;
1151 }
1152
1153 os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_GENERIC);
1154 if (jetsam_reason == OS_REASON_NULL) {
1155 memorystatus_log_error("memorystatus_freezer_reset_state -- sync: failed to allocate jetsam reason\n");
1156 }
1157 lck_mtx_lock(&freezer_mutex);
1158 kill_all_frozen_processes(kMaxBand, true, jetsam_reason, NULL);
1159 proc_list_lock();
1160
1161 /*
1162 * Clear the considered and skip reason flags on all processes
1163 * so we're starting fresh with the new policy.
1164 */
1165 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1166 while (next_p) {
1167 proc_t p = next_p;
1168 uint32_t state = p->p_memstat_state;
1169 next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1170
1171 if (p->p_memstat_effectivepriority > kMaxBand) {
1172 break;
1173 }
1174 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1175 continue;
1176 }
1177
1178 p->p_memstat_state &= ~(P_MEMSTAT_FREEZE_CONSIDERED);
1179 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1180 }
1181
1182 proc_list_unlock();
1183
1184 new_budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1185 memorystatus_freeze_force_new_interval(new_budget);
1186
1187 lck_mtx_unlock(&freezer_mutex);
1188 *retval = 0;
1189 return 0;
1190 }
1191
1192 int
memorystatus_freezer_control(int32_t flags,user_addr_t buffer,size_t buffer_size,int32_t * retval)1193 memorystatus_freezer_control(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval)
1194 {
1195 int err = ENOTSUP;
1196
1197 #if DEVELOPMENT || DEBUG
1198 if (flags == FREEZER_CONTROL_GET_STATUS) {
1199 err = memorystatus_freezer_get_status(buffer, buffer_size, retval);
1200 }
1201 #endif /* DEVELOPMENT || DEBUG */
1202 if (flags == FREEZER_CONTROL_GET_PROCS) {
1203 err = memorystatus_freezer_get_procs(buffer, buffer_size, retval);
1204 } else if (flags == FREEZER_CONTROL_SET_DASD_TRIAL_IDENTIFIERS) {
1205 err = memorystatus_freezer_set_dasd_trial_identifiers(buffer, buffer_size, retval);
1206 } else if (flags == FREEZER_CONTROL_RESET_STATE) {
1207 err = memorystatus_freezer_reset_state(retval);
1208 }
1209
1210 return err;
1211 }
1212
1213 extern void vm_swap_consider_defragmenting(int);
1214 extern void vm_page_reactivate_all_throttled(void);
1215
1216 static bool
kill_all_frozen_processes(uint64_t max_band,bool suspended_only,os_reason_t jetsam_reason,uint64_t * memory_reclaimed_out)1217 kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out)
1218 {
1219 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1220 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1221
1222 unsigned int band = 0;
1223 proc_t p = PROC_NULL, next_p = PROC_NULL;
1224 pid_t pid = 0;
1225 bool retval = false, killed = false;
1226 uint32_t state;
1227 uint64_t memory_reclaimed = 0, footprint = 0, skips = 0;
1228 proc_list_lock();
1229
1230 band = JETSAM_PRIORITY_IDLE;
1231 p = PROC_NULL;
1232 next_p = PROC_NULL;
1233
1234 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1235 while (next_p) {
1236 p = next_p;
1237 next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1238 state = p->p_memstat_state;
1239
1240 if (p->p_memstat_effectivepriority > max_band) {
1241 break;
1242 }
1243
1244 if (!(state & P_MEMSTAT_FROZEN)) {
1245 continue;
1246 }
1247
1248 if (suspended_only && !(state & P_MEMSTAT_SUSPENDED)) {
1249 continue;
1250 }
1251
1252 if (state & P_MEMSTAT_ERROR) {
1253 p->p_memstat_state &= ~P_MEMSTAT_ERROR;
1254 }
1255
1256 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1257 memorystatus_log("memorystatus: Skipping kill of frozen process %s (%d) because it's already exiting.\n", p->p_name, proc_getpid(p));
1258 skips++;
1259 continue;
1260 }
1261
1262 footprint = get_task_phys_footprint(proc_task(p));
1263 pid = proc_getpid(p);
1264 proc_list_unlock();
1265
1266 /* memorystatus_kill_with_jetsam_reason_sync drops a reference. */
1267 os_reason_ref(jetsam_reason);
1268 retval = memorystatus_kill_with_jetsam_reason_sync(pid, jetsam_reason);
1269 if (retval) {
1270 killed = true;
1271 memory_reclaimed += footprint;
1272 }
1273 proc_list_lock();
1274 /*
1275 * The bands might have changed when we dropped the proc list lock.
1276 * So start from the beginning.
1277 * Since we're preventing any further freezing by holding the freezer mutex,
1278 * and we skip anything we've already tried to kill this is guaranteed to terminate.
1279 */
1280 band = 0;
1281 skips = 0;
1282 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1283 }
1284
1285 assert(skips <= memorystatus_frozen_count);
1286 #if DEVELOPMENT || DEBUG
1287 if (!suspended_only && max_band >= JETSAM_PRIORITY_FOREGROUND) {
1288 /*
1289 * Check that we've killed all frozen processes.
1290 * Note that they may still be exiting (represented by skips).
1291 */
1292 if (memorystatus_frozen_count - skips > 0) {
1293 assert(memorystatus_freeze_enabled == FALSE);
1294
1295 panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d",
1296 memorystatus_frozen_count);
1297 }
1298 }
1299 #endif /* DEVELOPMENT || DEBUG */
1300 if (memory_reclaimed_out) {
1301 *memory_reclaimed_out = memory_reclaimed;
1302 }
1303 proc_list_unlock();
1304 return killed;
1305 }
1306
1307 /*
1308 * Disables the freezer, jetsams all frozen processes,
1309 * and reclaims the swap space immediately.
1310 */
1311
1312 void
memorystatus_disable_freeze(void)1313 memorystatus_disable_freeze(void)
1314 {
1315 uint64_t memory_reclaimed = 0;
1316 bool killed = false;
1317 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1318 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1319
1320
1321 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_START,
1322 memorystatus_available_pages, 0, 0, 0, 0);
1323 memorystatus_log("memorystatus: Disabling freezer. Will kill all frozen processes\n");
1324
1325 /*
1326 * We hold the freezer_mutex (preventing anything from being frozen in parallel)
1327 * and all frozen processes will be killed
1328 * by the time we release it. Setting memorystatus_freeze_enabled to false,
1329 * ensures that no new processes will be frozen once we release the mutex.
1330 *
1331 */
1332 memorystatus_freeze_enabled = FALSE;
1333
1334 /*
1335 * Move dirty pages out from the throttle to the active queue since we're not freezing anymore.
1336 */
1337 vm_page_reactivate_all_throttled();
1338 os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE);
1339 if (jetsam_reason == OS_REASON_NULL) {
1340 memorystatus_log_error("memorystatus_disable_freeze -- sync: failed to allocate jetsam reason\n");
1341 }
1342
1343 killed = kill_all_frozen_processes(JETSAM_PRIORITY_FOREGROUND, false, jetsam_reason, &memory_reclaimed);
1344
1345 if (killed) {
1346 memorystatus_log_info("memorystatus: Killed all frozen processes.\n");
1347 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM);
1348
1349 proc_list_lock();
1350 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
1351 sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
1352 uint64_t timestamp_now = mach_absolute_time();
1353 memorystatus_jetsam_snapshot->notification_time = timestamp_now;
1354 memorystatus_jetsam_snapshot->js_gencount++;
1355 if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
1356 timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) {
1357 proc_list_unlock();
1358 int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
1359 if (!ret) {
1360 proc_list_lock();
1361 memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
1362 }
1363 }
1364 proc_list_unlock();
1365 } else {
1366 memorystatus_log_info("memorystatus: No frozen processes to kill.\n");
1367 }
1368
1369 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_END,
1370 memorystatus_available_pages, memory_reclaimed, 0, 0, 0);
1371
1372 return;
1373 }
1374
1375 static void
memorystatus_set_freeze_is_enabled(bool enabled)1376 memorystatus_set_freeze_is_enabled(bool enabled)
1377 {
1378 lck_mtx_lock(&freezer_mutex);
1379 if (enabled != memorystatus_freeze_enabled) {
1380 if (enabled) {
1381 memorystatus_freeze_enabled = true;
1382 } else {
1383 memorystatus_disable_freeze();
1384 }
1385 }
1386 lck_mtx_unlock(&freezer_mutex);
1387 }
1388
1389
1390 static int
1391 sysctl_freeze_enabled SYSCTL_HANDLER_ARGS
1392 {
1393 #pragma unused(arg1, arg2)
1394 int error, val = memorystatus_freeze_enabled ? 1 : 0;
1395
1396 error = sysctl_handle_int(oidp, &val, 0, req);
1397 if (error || !req->newptr) {
1398 return error;
1399 }
1400
1401 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1402 memorystatus_log_error("memorystatus: Failed attempt to set vm.freeze_enabled sysctl\n");
1403 return EINVAL;
1404 }
1405
1406 memorystatus_set_freeze_is_enabled(val);
1407
1408 return 0;
1409 }
1410
1411 SYSCTL_PROC(_vm, OID_AUTO, freeze_enabled, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY, NULL, 0, sysctl_freeze_enabled, "I", "");
1412
1413 static void
schedule_interval_reset(thread_call_t reset_thread_call,throttle_interval_t * interval)1414 schedule_interval_reset(thread_call_t reset_thread_call, throttle_interval_t *interval)
1415 {
1416 uint64_t interval_expiration_ns = interval->ts.tv_sec * NSEC_PER_SEC + interval->ts.tv_nsec;
1417 uint64_t interval_expiration_absolutetime;
1418 nanoseconds_to_absolutetime(interval_expiration_ns, &interval_expiration_absolutetime);
1419 memorystatus_log_info("memorystatus: scheduling new freezer interval at %llu absolute time\n", interval_expiration_absolutetime);
1420
1421 thread_call_enter_delayed(reset_thread_call, interval_expiration_absolutetime);
1422 }
1423
1424 extern uuid_string_t trial_treatment_id;
1425 extern uuid_string_t trial_experiment_id;
1426 extern int trial_deployment_id;
1427
1428 CA_EVENT(freezer_interval,
1429 CA_INT, budget_remaining,
1430 CA_INT, error_below_min_pages,
1431 CA_INT, error_excess_shared_memory,
1432 CA_INT, error_low_private_shared_ratio,
1433 CA_INT, error_no_compressor_space,
1434 CA_INT, error_no_swap_space,
1435 CA_INT, error_low_probability_of_use,
1436 CA_INT, error_elevated,
1437 CA_INT, error_other,
1438 CA_INT, frozen_count,
1439 CA_INT, pageouts,
1440 CA_INT, refreeze_average,
1441 CA_INT, skipped_full,
1442 CA_INT, skipped_shared_mb_high,
1443 CA_INT, swapusage,
1444 CA_INT, thaw_count,
1445 CA_INT, thaw_percentage,
1446 CA_INT, thaws_per_gb,
1447 CA_INT, trial_deployment_id,
1448 CA_INT, dasd_trial_deployment_id,
1449 CA_INT, budget_exhaustion_duration_remaining,
1450 CA_INT, thaw_percentage_webcontent,
1451 CA_INT, thaw_percentage_fg,
1452 CA_INT, thaw_percentage_bg,
1453 CA_INT, thaw_percentage_fg_non_xpc_service,
1454 CA_INT, fg_resume_count,
1455 CA_INT, unique_freeze_count,
1456 CA_INT, unique_thaw_count,
1457 CA_STATIC_STRING(CA_UUID_LEN), trial_treatment_id,
1458 CA_STATIC_STRING(CA_UUID_LEN), trial_experiment_id,
1459 CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_treatment_id,
1460 CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_experiment_id);
1461
1462 extern uint64_t vm_swap_get_total_space(void);
1463 extern uint64_t vm_swap_get_free_space(void);
1464
1465 /*
1466 * Record statistics from the expiring interval
1467 * via core analytics.
1468 */
1469 static void
memorystatus_freeze_record_interval_analytics(void)1470 memorystatus_freeze_record_interval_analytics(void)
1471 {
1472 ca_event_t event = CA_EVENT_ALLOCATE(freezer_interval);
1473 CA_EVENT_TYPE(freezer_interval) * e = event->data;
1474 e->budget_remaining = memorystatus_freeze_budget_pages_remaining * PAGE_SIZE / (1UL << 20);
1475 uint64_t process_considered_count, refrozen_count, below_threshold_count;
1476 memory_object_size_t swap_size;
1477 process_considered_count = memorystatus_freezer_stats.mfs_process_considered_count;
1478 if (process_considered_count != 0) {
1479 e->error_below_min_pages = memorystatus_freezer_stats.mfs_error_below_min_pages_count * 100 / process_considered_count;
1480 e->error_excess_shared_memory = memorystatus_freezer_stats.mfs_error_excess_shared_memory_count * 100 / process_considered_count;
1481 e->error_low_private_shared_ratio = memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count * 100 / process_considered_count;
1482 e->error_no_compressor_space = memorystatus_freezer_stats.mfs_error_no_compressor_space_count * 100 / process_considered_count;
1483 e->error_no_swap_space = memorystatus_freezer_stats.mfs_error_no_swap_space_count * 100 / process_considered_count;
1484 e->error_low_probability_of_use = memorystatus_freezer_stats.mfs_error_low_probability_of_use_count * 100 / process_considered_count;
1485 e->error_elevated = memorystatus_freezer_stats.mfs_error_elevated_count * 100 / process_considered_count;
1486 e->error_other = memorystatus_freezer_stats.mfs_error_other_count * 100 / process_considered_count;
1487 }
1488 e->frozen_count = memorystatus_frozen_count;
1489 e->pageouts = normal_throttle_window->pageouts * PAGE_SIZE / (1UL << 20);
1490 refrozen_count = memorystatus_freezer_stats.mfs_refreeze_count;
1491 if (refrozen_count != 0) {
1492 e->refreeze_average = (memorystatus_freezer_stats.mfs_bytes_refrozen / (1UL << 20)) / refrozen_count;
1493 }
1494 below_threshold_count = memorystatus_freezer_stats.mfs_below_threshold_count;
1495 if (below_threshold_count != 0) {
1496 e->skipped_full = memorystatus_freezer_stats.mfs_skipped_full_count * 100 / below_threshold_count;
1497 e->skipped_shared_mb_high = memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count * 100 / below_threshold_count;
1498 }
1499 if (VM_CONFIG_SWAP_IS_PRESENT) {
1500 swap_size = vm_swap_get_total_space();
1501 if (swap_size) {
1502 e->swapusage = vm_swap_get_free_space() * 100 / swap_size;
1503 }
1504 }
1505 e->thaw_count = memorystatus_thaw_count;
1506 e->thaw_percentage = get_thaw_percentage();
1507 e->thaw_percentage_webcontent = get_thaw_percentage_webcontent();
1508 e->thaw_percentage_fg = get_thaw_percentage_fg();
1509 e->thaw_percentage_bg = get_thaw_percentage_bg();
1510 e->thaw_percentage_fg_non_xpc_service = get_thaw_percentage_fg_non_xpc_service();
1511
1512 if (e->pageouts / (1UL << 10) != 0) {
1513 e->thaws_per_gb = memorystatus_thaw_count / (e->pageouts / (1UL << 10));
1514 }
1515 e->budget_exhaustion_duration_remaining = memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining;
1516 e->fg_resume_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
1517 e->unique_freeze_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1518 e->unique_thaw_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
1519
1520 /*
1521 * Record any xnu or dasd experiment information
1522 */
1523 strlcpy(e->trial_treatment_id, trial_treatment_id, CA_UUID_LEN);
1524 strlcpy(e->trial_experiment_id, trial_experiment_id, CA_UUID_LEN);
1525 e->trial_deployment_id = trial_deployment_id;
1526 strlcpy(e->dasd_trial_treatment_id, dasd_trial_identifiers.treatment_id, CA_UUID_LEN);
1527 strlcpy(e->dasd_trial_experiment_id, dasd_trial_identifiers.experiment_id, CA_UUID_LEN);
1528 e->dasd_trial_deployment_id = dasd_trial_identifiers.deployment_id;
1529
1530 CA_EVENT_SEND(event);
1531 }
1532
1533 static void
memorystatus_freeze_reset_interval(void * arg0,void * arg1)1534 memorystatus_freeze_reset_interval(void *arg0, void *arg1)
1535 {
1536 #pragma unused(arg0, arg1)
1537 struct throttle_interval_t *interval = NULL;
1538 clock_sec_t sec;
1539 clock_nsec_t nsec;
1540 mach_timespec_t now_ts;
1541 uint32_t budget_rollover = 0;
1542
1543 clock_get_system_nanotime(&sec, &nsec);
1544 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
1545 now_ts.tv_nsec = nsec;
1546 interval = normal_throttle_window;
1547
1548 /* Record analytics from the old interval before resetting. */
1549 memorystatus_freeze_record_interval_analytics();
1550
1551 lck_mtx_lock(&freezer_mutex);
1552 /* How long has it been since the previous interval expired? */
1553 mach_timespec_t expiration_period_ts = now_ts;
1554 SUB_MACH_TIMESPEC(&expiration_period_ts, &interval->ts);
1555 /* Get unused budget. Clamp to 0. We'll adjust for overused budget in the next interval. */
1556 budget_rollover = interval->pageouts > interval->max_pageouts ?
1557 0 : interval->max_pageouts - interval->pageouts;
1558
1559 memorystatus_freeze_start_normal_throttle_interval(memorystatus_freeze_calculate_new_budget(
1560 expiration_period_ts.tv_sec, interval->burst_multiple,
1561 interval->mins, budget_rollover),
1562 now_ts);
1563 memorystatus_freeze_budget_pages_remaining = interval->max_pageouts;
1564
1565 if (!memorystatus_freezer_use_demotion_list) {
1566 memorystatus_demote_frozen_processes(false); /* normal mode...don't force a demotion */
1567 }
1568 lck_mtx_unlock(&freezer_mutex);
1569 }
1570
1571
1572 proc_t
memorystatus_get_coalition_leader_and_role(proc_t p,int * role_in_coalition)1573 memorystatus_get_coalition_leader_and_role(proc_t p, int *role_in_coalition)
1574 {
1575 coalition_t coal = COALITION_NULL;
1576 task_t leader_task = NULL, curr_task = NULL;
1577 proc_t leader_proc = PROC_NULL;
1578
1579 curr_task = proc_task(p);
1580 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1581
1582 if (coal == NULL || coalition_is_leader(curr_task, coal)) {
1583 return p;
1584 }
1585
1586 leader_task = coalition_get_leader(coal);
1587 if (leader_task == TASK_NULL) {
1588 /*
1589 * This jetsam coalition is currently leader-less.
1590 * This could happen if the app died, but XPC services
1591 * have not yet exited.
1592 */
1593 return PROC_NULL;
1594 }
1595
1596 leader_proc = (proc_t)get_bsdtask_info(leader_task);
1597 task_deallocate(leader_task);
1598
1599 if (leader_proc == PROC_NULL) {
1600 /* leader task is exiting */
1601 return PROC_NULL;
1602 }
1603
1604 *role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
1605
1606 return leader_proc;
1607 }
1608
1609 bool
memorystatus_freeze_process_is_recommended(const proc_t p)1610 memorystatus_freeze_process_is_recommended(const proc_t p)
1611 {
1612 assert(!memorystatus_freezer_use_ordered_list);
1613 int probability_of_use = 0;
1614
1615 size_t entry_count = 0, i = 0;
1616 entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
1617 if (entry_count == 0) {
1618 /*
1619 * If dasd hasn't supplied a table yet, we default to every app being eligible
1620 * for the freezer.
1621 */
1622 return true;
1623 }
1624 for (i = 0; i < entry_count; i++) {
1625 /*
1626 * NB: memorystatus_internal_probabilities.proc_name is MAXCOMLEN + 1 bytes
1627 * proc_t.p_name is 2*MAXCOMLEN + 1 bytes. So we only compare the first
1628 * MAXCOMLEN bytes here since the name in the probabilities table could
1629 * be truncated from the proc_t's p_name.
1630 */
1631 if (strncmp(memorystatus_global_probabilities_table[i].proc_name,
1632 p->p_name,
1633 MAXCOMLEN) == 0) {
1634 probability_of_use = memorystatus_global_probabilities_table[i].use_probability;
1635 break;
1636 }
1637 }
1638 return probability_of_use > 0;
1639 }
1640
1641 __private_extern__ void
memorystatus_freeze_init(void)1642 memorystatus_freeze_init(void)
1643 {
1644 kern_return_t result;
1645 thread_t thread;
1646
1647 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1648 /*
1649 * This is just the default value if the underlying
1650 * storage device doesn't have any specific budget.
1651 * We check with the storage layer in memorystatus_freeze_update_throttle()
1652 * before we start our freezing the first time.
1653 */
1654 memorystatus_freeze_budget_pages_remaining = (memorystatus_freeze_daily_mb_max * 1024 * 1024) / PAGE_SIZE;
1655
1656 result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
1657 if (result == KERN_SUCCESS) {
1658 proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
1659 proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1660 thread_set_thread_name(thread, "VM_freezer");
1661
1662 thread_deallocate(thread);
1663 } else {
1664 panic("Could not create memorystatus_freeze_thread");
1665 }
1666
1667 freeze_interval_reset_thread_call = thread_call_allocate_with_options(memorystatus_freeze_reset_interval, NULL, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
1668 /* Start a new interval */
1669
1670 lck_mtx_lock(&freezer_mutex);
1671 uint32_t budget;
1672 budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1673 memorystatus_freeze_force_new_interval(budget);
1674 lck_mtx_unlock(&freezer_mutex);
1675 } else {
1676 memorystatus_freeze_budget_pages_remaining = 0;
1677 }
1678 }
1679
1680 void
memorystatus_freeze_configure_for_swap()1681 memorystatus_freeze_configure_for_swap()
1682 {
1683 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1684 return;
1685 }
1686
1687 assert(memorystatus_swap_all_apps);
1688
1689 /*
1690 * We expect both a larger working set and larger individual apps
1691 * in this mode, so tune up the freezer accordingly.
1692 */
1693 memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX_SWAP_ENABLED;
1694 memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS_SWAP_ENABLED;
1695 memorystatus_freeze_pages_max = FREEZE_PAGES_MAX_SWAP_ENABLED;
1696
1697 /*
1698 * We don't have a budget when running with full app swap.
1699 * Force a new interval. memorystatus_freeze_calculate_new_budget should give us an
1700 * unlimited budget.
1701 */
1702 lck_mtx_lock(&freezer_mutex);
1703 uint32_t budget;
1704 budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1705 memorystatus_freeze_force_new_interval(budget);
1706 lck_mtx_unlock(&freezer_mutex);
1707 }
1708
1709 void
memorystatus_freeze_disable_swap()1710 memorystatus_freeze_disable_swap()
1711 {
1712 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1713 return;
1714 }
1715
1716 assert(!memorystatus_swap_all_apps);
1717
1718 memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX;
1719 memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS;
1720 memorystatus_freeze_pages_max = FREEZE_PAGES_MAX;
1721
1722 /*
1723 * Calculate a new budget now that we're constrained by our daily write budget again.
1724 */
1725 lck_mtx_lock(&freezer_mutex);
1726 uint32_t budget;
1727 budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1728 memorystatus_freeze_force_new_interval(budget);
1729 lck_mtx_unlock(&freezer_mutex);
1730 }
1731
1732 /*
1733 * Called with both the freezer_mutex and proc_list_lock held & both will be held on return.
1734 */
1735 static int
memorystatus_freeze_process(proc_t p,coalition_t * coal,pid_t * coalition_list,unsigned int * coalition_list_length)1736 memorystatus_freeze_process(
1737 proc_t p,
1738 coalition_t *coal, /* IN / OUT */
1739 pid_t *coalition_list, /* OUT */
1740 unsigned int *coalition_list_length /* OUT */)
1741 {
1742 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1743 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1744
1745 kern_return_t kr;
1746 uint32_t purgeable, wired, clean, dirty, shared;
1747 uint64_t max_pages = 0;
1748 int freezer_error_code = 0;
1749 bool is_refreeze = false;
1750 task_t curr_task = TASK_NULL;
1751
1752 pid_t aPid = proc_getpid(p);
1753
1754 is_refreeze = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
1755
1756 /* Ensure the process is eligible for (re-)freezing */
1757 if (is_refreeze && !memorystatus_freeze_proc_is_refreeze_eligible(p)) {
1758 /* Process is already frozen & hasn't been thawed. Nothing to do here. */
1759 return EINVAL;
1760 }
1761 if (is_refreeze) {
1762 /*
1763 * Not currently being looked at for something.
1764 */
1765 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1766 return EBUSY;
1767 }
1768
1769 /*
1770 * We are going to try and refreeze and so re-evaluate
1771 * the process. We don't want to double count the shared
1772 * memory. So deduct the old snapshot here.
1773 */
1774 memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages;
1775 p->p_memstat_freeze_sharedanon_pages = 0;
1776
1777 p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
1778 memorystatus_refreeze_eligible_count--;
1779 } else {
1780 if (!memorystatus_is_process_eligible_for_freeze(p)) {
1781 return EINVAL;
1782 }
1783 }
1784
1785 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1786 /*
1787 * Freezer backed by the compressor and swap file(s)
1788 * will hold compressed data.
1789 */
1790
1791 max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1792 } else {
1793 /*
1794 * We only have the compressor pool.
1795 */
1796 max_pages = UINT32_MAX - 1;
1797 }
1798
1799 /* Mark as locked temporarily to avoid kill */
1800 p->p_memstat_state |= P_MEMSTAT_LOCKED;
1801
1802 p = proc_ref(p, true);
1803 if (!p) {
1804 memorystatus_freezer_stats.mfs_error_other_count++;
1805 return EBUSY;
1806 }
1807
1808 proc_list_unlock();
1809
1810 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START,
1811 memorystatus_available_pages, 0, 0, 0, 0);
1812
1813 max_pages = MIN(max_pages, UINT32_MAX);
1814 kr = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, (uint32_t) max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1815 if (kr == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1816 memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
1817 }
1818
1819 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
1820 memorystatus_available_pages, aPid, 0, 0, 0);
1821
1822 memorystatus_log_debug("memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
1823 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %llu, shared %d\n",
1824 (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1825 memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1826
1827 proc_list_lock();
1828
1829 /* Success? */
1830 if (KERN_SUCCESS == kr) {
1831 memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1832
1833 p->p_memstat_freeze_sharedanon_pages += shared;
1834
1835 memorystatus_frozen_shared_mb += shared;
1836
1837 if (!is_refreeze) {
1838 p->p_memstat_state |= P_MEMSTAT_FROZEN;
1839 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1840 memorystatus_frozen_count++;
1841 os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1842 if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
1843 memorystatus_frozen_count_webcontent++;
1844 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
1845 }
1846 if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
1847 memorystatus_freeze_out_of_slots();
1848 }
1849 } else {
1850 // This was a re-freeze
1851 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1852 memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
1853 memorystatus_freezer_stats.mfs_refreeze_count++;
1854 }
1855 }
1856
1857 p->p_memstat_frozen_count++;
1858
1859 /*
1860 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1861 * to its higher jetsam band.
1862 */
1863 proc_list_unlock();
1864
1865 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1866
1867 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1868 int ret;
1869 unsigned int i;
1870 ret = memorystatus_update_inactive_jetsam_priority_band(proc_getpid(p), MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, memorystatus_freeze_jetsam_band, TRUE);
1871
1872 if (ret) {
1873 printf("Elevating the frozen process failed with %d\n", ret);
1874 /* not fatal */
1875 }
1876
1877 /* Update stats */
1878 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1879 throttle_intervals[i].pageouts += dirty;
1880 }
1881 }
1882 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1883 memorystatus_log("memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
1884 is_refreeze ? "re" : "", ((!coal || !*coal) ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"),
1885 memorystatus_freeze_budget_pages_remaining, is_refreeze ? "Re" : "", dirty);
1886
1887 proc_list_lock();
1888
1889 memorystatus_freeze_pageouts += dirty;
1890
1891 if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1892 /*
1893 * Add some eviction logic here? At some point should we
1894 * jetsam a process to get back its swap space so that we
1895 * can freeze a more eligible process at this moment in time?
1896 */
1897 }
1898
1899 /* Check if we just froze a coalition leader. If so, return the list of XPC services to freeze next. */
1900 if (coal != NULL && *coal == NULL) {
1901 curr_task = proc_task(p);
1902 *coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1903 if (coalition_is_leader(curr_task, *coal)) {
1904 *coalition_list_length = coalition_get_pid_list(*coal, COALITION_ROLEMASK_XPC,
1905 COALITION_SORT_DEFAULT, coalition_list, MAX_XPC_SERVICE_PIDS);
1906
1907 if (*coalition_list_length > MAX_XPC_SERVICE_PIDS) {
1908 *coalition_list_length = MAX_XPC_SERVICE_PIDS;
1909 }
1910 }
1911 } else {
1912 /* We just froze an xpc service. Mark it as such for telemetry */
1913 p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
1914 memorystatus_frozen_count_xpc_service++;
1915 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
1916 }
1917
1918 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1919 wakeup(&p->p_memstat_state);
1920 proc_rele(p);
1921 return 0;
1922 } else {
1923 if (is_refreeze) {
1924 if ((freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) ||
1925 (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO)) {
1926 /*
1927 * Keeping this prior-frozen process in this high band when
1928 * we failed to re-freeze it due to bad shared memory usage
1929 * could cause excessive pressure on the lower bands.
1930 * We need to demote it for now. It'll get re-evaluated next
1931 * time because we don't set the P_MEMSTAT_FREEZE_IGNORE
1932 * bit.
1933 */
1934
1935 p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
1936 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1937 memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, TRUE, TRUE);
1938 }
1939 } else {
1940 p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
1941 }
1942 memorystatus_freeze_handle_error(p, freezer_error_code, p->p_memstat_state & P_MEMSTAT_FROZEN, aPid, (coal != NULL) ? *coal : NULL, "memorystatus_freeze_top_process");
1943
1944 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1945 wakeup(&p->p_memstat_state);
1946 proc_rele(p);
1947
1948 return EINVAL;
1949 }
1950 }
1951
1952 /*
1953 * Synchronously freeze the passed proc. Called with a reference to the proc held.
1954 *
1955 * Doesn't deal with:
1956 * - re-freezing because this is called on a specific process and
1957 * not by the freezer thread. If that changes, we'll have to teach it about
1958 * refreezing a frozen process.
1959 *
1960 * - grouped/coalition freezing because we are hoping to deprecate this
1961 * interface as it was used by user-space to freeze particular processes. But
1962 * we have moved away from that approach to having the kernel choose the optimal
1963 * candidates to be frozen.
1964 *
1965 * Returns ENOTSUP if the freezer isn't supported on this device. Otherwise
1966 * returns EINVAL or the value returned by task_freeze().
1967 */
1968 int
memorystatus_freeze_process_sync(proc_t p)1969 memorystatus_freeze_process_sync(proc_t p)
1970 {
1971 int ret = EINVAL;
1972 boolean_t memorystatus_freeze_swap_low = FALSE;
1973
1974 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1975 return ENOTSUP;
1976 }
1977
1978 lck_mtx_lock(&freezer_mutex);
1979
1980 if (p == NULL) {
1981 printf("memorystatus_freeze_process_sync: Invalid process\n");
1982 goto exit;
1983 }
1984
1985 if (memorystatus_freeze_enabled == FALSE) {
1986 printf("memorystatus_freeze_process_sync: Freezing is DISABLED\n");
1987 goto exit;
1988 }
1989
1990 if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
1991 printf("memorystatus_freeze_process_sync: Low compressor and/or low swap space...skipping freeze\n");
1992 goto exit;
1993 }
1994
1995 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1996 if (!memorystatus_freeze_budget_pages_remaining) {
1997 printf("memorystatus_freeze_process_sync: exit with NO available budget\n");
1998 goto exit;
1999 }
2000
2001 proc_list_lock();
2002
2003 ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2004
2005 exit:
2006 lck_mtx_unlock(&freezer_mutex);
2007
2008 return ret;
2009 }
2010
2011 proc_t
memorystatus_freezer_candidate_list_get_proc(struct memorystatus_freezer_candidate_list * list,size_t index,uint64_t * pid_mismatch_counter)2012 memorystatus_freezer_candidate_list_get_proc(
2013 struct memorystatus_freezer_candidate_list *list,
2014 size_t index,
2015 uint64_t *pid_mismatch_counter)
2016 {
2017 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2018 if (list->mfcl_list == NULL || list->mfcl_length <= index) {
2019 return NULL;
2020 }
2021 memorystatus_properties_freeze_entry_v1 *entry = &list->mfcl_list[index];
2022 if (entry->pid == NO_PID) {
2023 /* Entry has been removed. */
2024 return NULL;
2025 }
2026
2027 proc_t p = proc_find_locked(entry->pid);
2028 if (p && strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2029 /*
2030 * We grab a reference when we are about to freeze the process. So drop
2031 * the reference that proc_find_locked() grabbed for us.
2032 * We also have the proc_list_lock so this process is stable.
2033 */
2034 proc_rele(p);
2035 return p;
2036 } else {
2037 if (p) {
2038 /* pid rollover. */
2039 proc_rele(p);
2040 }
2041 /*
2042 * The proc has exited since we received this list.
2043 * It may have re-launched with a new pid, so we go looking for it.
2044 */
2045 unsigned int band = JETSAM_PRIORITY_IDLE;
2046 p = memorystatus_get_first_proc_locked(&band, TRUE);
2047 while (p != NULL && band <= memorystatus_freeze_max_candidate_band) {
2048 if (strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2049 (*pid_mismatch_counter)++;
2050 /* Stash the pid for faster lookup next time. */
2051 entry->pid = proc_getpid(p);
2052 return p;
2053 }
2054 p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2055 }
2056 /* No match. */
2057 return NULL;
2058 }
2059 }
2060
2061 static size_t
memorystatus_freeze_pid_list(pid_t * pid_list,unsigned int num_pids)2062 memorystatus_freeze_pid_list(pid_t *pid_list, unsigned int num_pids)
2063 {
2064 int ret = 0;
2065 size_t num_frozen = 0;
2066 while (num_pids > 0) {
2067 pid_t pid = pid_list[--num_pids];
2068 proc_t p = proc_find_locked(pid);
2069 if (p) {
2070 proc_rele(p);
2071 ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2072 if (ret != 0) {
2073 break;
2074 }
2075 num_frozen++;
2076 }
2077 }
2078 return num_frozen;
2079 }
2080
2081 /*
2082 * Attempt to freeze the best candidate process.
2083 * Keep trying until we freeze something or run out of candidates.
2084 * Returns the number of processes frozen (including coalition members).
2085 */
2086 static size_t
memorystatus_freeze_top_process(void)2087 memorystatus_freeze_top_process(void)
2088 {
2089 int freeze_ret;
2090 size_t num_frozen = 0;
2091 coalition_t coal = COALITION_NULL;
2092 pid_t pid_list[MAX_XPC_SERVICE_PIDS];
2093 unsigned int ntasks = 0;
2094 struct memorystatus_freeze_list_iterator iterator;
2095 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2096
2097 bzero(&iterator, sizeof(struct memorystatus_freeze_list_iterator));
2098 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages, 0, 0, 0, 0);
2099
2100 proc_list_lock();
2101 while (true) {
2102 proc_t p = memorystatus_freeze_pick_process(&iterator);
2103 if (p == PROC_NULL) {
2104 /* Nothing left to freeze */
2105 break;
2106 }
2107 freeze_ret = memorystatus_freeze_process(p, &coal, pid_list, &ntasks);
2108 if (freeze_ret == 0) {
2109 num_frozen = 1;
2110 /*
2111 * We froze a process successfully.
2112 * If it's a coalition head, freeze the coalition.
2113 * Then we're done for now.
2114 */
2115 if (coal != NULL) {
2116 num_frozen += memorystatus_freeze_pid_list(pid_list, ntasks);
2117 }
2118 break;
2119 } else {
2120 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
2121 break;
2122 }
2123 /*
2124 * Freeze failed but we're not out of space.
2125 * Keep trying to find a good candidate,
2126 * memorystatus_freeze_pick_process will not return this proc again until
2127 * we reset the iterator.
2128 */
2129 }
2130 }
2131 proc_list_unlock();
2132
2133 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_END, memorystatus_available_pages, 0, 0, 0, 0);
2134
2135 return num_frozen;
2136 }
2137
2138 #if DEVELOPMENT || DEBUG
2139 /* For testing memorystatus_freeze_top_process */
2140 static int
2141 sysctl_memorystatus_freeze_top_process SYSCTL_HANDLER_ARGS
2142 {
2143 #pragma unused(arg1, arg2)
2144 int error, val, ret = 0;
2145 size_t num_frozen;
2146 /*
2147 * Only freeze on write to prevent freezing during `sysctl -a`.
2148 * The actual value written doesn't matter.
2149 */
2150 error = sysctl_handle_int(oidp, &val, 0, req);
2151 if (error || !req->newptr) {
2152 return error;
2153 }
2154
2155 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2156 return ENOTSUP;
2157 }
2158
2159 lck_mtx_lock(&freezer_mutex);
2160 num_frozen = memorystatus_freeze_top_process();
2161 lck_mtx_unlock(&freezer_mutex);
2162
2163 if (num_frozen == 0) {
2164 ret = ESRCH;
2165 }
2166 return ret;
2167 }
2168 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_top_process, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2169 0, 0, &sysctl_memorystatus_freeze_top_process, "I", "");
2170 #endif /* DEVELOPMENT || DEBUG */
2171
2172 static inline boolean_t
memorystatus_can_freeze_processes(void)2173 memorystatus_can_freeze_processes(void)
2174 {
2175 boolean_t ret;
2176
2177 proc_list_lock();
2178
2179 if (memorystatus_suspended_count) {
2180 memorystatus_freeze_suspended_threshold = MIN(memorystatus_freeze_suspended_threshold, FREEZE_SUSPENDED_THRESHOLD_DEFAULT);
2181
2182 if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
2183 ret = TRUE;
2184 } else {
2185 ret = FALSE;
2186 }
2187 } else {
2188 ret = FALSE;
2189 }
2190
2191 proc_list_unlock();
2192
2193 return ret;
2194 }
2195
2196 static boolean_t
memorystatus_can_freeze(boolean_t * memorystatus_freeze_swap_low)2197 memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
2198 {
2199 boolean_t can_freeze = TRUE;
2200
2201 /* Only freeze if we're sufficiently low on memory; this holds off freeze right
2202 * after boot, and is generally is a no-op once we've reached steady state. */
2203 if (memorystatus_available_pages > memorystatus_freeze_threshold) {
2204 return FALSE;
2205 }
2206
2207 /* Check minimum suspended process threshold. */
2208 if (!memorystatus_can_freeze_processes()) {
2209 return FALSE;
2210 }
2211 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
2212
2213 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2214 /*
2215 * In-core compressor used for freezing WITHOUT on-disk swap support.
2216 */
2217 if (vm_compressor_low_on_space()) {
2218 if (*memorystatus_freeze_swap_low) {
2219 *memorystatus_freeze_swap_low = TRUE;
2220 }
2221
2222 can_freeze = FALSE;
2223 } else {
2224 if (*memorystatus_freeze_swap_low) {
2225 *memorystatus_freeze_swap_low = FALSE;
2226 }
2227
2228 can_freeze = TRUE;
2229 }
2230 } else {
2231 /*
2232 * Freezing WITH on-disk swap support.
2233 *
2234 * In-core compressor fronts the swap.
2235 */
2236 if (vm_swap_low_on_space()) {
2237 if (*memorystatus_freeze_swap_low) {
2238 *memorystatus_freeze_swap_low = TRUE;
2239 }
2240
2241 can_freeze = FALSE;
2242 }
2243 }
2244
2245 return can_freeze;
2246 }
2247
2248 /*
2249 * Demote the given frozen process.
2250 * Caller must hold the proc_list_lock & it will be held on return.
2251 */
2252 static void
memorystatus_demote_frozen_process(proc_t p,bool urgent_mode __unused)2253 memorystatus_demote_frozen_process(proc_t p, bool urgent_mode __unused)
2254 {
2255 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2256
2257 /* We demote to IDLE unless someone has asserted a higher priority on this process. */
2258 int maxpriority = JETSAM_PRIORITY_IDLE;
2259 p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
2260 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2261
2262 maxpriority = MAX(p->p_memstat_assertionpriority, maxpriority);
2263 memorystatus_update_priority_locked(p, maxpriority, FALSE, FALSE);
2264 #if DEVELOPMENT || DEBUG
2265 memorystatus_log("memorystatus_demote_frozen_process(%s) pid %d [%s]\n",
2266 (urgent_mode ? "urgent" : "normal"), (p ? proc_getpid(p) : -1), ((p && *p->p_name) ? p->p_name : "unknown"));
2267 #endif /* DEVELOPMENT || DEBUG */
2268
2269 /*
2270 * The freezer thread will consider this a normal app to be frozen
2271 * because it is in the IDLE band. So we don't need the
2272 * P_MEMSTAT_REFREEZE_ELIGIBLE state here. Also, if it gets resumed
2273 * we'll correctly count it as eligible for re-freeze again.
2274 *
2275 * We don't drop the frozen count because this process still has
2276 * state on disk. So there's a chance it gets resumed and then it
2277 * should land in the higher jetsam band. For that it needs to
2278 * remain marked frozen.
2279 */
2280 if (memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2281 p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
2282 memorystatus_refreeze_eligible_count--;
2283 }
2284 }
2285
2286 static unsigned int
memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)2287 memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)
2288 {
2289 unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
2290 unsigned int demoted_proc_count = 0;
2291 proc_t p = PROC_NULL, next_p = PROC_NULL;
2292 proc_list_lock();
2293
2294 next_p = memorystatus_get_first_proc_locked(&band, FALSE);
2295 while (next_p) {
2296 p = next_p;
2297 next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2298
2299 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
2300 continue;
2301 }
2302
2303 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
2304 continue;
2305 }
2306
2307 if (urgent_mode) {
2308 if (!memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2309 /*
2310 * This process hasn't been thawed recently and so most of
2311 * its state sits on NAND and so we skip it -- jetsamming it
2312 * won't help with memory pressure.
2313 */
2314 continue;
2315 }
2316 } else {
2317 if (p->p_memstat_thaw_count >= memorystatus_thaw_count_demotion_threshold) {
2318 /*
2319 * This process has met / exceeded our thaw count demotion threshold
2320 * and so we let it live in the higher bands.
2321 */
2322 continue;
2323 }
2324 }
2325
2326 memorystatus_demote_frozen_process(p, urgent_mode);
2327 demoted_proc_count++;
2328 if ((urgent_mode) || (demoted_proc_count == memorystatus_max_frozen_demotions_daily)) {
2329 break;
2330 }
2331 }
2332
2333 proc_list_unlock();
2334 return demoted_proc_count;
2335 }
2336
2337 static unsigned int
memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)2338 memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)
2339 {
2340 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2341 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2342 assert(memorystatus_freezer_use_demotion_list);
2343 unsigned int demoted_proc_count = 0;
2344
2345 proc_list_lock();
2346 for (size_t i = 0; i < memorystatus_global_demote_list.mfcl_length; i++) {
2347 proc_t p = memorystatus_freezer_candidate_list_get_proc(
2348 &memorystatus_global_demote_list,
2349 i,
2350 &memorystatus_freezer_stats.mfs_demote_pid_mismatches);
2351 if (p != NULL && memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2352 memorystatus_demote_frozen_process(p, urgent_mode);
2353 /* Remove this entry now that it's been demoted. */
2354 memorystatus_global_demote_list.mfcl_list[i].pid = NO_PID;
2355 demoted_proc_count++;
2356 /*
2357 * We only demote one proc at a time in this mode.
2358 * This gives jetsam a chance to kill the recently demoted processes.
2359 */
2360 break;
2361 }
2362 }
2363
2364 proc_list_unlock();
2365 return demoted_proc_count;
2366 }
2367
2368 /*
2369 * This function evaluates if the currently frozen processes deserve
2370 * to stay in the higher jetsam band. There are 2 modes:
2371 * - 'force one == TRUE': (urgent mode)
2372 * We are out of budget and can't refreeze a process. The process's
2373 * state, if it was resumed, will stay in compressed memory. If we let it
2374 * remain up in the higher frozen jetsam band, it'll put a lot of pressure on
2375 * the lower bands. So we force-demote the least-recently-used-and-thawed
2376 * process.
2377 *
2378 * - 'force_one == FALSE': (normal mode)
2379 * If the # of thaws of a process is below our threshold, then we
2380 * will demote that process into the IDLE band.
2381 * We don't immediately kill the process here because it already has
2382 * state on disk and so it might be worth giving it another shot at
2383 * getting thawed/resumed and used.
2384 */
2385 static void
memorystatus_demote_frozen_processes(bool urgent_mode)2386 memorystatus_demote_frozen_processes(bool urgent_mode)
2387 {
2388 unsigned int demoted_proc_count = 0;
2389
2390 if (memorystatus_freeze_enabled == FALSE) {
2391 /*
2392 * Freeze has been disabled likely to
2393 * reclaim swap space. So don't change
2394 * any state on the frozen processes.
2395 */
2396 return;
2397 }
2398
2399 /*
2400 * We have two demotion policies which can be toggled by userspace.
2401 * In non-urgent mode, the ordered list policy will
2402 * choose a demotion candidate using the list provided by dasd.
2403 * The thaw count policy will demote the oldest process that hasn't been
2404 * thawed more than memorystatus_thaw_count_demotion_threshold times.
2405 *
2406 * If urgent_mode is set, both policies will only consider demoting
2407 * processes that are re-freeze eligible. But the ordering is different.
2408 * The ordered list policy will scan in the order given by dasd.
2409 * The thaw count policy will scan through the frozen band.
2410 */
2411 if (memorystatus_freezer_use_demotion_list) {
2412 demoted_proc_count += memorystatus_demote_frozen_processes_using_demote_list(urgent_mode);
2413
2414 if (demoted_proc_count == 0 && urgent_mode) {
2415 /*
2416 * We're out of budget and the demotion list doesn't contain any valid
2417 * candidates. We still need to demote something. Fall back to scanning
2418 * the frozen band.
2419 */
2420 memorystatus_demote_frozen_processes_using_thaw_count(true);
2421 }
2422 } else {
2423 demoted_proc_count += memorystatus_demote_frozen_processes_using_thaw_count(urgent_mode);
2424 }
2425 }
2426
2427 /*
2428 * Calculate a new freezer budget.
2429 * @param time_since_last_interval_expired_sec How long has it been (in seconds) since the previous interval expired.
2430 * @param burst_multiple The burst_multiple for the new period
2431 * @param interval_duration_min How many minutes will the new interval be?
2432 * @param rollover The amount to rollover from the previous budget.
2433 *
2434 * @return A budget for the new interval.
2435 */
2436 static uint32_t
memorystatus_freeze_calculate_new_budget(unsigned int time_since_last_interval_expired_sec,unsigned int burst_multiple,unsigned int interval_duration_min,uint32_t rollover)2437 memorystatus_freeze_calculate_new_budget(
2438 unsigned int time_since_last_interval_expired_sec,
2439 unsigned int burst_multiple,
2440 unsigned int interval_duration_min,
2441 uint32_t rollover)
2442 {
2443 uint64_t freeze_daily_budget = 0, freeze_daily_budget_mb = 0, daily_budget_pageouts = 0, budget_missed = 0, freeze_daily_pageouts_max = 0, new_budget = 0;
2444 const static unsigned int kNumSecondsInDay = 60 * 60 * 24;
2445 /* Precision factor for days_missed. 2 decimal points. */
2446 const static unsigned int kFixedPointFactor = 100;
2447 unsigned int days_missed;
2448
2449 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2450 return 0;
2451 }
2452 if (memorystatus_swap_all_apps) {
2453 /*
2454 * We effectively have an unlimited budget when app swap is enabled.
2455 */
2456 memorystatus_freeze_daily_mb_max = UINT32_MAX;
2457 return UINT32_MAX;
2458 }
2459
2460 /* Get the daily budget from the storage layer */
2461 if (vm_swap_max_budget(&freeze_daily_budget)) {
2462 freeze_daily_budget_mb = freeze_daily_budget / (1024 * 1024);
2463 assert(freeze_daily_budget_mb <= UINT32_MAX);
2464 memorystatus_freeze_daily_mb_max = (unsigned int) freeze_daily_budget_mb;
2465 memorystatus_log_info("memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
2466 }
2467 /* Calculate the daily pageout budget */
2468 freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
2469 /* Multiply by memorystatus_freeze_budget_multiplier */
2470 freeze_daily_pageouts_max = ((kFixedPointFactor * memorystatus_freeze_budget_multiplier / 100) * freeze_daily_pageouts_max) / kFixedPointFactor;
2471
2472 daily_budget_pageouts = (burst_multiple * (((uint64_t) interval_duration_min * freeze_daily_pageouts_max) / (kNumSecondsInDay / 60)));
2473
2474 /*
2475 * Add additional budget for time since the interval expired.
2476 * For example, if the interval expired n days ago, we should get an additional n days
2477 * of budget since we didn't use any budget during those n days.
2478 */
2479 days_missed = time_since_last_interval_expired_sec * kFixedPointFactor / kNumSecondsInDay;
2480 budget_missed = days_missed * freeze_daily_pageouts_max / kFixedPointFactor;
2481 new_budget = rollover + daily_budget_pageouts + budget_missed;
2482 return (uint32_t) MIN(new_budget, UINT32_MAX);
2483 }
2484
2485 /*
2486 * Mark all non frozen, freezer-eligible processes as skipped for the given reason.
2487 * Used when we hit some system freeze limit and know that we won't be considering remaining processes.
2488 * If you're using this for a new reason, make sure to add it to memorystatus_freeze_init_proc so that
2489 * it gets set for new processes.
2490 * NB: These processes will retain this skip reason until they are reconsidered by memorystatus_is_process_eligible_for_freeze.
2491 */
2492 static void
memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason,bool locked)2493 memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason, bool locked)
2494 {
2495 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2496 LCK_MTX_ASSERT(&proc_list_mlock, locked ? LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED);
2497 unsigned int band = JETSAM_PRIORITY_IDLE;
2498 proc_t p;
2499
2500 if (!locked) {
2501 proc_list_lock();
2502 }
2503 p = memorystatus_get_first_proc_locked(&band, FALSE);
2504 while (p) {
2505 assert(p->p_memstat_effectivepriority == (int32_t) band);
2506 if (!(p->p_memstat_state & P_MEMSTAT_FROZEN) && memorystatus_is_process_eligible_for_freeze(p)) {
2507 assert(p->p_memstat_freeze_skip_reason == kMemorystatusFreezeSkipReasonNone);
2508 p->p_memstat_freeze_skip_reason = (uint8_t) reason;
2509 }
2510 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2511 }
2512 if (!locked) {
2513 proc_list_unlock();
2514 }
2515 }
2516
2517 /*
2518 * Called after we fail to freeze a process.
2519 * Logs the failure, marks the process with the failure reason, and updates freezer stats.
2520 */
2521 static void
memorystatus_freeze_handle_error(proc_t p,const int freezer_error_code,bool was_refreeze,pid_t pid,const coalition_t coalition,const char * log_prefix)2522 memorystatus_freeze_handle_error(
2523 proc_t p,
2524 const int freezer_error_code,
2525 bool was_refreeze,
2526 pid_t pid,
2527 const coalition_t coalition,
2528 const char* log_prefix)
2529 {
2530 const char *reason;
2531 memorystatus_freeze_skip_reason_t skip_reason;
2532
2533 switch (freezer_error_code) {
2534 case FREEZER_ERROR_EXCESS_SHARED_MEMORY:
2535 memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
2536 reason = "too much shared memory";
2537 skip_reason = kMemorystatusFreezeSkipReasonExcessSharedMemory;
2538 break;
2539 case FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO:
2540 memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
2541 reason = "private-shared pages ratio";
2542 skip_reason = kMemorystatusFreezeSkipReasonLowPrivateSharedRatio;
2543 break;
2544 case FREEZER_ERROR_NO_COMPRESSOR_SPACE:
2545 memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
2546 reason = "no compressor space";
2547 skip_reason = kMemorystatusFreezeSkipReasonNoCompressorSpace;
2548 break;
2549 case FREEZER_ERROR_NO_SWAP_SPACE:
2550 memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
2551 reason = "no swap space";
2552 skip_reason = kMemorystatusFreezeSkipReasonNoSwapSpace;
2553 break;
2554 default:
2555 reason = "unknown error";
2556 skip_reason = kMemorystatusFreezeSkipReasonOther;
2557 }
2558
2559 p->p_memstat_freeze_skip_reason = (uint8_t) skip_reason;
2560
2561 memorystatus_log("%s: %sfreezing (%s) pid %d [%s]...skipped (%s)\n",
2562 log_prefix, was_refreeze ? "re" : "",
2563 (coalition == NULL ? "general" : "coalition-driven"), pid,
2564 ((p && *p->p_name) ? p->p_name : "unknown"), reason);
2565 }
2566
2567 /*
2568 * Start a new normal throttle interval with the given budget.
2569 * Caller must hold the freezer mutex
2570 */
2571 static void
memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget,mach_timespec_t start_ts)2572 memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts)
2573 {
2574 unsigned int band;
2575 proc_t p, next_p;
2576 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2577 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2578
2579 normal_throttle_window->max_pageouts = new_budget;
2580 normal_throttle_window->ts.tv_sec = normal_throttle_window->mins * 60;
2581 normal_throttle_window->ts.tv_nsec = 0;
2582 ADD_MACH_TIMESPEC(&normal_throttle_window->ts, &start_ts);
2583 /* Since we update the throttle stats pre-freeze, adjust for overshoot here */
2584 if (normal_throttle_window->pageouts > normal_throttle_window->max_pageouts) {
2585 normal_throttle_window->pageouts -= normal_throttle_window->max_pageouts;
2586 } else {
2587 normal_throttle_window->pageouts = 0;
2588 }
2589 /* Ensure the normal window is now active. */
2590 memorystatus_freeze_degradation = FALSE;
2591
2592 /*
2593 * Reset interval statistics.
2594 */
2595 memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
2596 memorystatus_freezer_stats.mfs_process_considered_count = 0;
2597 memorystatus_freezer_stats.mfs_error_below_min_pages_count = 0;
2598 memorystatus_freezer_stats.mfs_error_excess_shared_memory_count = 0;
2599 memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count = 0;
2600 memorystatus_freezer_stats.mfs_error_no_compressor_space_count = 0;
2601 memorystatus_freezer_stats.mfs_error_no_swap_space_count = 0;
2602 memorystatus_freezer_stats.mfs_error_low_probability_of_use_count = 0;
2603 memorystatus_freezer_stats.mfs_error_elevated_count = 0;
2604 memorystatus_freezer_stats.mfs_error_other_count = 0;
2605 memorystatus_freezer_stats.mfs_refreeze_count = 0;
2606 memorystatus_freezer_stats.mfs_bytes_refrozen = 0;
2607 memorystatus_freezer_stats.mfs_below_threshold_count = 0;
2608 memorystatus_freezer_stats.mfs_skipped_full_count = 0;
2609 memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count = 0;
2610 memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = 0;
2611 memorystatus_thaw_count = 0;
2612 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed, 0, release);
2613 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, 0, release);
2614 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg, 0, release);
2615 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, 0, release);
2616 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen, memorystatus_frozen_count, release);
2617 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, memorystatus_frozen_count_webcontent, release);
2618 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, memorystatus_frozen_count_xpc_service, release);
2619 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_fg_resumed, 0, release);
2620 os_atomic_inc(&memorystatus_freeze_current_interval, release);
2621
2622 /* Clear the focal thaw bit */
2623 proc_list_lock();
2624 band = JETSAM_PRIORITY_IDLE;
2625 p = PROC_NULL;
2626 next_p = PROC_NULL;
2627
2628 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
2629 while (next_p) {
2630 p = next_p;
2631 next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2632
2633 if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) {
2634 break;
2635 }
2636 p->p_memstat_state &= ~P_MEMSTAT_FROZEN_FOCAL_THAW;
2637 }
2638 proc_list_unlock();
2639
2640 schedule_interval_reset(freeze_interval_reset_thread_call, normal_throttle_window);
2641 }
2642
2643 #if DEVELOPMENT || DEBUG
2644
2645 static int
2646 sysctl_memorystatus_freeze_calculate_new_budget SYSCTL_HANDLER_ARGS
2647 {
2648 #pragma unused(arg1, arg2)
2649 int error = 0;
2650 unsigned int time_since_last_interval_expired_sec = 0;
2651 unsigned int new_budget;
2652
2653 error = sysctl_handle_int(oidp, &time_since_last_interval_expired_sec, 0, req);
2654 if (error || !req->newptr) {
2655 return error;
2656 }
2657
2658 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2659 return ENOTSUP;
2660 }
2661 new_budget = memorystatus_freeze_calculate_new_budget(time_since_last_interval_expired_sec, 1, NORMAL_WINDOW_MINS, 0);
2662 return copyout(&new_budget, req->oldptr, MIN(sizeof(req->oldlen), sizeof(new_budget)));
2663 }
2664
2665 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_calculate_new_budget, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2666 0, 0, &sysctl_memorystatus_freeze_calculate_new_budget, "I", "");
2667
2668 #endif /* DEVELOPMENT || DEBUG */
2669
2670 /*
2671 * Called when we first run out of budget in an interval.
2672 * Marks idle processes as not frozen due to lack of budget.
2673 * NB: It might be worth having a CA event here.
2674 */
2675 static void
memorystatus_freeze_out_of_budget(const struct throttle_interval_t * interval)2676 memorystatus_freeze_out_of_budget(const struct throttle_interval_t *interval)
2677 {
2678 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2679 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2680
2681 mach_timespec_t time_left = {0, 0};
2682 mach_timespec_t now_ts;
2683 clock_sec_t sec;
2684 clock_nsec_t nsec;
2685
2686 time_left.tv_sec = interval->ts.tv_sec;
2687 time_left.tv_nsec = 0;
2688 clock_get_system_nanotime(&sec, &nsec);
2689 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2690 now_ts.tv_nsec = nsec;
2691
2692 SUB_MACH_TIMESPEC(&time_left, &now_ts);
2693 memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = time_left.tv_sec;
2694 os_log(OS_LOG_DEFAULT,
2695 "memorystatus_freeze: Out of NAND write budget with %u minutes left in the current freezer interval. %u procs are frozen.\n",
2696 time_left.tv_sec / 60, memorystatus_frozen_count);
2697
2698 memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfBudget, false);
2699 }
2700
2701 /*
2702 * Called when we cross over the threshold of maximum frozen processes allowed.
2703 * Marks remaining idle processes as not frozen due to lack of slots.
2704 */
2705 static void
memorystatus_freeze_out_of_slots(void)2706 memorystatus_freeze_out_of_slots(void)
2707 {
2708 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2709 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2710 assert(memorystatus_frozen_count == memorystatus_frozen_processes_max);
2711
2712 os_log(OS_LOG_DEFAULT,
2713 "memorystatus_freeze: Out of slots in the freezer. %u procs are frozen.\n",
2714 memorystatus_frozen_count);
2715
2716 memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfSlots, true);
2717 }
2718
2719 /*
2720 * This function will do 4 things:
2721 *
2722 * 1) check to see if we are currently in a degraded freezer mode, and if so:
2723 * - check to see if our window has expired and we should exit this mode, OR,
2724 * - return a budget based on the degraded throttle window's max. pageouts vs current pageouts.
2725 *
2726 * 2) check to see if we are in a NEW normal window and update the normal throttle window's params.
2727 *
2728 * 3) check what the current normal window allows for a budget.
2729 *
2730 * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below
2731 * what we would normally expect, then we are running low on our daily budget and need to enter
2732 * degraded perf. mode.
2733 *
2734 * Caller must hold the freezer mutex
2735 * Caller must not hold the proc_list lock
2736 */
2737
2738 static void
memorystatus_freeze_update_throttle(uint64_t * budget_pages_allowed)2739 memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
2740 {
2741 clock_sec_t sec;
2742 clock_nsec_t nsec;
2743 mach_timespec_t now_ts;
2744 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2745 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2746
2747 unsigned int freeze_daily_pageouts_max = 0;
2748 bool started_with_budget = (*budget_pages_allowed > 0);
2749
2750 #if DEVELOPMENT || DEBUG
2751 if (!memorystatus_freeze_throttle_enabled) {
2752 /*
2753 * No throttling...we can use the full budget everytime.
2754 */
2755 *budget_pages_allowed = UINT64_MAX;
2756 return;
2757 }
2758 #endif
2759
2760 clock_get_system_nanotime(&sec, &nsec);
2761 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2762 now_ts.tv_nsec = nsec;
2763
2764 struct throttle_interval_t *interval = NULL;
2765
2766 if (memorystatus_freeze_degradation == TRUE) {
2767 interval = degraded_throttle_window;
2768
2769 if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
2770 interval->pageouts = 0;
2771 interval->max_pageouts = 0;
2772 } else {
2773 *budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2774 }
2775 }
2776
2777 interval = normal_throttle_window;
2778
2779 /*
2780 * Current throttle window.
2781 * Deny freezing if we have no budget left.
2782 * Try graceful degradation if we are within 25% of:
2783 * - the daily budget, and
2784 * - the current budget left is below our normal budget expectations.
2785 */
2786
2787 if (memorystatus_freeze_degradation == FALSE) {
2788 if (interval->pageouts >= interval->max_pageouts) {
2789 *budget_pages_allowed = 0;
2790 if (started_with_budget) {
2791 memorystatus_freeze_out_of_budget(interval);
2792 }
2793 } else {
2794 int budget_left = interval->max_pageouts - interval->pageouts;
2795 int budget_threshold = (freeze_daily_pageouts_max * FREEZE_DEGRADATION_BUDGET_THRESHOLD) / 100;
2796
2797 mach_timespec_t time_left = {0, 0};
2798
2799 time_left.tv_sec = interval->ts.tv_sec;
2800 time_left.tv_nsec = 0;
2801
2802 SUB_MACH_TIMESPEC(&time_left, &now_ts);
2803
2804 if (budget_left <= budget_threshold) {
2805 /*
2806 * For the current normal window, calculate how much we would pageout in a DEGRADED_WINDOW_MINS duration.
2807 * And also calculate what we would pageout for the same DEGRADED_WINDOW_MINS duration if we had the full
2808 * daily pageout budget.
2809 */
2810
2811 unsigned int current_budget_rate_allowed = ((budget_left / time_left.tv_sec) / 60) * DEGRADED_WINDOW_MINS;
2812 unsigned int normal_budget_rate_allowed = (freeze_daily_pageouts_max / NORMAL_WINDOW_MINS) * DEGRADED_WINDOW_MINS;
2813
2814 /*
2815 * The current rate of pageouts is below what we would expect for
2816 * the normal rate i.e. we have below normal budget left and so...
2817 */
2818
2819 if (current_budget_rate_allowed < normal_budget_rate_allowed) {
2820 memorystatus_freeze_degradation = TRUE;
2821 degraded_throttle_window->max_pageouts = current_budget_rate_allowed;
2822 degraded_throttle_window->pageouts = 0;
2823
2824 /*
2825 * Switch over to the degraded throttle window so the budget
2826 * doled out is based on that window.
2827 */
2828 interval = degraded_throttle_window;
2829 }
2830 }
2831
2832 *budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2833 }
2834 }
2835
2836 memorystatus_log_debug(
2837 "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining\n",
2838 interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - now_ts.tv_sec) / 60);
2839 }
2840
2841 bool memorystatus_freeze_thread_init = false;
2842 static void
memorystatus_freeze_thread(void * param __unused,wait_result_t wr __unused)2843 memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
2844 {
2845 static boolean_t memorystatus_freeze_swap_low = FALSE;
2846 size_t max_to_freeze = 0, num_frozen = 0, num_frozen_this_iteration = 0;
2847
2848 if (!memorystatus_freeze_thread_init) {
2849 #if CONFIG_THREAD_GROUPS
2850 thread_group_vm_add();
2851 #endif
2852 memorystatus_freeze_thread_init = true;
2853 }
2854
2855 max_to_freeze = memorystatus_pick_freeze_count_for_wakeup();
2856
2857 lck_mtx_lock(&freezer_mutex);
2858 if (memorystatus_freeze_enabled) {
2859 if (memorystatus_freezer_use_demotion_list && memorystatus_refreeze_eligible_count > 0) {
2860 memorystatus_demote_frozen_processes(false); /* Normal mode. Consider demoting thawed processes. */
2861 }
2862 while (num_frozen < max_to_freeze &&
2863 memorystatus_can_freeze(&memorystatus_freeze_swap_low) &&
2864 ((memorystatus_frozen_count < memorystatus_frozen_processes_max) ||
2865 (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD))) {
2866 /* Only freeze if we've not exceeded our pageout budgets.*/
2867 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2868
2869 if (memorystatus_freeze_budget_pages_remaining) {
2870 num_frozen_this_iteration = memorystatus_freeze_top_process();
2871 if (num_frozen_this_iteration == 0) {
2872 /* Nothing left to freeze. */
2873 break;
2874 }
2875 num_frozen += num_frozen_this_iteration;
2876 } else {
2877 memorystatus_demote_frozen_processes(true); /* urgent mode..force one demotion */
2878 break;
2879 }
2880 }
2881 }
2882
2883 /*
2884 * Give applications currently in the aging band a chance to age out into the idle band before
2885 * running the freezer again.
2886 */
2887 memorystatus_freezer_thread_next_run_ts = mach_absolute_time() + memorystatus_apps_idle_delay_time;
2888
2889 assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
2890 lck_mtx_unlock(&freezer_mutex);
2891
2892 thread_block((thread_continue_t) memorystatus_freeze_thread);
2893 }
2894
2895 int
memorystatus_get_process_is_freezable(pid_t pid,int * is_freezable)2896 memorystatus_get_process_is_freezable(pid_t pid, int *is_freezable)
2897 {
2898 proc_t p = PROC_NULL;
2899
2900 if (pid == 0) {
2901 return EINVAL;
2902 }
2903
2904 p = proc_find(pid);
2905 if (!p) {
2906 return ESRCH;
2907 }
2908
2909 /*
2910 * Only allow this on the current proc for now.
2911 * We can check for privileges and allow targeting another process in the future.
2912 */
2913 if (p != current_proc()) {
2914 proc_rele(p);
2915 return EPERM;
2916 }
2917
2918 proc_list_lock();
2919 *is_freezable = ((p->p_memstat_state & P_MEMSTAT_FREEZE_DISABLED) ? 0 : 1);
2920 proc_rele(p);
2921 proc_list_unlock();
2922
2923 return 0;
2924 }
2925
2926 errno_t
memorystatus_get_process_is_frozen(pid_t pid,int * is_frozen)2927 memorystatus_get_process_is_frozen(pid_t pid, int *is_frozen)
2928 {
2929 proc_t p = PROC_NULL;
2930
2931 if (pid == 0) {
2932 return EINVAL;
2933 }
2934
2935 /*
2936 * Only allow this on the current proc for now.
2937 * We can check for privileges and allow targeting another process in the future.
2938 */
2939 p = current_proc();
2940 if (proc_getpid(p) != pid) {
2941 return EPERM;
2942 }
2943
2944 proc_list_lock();
2945 *is_frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
2946 proc_list_unlock();
2947
2948 return 0;
2949 }
2950
2951 int
memorystatus_set_process_is_freezable(pid_t pid,boolean_t is_freezable)2952 memorystatus_set_process_is_freezable(pid_t pid, boolean_t is_freezable)
2953 {
2954 proc_t p = PROC_NULL;
2955
2956 if (pid == 0) {
2957 return EINVAL;
2958 }
2959
2960 /*
2961 * To enable freezable status, you need to be root or an entitlement.
2962 */
2963 if (is_freezable &&
2964 !kauth_cred_issuser(kauth_cred_get()) &&
2965 !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
2966 return EPERM;
2967 }
2968
2969 p = proc_find(pid);
2970 if (!p) {
2971 return ESRCH;
2972 }
2973
2974 /*
2975 * A process can change its own status. A coalition leader can
2976 * change the status of coalition members.
2977 * An entitled process (or root) can change anyone's status.
2978 */
2979 if (p != current_proc() &&
2980 !kauth_cred_issuser(kauth_cred_get()) &&
2981 !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
2982 coalition_t coal = task_get_coalition(proc_task(p), COALITION_TYPE_JETSAM);
2983 if (!coalition_is_leader(proc_task(current_proc()), coal)) {
2984 proc_rele(p);
2985 return EPERM;
2986 }
2987 }
2988
2989 proc_list_lock();
2990 if (is_freezable == FALSE) {
2991 /* Freeze preference set to FALSE. Set the P_MEMSTAT_FREEZE_DISABLED bit. */
2992 p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED;
2993 os_log(OS_LOG_DEFAULT, "memorystatus_set_process_is_freezable: disabling freeze for pid %d [%s]\n",
2994 proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
2995 } else {
2996 p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED;
2997 os_log(OS_LOG_DEFAULT, "memorystatus_set_process_is_freezable: enabling freeze for pid %d [%s]\n",
2998 proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
2999 }
3000 proc_rele(p);
3001 proc_list_unlock();
3002
3003 return 0;
3004 }
3005
3006 /*
3007 * Called when process is created before it is added to a memorystatus bucket.
3008 */
3009 void
memorystatus_freeze_init_proc(proc_t p)3010 memorystatus_freeze_init_proc(proc_t p)
3011 {
3012 /* NB: Process is not on the memorystatus lists yet so it's safe to modify the skip reason without the freezer mutex. */
3013 if (memorystatus_freeze_budget_pages_remaining == 0) {
3014 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfBudget;
3015 } else if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
3016 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
3017 } else {
3018 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
3019 }
3020 }
3021
3022
3023 static int
3024 sysctl_memorystatus_do_fastwake_warmup_all SYSCTL_HANDLER_ARGS
3025 {
3026 #pragma unused(oidp, arg1, arg2)
3027
3028 if (!req->newptr) {
3029 return EINVAL;
3030 }
3031
3032 /* Need to be root or have entitlement */
3033 if (!kauth_cred_issuser(kauth_cred_get()) && !IOCurrentTaskHasEntitlement( MEMORYSTATUS_ENTITLEMENT)) {
3034 return EPERM;
3035 }
3036
3037 if (memorystatus_freeze_enabled == FALSE) {
3038 return ENOTSUP;
3039 }
3040
3041 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
3042 return ENOTSUP;
3043 }
3044
3045 do_fastwake_warmup_all();
3046
3047 return 0;
3048 }
3049
3050 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_do_fastwake_warmup_all, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3051 0, 0, &sysctl_memorystatus_do_fastwake_warmup_all, "I", "");
3052
3053 /*
3054 * Takes in a candidate list from the user_addr, validates it, and copies it into the list pointer.
3055 * Takes ownership over the original value of list.
3056 * Assumes that list is protected by the freezer_mutex.
3057 * The caller should not hold any locks.
3058 */
3059 static errno_t
set_freezer_candidate_list(user_addr_t buffer,size_t buffer_size,struct memorystatus_freezer_candidate_list * list)3060 set_freezer_candidate_list(user_addr_t buffer, size_t buffer_size, struct memorystatus_freezer_candidate_list *list)
3061 {
3062 errno_t error = 0;
3063 memorystatus_properties_freeze_entry_v1 *entries = NULL, *tmp_entries = NULL;
3064 size_t entry_count = 0, entries_size = 0, tmp_size = 0;
3065
3066 /* Validate the user provided list. */
3067 if ((buffer == USER_ADDR_NULL) || (buffer_size == 0)) {
3068 memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: NULL or empty list\n");
3069 return EINVAL;
3070 }
3071
3072 if (buffer_size % sizeof(memorystatus_properties_freeze_entry_v1) != 0) {
3073 memorystatus_log_error(
3074 "memorystatus_cmd_grp_set_freeze_priority: Invalid list length (caller might have comiled agsinst invalid headers.)\n");
3075 return EINVAL;
3076 }
3077
3078 entry_count = buffer_size / sizeof(memorystatus_properties_freeze_entry_v1);
3079 entries_size = buffer_size;
3080 entries = kalloc_data(buffer_size, Z_WAITOK | Z_ZERO);
3081 if (entries == NULL) {
3082 return ENOMEM;
3083 }
3084
3085 error = copyin(buffer, entries, buffer_size);
3086 if (error != 0) {
3087 goto out;
3088 }
3089
3090 #if MACH_ASSERT
3091 for (size_t i = 0; i < entry_count; i++) {
3092 memorystatus_properties_freeze_entry_v1 *entry = &entries[i];
3093 if (entry->version != 1) {
3094 os_log(OS_LOG_DEFAULT, "memorystatus_cmd_grp_set_freeze_priority: Invalid entry version number.");
3095 error = EINVAL;
3096 goto out;
3097 }
3098 if (i > 0 && entry->priority >= entries[i - 1].priority) {
3099 os_log(OS_LOG_DEFAULT, "memorystatus_cmd_grp_set_freeze_priority: Entry list is not in descending order.");
3100 error = EINVAL;
3101 goto out;
3102 }
3103 }
3104 #endif /* MACH_ASSERT */
3105
3106 lck_mtx_lock(&freezer_mutex);
3107
3108 tmp_entries = list->mfcl_list;
3109 tmp_size = list->mfcl_length * sizeof(memorystatus_properties_freeze_entry_v1);
3110 list->mfcl_list = entries;
3111 list->mfcl_length = entry_count;
3112
3113 lck_mtx_unlock(&freezer_mutex);
3114
3115 entries = tmp_entries;
3116 entries_size = tmp_size;
3117
3118 out:
3119 kfree_data(entries, entries_size);
3120 return error;
3121 }
3122
3123 errno_t
memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer,size_t buffer_size)3124 memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer, size_t buffer_size)
3125 {
3126 return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_freeze_list);
3127 }
3128
3129 errno_t
memorystatus_cmd_grp_set_demote_list(user_addr_t buffer,size_t buffer_size)3130 memorystatus_cmd_grp_set_demote_list(user_addr_t buffer, size_t buffer_size)
3131 {
3132 return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_demote_list);
3133 }
3134
3135 void
memorystatus_freezer_mark_ui_transition(proc_t p)3136 memorystatus_freezer_mark_ui_transition(proc_t p)
3137 {
3138 bool frozen = false, previous_focal_thaw = false, xpc_service = false, suspended = false;
3139 proc_list_lock();
3140
3141 if (isSysProc(p)) {
3142 goto out;
3143 }
3144
3145 frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
3146 previous_focal_thaw = (p->p_memstat_state & P_MEMSTAT_FROZEN_FOCAL_THAW) != 0;
3147 xpc_service = (p->p_memstat_state & P_MEMSTAT_FROZEN_XPC_SERVICE) != 0;
3148 suspended = (p->p_memstat_state & P_MEMSTAT_SUSPENDED) != 0;
3149 if (!suspended) {
3150 if (frozen) {
3151 if (!previous_focal_thaw) {
3152 p->p_memstat_state |= P_MEMSTAT_FROZEN_FOCAL_THAW;
3153 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg), relaxed);
3154 if (xpc_service) {
3155 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service), relaxed);
3156 }
3157 }
3158 }
3159 os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_fg_resumed), relaxed);
3160 }
3161
3162 out:
3163 proc_list_unlock();
3164 }
3165
3166 #endif /* CONFIG_FREEZE */
3167