xref: /xnu-11215.81.4/bsd/kern/kern_memorystatus_freeze.c (revision d4514f0bc1d3f944c22d92e68b646ac3fb40d452)
1 /*
2  * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  *
28  */
29 
30 #include <kern/sched_prim.h>
31 #include <kern/kalloc.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h>
34 #include <kern/locks.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/host.h>
38 #include <kern/policy_internal.h>
39 #include <kern/thread_call.h>
40 #include <kern/thread_group.h>
41 
42 #include <libkern/libkern.h>
43 #include <mach/coalition.h>
44 #include <mach/mach_time.h>
45 #include <mach/task.h>
46 #include <mach/host_priv.h>
47 #include <mach/mach_host.h>
48 #include <os/log.h>
49 #include <pexpert/pexpert.h>
50 #include <sys/coalition.h>
51 #include <sys/kern_event.h>
52 #include <sys/kdebug.h>
53 #include <sys/kdebug_kernel.h>
54 #include <sys/proc.h>
55 #include <sys/proc_info.h>
56 #include <sys/reason.h>
57 #include <sys/signal.h>
58 #include <sys/signalvar.h>
59 #include <sys/sysctl.h>
60 #include <sys/sysproto.h>
61 #include <sys/ubc.h> /* mach_to_bsd_errno */
62 #include <sys/wait.h>
63 #include <sys/tree.h>
64 #include <sys/priv.h>
65 #include <vm/vm_pageout.h>
66 #include <vm/vm_protos.h>
67 #include <vm/vm_page.h>
68 #include <vm/vm_compressor_xnu.h>
69 #include <vm/vm_compressor_backing_store_xnu.h>
70 #include <mach/machine/sdt.h>
71 #include <libkern/coreanalytics/coreanalytics.h>
72 #include <libkern/section_keywords.h>
73 #include <stdatomic.h>
74 
75 #include <IOKit/IOBSD.h>
76 
77 #if CONFIG_FREEZE
78 #include <vm/vm_map_xnu.h>
79 #endif /* CONFIG_FREEZE */
80 
81 #include <kern/kern_memorystatus_internal.h>
82 #include <sys/kern_memorystatus.h>
83 #include <sys/kern_memorystatus_freeze.h>
84 #include <sys/kern_memorystatus_notify.h>
85 #include <sys/ubc.h>
86 
87 unsigned int memorystatus_frozen_count = 0;
88 unsigned int memorystatus_frozen_count_webcontent = 0;
89 unsigned int memorystatus_frozen_count_xpc_service = 0;
90 
91 #if CONFIG_FREEZE
92 
93 static LCK_GRP_DECLARE(freezer_lck_grp, "freezer");
94 static LCK_MTX_DECLARE(freezer_mutex, &freezer_lck_grp);
95 
96 /* Thresholds */
97 unsigned int memorystatus_freeze_threshold = 0;
98 unsigned int memorystatus_freeze_pages_min = 0;
99 unsigned int memorystatus_freeze_pages_max = 0;
100 unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
101 unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT;
102 uint64_t     memorystatus_freeze_budget_pages_remaining = 0; /* Remaining # of pages that can be frozen to disk */
103 uint64_t     memorystatus_freeze_budget_multiplier = 100; /* Multiplies the daily budget by 100/multiplier */
104 boolean_t memorystatus_freeze_degradation = FALSE; /* Protected by the freezer mutex. Signals we are in a degraded freeze mode. */
105 unsigned int memorystatus_freeze_max_candidate_band = FREEZE_MAX_CANDIDATE_BAND;
106 
107 unsigned int memorystatus_max_frozen_demotions_daily = 0;
108 unsigned int memorystatus_thaw_count_demotion_threshold = 0;
109 unsigned int memorystatus_min_thaw_refreeze_threshold;
110 
111 #if XNU_TARGET_OS_WATCH
112 #define FREEZE_DYNAMIC_THREAD_DELAY_ENABLED_DEFAULT true
113 #else
114 #define FREEZE_DYNAMIC_THREAD_DELAY_ENABLED_DEFAULT false
115 #endif
116 boolean_t memorystatus_freeze_dynamic_thread_delay_enabled = FREEZE_DYNAMIC_THREAD_DELAY_ENABLED_DEFAULT;
117 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_dynamic_thread_delay_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_dynamic_thread_delay_enabled, 0, "");
118 
119 #define FREEZE_APPS_IDLE_DELAY_MULTIPLIER_FAST 1
120 #define FREEZE_APPS_IDLE_DELAY_MULTIPLIER_SLOW 30
121 #define FREEZE_APPS_IDLE_DELAY_MULTIPLIER_DEFAULT FREEZE_APPS_IDLE_DELAY_MULTIPLIER_FAST
122 unsigned int memorystatus_freeze_apps_idle_delay_multiplier = FREEZE_APPS_IDLE_DELAY_MULTIPLIER_DEFAULT;
123 
124 #if (XNU_TARGET_OS_IOS && !XNU_TARGET_OS_XR) || XNU_TARGET_OS_WATCH
125 #define FREEZE_ENABLED_DEFAULT TRUE
126 #else
127 #define FREEZE_ENABLED_DEFAULT FALSE
128 #endif
129 boolean_t memorystatus_freeze_enabled = FREEZE_ENABLED_DEFAULT;
130 
131 int memorystatus_freeze_wakeup = 0;
132 
133 #define MAX_XPC_SERVICE_PIDS 10 /* Max. # of XPC services per coalition we'll consider freezing. */
134 
135 unsigned int memorystatus_frozen_processes_max = 0;
136 unsigned int memorystatus_frozen_shared_mb = 0;
137 unsigned int memorystatus_frozen_shared_mb_max = 0;
138 unsigned int memorystatus_freeze_shared_mb_per_process_max = 0; /* Max. MB allowed per process to be freezer-eligible. */
139 #if XNU_TARGET_OS_WATCH
140 unsigned int memorystatus_freeze_private_shared_pages_ratio = 1; /* Ratio of private:shared pages for a process to be freezer-eligible. */
141 #else
142 unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of private:shared pages for a process to be freezer-eligible. */
143 #endif
144 unsigned int memorystatus_thaw_count = 0; /* # of thaws in the current freezer interval */
145 uint64_t memorystatus_thaw_count_since_boot = 0; /* The number of thaws since boot */
146 unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
147 
148 struct memorystatus_freezer_stats_t memorystatus_freezer_stats = {0};
149 
150 static inline boolean_t memorystatus_can_freeze_processes(void);
151 static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
152 static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
153 static uint32_t memorystatus_freeze_calculate_new_budget(
154 	unsigned int time_since_last_interval_expired_sec,
155 	unsigned int burst_multiple,
156 	unsigned int interval_duration_min,
157 	uint32_t rollover);
158 static void memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts);
159 
160 static void memorystatus_set_freeze_is_enabled(bool enabled);
161 static void memorystatus_disable_freeze(void);
162 static bool kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out);
163 
164 /* Stats */
165 static uint64_t memorystatus_freeze_pageouts = 0;
166 
167 /* Throttling */
168 #define DEGRADED_WINDOW_MINS    (30)
169 #define NORMAL_WINDOW_MINS      (24 * 60)
170 
171 /* Protected by the freezer_mutex */
172 static throttle_interval_t throttle_intervals[] = {
173 	{ DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
174 	{ NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
175 };
176 throttle_interval_t *degraded_throttle_window = &throttle_intervals[0];
177 throttle_interval_t *normal_throttle_window = &throttle_intervals[1];
178 uint32_t memorystatus_freeze_current_interval = 0;
179 static thread_call_t freeze_interval_reset_thread_call;
180 static uint32_t memorystatus_freeze_calculate_new_budget(
181 	unsigned int time_since_last_interval_expired_sec,
182 	unsigned int burst_multiple,
183 	unsigned int interval_duration_min,
184 	uint32_t rollover);
185 
186 struct memorystatus_freezer_candidate_list memorystatus_global_freeze_list = {NULL, 0};
187 struct memorystatus_freezer_candidate_list memorystatus_global_demote_list = {NULL, 0};
188 /*
189  * When enabled, freeze candidates are chosen from the memorystatus_global_freeze_list
190  * in order (as opposed to using the older LRU approach).
191  */
192 #if XNU_TARGET_OS_WATCH
193 #define FREEZER_USE_ORDERED_LIST_DEFAULT 1
194 #else
195 #define FREEZER_USE_ORDERED_LIST_DEFAULT 0
196 #endif
197 int memorystatus_freezer_use_ordered_list = FREEZER_USE_ORDERED_LIST_DEFAULT;
198 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_ordered_list, &memorystatus_freezer_use_ordered_list, 0, 1, "");
199 /*
200  * When enabled, demotion candidates are chosen from memorystatus_global_demotion_list
201  */
202 int memorystatus_freezer_use_demotion_list = 0;
203 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_demotion_list, &memorystatus_freezer_use_demotion_list, 0, 1, "");
204 
205 extern boolean_t vm_swap_max_budget(uint64_t *);
206 
207 static void memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed);
208 static void memorystatus_demote_frozen_processes(bool urgent_mode);
209 
210 static void memorystatus_freeze_handle_error(proc_t p, const freezer_error_code_t freezer_error_code, bool was_refreeze, pid_t pid, const coalition_t coalition, const char* log_prefix);
211 static void memorystatus_freeze_out_of_slots(void);
212 uint64_t memorystatus_freezer_thread_next_run_ts = 0;
213 
214 /* Sysctls needed for aggd stats */
215 
216 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count, 0, "");
217 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_webcontent, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_webcontent, 0, "");
218 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_xpc_service, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_xpc_service, 0, "");
219 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
220 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_thaw_count_since_boot, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count_since_boot, "");
221 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
222 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_interval, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_current_interval, 0, "");
223 
224 /*
225  * Force a new interval with the given budget (no rollover).
226  */
227 static void
memorystatus_freeze_force_new_interval(uint64_t new_budget)228 memorystatus_freeze_force_new_interval(uint64_t new_budget)
229 {
230 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
231 	mach_timespec_t now_ts;
232 	clock_sec_t sec;
233 	clock_nsec_t nsec;
234 
235 	clock_get_system_nanotime(&sec, &nsec);
236 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
237 	now_ts.tv_nsec = nsec;
238 	memorystatus_freeze_start_normal_throttle_interval((uint32_t) MIN(new_budget, UINT32_MAX), now_ts);
239 	/* Don't carry over any excess pageouts since we're forcing a new budget */
240 	normal_throttle_window->pageouts = 0;
241 	memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
242 }
243 #if DEVELOPMENT || DEBUG
244 static int sysctl_memorystatus_freeze_budget_pages_remaining SYSCTL_HANDLER_ARGS
245 {
246 	#pragma unused(arg1, arg2, oidp)
247 	int error, changed;
248 	uint64_t new_budget = memorystatus_freeze_budget_pages_remaining;
249 
250 	lck_mtx_lock(&freezer_mutex);
251 
252 	error = sysctl_io_number(req, memorystatus_freeze_budget_pages_remaining, sizeof(uint64_t), &new_budget, &changed);
253 	if (changed) {
254 		if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
255 			lck_mtx_unlock(&freezer_mutex);
256 			return ENOTSUP;
257 		}
258 		memorystatus_freeze_force_new_interval(new_budget);
259 	}
260 
261 	lck_mtx_unlock(&freezer_mutex);
262 	return error;
263 }
264 
265 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freeze_budget_pages_remaining, "Q", "");
266 #else /* DEVELOPMENT || DEBUG */
267 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, "");
268 #endif /* DEVELOPMENT || DEBUG */
269 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_excess_shared_memory_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_excess_shared_memory_count, "");
270 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_private_shared_ratio_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count, "");
271 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_compressor_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_compressor_space_count, "");
272 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_swap_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_swap_space_count, "");
273 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_below_min_pages_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_below_min_pages_count, "");
274 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_probability_of_use_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_probability_of_use_count, "");
275 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_elevated_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_elevated_count, "");
276 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_other_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_other_count, "");
277 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_process_considered_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_process_considered_count, "");
278 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_below_threshold_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_below_threshold_count, "");
279 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_full_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_full_count, "");
280 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_shared_mb_high_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count, "");
281 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_shared_pages_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_shared_pages_skipped, "");
282 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_bytes_refrozen, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_bytes_refrozen, "");
283 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_refreeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_refreeze_count, "");
284 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_freeze_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_freeze_pid_mismatches, "");
285 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_demote_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_demote_pid_mismatches, "");
286 
287 static_assert(_kMemorystatusFreezeSkipReasonMax <= UINT8_MAX);
288 
289 /*
290  * Calculates the hit rate for the freezer.
291  * The hit rate is defined as the percentage of procs that are currently in the
292  * freezer which we have thawed.
293  * A low hit rate means we're freezing bad candidates since they're not re-used.
294  */
295 static int
calculate_thaw_percentage(uint64_t frozen_count,uint64_t thaw_count)296 calculate_thaw_percentage(uint64_t frozen_count, uint64_t thaw_count)
297 {
298 	int thaw_percentage = 100;
299 
300 	if (frozen_count > 0) {
301 		if (thaw_count > frozen_count) {
302 			/*
303 			 * Both counts are using relaxed atomics & could be out of sync
304 			 * causing us to see thaw_percentage > 100.
305 			 */
306 			thaw_percentage = 100;
307 		} else {
308 			thaw_percentage = (int)(100 * thaw_count / frozen_count);
309 		}
310 	}
311 	return thaw_percentage;
312 }
313 
314 static int
get_thaw_percentage()315 get_thaw_percentage()
316 {
317 	uint64_t processes_frozen, processes_thawed;
318 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
319 	processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
320 	return calculate_thaw_percentage(processes_frozen, processes_thawed);
321 }
322 
323 static int
324 sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS
325 {
326 #pragma unused(arg1, arg2)
327 	int thaw_percentage = get_thaw_percentage();
328 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
329 }
330 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage, "I", "");
331 
332 static int
get_thaw_percentage_fg()333 get_thaw_percentage_fg()
334 {
335 	uint64_t processes_frozen, processes_thawed_fg;
336 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
337 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
338 	return calculate_thaw_percentage(processes_frozen, processes_thawed_fg);
339 }
340 
341 static int sysctl_memorystatus_freezer_thaw_percentage_fg SYSCTL_HANDLER_ARGS
342 {
343 #pragma unused(arg1, arg2)
344 	int thaw_percentage = get_thaw_percentage_fg();
345 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
346 }
347 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg, "I", "");
348 
349 static int
get_thaw_percentage_webcontent()350 get_thaw_percentage_webcontent()
351 {
352 	uint64_t processes_frozen_webcontent, processes_thawed_webcontent;
353 	processes_frozen_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, relaxed);
354 	processes_thawed_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, relaxed);
355 	return calculate_thaw_percentage(processes_frozen_webcontent, processes_thawed_webcontent);
356 }
357 
358 static int sysctl_memorystatus_freezer_thaw_percentage_webcontent SYSCTL_HANDLER_ARGS
359 {
360 #pragma unused(arg1, arg2)
361 	int thaw_percentage = get_thaw_percentage_webcontent();
362 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
363 }
364 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_webcontent, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_webcontent, "I", "");
365 
366 
367 static int
get_thaw_percentage_bg()368 get_thaw_percentage_bg()
369 {
370 	uint64_t processes_frozen, processes_thawed_fg, processes_thawed;
371 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
372 	processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
373 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
374 	return calculate_thaw_percentage(processes_frozen, processes_thawed - processes_thawed_fg);
375 }
376 
377 static int sysctl_memorystatus_freezer_thaw_percentage_bg SYSCTL_HANDLER_ARGS
378 {
379 #pragma unused(arg1, arg2)
380 	int thaw_percentage = get_thaw_percentage_bg();
381 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
382 }
383 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_bg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_bg, "I", "");
384 
385 static int
get_thaw_percentage_fg_non_xpc_service()386 get_thaw_percentage_fg_non_xpc_service()
387 {
388 	uint64_t processes_frozen, processes_frozen_xpc_service, processes_thawed_fg, processes_thawed_fg_xpc_service;
389 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
390 	processes_frozen_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, relaxed);
391 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
392 	processes_thawed_fg_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, relaxed);
393 	/*
394 	 * Since these are all relaxed loads, it's possible (although unlikely) to read a value for
395 	 * frozen/thawed xpc services that's > the value for processes frozen / thawed.
396 	 * Clamp just in case.
397 	 */
398 	processes_frozen_xpc_service = MIN(processes_frozen_xpc_service, processes_frozen);
399 	processes_thawed_fg_xpc_service = MIN(processes_thawed_fg_xpc_service, processes_thawed_fg);
400 	return calculate_thaw_percentage(processes_frozen - processes_frozen_xpc_service, processes_thawed_fg - processes_thawed_fg_xpc_service);
401 }
402 
403 static int sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service SYSCTL_HANDLER_ARGS
404 {
405 #pragma unused(arg1, arg2)
406 	int thaw_percentage = get_thaw_percentage_fg_non_xpc_service();
407 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
408 }
409 
410 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg_non_xpc_service, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service, "I", "");
411 
412 #define FREEZER_ERROR_STRING_LENGTH 128
413 
414 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_min, &memorystatus_freeze_pages_min, 0, UINT32_MAX, "");
415 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_max, &memorystatus_freeze_pages_max, 0, UINT32_MAX, "");
416 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_processes_max, &memorystatus_frozen_processes_max, 0, UINT32_MAX, "");
417 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_jetsam_band, &memorystatus_freeze_jetsam_band, JETSAM_PRIORITY_BACKGROUND, JETSAM_PRIORITY_FOREGROUND, "");
418 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_private_shared_pages_ratio, &memorystatus_freeze_private_shared_pages_ratio, 0, UINT32_MAX, "");
419 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_min_processes, &memorystatus_freeze_suspended_threshold, 0, UINT32_MAX, "");
420 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_max_candidate_band, &memorystatus_freeze_max_candidate_band, JETSAM_PRIORITY_IDLE, JETSAM_PRIORITY_FOREGROUND, "");
421 static int
422 sysctl_memorystatus_freeze_budget_multiplier SYSCTL_HANDLER_ARGS
423 {
424 #pragma unused(arg1, arg2, oidp, req)
425 	int error = 0, changed = 0;
426 	uint64_t val = memorystatus_freeze_budget_multiplier;
427 	unsigned int new_budget;
428 	clock_sec_t sec;
429 	clock_nsec_t nsec;
430 	mach_timespec_t now_ts;
431 
432 	error = sysctl_io_number(req, memorystatus_freeze_budget_multiplier, sizeof(val), &val, &changed);
433 	if (error) {
434 		return error;
435 	}
436 	if (changed) {
437 		if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
438 			return ENOTSUP;
439 		}
440 #if !(DEVELOPMENT || DEBUG)
441 		if (val > 100) {
442 			/* Can not increase budget on release. */
443 			return EINVAL;
444 		}
445 #endif
446 		lck_mtx_lock(&freezer_mutex);
447 
448 		memorystatus_freeze_budget_multiplier = val;
449 		/* Start a new throttle interval with this budget multiplier */
450 		new_budget = memorystatus_freeze_calculate_new_budget(0, 1, NORMAL_WINDOW_MINS, 0);
451 		clock_get_system_nanotime(&sec, &nsec);
452 		now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
453 		now_ts.tv_nsec = nsec;
454 		memorystatus_freeze_start_normal_throttle_interval(new_budget, now_ts);
455 		memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
456 
457 		lck_mtx_unlock(&freezer_mutex);
458 	}
459 	return 0;
460 }
461 EXPERIMENT_FACTOR_PROC(_kern, memorystatus_freeze_budget_multiplier, CTLTYPE_QUAD | CTLFLAG_RW, 0, 0, &sysctl_memorystatus_freeze_budget_multiplier, "Q", "");
462 /*
463  * max. # of frozen process demotions we will allow in our daily cycle.
464  */
465 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_max_freeze_demotions_daily, &memorystatus_max_frozen_demotions_daily, 0, UINT32_MAX, "");
466 
467 /*
468  * min # of thaws needed by a process to protect it from getting demoted into the IDLE band.
469  */
470 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_thaw_count_demotion_threshold, &memorystatus_thaw_count_demotion_threshold, 0, UINT32_MAX, "");
471 
472 /*
473  * min # of global thaws needed for us to consider refreezing these processes.
474  */
475 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_min_thaw_refreeze_threshold, &memorystatus_min_thaw_refreeze_threshold, 0, UINT32_MAX, "");
476 
477 #if DEVELOPMENT || DEBUG
478 
479 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, "");
480 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, "");
481 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
482 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, "");
483 
484 /*
485  * Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band.
486  * "0" means no limit.
487  * Default is 10% of system-wide task limit.
488  */
489 
490 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb_max, 0, "");
491 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, "");
492 
493 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, "");
494 
495 boolean_t memorystatus_freeze_throttle_enabled = TRUE;
496 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
497 
498 /*
499  * When set to true, this keeps frozen processes in the compressor pool in memory, instead of swapping them out to disk.
500  * Exposed via the sysctl kern.memorystatus_freeze_to_memory.
501  */
502 boolean_t memorystatus_freeze_to_memory = FALSE;
503 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_to_memory, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_to_memory, 0, "");
504 
505 #define VM_PAGES_FOR_ALL_PROCS    (2)
506 
507 /*
508  * Manual trigger of freeze and thaw for dev / debug kernels only.
509  */
510 static int
511 sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
512 {
513 #pragma unused(arg1, arg2)
514 	int error, pid = 0;
515 	proc_t p;
516 	freezer_error_code_t freezer_error_code = 0;
517 	pid_t pid_list[MAX_XPC_SERVICE_PIDS];
518 	int ntasks = 0;
519 	coalition_t coal = COALITION_NULL;
520 
521 	error = sysctl_handle_int(oidp, &pid, 0, req);
522 	if (error || !req->newptr) {
523 		return error;
524 	}
525 
526 	if (pid == VM_PAGES_FOR_ALL_PROCS) {
527 		error = mach_to_bsd_errno(vm_pageout_anonymous_pages());
528 		return error;
529 	}
530 
531 	lck_mtx_lock(&freezer_mutex);
532 	if (memorystatus_freeze_enabled == false) {
533 		lck_mtx_unlock(&freezer_mutex);
534 		memorystatus_log("sysctl_freeze: Freeze is DISABLED\n");
535 		return ENOTSUP;
536 	}
537 
538 again:
539 	p = proc_find(pid);
540 	if (p != NULL) {
541 		memorystatus_freezer_stats.mfs_process_considered_count++;
542 		uint32_t purgeable, wired, clean, dirty, shared;
543 		uint32_t max_pages = 0, state = 0;
544 
545 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
546 			/*
547 			 * Freezer backed by the compressor and swap file(s)
548 			 * will hold compressed data.
549 			 *
550 			 * Set the sysctl kern.memorystatus_freeze_to_memory to true to keep compressed data from
551 			 * being swapped out to disk. Note that this disables freezer swap support globally,
552 			 * not just for the process being frozen.
553 			 *
554 			 *
555 			 * We don't care about the global freezer budget or the process's (min/max) budget here.
556 			 * The freeze sysctl is meant to force-freeze a process.
557 			 *
558 			 * We also don't update any global or process stats on this path, so that the jetsam/ freeze
559 			 * logic remains unaffected. The tasks we're performing here are: freeze the process, set the
560 			 * P_MEMSTAT_FROZEN bit, and elevate the process to a higher band (if the freezer is active).
561 			 */
562 			max_pages = memorystatus_freeze_pages_max;
563 		} else {
564 			/*
565 			 * We only have the compressor without any swap.
566 			 */
567 			max_pages = UINT32_MAX - 1;
568 		}
569 
570 		proc_list_lock();
571 		state = p->p_memstat_state;
572 		proc_list_unlock();
573 
574 		/*
575 		 * The jetsam path also verifies that the process is a suspended App. We don't care about that here.
576 		 * We simply ensure that jetsam is not already working on the process and that the process has not
577 		 * explicitly disabled freezing.
578 		 */
579 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED)) {
580 			memorystatus_log_error("sysctl_freeze: p_memstat_state check failed, process is%s%s%s\n",
581 			    (state & P_MEMSTAT_TERMINATED) ? " terminated" : "",
582 			    (state & P_MEMSTAT_LOCKED) ? " locked" : "",
583 			    (state & P_MEMSTAT_FREEZE_DISABLED) ? " unfreezable" : "");
584 
585 			proc_rele(p);
586 			lck_mtx_unlock(&freezer_mutex);
587 			return EPERM;
588 		}
589 
590 		KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, memorystatus_available_pages, pid, max_pages);
591 		error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
592 		if (!error || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
593 			memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
594 		}
595 		KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, purgeable, wired, clean, dirty);
596 
597 		if (error) {
598 			memorystatus_freeze_handle_error(p, freezer_error_code, state & P_MEMSTAT_FROZEN, pid, coal, "sysctl_freeze");
599 			if (error == KERN_NO_SPACE) {
600 				/* Make it easy to distinguish between failures due to low compressor/ swap space and other failures. */
601 				error = ENOSPC;
602 			} else {
603 				error = EIO;
604 			}
605 		} else {
606 			proc_list_lock();
607 			if (!_memstat_proc_is_frozen(p)) {
608 				p->p_memstat_state |= P_MEMSTAT_FROZEN;
609 				p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
610 				memorystatus_frozen_count++;
611 				os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
612 				if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
613 					memorystatus_frozen_count_webcontent++;
614 					os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
615 				}
616 				if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
617 					memorystatus_freeze_out_of_slots();
618 				}
619 			} else {
620 				// This was a re-freeze
621 				if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
622 					memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
623 					memorystatus_freezer_stats.mfs_refreeze_count++;
624 				}
625 			}
626 			p->p_memstat_frozen_count++;
627 
628 			if (coal != NULL) {
629 				/* We just froze an xpc service. Mark it as such for telemetry */
630 				p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
631 				memorystatus_frozen_count_xpc_service++;
632 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
633 			}
634 
635 
636 			proc_list_unlock();
637 
638 			if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
639 				/*
640 				 * We elevate only if we are going to swap out the data.
641 				 */
642 				error = memorystatus_update_inactive_jetsam_priority_band(pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
643 				    memorystatus_freeze_jetsam_band, TRUE);
644 
645 				if (error) {
646 					memorystatus_log_error("sysctl_freeze: Elevating frozen process to higher jetsam band failed with %d\n", error);
647 				}
648 			}
649 		}
650 
651 		if ((error == 0) && (coal == NULL)) {
652 			/*
653 			 * We froze a process and so we check to see if it was
654 			 * a coalition leader and if it has XPC services that
655 			 * might need freezing.
656 			 * Only one leader can be frozen at a time and so we shouldn't
657 			 * enter this block more than once per call. Hence the
658 			 * check that 'coal' has to be NULL. We should make this an
659 			 * assert() or panic() once we have a much more concrete way
660 			 * to detect an app vs a daemon.
661 			 */
662 
663 			task_t          curr_task = NULL;
664 
665 			curr_task = proc_task(p);
666 			coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
667 			if (coalition_is_leader(curr_task, coal)) {
668 				ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
669 				    COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
670 
671 				if (ntasks > MAX_XPC_SERVICE_PIDS) {
672 					ntasks = MAX_XPC_SERVICE_PIDS;
673 				}
674 			}
675 		}
676 
677 		proc_rele(p);
678 
679 		while (ntasks) {
680 			pid = pid_list[--ntasks];
681 			goto again;
682 		}
683 
684 		lck_mtx_unlock(&freezer_mutex);
685 		return error;
686 	} else {
687 		memorystatus_log_error("sysctl_freeze: Invalid process\n");
688 	}
689 
690 
691 	lck_mtx_unlock(&freezer_mutex);
692 	return EINVAL;
693 }
694 
695 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
696     0, 0, &sysctl_memorystatus_freeze, "I", "");
697 
698 /*
699  * Manual trigger of agressive frozen demotion for dev / debug kernels only.
700  */
701 static int
702 sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS
703 {
704 #pragma unused(arg1, arg2)
705 	int error, val;
706 	/*
707 	 * Only demote on write to prevent demoting during `sysctl -a`.
708 	 * The actual value written doesn't matter.
709 	 */
710 	error = sysctl_handle_int(oidp, &val, 0, req);
711 	if (error || !req->newptr) {
712 		return error;
713 	}
714 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
715 		return ENOTSUP;
716 	}
717 	lck_mtx_lock(&freezer_mutex);
718 	memorystatus_demote_frozen_processes(false);
719 	lck_mtx_unlock(&freezer_mutex);
720 	return 0;
721 }
722 
723 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
724 
725 static int
726 sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
727 {
728 #pragma unused(arg1, arg2)
729 
730 	int error, pid = 0;
731 	proc_t p;
732 
733 	if (memorystatus_freeze_enabled == false) {
734 		return ENOTSUP;
735 	}
736 
737 	error = sysctl_handle_int(oidp, &pid, 0, req);
738 	if (error || !req->newptr) {
739 		return error;
740 	}
741 
742 	if (pid == VM_PAGES_FOR_ALL_PROCS) {
743 		do_fastwake_warmup_all();
744 		return 0;
745 	} else {
746 		p = proc_find(pid);
747 		if (p != NULL) {
748 			error = task_thaw(proc_task(p));
749 
750 			if (error) {
751 				error = EIO;
752 			} else {
753 				/*
754 				 * task_thaw() succeeded.
755 				 *
756 				 * We increment memorystatus_frozen_count on the sysctl freeze path.
757 				 * And so we need the P_MEMSTAT_FROZEN to decrement the frozen count
758 				 * when this process exits.
759 				 *
760 				 * proc_list_lock();
761 				 * p->p_memstat_state &= ~P_MEMSTAT_FROZEN;
762 				 * proc_list_unlock();
763 				 */
764 			}
765 			proc_rele(p);
766 			return error;
767 		}
768 	}
769 
770 	return EINVAL;
771 }
772 
773 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
774     0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
775 
776 
777 typedef struct _global_freezable_status {
778 	boolean_t       freeze_pages_threshold_crossed;
779 	boolean_t       freeze_eligible_procs_available;
780 	boolean_t       freeze_scheduled_in_future;
781 }global_freezable_status_t;
782 
783 typedef struct _proc_freezable_status {
784 	boolean_t    freeze_has_memstat_state;
785 	boolean_t    freeze_has_pages_min;
786 	int        freeze_has_probability;
787 	int        freeze_leader_eligible;
788 	boolean_t    freeze_attempted;
789 	uint32_t    p_memstat_state;
790 	uint32_t    p_pages;
791 	int        p_freeze_error_code;
792 	int        p_pid;
793 	int        p_leader_pid;
794 	char        p_name[MAXCOMLEN + 1];
795 }proc_freezable_status_t;
796 
797 #define MAX_FREEZABLE_PROCESSES 200 /* Total # of processes in band 0 that we evaluate for freezability */
798 
799 /*
800  * For coalition based freezing evaluations, we proceed as follows:
801  *  - detect that the process is a coalition member and a XPC service
802  *  - mark its 'freeze_leader_eligible' field with FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN
803  *  - continue its freezability evaluation assuming its leader will be freezable too
804  *
805  * Once we are done evaluating all processes, we do a quick run thru all
806  * processes and for a coalition member XPC service we look up the 'freezable'
807  * status of its leader and iff:
808  *  - the xpc service is freezable i.e. its individual freeze evaluation worked
809  *  - and, its leader is also marked freezable
810  * we update its 'freeze_leader_eligible' to FREEZE_PROC_LEADER_FREEZABLE_SUCCESS.
811  */
812 
813 #define FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN   (-1)
814 #define FREEZE_PROC_LEADER_FREEZABLE_SUCCESS    (1)
815 #define FREEZE_PROC_LEADER_FREEZABLE_FAILURE    (2)
816 
817 static int
memorystatus_freezer_get_status(user_addr_t buffer,size_t buffer_size,int32_t * retval)818 memorystatus_freezer_get_status(user_addr_t buffer, size_t buffer_size, int32_t *retval)
819 {
820 	uint32_t            proc_count = 0, freeze_eligible_proc_considered = 0, band = 0, xpc_index = 0, leader_index = 0;
821 	global_freezable_status_t    *list_head;
822 	proc_freezable_status_t     *list_entry, *list_entry_start;
823 	size_t                list_size = 0, entry_count = 0;
824 	proc_t                p, leader_proc;
825 	memstat_bucket_t        *bucket;
826 	uint32_t            state = 0, pages = 0;
827 	boolean_t            try_freeze = TRUE, xpc_skip_size_probability_check = FALSE;
828 	int                error = 0, probability_of_use = 0;
829 	pid_t              leader_pid = 0;
830 	struct memorystatus_freeze_list_iterator iterator;
831 
832 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
833 		return ENOTSUP;
834 	}
835 
836 	bzero(&iterator, sizeof(struct memorystatus_freeze_list_iterator));
837 
838 	list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
839 
840 	if (buffer_size < list_size) {
841 		return EINVAL;
842 	}
843 
844 	list_head = (global_freezable_status_t *)kalloc_data(list_size, Z_WAITOK | Z_ZERO);
845 	if (list_head == NULL) {
846 		return ENOMEM;
847 	}
848 
849 	list_size = sizeof(global_freezable_status_t);
850 
851 	lck_mtx_lock(&freezer_mutex);
852 	proc_list_lock();
853 
854 	uint64_t curr_time = mach_absolute_time();
855 
856 	list_head->freeze_pages_threshold_crossed = (memorystatus_available_pages < memorystatus_freeze_threshold);
857 	if (memorystatus_freezer_use_ordered_list) {
858 		list_head->freeze_eligible_procs_available = memorystatus_frozen_count < memorystatus_global_freeze_list.mfcl_length;
859 	} else {
860 		list_head->freeze_eligible_procs_available = ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold);
861 	}
862 	list_head->freeze_scheduled_in_future = (curr_time < memorystatus_freezer_thread_next_run_ts);
863 
864 	list_entry_start = (proc_freezable_status_t*) ((uintptr_t)list_head + sizeof(global_freezable_status_t));
865 	list_entry = list_entry_start;
866 
867 	bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
868 
869 	entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
870 
871 	if (memorystatus_freezer_use_ordered_list) {
872 		while (iterator.global_freeze_list_index < memorystatus_global_freeze_list.mfcl_length) {
873 			p = memorystatus_freezer_candidate_list_get_proc(
874 				&memorystatus_global_freeze_list,
875 				(iterator.global_freeze_list_index)++,
876 				NULL);
877 			if (p != PROC_NULL) {
878 				break;
879 			}
880 		}
881 	} else {
882 		p = memorystatus_get_first_proc_locked(&band, FALSE);
883 	}
884 
885 	proc_count++;
886 
887 	while ((proc_count <= MAX_FREEZABLE_PROCESSES) &&
888 	    (p) &&
889 	    (list_size < buffer_size)) {
890 		if (isSysProc(p)) {
891 			/*
892 			 * Daemon:- We will consider freezing it iff:
893 			 * - it belongs to a coalition and the leader is freeze-eligible (delayed evaluation)
894 			 * - its role in the coalition is XPC service.
895 			 *
896 			 * We skip memory size requirements in this case.
897 			 */
898 
899 			coalition_t     coal = COALITION_NULL;
900 			task_t          leader_task = NULL, curr_task = NULL;
901 			int             task_role_in_coalition = 0;
902 
903 			curr_task = proc_task(p);
904 			coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
905 
906 			if (coal == COALITION_NULL || coalition_is_leader(curr_task, coal)) {
907 				/*
908 				 * By default, XPC services without an app
909 				 * will be the leader of their own single-member
910 				 * coalition.
911 				 */
912 				goto skip_ineligible_xpc;
913 			}
914 
915 			leader_task = coalition_get_leader(coal);
916 			if (leader_task == TASK_NULL) {
917 				/*
918 				 * This jetsam coalition is currently leader-less.
919 				 * This could happen if the app died, but XPC services
920 				 * have not yet exited.
921 				 */
922 				goto skip_ineligible_xpc;
923 			}
924 
925 			leader_proc = (proc_t)get_bsdtask_info(leader_task);
926 			task_deallocate(leader_task);
927 
928 			if (leader_proc == PROC_NULL) {
929 				/* leader task is exiting */
930 				goto skip_ineligible_xpc;
931 			}
932 
933 			task_role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
934 
935 			if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
936 				xpc_skip_size_probability_check = TRUE;
937 				leader_pid = proc_getpid(leader_proc);
938 				goto continue_eval;
939 			}
940 
941 skip_ineligible_xpc:
942 			p = memorystatus_get_next_proc_locked(&band, p, FALSE);
943 			proc_count++;
944 			continue;
945 		}
946 
947 continue_eval:
948 		strlcpy(list_entry->p_name, p->p_name, MAXCOMLEN + 1);
949 
950 		list_entry->p_pid = proc_getpid(p);
951 
952 		state = p->p_memstat_state;
953 
954 		if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) ||
955 		    !(state & P_MEMSTAT_SUSPENDED)) {
956 			try_freeze = list_entry->freeze_has_memstat_state = FALSE;
957 		} else {
958 			try_freeze = list_entry->freeze_has_memstat_state = TRUE;
959 		}
960 
961 		list_entry->p_memstat_state = state;
962 
963 		if (xpc_skip_size_probability_check == TRUE) {
964 			/*
965 			 * Assuming the coalition leader is freezable
966 			 * we don't care re. minimum pages and probability
967 			 * as long as the process isn't marked P_MEMSTAT_FREEZE_DISABLED.
968 			 * XPC services have to be explicity opted-out of the disabled
969 			 * state. And we checked that state above.
970 			 */
971 			list_entry->freeze_has_pages_min = TRUE;
972 			list_entry->p_pages = -1;
973 			list_entry->freeze_has_probability = -1;
974 
975 			list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN;
976 			list_entry->p_leader_pid = leader_pid;
977 
978 			xpc_skip_size_probability_check = FALSE;
979 		} else {
980 			list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; /* Apps are freeze eligible and their own leaders. */
981 			list_entry->p_leader_pid = 0; /* Setting this to 0 signifies this isn't a coalition driven freeze. */
982 
983 			memorystatus_get_task_page_counts(proc_task(p), &pages, NULL, NULL);
984 			if (pages < memorystatus_freeze_pages_min) {
985 				try_freeze = list_entry->freeze_has_pages_min = FALSE;
986 			} else {
987 				list_entry->freeze_has_pages_min = TRUE;
988 			}
989 
990 			list_entry->p_pages = pages;
991 
992 			if (entry_count) {
993 				uint32_t j = 0;
994 				for (j = 0; j < entry_count; j++) {
995 					if (strncmp(memorystatus_global_probabilities_table[j].proc_name,
996 					    p->p_name,
997 					    MAXCOMLEN) == 0) {
998 						probability_of_use = memorystatus_global_probabilities_table[j].use_probability;
999 						break;
1000 					}
1001 				}
1002 
1003 				list_entry->freeze_has_probability = probability_of_use;
1004 
1005 				try_freeze = ((probability_of_use > 0) && try_freeze);
1006 			} else {
1007 				list_entry->freeze_has_probability = -1;
1008 			}
1009 		}
1010 
1011 		if (try_freeze) {
1012 			uint32_t purgeable, wired, clean, dirty, shared;
1013 			uint32_t max_pages = 0;
1014 			int freezer_error_code = 0;
1015 
1016 			error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, TRUE /* eval only */);
1017 
1018 			if (error) {
1019 				list_entry->p_freeze_error_code = freezer_error_code;
1020 			}
1021 
1022 			list_entry->freeze_attempted = TRUE;
1023 		}
1024 
1025 		list_entry++;
1026 		freeze_eligible_proc_considered++;
1027 
1028 		list_size += sizeof(proc_freezable_status_t);
1029 
1030 		if (memorystatus_freezer_use_ordered_list) {
1031 			p = PROC_NULL;
1032 			while (iterator.global_freeze_list_index < memorystatus_global_freeze_list.mfcl_length) {
1033 				p = memorystatus_freezer_candidate_list_get_proc(
1034 					&memorystatus_global_freeze_list,
1035 					(iterator.global_freeze_list_index)++,
1036 					NULL);
1037 				if (p != PROC_NULL) {
1038 					break;
1039 				}
1040 			}
1041 		} else {
1042 			p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1043 		}
1044 
1045 		proc_count++;
1046 	}
1047 
1048 	proc_list_unlock();
1049 	lck_mtx_unlock(&freezer_mutex);
1050 
1051 	list_entry = list_entry_start;
1052 
1053 	for (xpc_index = 0; xpc_index < freeze_eligible_proc_considered; xpc_index++) {
1054 		if (list_entry[xpc_index].freeze_leader_eligible == FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN) {
1055 			leader_pid = list_entry[xpc_index].p_leader_pid;
1056 
1057 			leader_proc = proc_find(leader_pid);
1058 
1059 			if (leader_proc) {
1060 				if (_memstat_proc_is_frozen(leader_proc)) {
1061 					/*
1062 					 * Leader has already been frozen.
1063 					 */
1064 					list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1065 					proc_rele(leader_proc);
1066 					continue;
1067 				}
1068 				proc_rele(leader_proc);
1069 			}
1070 
1071 			for (leader_index = 0; leader_index < freeze_eligible_proc_considered; leader_index++) {
1072 				if (list_entry[leader_index].p_pid == leader_pid) {
1073 					if (list_entry[leader_index].freeze_attempted && list_entry[leader_index].p_freeze_error_code == 0) {
1074 						list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1075 					} else {
1076 						list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1077 						list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1078 					}
1079 					break;
1080 				}
1081 			}
1082 
1083 			/*
1084 			 * Didn't find the leader entry. This might be likely because
1085 			 * the leader never made it down to band 0.
1086 			 */
1087 			if (leader_index == freeze_eligible_proc_considered) {
1088 				list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1089 				list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1090 			}
1091 		}
1092 	}
1093 
1094 	buffer_size = MIN(list_size, INT32_MAX);
1095 
1096 	error = copyout(list_head, buffer, buffer_size);
1097 	if (error == 0) {
1098 		*retval = (int32_t) buffer_size;
1099 	} else {
1100 		*retval = 0;
1101 	}
1102 
1103 	list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
1104 	kfree_data(list_head, list_size);
1105 
1106 	memorystatus_log_debug("memorystatus_freezer_get_status: returning %d (%lu - size)\n", error, (unsigned long)list_size);
1107 
1108 	return error;
1109 }
1110 
1111 #endif /* DEVELOPMENT || DEBUG */
1112 
1113 /*
1114  * Get a list of all processes in the freezer band which are currently frozen.
1115  * Used by powerlog to collect analytics on frozen process.
1116  */
1117 static int
memorystatus_freezer_get_procs(user_addr_t buffer,size_t buffer_size,int32_t * retval)1118 memorystatus_freezer_get_procs(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1119 {
1120 	global_frozen_procs_t *frozen_procs = NULL;
1121 	uint32_t band = memorystatus_freeze_jetsam_band;
1122 	proc_t p;
1123 	int error;
1124 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
1125 		return ENOTSUP;
1126 	}
1127 	if (buffer_size < sizeof(global_frozen_procs_t)) {
1128 		return EINVAL;
1129 	}
1130 	frozen_procs = (global_frozen_procs_t *)kalloc_data(sizeof(global_frozen_procs_t), Z_WAITOK | Z_ZERO);
1131 	if (frozen_procs == NULL) {
1132 		return ENOMEM;
1133 	}
1134 
1135 	proc_list_lock();
1136 	p = memorystatus_get_first_proc_locked(&band, FALSE);
1137 	while (p && frozen_procs->gfp_num_frozen < FREEZER_CONTROL_GET_PROCS_MAX_COUNT) {
1138 		if (_memstat_proc_is_frozen(p)) {
1139 			frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_pid = proc_getpid(p);
1140 			strlcpy(frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_name,
1141 			    p->p_name, sizeof(proc_name_t));
1142 			frozen_procs->gfp_num_frozen++;
1143 		}
1144 		p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1145 	}
1146 	proc_list_unlock();
1147 
1148 	buffer_size = MIN(buffer_size, sizeof(global_frozen_procs_t));
1149 	error = copyout(frozen_procs, buffer, buffer_size);
1150 	if (error == 0) {
1151 		*retval = (int32_t) buffer_size;
1152 	} else {
1153 		*retval = 0;
1154 	}
1155 	kfree_data(frozen_procs, sizeof(global_frozen_procs_t));
1156 
1157 	return error;
1158 }
1159 
1160 /*
1161  * If dasd is running an experiment that impacts their freezer candidate selection,
1162  * we record that in our telemetry.
1163  */
1164 static memorystatus_freezer_trial_identifiers_v1 dasd_trial_identifiers;
1165 
1166 static int
memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer,size_t buffer_size,int32_t * retval)1167 memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1168 {
1169 	memorystatus_freezer_trial_identifiers_v1 identifiers;
1170 	int error = 0;
1171 
1172 	if (buffer_size != sizeof(identifiers)) {
1173 		return EINVAL;
1174 	}
1175 	error = copyin(buffer, &identifiers, sizeof(identifiers));
1176 	if (error != 0) {
1177 		return error;
1178 	}
1179 	if (identifiers.version != 1) {
1180 		return EINVAL;
1181 	}
1182 	dasd_trial_identifiers = identifiers;
1183 	*retval = 0;
1184 	return error;
1185 }
1186 
1187 /*
1188  * Reset the freezer state by wiping out all suspended frozen apps, clearing
1189  * per-process freezer state, and starting a fresh interval.
1190  */
1191 static int
memorystatus_freezer_reset_state(int32_t * retval)1192 memorystatus_freezer_reset_state(int32_t *retval)
1193 {
1194 	uint32_t band = JETSAM_PRIORITY_IDLE;
1195 	/* Don't kill above the frozen band */
1196 	uint32_t kMaxBand = memorystatus_freeze_jetsam_band;
1197 	proc_t next_p = PROC_NULL;
1198 	uint64_t new_budget;
1199 
1200 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1201 		return ENOTSUP;
1202 	}
1203 
1204 	os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_GENERIC);
1205 	if (jetsam_reason == OS_REASON_NULL) {
1206 		memorystatus_log_error("memorystatus_freezer_reset_state -- sync: failed to allocate jetsam reason\n");
1207 	}
1208 	lck_mtx_lock(&freezer_mutex);
1209 	kill_all_frozen_processes(kMaxBand, true, jetsam_reason, NULL);
1210 	proc_list_lock();
1211 
1212 	/*
1213 	 * Clear the considered and skip reason flags on all processes
1214 	 * so we're starting fresh with the new policy.
1215 	 */
1216 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1217 	while (next_p) {
1218 		proc_t p = next_p;
1219 		uint32_t state = p->p_memstat_state;
1220 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1221 
1222 		if (p->p_memstat_effectivepriority > kMaxBand) {
1223 			break;
1224 		}
1225 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1226 			continue;
1227 		}
1228 
1229 		p->p_memstat_state &= ~(P_MEMSTAT_FREEZE_CONSIDERED);
1230 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1231 	}
1232 
1233 	proc_list_unlock();
1234 
1235 	new_budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1236 	memorystatus_freeze_force_new_interval(new_budget);
1237 
1238 	lck_mtx_unlock(&freezer_mutex);
1239 	*retval = 0;
1240 	return 0;
1241 }
1242 
1243 int
memorystatus_freezer_control(int32_t flags,user_addr_t buffer,size_t buffer_size,int32_t * retval)1244 memorystatus_freezer_control(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval)
1245 {
1246 	int err = ENOTSUP;
1247 
1248 #if DEVELOPMENT || DEBUG
1249 	if (flags == FREEZER_CONTROL_GET_STATUS) {
1250 		err = memorystatus_freezer_get_status(buffer, buffer_size, retval);
1251 	}
1252 #endif /* DEVELOPMENT || DEBUG */
1253 	if (flags == FREEZER_CONTROL_GET_PROCS) {
1254 		err = memorystatus_freezer_get_procs(buffer, buffer_size, retval);
1255 	} else if (flags == FREEZER_CONTROL_SET_DASD_TRIAL_IDENTIFIERS) {
1256 		err = memorystatus_freezer_set_dasd_trial_identifiers(buffer, buffer_size, retval);
1257 	} else if (flags == FREEZER_CONTROL_RESET_STATE) {
1258 		err = memorystatus_freezer_reset_state(retval);
1259 	}
1260 
1261 	return err;
1262 }
1263 
1264 static bool
kill_all_frozen_processes(uint64_t max_band,bool suspended_only,os_reason_t jetsam_reason,uint64_t * memory_reclaimed_out)1265 kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out)
1266 {
1267 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1268 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1269 
1270 	unsigned int band = 0;
1271 	proc_t p = PROC_NULL, next_p = PROC_NULL;
1272 	pid_t pid = 0;
1273 	bool retval = false, killed = false;
1274 	uint32_t state;
1275 	uint64_t memory_reclaimed = 0, footprint = 0, skips = 0;
1276 	proc_list_lock();
1277 
1278 	band = JETSAM_PRIORITY_IDLE;
1279 	p = PROC_NULL;
1280 	next_p = PROC_NULL;
1281 
1282 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1283 	while (next_p) {
1284 		p = next_p;
1285 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1286 		state = p->p_memstat_state;
1287 
1288 		if (p->p_memstat_effectivepriority > max_band) {
1289 			break;
1290 		}
1291 
1292 		if (!(state & P_MEMSTAT_FROZEN)) {
1293 			continue;
1294 		}
1295 
1296 		if (suspended_only && !(state & P_MEMSTAT_SUSPENDED)) {
1297 			continue;
1298 		}
1299 
1300 		if (state & P_MEMSTAT_ERROR) {
1301 			p->p_memstat_state &= ~P_MEMSTAT_ERROR;
1302 		}
1303 
1304 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1305 			memorystatus_log("memorystatus: Skipping kill of frozen process %s (%d) because it's already exiting.\n", p->p_name, proc_getpid(p));
1306 			skips++;
1307 			continue;
1308 		}
1309 
1310 		footprint = get_task_phys_footprint(proc_task(p));
1311 		pid = proc_getpid(p);
1312 		proc_list_unlock();
1313 
1314 		/* memorystatus_kill_with_jetsam_reason_sync drops a reference. */
1315 		os_reason_ref(jetsam_reason);
1316 		retval = memorystatus_kill_with_jetsam_reason_sync(pid, jetsam_reason);
1317 		if (retval) {
1318 			killed = true;
1319 			memory_reclaimed += footprint;
1320 		}
1321 		proc_list_lock();
1322 		/*
1323 		 * The bands might have changed when we dropped the proc list lock.
1324 		 * So start from the beginning.
1325 		 * Since we're preventing any further freezing by holding the freezer mutex,
1326 		 * and we skip anything we've already tried to kill this is guaranteed to terminate.
1327 		 */
1328 		band = 0;
1329 		skips = 0;
1330 		next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1331 	}
1332 
1333 	assert(skips <= memorystatus_frozen_count);
1334 #if MACH_ASSERT
1335 	if (!suspended_only && max_band == JETSAM_PRIORITY_MAX) {
1336 		/*
1337 		 * Check that we've killed all frozen processes.
1338 		 * Note that they may still be exiting (represented by skips).
1339 		 */
1340 		if (memorystatus_frozen_count - skips > 0) {
1341 			assert(memorystatus_freeze_enabled == false);
1342 
1343 			panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d",
1344 			    memorystatus_frozen_count);
1345 		}
1346 	}
1347 #endif /* MACH_ASSERT */
1348 	if (memory_reclaimed_out) {
1349 		*memory_reclaimed_out = memory_reclaimed;
1350 	}
1351 	proc_list_unlock();
1352 	return killed;
1353 }
1354 
1355 /*
1356  * Disables the freezer, jetsams all frozen processes,
1357  * and reclaims the swap space immediately.
1358  */
1359 
1360 void
memorystatus_disable_freeze(void)1361 memorystatus_disable_freeze(void)
1362 {
1363 	uint64_t memory_reclaimed = 0;
1364 	bool killed = false;
1365 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1366 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1367 
1368 
1369 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_START,
1370 	    memorystatus_available_pages);
1371 	memorystatus_log("memorystatus: Disabling freezer. Will kill all frozen processes\n");
1372 
1373 	/*
1374 	 * We hold the freezer_mutex (preventing anything from being frozen in parallel)
1375 	 * and all frozen processes will be killed
1376 	 * by the time we release it. Setting memorystatus_freeze_enabled to false,
1377 	 * ensures that no new processes will be frozen once we release the mutex.
1378 	 *
1379 	 */
1380 	memorystatus_freeze_enabled = false;
1381 
1382 	/*
1383 	 * Move dirty pages out from the throttle to the active queue since we're not freezing anymore.
1384 	 */
1385 	vm_page_reactivate_all_throttled();
1386 	os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE);
1387 	if (jetsam_reason == OS_REASON_NULL) {
1388 		memorystatus_log_error("memorystatus_disable_freeze -- sync: failed to allocate jetsam reason\n");
1389 	}
1390 
1391 	killed = kill_all_frozen_processes(JETSAM_PRIORITY_MAX, false, jetsam_reason, &memory_reclaimed);
1392 
1393 	if (killed) {
1394 		memorystatus_log_info("memorystatus: Killed all frozen processes.\n");
1395 		vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM);
1396 
1397 		memorystatus_post_snapshot();
1398 	} else {
1399 		memorystatus_log_info("memorystatus: No frozen processes to kill.\n");
1400 	}
1401 
1402 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_END,
1403 	    memorystatus_available_pages, memory_reclaimed);
1404 
1405 	return;
1406 }
1407 
1408 static void
memorystatus_set_freeze_is_enabled(bool enabled)1409 memorystatus_set_freeze_is_enabled(bool enabled)
1410 {
1411 	lck_mtx_lock(&freezer_mutex);
1412 	if (enabled != memorystatus_freeze_enabled) {
1413 		if (enabled) {
1414 			memorystatus_freeze_enabled = true;
1415 		} else {
1416 			memorystatus_disable_freeze();
1417 		}
1418 	}
1419 	lck_mtx_unlock(&freezer_mutex);
1420 }
1421 
1422 
1423 static int
1424 sysctl_freeze_enabled SYSCTL_HANDLER_ARGS
1425 {
1426 #pragma unused(arg1, arg2)
1427 	int error, val = memorystatus_freeze_enabled ? 1 : 0;
1428 
1429 	error = sysctl_handle_int(oidp, &val, 0, req);
1430 	if (error || !req->newptr) {
1431 		return error;
1432 	}
1433 
1434 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1435 		memorystatus_log_error("memorystatus: Failed attempt to set vm.freeze_enabled sysctl\n");
1436 		return EINVAL;
1437 	}
1438 
1439 	memorystatus_set_freeze_is_enabled(val);
1440 
1441 	return 0;
1442 }
1443 
1444 EXPERIMENT_FACTOR_PROC(_vm, freeze_enabled, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY, NULL, 0, sysctl_freeze_enabled, "I", "");
1445 
1446 static void
schedule_interval_reset(thread_call_t reset_thread_call,throttle_interval_t * interval)1447 schedule_interval_reset(thread_call_t reset_thread_call, throttle_interval_t *interval)
1448 {
1449 	uint64_t interval_expiration_ns = interval->ts.tv_sec * NSEC_PER_SEC + interval->ts.tv_nsec;
1450 	uint64_t interval_expiration_absolutetime;
1451 	nanoseconds_to_absolutetime(interval_expiration_ns, &interval_expiration_absolutetime);
1452 	memorystatus_log_info("memorystatus: scheduling new freezer interval at %llu absolute time\n", interval_expiration_absolutetime);
1453 
1454 	thread_call_enter_delayed(reset_thread_call, interval_expiration_absolutetime);
1455 }
1456 
1457 extern uuid_string_t trial_treatment_id;
1458 extern uuid_string_t trial_experiment_id;
1459 extern int trial_deployment_id;
1460 
1461 CA_EVENT(freezer_interval,
1462     CA_INT, budget_remaining,
1463     CA_INT, error_below_min_pages,
1464     CA_INT, error_excess_shared_memory,
1465     CA_INT, error_low_private_shared_ratio,
1466     CA_INT, error_no_compressor_space,
1467     CA_INT, error_no_swap_space,
1468     CA_INT, error_low_probability_of_use,
1469     CA_INT, error_elevated,
1470     CA_INT, error_other,
1471     CA_INT, frozen_count,
1472     CA_INT, pageouts,
1473     CA_INT, refreeze_average,
1474     CA_INT, skipped_full,
1475     CA_INT, skipped_shared_mb_high,
1476     CA_INT, swapusage,
1477     CA_INT, thaw_count,
1478     CA_INT, thaw_percentage,
1479     CA_INT, thaws_per_gb,
1480     CA_INT, trial_deployment_id,
1481     CA_INT, dasd_trial_deployment_id,
1482     CA_INT, budget_exhaustion_duration_remaining,
1483     CA_INT, thaw_percentage_webcontent,
1484     CA_INT, thaw_percentage_fg,
1485     CA_INT, thaw_percentage_bg,
1486     CA_INT, thaw_percentage_fg_non_xpc_service,
1487     CA_INT, fg_resume_count,
1488     CA_INT, unique_freeze_count,
1489     CA_INT, unique_thaw_count,
1490     CA_STATIC_STRING(CA_UUID_LEN), trial_treatment_id,
1491     CA_STATIC_STRING(CA_UUID_LEN), trial_experiment_id,
1492     CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_treatment_id,
1493     CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_experiment_id);
1494 
1495 
1496 /*
1497  * Record statistics from the expiring interval
1498  * via core analytics.
1499  */
1500 static void
memorystatus_freeze_record_interval_analytics(void)1501 memorystatus_freeze_record_interval_analytics(void)
1502 {
1503 	ca_event_t event = CA_EVENT_ALLOCATE(freezer_interval);
1504 	CA_EVENT_TYPE(freezer_interval) * e = event->data;
1505 	e->budget_remaining = memorystatus_freeze_budget_pages_remaining * PAGE_SIZE / (1UL << 20);
1506 	uint64_t process_considered_count, refrozen_count, below_threshold_count;
1507 	memory_object_size_t swap_size;
1508 	process_considered_count = memorystatus_freezer_stats.mfs_process_considered_count;
1509 	if (process_considered_count != 0) {
1510 		e->error_below_min_pages = memorystatus_freezer_stats.mfs_error_below_min_pages_count * 100 / process_considered_count;
1511 		e->error_excess_shared_memory = memorystatus_freezer_stats.mfs_error_excess_shared_memory_count * 100 / process_considered_count;
1512 		e->error_low_private_shared_ratio = memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count * 100 / process_considered_count;
1513 		e->error_no_compressor_space = memorystatus_freezer_stats.mfs_error_no_compressor_space_count * 100 / process_considered_count;
1514 		e->error_no_swap_space = memorystatus_freezer_stats.mfs_error_no_swap_space_count * 100 / process_considered_count;
1515 		e->error_low_probability_of_use = memorystatus_freezer_stats.mfs_error_low_probability_of_use_count * 100 / process_considered_count;
1516 		e->error_elevated = memorystatus_freezer_stats.mfs_error_elevated_count * 100 / process_considered_count;
1517 		e->error_other = memorystatus_freezer_stats.mfs_error_other_count * 100 / process_considered_count;
1518 	}
1519 	e->frozen_count = memorystatus_frozen_count;
1520 	e->pageouts = normal_throttle_window->pageouts * PAGE_SIZE / (1UL << 20);
1521 	refrozen_count = memorystatus_freezer_stats.mfs_refreeze_count;
1522 	if (refrozen_count != 0) {
1523 		e->refreeze_average = (memorystatus_freezer_stats.mfs_bytes_refrozen / (1UL << 20)) / refrozen_count;
1524 	}
1525 	below_threshold_count = memorystatus_freezer_stats.mfs_below_threshold_count;
1526 	if (below_threshold_count != 0) {
1527 		e->skipped_full = memorystatus_freezer_stats.mfs_skipped_full_count * 100 / below_threshold_count;
1528 		e->skipped_shared_mb_high = memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count * 100 / below_threshold_count;
1529 	}
1530 	if (VM_CONFIG_SWAP_IS_PRESENT) {
1531 		swap_size = vm_swap_get_total_space();
1532 		if (swap_size) {
1533 			e->swapusage = vm_swap_get_free_space() * 100 / swap_size;
1534 		}
1535 	}
1536 	e->thaw_count = memorystatus_thaw_count;
1537 	e->thaw_percentage = get_thaw_percentage();
1538 	e->thaw_percentage_webcontent = get_thaw_percentage_webcontent();
1539 	e->thaw_percentage_fg = get_thaw_percentage_fg();
1540 	e->thaw_percentage_bg = get_thaw_percentage_bg();
1541 	e->thaw_percentage_fg_non_xpc_service = get_thaw_percentage_fg_non_xpc_service();
1542 
1543 	if (e->pageouts / (1UL << 10) != 0) {
1544 		e->thaws_per_gb = memorystatus_thaw_count / (e->pageouts / (1UL << 10));
1545 	}
1546 	e->budget_exhaustion_duration_remaining = memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining;
1547 	e->fg_resume_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
1548 	e->unique_freeze_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1549 	e->unique_thaw_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
1550 
1551 	/*
1552 	 * Record any xnu or dasd experiment information
1553 	 */
1554 	strlcpy(e->trial_treatment_id, trial_treatment_id, CA_UUID_LEN);
1555 	strlcpy(e->trial_experiment_id, trial_experiment_id, CA_UUID_LEN);
1556 	e->trial_deployment_id = trial_deployment_id;
1557 	strlcpy(e->dasd_trial_treatment_id, dasd_trial_identifiers.treatment_id, CA_UUID_LEN);
1558 	strlcpy(e->dasd_trial_experiment_id, dasd_trial_identifiers.experiment_id, CA_UUID_LEN);
1559 	e->dasd_trial_deployment_id = dasd_trial_identifiers.deployment_id;
1560 
1561 	CA_EVENT_SEND(event);
1562 }
1563 
1564 static void
memorystatus_freeze_reset_interval(void * arg0,void * arg1)1565 memorystatus_freeze_reset_interval(void *arg0, void *arg1)
1566 {
1567 #pragma unused(arg0, arg1)
1568 	struct throttle_interval_t *interval = NULL;
1569 	clock_sec_t sec;
1570 	clock_nsec_t nsec;
1571 	mach_timespec_t now_ts;
1572 	uint32_t budget_rollover = 0;
1573 
1574 	clock_get_system_nanotime(&sec, &nsec);
1575 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
1576 	now_ts.tv_nsec = nsec;
1577 	interval = normal_throttle_window;
1578 
1579 	/* Record analytics from the old interval before resetting. */
1580 	memorystatus_freeze_record_interval_analytics();
1581 
1582 	lck_mtx_lock(&freezer_mutex);
1583 	/* How long has it been since the previous interval expired? */
1584 	mach_timespec_t expiration_period_ts = now_ts;
1585 	SUB_MACH_TIMESPEC(&expiration_period_ts, &interval->ts);
1586 	/* Get unused budget. Clamp to 0. We'll adjust for overused budget in the next interval. */
1587 	budget_rollover = interval->pageouts > interval->max_pageouts ?
1588 	    0 : interval->max_pageouts - interval->pageouts;
1589 
1590 	memorystatus_freeze_start_normal_throttle_interval(memorystatus_freeze_calculate_new_budget(
1591 		    expiration_period_ts.tv_sec, interval->burst_multiple,
1592 		    interval->mins, budget_rollover),
1593 	    now_ts);
1594 	memorystatus_freeze_budget_pages_remaining = interval->max_pageouts;
1595 
1596 	if (!memorystatus_freezer_use_demotion_list) {
1597 		memorystatus_demote_frozen_processes(false); /* normal mode...don't force a demotion */
1598 	}
1599 	lck_mtx_unlock(&freezer_mutex);
1600 }
1601 
1602 
1603 proc_t
memorystatus_get_coalition_leader_and_role(proc_t p,int * role_in_coalition)1604 memorystatus_get_coalition_leader_and_role(proc_t p, int *role_in_coalition)
1605 {
1606 	coalition_t     coal = COALITION_NULL;
1607 	task_t          leader_task = NULL, curr_task = NULL;
1608 	proc_t          leader_proc = PROC_NULL;
1609 
1610 	curr_task = proc_task(p);
1611 	coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1612 
1613 	if (coal == NULL || coalition_is_leader(curr_task, coal)) {
1614 		return p;
1615 	}
1616 
1617 	leader_task = coalition_get_leader(coal);
1618 	if (leader_task == TASK_NULL) {
1619 		/*
1620 		 * This jetsam coalition is currently leader-less.
1621 		 * This could happen if the app died, but XPC services
1622 		 * have not yet exited.
1623 		 */
1624 		return PROC_NULL;
1625 	}
1626 
1627 	leader_proc = (proc_t)get_bsdtask_info(leader_task);
1628 	task_deallocate(leader_task);
1629 
1630 	if (leader_proc == PROC_NULL) {
1631 		/* leader task is exiting */
1632 		return PROC_NULL;
1633 	}
1634 
1635 	*role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
1636 
1637 	return leader_proc;
1638 }
1639 
1640 bool
memorystatus_freeze_process_is_recommended(const proc_t p)1641 memorystatus_freeze_process_is_recommended(const proc_t p)
1642 {
1643 	assert(!memorystatus_freezer_use_ordered_list);
1644 	int probability_of_use = 0;
1645 
1646 	size_t entry_count = 0, i = 0;
1647 	entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
1648 	if (entry_count == 0) {
1649 		/*
1650 		 * If dasd hasn't supplied a table yet, we default to every app being eligible
1651 		 * for the freezer.
1652 		 */
1653 		return true;
1654 	}
1655 	for (i = 0; i < entry_count; i++) {
1656 		/*
1657 		 * NB: memorystatus_internal_probabilities.proc_name is MAXCOMLEN + 1 bytes
1658 		 * proc_t.p_name is 2*MAXCOMLEN + 1 bytes. So we only compare the first
1659 		 * MAXCOMLEN bytes here since the name in the probabilities table could
1660 		 * be truncated from the proc_t's p_name.
1661 		 */
1662 		if (strncmp(memorystatus_global_probabilities_table[i].proc_name,
1663 		    p->p_name,
1664 		    MAXCOMLEN) == 0) {
1665 			probability_of_use = memorystatus_global_probabilities_table[i].use_probability;
1666 			break;
1667 		}
1668 	}
1669 	return probability_of_use > 0;
1670 }
1671 
1672 __private_extern__ void
memorystatus_freeze_init(void)1673 memorystatus_freeze_init(void)
1674 {
1675 	kern_return_t result;
1676 	thread_t thread;
1677 
1678 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1679 		int32_t memorystatus_freezer_use_ordered_list_bootarg = 0;
1680 		if (PE_parse_boot_argn("memorystatus_freezer_use_ordered_list", &memorystatus_freezer_use_ordered_list_bootarg, sizeof(memorystatus_freezer_use_ordered_list_bootarg))) {
1681 			memorystatus_freezer_use_ordered_list = (memorystatus_freezer_use_ordered_list_bootarg != 0);
1682 		}
1683 
1684 		int32_t memorystatus_freeze_max_candidate_band_bootarg = 0;
1685 		if (PE_parse_boot_argn("memorystatus_freeze_max_candidate_band", &memorystatus_freeze_max_candidate_band_bootarg, sizeof(memorystatus_freeze_max_candidate_band_bootarg))) {
1686 			if (memorystatus_freeze_max_candidate_band_bootarg >= 0 && memorystatus_freeze_max_candidate_band_bootarg <= 1000) {
1687 				memorystatus_freeze_max_candidate_band = memorystatus_freeze_max_candidate_band_bootarg;
1688 			}
1689 		}
1690 
1691 		/*
1692 		 * This is just the default value if the underlying
1693 		 * storage device doesn't have any specific budget.
1694 		 * We check with the storage layer in memorystatus_freeze_update_throttle()
1695 		 * before we start our freezing the first time.
1696 		 */
1697 		memorystatus_freeze_budget_pages_remaining = (memorystatus_freeze_daily_mb_max * 1024 * 1024) / PAGE_SIZE;
1698 
1699 		result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
1700 		if (result == KERN_SUCCESS) {
1701 			proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
1702 			proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1703 			thread_set_thread_name(thread, "VM_freezer");
1704 
1705 			thread_deallocate(thread);
1706 		} else {
1707 			panic("Could not create memorystatus_freeze_thread");
1708 		}
1709 
1710 		freeze_interval_reset_thread_call = thread_call_allocate_with_options(memorystatus_freeze_reset_interval, NULL, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
1711 		/* Start a new interval */
1712 
1713 		lck_mtx_lock(&freezer_mutex);
1714 		uint32_t budget;
1715 		budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1716 		memorystatus_freeze_force_new_interval(budget);
1717 		lck_mtx_unlock(&freezer_mutex);
1718 	} else {
1719 		memorystatus_freeze_budget_pages_remaining = 0;
1720 	}
1721 }
1722 
1723 void
memorystatus_freeze_configure_for_swap()1724 memorystatus_freeze_configure_for_swap()
1725 {
1726 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1727 		return;
1728 	}
1729 
1730 	assert(memorystatus_swap_all_apps);
1731 
1732 	/*
1733 	 * We expect both a larger working set and larger individual apps
1734 	 * in this mode, so tune up the freezer accordingly.
1735 	 */
1736 	memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX_SWAP_ENABLED_DEFAULT;
1737 	memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS_SWAP_ENABLED_DEFAULT;
1738 	memorystatus_freeze_pages_max = FREEZE_PAGES_MAX_SWAP_ENABLED_DEFAULT;
1739 
1740 	/*
1741 	 * We don't have a budget when running with full app swap.
1742 	 * Force a new interval. memorystatus_freeze_calculate_new_budget should give us an
1743 	 * unlimited budget.
1744 	 */
1745 	lck_mtx_lock(&freezer_mutex);
1746 	uint32_t budget;
1747 	budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1748 	memorystatus_freeze_force_new_interval(budget);
1749 	lck_mtx_unlock(&freezer_mutex);
1750 }
1751 
1752 void
memorystatus_freeze_disable_swap()1753 memorystatus_freeze_disable_swap()
1754 {
1755 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1756 		return;
1757 	}
1758 
1759 	assert(!memorystatus_swap_all_apps);
1760 
1761 	memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX_DEFAULT;
1762 	memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS_DEFAULT;
1763 	memorystatus_freeze_pages_max = FREEZE_PAGES_MAX_DEFAULT;
1764 
1765 	/*
1766 	 * Calculate a new budget now that we're constrained by our daily write budget again.
1767 	 */
1768 	lck_mtx_lock(&freezer_mutex);
1769 	uint32_t budget;
1770 	budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1771 	memorystatus_freeze_force_new_interval(budget);
1772 	lck_mtx_unlock(&freezer_mutex);
1773 }
1774 
1775 /*
1776  * Called with both the freezer_mutex and proc_list_lock held & both will be held on return.
1777  */
1778 static int
memorystatus_freeze_process(proc_t p,coalition_t * coal,pid_t * coalition_list,unsigned int * coalition_list_length)1779 memorystatus_freeze_process(
1780 	proc_t p,
1781 	coalition_t *coal, /* IN / OUT */
1782 	pid_t *coalition_list, /* OUT */
1783 	unsigned int *coalition_list_length /* OUT */)
1784 {
1785 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1786 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1787 
1788 	kern_return_t kr;
1789 	uint32_t purgeable, wired, clean, dirty, shared;
1790 	uint64_t max_pages = 0;
1791 	freezer_error_code_t freezer_error_code = 0;
1792 	bool is_refreeze = false;
1793 	task_t curr_task = TASK_NULL;
1794 
1795 	pid_t aPid = proc_getpid(p);
1796 
1797 	is_refreeze = _memstat_proc_is_frozen(p);
1798 
1799 	/* Ensure the process is eligible for (re-)freezing */
1800 	if (is_refreeze && !memorystatus_freeze_proc_is_refreeze_eligible(p)) {
1801 		/* Process is already frozen & hasn't been thawed. Nothing to do here. */
1802 		return EINVAL;
1803 	}
1804 	if (is_refreeze) {
1805 		/*
1806 		 * Not currently being looked at for something.
1807 		 */
1808 		if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1809 			return EBUSY;
1810 		}
1811 
1812 		/*
1813 		 * We are going to try and refreeze and so re-evaluate
1814 		 * the process. We don't want to double count the shared
1815 		 * memory. So deduct the old snapshot here.
1816 		 */
1817 		memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages;
1818 		p->p_memstat_freeze_sharedanon_pages = 0;
1819 
1820 		p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
1821 		memorystatus_refreeze_eligible_count--;
1822 	} else {
1823 		if (!memorystatus_is_process_eligible_for_freeze(p)) {
1824 			return EINVAL;
1825 		}
1826 		if (memorystatus_frozen_count >= memorystatus_frozen_processes_max) {
1827 			memorystatus_freeze_handle_error(p, FREEZER_ERROR_NO_SLOTS, is_refreeze, aPid, (coal ? *coal : NULL), "memorystatus_freeze_process");
1828 			return ENOSPC;
1829 		}
1830 	}
1831 
1832 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1833 		/*
1834 		 * Freezer backed by the compressor and swap file(s)
1835 		 * will hold compressed data.
1836 		 */
1837 
1838 		max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1839 	} else {
1840 		/*
1841 		 * We only have the compressor pool.
1842 		 */
1843 		max_pages = UINT32_MAX - 1;
1844 	}
1845 
1846 	/* Mark as locked temporarily to avoid kill */
1847 	p->p_memstat_state |= P_MEMSTAT_LOCKED;
1848 
1849 	p = proc_ref(p, true);
1850 	if (!p) {
1851 		memorystatus_freezer_stats.mfs_error_other_count++;
1852 		return EBUSY;
1853 	}
1854 
1855 	proc_list_unlock();
1856 
1857 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, memorystatus_available_pages, aPid, max_pages);
1858 
1859 	max_pages = MIN(max_pages, UINT32_MAX);
1860 	kr = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, (uint32_t) max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1861 	if (kr == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1862 		memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
1863 	}
1864 
1865 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, purgeable, wired, clean, dirty);
1866 
1867 	memorystatus_log_debug("memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
1868 	    "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %llu, shared %d",
1869 	    (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1870 	    memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1871 
1872 	proc_list_lock();
1873 
1874 	/* Success? */
1875 	if (KERN_SUCCESS == kr) {
1876 		memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1877 
1878 		p->p_memstat_freeze_sharedanon_pages += shared;
1879 
1880 		memorystatus_frozen_shared_mb += shared;
1881 
1882 		if (!is_refreeze) {
1883 			p->p_memstat_state |= P_MEMSTAT_FROZEN;
1884 			p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1885 			memorystatus_frozen_count++;
1886 			os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1887 			if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
1888 				memorystatus_frozen_count_webcontent++;
1889 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
1890 			}
1891 			if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
1892 				memorystatus_freeze_out_of_slots();
1893 			}
1894 		} else {
1895 			// This was a re-freeze
1896 			if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1897 				memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
1898 				memorystatus_freezer_stats.mfs_refreeze_count++;
1899 			}
1900 		}
1901 
1902 		p->p_memstat_frozen_count++;
1903 
1904 		/*
1905 		 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1906 		 * to its higher jetsam band.
1907 		 */
1908 		proc_list_unlock();
1909 
1910 		memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1911 
1912 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1913 #if FREEZE_USE_ELEVATED_INACTIVE_BAND
1914 			int ret;
1915 			ret = memorystatus_update_inactive_jetsam_priority_band(proc_getpid(p), MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, memorystatus_freeze_jetsam_band, TRUE);
1916 
1917 			if (ret) {
1918 				memorystatus_log_error("Elevating the frozen process failed with %d\n", ret);
1919 				/* not fatal */
1920 			}
1921 #endif
1922 
1923 			/* Update stats */
1924 			for (unsigned int i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1925 				throttle_intervals[i].pageouts += dirty;
1926 			}
1927 		}
1928 		memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1929 		memorystatus_log("memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
1930 		    is_refreeze ? "re" : "", ((!coal || !*coal) ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"),
1931 		    memorystatus_freeze_budget_pages_remaining, is_refreeze ? "Re" : "", dirty);
1932 
1933 		proc_list_lock();
1934 
1935 		memorystatus_freeze_pageouts += dirty;
1936 
1937 		if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1938 			/*
1939 			 * Add some eviction logic here? At some point should we
1940 			 * jetsam a process to get back its swap space so that we
1941 			 * can freeze a more eligible process at this moment in time?
1942 			 */
1943 		}
1944 
1945 		/* Check if we just froze a coalition leader. If so, return the list of XPC services to freeze next. */
1946 		if (coal != NULL && *coal == NULL) {
1947 			curr_task = proc_task(p);
1948 			*coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1949 			if (coalition_is_leader(curr_task, *coal)) {
1950 				*coalition_list_length = coalition_get_pid_list(*coal, COALITION_ROLEMASK_XPC,
1951 				    COALITION_SORT_DEFAULT, coalition_list, MAX_XPC_SERVICE_PIDS);
1952 
1953 				if (*coalition_list_length > MAX_XPC_SERVICE_PIDS) {
1954 					*coalition_list_length = MAX_XPC_SERVICE_PIDS;
1955 				}
1956 			}
1957 		} else {
1958 			/* We just froze an xpc service. Mark it as such for telemetry */
1959 			p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
1960 			memorystatus_frozen_count_xpc_service++;
1961 			os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
1962 		}
1963 
1964 		p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1965 		wakeup(&p->p_memstat_state);
1966 		proc_rele(p);
1967 		return 0;
1968 	} else {
1969 		if (is_refreeze) {
1970 			if ((freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) ||
1971 			    (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO)) {
1972 				/*
1973 				 * Keeping this prior-frozen process in this high band when
1974 				 * we failed to re-freeze it due to bad shared memory usage
1975 				 * could cause excessive pressure on the lower bands.
1976 				 * We need to demote it for now. It'll get re-evaluated next
1977 				 * time because we don't set the P_MEMSTAT_FREEZE_IGNORE
1978 				 * bit.
1979 				 */
1980 
1981 				p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
1982 				memstat_update_priority_locked(p, JETSAM_PRIORITY_IDLE,
1983 				    MEMSTAT_PRIORITY_INSERT_HEAD | MEMSTAT_PRIORITY_NO_AGING);
1984 			}
1985 		} else {
1986 			p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
1987 		}
1988 		memorystatus_freeze_handle_error(p, freezer_error_code, p->p_memstat_state & P_MEMSTAT_FROZEN, aPid, (coal != NULL) ? *coal : NULL, "memorystatus_freeze_process");
1989 
1990 		p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1991 		wakeup(&p->p_memstat_state);
1992 		proc_rele(p);
1993 
1994 		return EINVAL;
1995 	}
1996 }
1997 
1998 /*
1999  * Synchronously freeze the passed proc. Called with a reference to the proc held.
2000  *
2001  * Doesn't deal with:
2002  * - re-freezing because this is called on a specific process and
2003  *   not by the freezer thread. If that changes, we'll have to teach it about
2004  *   refreezing a frozen process.
2005  *
2006  * - grouped/coalition freezing because we are hoping to deprecate this
2007  *   interface as it was used by user-space to freeze particular processes. But
2008  *   we have moved away from that approach to having the kernel choose the optimal
2009  *   candidates to be frozen.
2010  *
2011  * Returns ENOTSUP if the freezer isn't supported on this device. Otherwise
2012  * returns EINVAL or the value returned by task_freeze().
2013  */
2014 int
memorystatus_freeze_process_sync(proc_t p)2015 memorystatus_freeze_process_sync(proc_t p)
2016 {
2017 	int ret = EINVAL;
2018 	boolean_t memorystatus_freeze_swap_low = FALSE;
2019 
2020 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2021 		return ENOTSUP;
2022 	}
2023 
2024 	lck_mtx_lock(&freezer_mutex);
2025 
2026 	if (p == NULL) {
2027 		memorystatus_log_error("memorystatus_freeze_process_sync: Invalid process\n");
2028 		goto exit;
2029 	}
2030 
2031 	if (memorystatus_freeze_enabled == false) {
2032 		memorystatus_log_error("memorystatus_freeze_process_sync: Freezing is DISABLED\n");
2033 		goto exit;
2034 	}
2035 
2036 	if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
2037 		memorystatus_log_info("memorystatus_freeze_process_sync: Low compressor and/or low swap space...skipping freeze\n");
2038 		goto exit;
2039 	}
2040 
2041 	memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2042 	if (!memorystatus_freeze_budget_pages_remaining) {
2043 		memorystatus_log_info("memorystatus_freeze_process_sync: exit with NO available budget\n");
2044 		goto exit;
2045 	}
2046 
2047 	proc_list_lock();
2048 
2049 	ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2050 
2051 exit:
2052 	lck_mtx_unlock(&freezer_mutex);
2053 
2054 	return ret;
2055 }
2056 
2057 proc_t
memorystatus_freezer_candidate_list_get_proc(struct memorystatus_freezer_candidate_list * list,size_t index,uint64_t * pid_mismatch_counter)2058 memorystatus_freezer_candidate_list_get_proc(
2059 	struct memorystatus_freezer_candidate_list *list,
2060 	size_t index,
2061 	uint64_t *pid_mismatch_counter)
2062 {
2063 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2064 	if (list->mfcl_list == NULL || list->mfcl_length <= index) {
2065 		return NULL;
2066 	}
2067 	memorystatus_properties_freeze_entry_v1 *entry = &list->mfcl_list[index];
2068 	if (entry->pid == NO_PID) {
2069 		/* Entry has been removed. */
2070 		return NULL;
2071 	}
2072 
2073 	proc_t p = proc_find_locked(entry->pid);
2074 	if (p && strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2075 		/*
2076 		 * We grab a reference when we are about to freeze the process. So drop
2077 		 * the reference that proc_find_locked() grabbed for us.
2078 		 * We also have the proc_list_lock so this process is stable.
2079 		 */
2080 		proc_rele(p);
2081 		return p;
2082 	} else {
2083 		if (p) {
2084 			/* pid rollover. */
2085 			proc_rele(p);
2086 		}
2087 		/*
2088 		 * The proc has exited since we received this list.
2089 		 * It may have re-launched with a new pid, so we go looking for it.
2090 		 */
2091 		unsigned int band = JETSAM_PRIORITY_IDLE;
2092 		p = memorystatus_get_first_proc_locked(&band, TRUE);
2093 		while (p != NULL && band <= memorystatus_freeze_max_candidate_band) {
2094 			if (strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2095 				if (pid_mismatch_counter != NULL) {
2096 					(*pid_mismatch_counter)++;
2097 				}
2098 				/* Stash the pid for faster lookup next time. */
2099 				entry->pid = proc_getpid(p);
2100 				return p;
2101 			}
2102 			p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2103 		}
2104 		/* No match. */
2105 		return NULL;
2106 	}
2107 }
2108 
2109 static size_t
memorystatus_freeze_pid_list(pid_t * pid_list,unsigned int num_pids)2110 memorystatus_freeze_pid_list(pid_t *pid_list, unsigned int num_pids)
2111 {
2112 	int ret = 0;
2113 	size_t num_frozen = 0;
2114 	while (num_pids > 0 &&
2115 	    memorystatus_frozen_count < memorystatus_frozen_processes_max) {
2116 		pid_t pid = pid_list[--num_pids];
2117 		proc_t p = proc_find_locked(pid);
2118 		if (p) {
2119 			proc_rele(p);
2120 			ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2121 			if (ret != 0) {
2122 				break;
2123 			}
2124 			num_frozen++;
2125 		}
2126 	}
2127 	return num_frozen;
2128 }
2129 
2130 /*
2131  * Attempt to freeze the best candidate process.
2132  * Keep trying until we freeze something or run out of candidates.
2133  * Returns the number of processes frozen (including coalition members).
2134  */
2135 static size_t
memorystatus_freeze_top_process(void)2136 memorystatus_freeze_top_process(void)
2137 {
2138 	int freeze_ret;
2139 	size_t num_frozen = 0;
2140 	coalition_t coal = COALITION_NULL;
2141 	pid_t pid_list[MAX_XPC_SERVICE_PIDS];
2142 	unsigned int ntasks = 0;
2143 	struct memorystatus_freeze_list_iterator iterator;
2144 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2145 
2146 	bzero(&iterator, sizeof(struct memorystatus_freeze_list_iterator));
2147 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages);
2148 
2149 	proc_list_lock();
2150 	while (true) {
2151 		proc_t p = memorystatus_freeze_pick_process(&iterator);
2152 		if (p == PROC_NULL) {
2153 			/* Nothing left to freeze */
2154 			break;
2155 		}
2156 		freeze_ret = memorystatus_freeze_process(p, &coal, pid_list, &ntasks);
2157 		if (freeze_ret == 0) {
2158 			num_frozen = 1;
2159 			/*
2160 			 * We froze a process successfully.
2161 			 * If it's a coalition head, freeze the coalition.
2162 			 * Then we're done for now.
2163 			 */
2164 			if (coal != NULL) {
2165 				num_frozen += memorystatus_freeze_pid_list(pid_list, ntasks);
2166 			}
2167 			break;
2168 		} else {
2169 			if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
2170 				break;
2171 			}
2172 			/*
2173 			 * Freeze failed but we're not out of space.
2174 			 * Keep trying to find a good candidate,
2175 			 * memorystatus_freeze_pick_process will not return this proc again until
2176 			 * we reset the iterator.
2177 			 */
2178 		}
2179 	}
2180 	proc_list_unlock();
2181 
2182 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_END, memorystatus_available_pages);
2183 
2184 	return num_frozen;
2185 }
2186 
2187 #if DEVELOPMENT || DEBUG
2188 /* For testing memorystatus_freeze_top_process */
2189 static int
2190 sysctl_memorystatus_freeze_top_process SYSCTL_HANDLER_ARGS
2191 {
2192 #pragma unused(arg1, arg2)
2193 	int error, val, ret = 0;
2194 	size_t num_frozen;
2195 	/*
2196 	 * Only freeze on write to prevent freezing during `sysctl -a`.
2197 	 * The actual value written doesn't matter.
2198 	 */
2199 	error = sysctl_handle_int(oidp, &val, 0, req);
2200 	if (error || !req->newptr) {
2201 		return error;
2202 	}
2203 
2204 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2205 		return ENOTSUP;
2206 	}
2207 
2208 	lck_mtx_lock(&freezer_mutex);
2209 	num_frozen = memorystatus_freeze_top_process();
2210 	lck_mtx_unlock(&freezer_mutex);
2211 
2212 	if (num_frozen == 0) {
2213 		ret = ESRCH;
2214 	}
2215 	return ret;
2216 }
2217 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_top_process, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2218     0, 0, &sysctl_memorystatus_freeze_top_process, "I", "");
2219 #endif /* DEVELOPMENT || DEBUG */
2220 
2221 static inline boolean_t
memorystatus_can_freeze_processes(void)2222 memorystatus_can_freeze_processes(void)
2223 {
2224 	boolean_t ret;
2225 
2226 	proc_list_lock();
2227 
2228 	if (memorystatus_suspended_count) {
2229 		memorystatus_freeze_suspended_threshold = MIN(memorystatus_freeze_suspended_threshold, FREEZE_SUSPENDED_THRESHOLD_DEFAULT);
2230 
2231 		if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
2232 			ret = TRUE;
2233 		} else {
2234 			ret = FALSE;
2235 		}
2236 	} else {
2237 		ret = FALSE;
2238 	}
2239 
2240 	proc_list_unlock();
2241 
2242 	return ret;
2243 }
2244 
2245 static boolean_t
memorystatus_can_freeze(boolean_t * memorystatus_freeze_swap_low)2246 memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
2247 {
2248 	boolean_t can_freeze = TRUE;
2249 
2250 	/* Only freeze if we're sufficiently low on memory; this holds off freeze right
2251 	*  after boot,  and is generally is a no-op once we've reached steady state. */
2252 	if (memorystatus_available_pages > memorystatus_freeze_threshold) {
2253 		return FALSE;
2254 	}
2255 
2256 	/* Check minimum suspended process threshold. */
2257 	if (!memorystatus_can_freeze_processes()) {
2258 		return FALSE;
2259 	}
2260 	assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
2261 
2262 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2263 		/*
2264 		 * In-core compressor used for freezing WITHOUT on-disk swap support.
2265 		 */
2266 		if (vm_compressor_low_on_space()) {
2267 			if (*memorystatus_freeze_swap_low) {
2268 				*memorystatus_freeze_swap_low = TRUE;
2269 			}
2270 
2271 			can_freeze = FALSE;
2272 		} else {
2273 			if (*memorystatus_freeze_swap_low) {
2274 				*memorystatus_freeze_swap_low = FALSE;
2275 			}
2276 
2277 			can_freeze = TRUE;
2278 		}
2279 	} else {
2280 		/*
2281 		 * Freezing WITH on-disk swap support.
2282 		 *
2283 		 * In-core compressor fronts the swap.
2284 		 */
2285 		if (vm_swap_low_on_space()) {
2286 			if (*memorystatus_freeze_swap_low) {
2287 				*memorystatus_freeze_swap_low = TRUE;
2288 			}
2289 
2290 			can_freeze = FALSE;
2291 		}
2292 	}
2293 
2294 	return can_freeze;
2295 }
2296 
2297 /*
2298  * Demote the given frozen process.
2299  * Caller must hold the proc_list_lock & it will be held on return.
2300  */
2301 static void
memorystatus_demote_frozen_process(proc_t p,bool urgent_mode __unused)2302 memorystatus_demote_frozen_process(proc_t p, bool urgent_mode __unused)
2303 {
2304 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2305 
2306 	/* We demote to IDLE unless someone has asserted a higher priority on this process. */
2307 	int priority = JETSAM_PRIORITY_IDLE;
2308 	p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
2309 	if (_memstat_proc_has_priority_assertion(p)) {
2310 		priority = MAX(p->p_memstat_assertionpriority, priority);
2311 	}
2312 	if (_memstat_proc_is_tracked(p) && _memstat_proc_is_dirty(p)) {
2313 		priority = MAX(p->p_memstat_requestedpriority, priority);
2314 	}
2315 	memstat_update_priority_locked(p, priority, MEMSTAT_PRIORITY_NO_AGING);
2316 #if DEVELOPMENT || DEBUG
2317 	memorystatus_log("memorystatus_demote_frozen_process(%s) pid %d [%s]\n",
2318 	    (urgent_mode ? "urgent" : "normal"), (p ? proc_getpid(p) : -1), ((p && *p->p_name) ? p->p_name : "unknown"));
2319 #endif /* DEVELOPMENT || DEBUG */
2320 
2321 	/*
2322 	 * The freezer thread will consider this a normal app to be frozen
2323 	 * because it is in the IDLE band. So we don't need the
2324 	 * P_MEMSTAT_REFREEZE_ELIGIBLE state here. Also, if it gets resumed
2325 	 * we'll correctly count it as eligible for re-freeze again.
2326 	 *
2327 	 * We don't drop the frozen count because this process still has
2328 	 * state on disk. So there's a chance it gets resumed and then it
2329 	 * should land in the higher jetsam band. For that it needs to
2330 	 * remain marked frozen.
2331 	 */
2332 	if (memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2333 		p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
2334 		memorystatus_refreeze_eligible_count--;
2335 	}
2336 }
2337 
2338 static unsigned int
memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)2339 memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)
2340 {
2341 	unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
2342 	unsigned int demoted_proc_count = 0;
2343 	proc_t p = PROC_NULL, next_p = PROC_NULL;
2344 	proc_list_lock();
2345 
2346 	next_p = memorystatus_get_first_proc_locked(&band, FALSE);
2347 	while (next_p) {
2348 		p = next_p;
2349 		next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2350 
2351 		if (!_memstat_proc_is_frozen(p)) {
2352 			continue;
2353 		}
2354 
2355 		if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
2356 			continue;
2357 		}
2358 
2359 		if (urgent_mode) {
2360 			if (!memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2361 				/*
2362 				 * This process hasn't been thawed recently and so most of
2363 				 * its state sits on NAND and so we skip it -- jetsamming it
2364 				 * won't help with memory pressure.
2365 				 */
2366 				continue;
2367 			}
2368 		} else {
2369 			if (p->p_memstat_thaw_count >= memorystatus_thaw_count_demotion_threshold) {
2370 				/*
2371 				 * This process has met / exceeded our thaw count demotion threshold
2372 				 * and so we let it live in the higher bands.
2373 				 */
2374 				continue;
2375 			}
2376 		}
2377 
2378 		memorystatus_demote_frozen_process(p, urgent_mode);
2379 		demoted_proc_count++;
2380 		if ((urgent_mode) || (demoted_proc_count == memorystatus_max_frozen_demotions_daily)) {
2381 			break;
2382 		}
2383 	}
2384 
2385 	proc_list_unlock();
2386 	return demoted_proc_count;
2387 }
2388 
2389 static unsigned int
memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)2390 memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)
2391 {
2392 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2393 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2394 	assert(memorystatus_freezer_use_demotion_list);
2395 	unsigned int demoted_proc_count = 0;
2396 
2397 	proc_list_lock();
2398 	for (size_t i = 0; i < memorystatus_global_demote_list.mfcl_length; i++) {
2399 		proc_t p = memorystatus_freezer_candidate_list_get_proc(
2400 			&memorystatus_global_demote_list,
2401 			i,
2402 			&memorystatus_freezer_stats.mfs_demote_pid_mismatches);
2403 		if (p != NULL && memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2404 			memorystatus_demote_frozen_process(p, urgent_mode);
2405 			/* Remove this entry now that it's been demoted. */
2406 			memorystatus_global_demote_list.mfcl_list[i].pid = NO_PID;
2407 			demoted_proc_count++;
2408 			/*
2409 			 * We only demote one proc at a time in this mode.
2410 			 * This gives jetsam a chance to kill the recently demoted processes.
2411 			 */
2412 			break;
2413 		}
2414 	}
2415 
2416 	proc_list_unlock();
2417 	return demoted_proc_count;
2418 }
2419 
2420 /*
2421  * This function evaluates if the currently frozen processes deserve
2422  * to stay in the higher jetsam band. There are 2 modes:
2423  * - 'force one == TRUE': (urgent mode)
2424  *	We are out of budget and can't refreeze a process. The process's
2425  * state, if it was resumed, will stay in compressed memory. If we let it
2426  * remain up in the higher frozen jetsam band, it'll put a lot of pressure on
2427  * the lower bands. So we force-demote the least-recently-used-and-thawed
2428  * process.
2429  *
2430  * - 'force_one == FALSE': (normal mode)
2431  *      If the # of thaws of a process is below our threshold, then we
2432  * will demote that process into the IDLE band.
2433  * We don't immediately kill the process here because it  already has
2434  * state on disk and so it might be worth giving it another shot at
2435  * getting thawed/resumed and used.
2436  */
2437 static void
memorystatus_demote_frozen_processes(bool urgent_mode)2438 memorystatus_demote_frozen_processes(bool urgent_mode)
2439 {
2440 	unsigned int demoted_proc_count = 0;
2441 
2442 	if (memorystatus_freeze_enabled == false) {
2443 		/*
2444 		 * Freeze has been disabled likely to
2445 		 * reclaim swap space. So don't change
2446 		 * any state on the frozen processes.
2447 		 */
2448 		return;
2449 	}
2450 
2451 	/*
2452 	 * We have two demotion policies which can be toggled by userspace.
2453 	 * In non-urgent mode, the ordered list policy will
2454 	 * choose a demotion candidate using the list provided by dasd.
2455 	 * The thaw count policy will demote the oldest process that hasn't been
2456 	 * thawed more than memorystatus_thaw_count_demotion_threshold times.
2457 	 *
2458 	 * If urgent_mode is set, both policies will only consider demoting
2459 	 * processes that are re-freeze eligible. But the ordering is different.
2460 	 * The ordered list policy will scan in the order given by dasd.
2461 	 * The thaw count policy will scan through the frozen band.
2462 	 */
2463 	if (memorystatus_freezer_use_demotion_list) {
2464 		demoted_proc_count += memorystatus_demote_frozen_processes_using_demote_list(urgent_mode);
2465 
2466 		if (demoted_proc_count == 0 && urgent_mode) {
2467 			/*
2468 			 * We're out of budget and the demotion list doesn't contain any valid
2469 			 * candidates. We still need to demote something. Fall back to scanning
2470 			 * the frozen band.
2471 			 */
2472 			memorystatus_demote_frozen_processes_using_thaw_count(true);
2473 		}
2474 	} else {
2475 		demoted_proc_count += memorystatus_demote_frozen_processes_using_thaw_count(urgent_mode);
2476 	}
2477 }
2478 
2479 /*
2480  * Calculate a new freezer budget.
2481  * @param time_since_last_interval_expired_sec How long has it been (in seconds) since the previous interval expired.
2482  * @param burst_multiple The burst_multiple for the new period
2483  * @param interval_duration_min How many minutes will the new interval be?
2484  * @param rollover The amount to rollover from the previous budget.
2485  *
2486  * @return A budget for the new interval.
2487  */
2488 static uint32_t
memorystatus_freeze_calculate_new_budget(unsigned int time_since_last_interval_expired_sec,unsigned int burst_multiple,unsigned int interval_duration_min,uint32_t rollover)2489 memorystatus_freeze_calculate_new_budget(
2490 	unsigned int time_since_last_interval_expired_sec,
2491 	unsigned int burst_multiple,
2492 	unsigned int interval_duration_min,
2493 	uint32_t rollover)
2494 {
2495 	uint64_t freeze_daily_budget = 0, freeze_daily_budget_mb = 0, daily_budget_pageouts = 0, budget_missed = 0, freeze_daily_pageouts_max = 0, new_budget = 0;
2496 	const static unsigned int kNumSecondsInDay = 60 * 60 * 24;
2497 	/* Precision factor for days_missed. 2 decimal points. */
2498 	const static unsigned int kFixedPointFactor = 100;
2499 	unsigned int days_missed;
2500 
2501 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2502 		return 0;
2503 	}
2504 	if (memorystatus_swap_all_apps) {
2505 		/*
2506 		 * We effectively have an unlimited budget when app swap is enabled.
2507 		 */
2508 		memorystatus_freeze_daily_mb_max = UINT32_MAX;
2509 		return UINT32_MAX;
2510 	}
2511 
2512 	/* Get the daily budget from the storage layer */
2513 	if (vm_swap_max_budget(&freeze_daily_budget)) {
2514 		freeze_daily_budget_mb = freeze_daily_budget / (1024 * 1024);
2515 		assert(freeze_daily_budget_mb <= UINT32_MAX);
2516 		memorystatus_freeze_daily_mb_max = (unsigned int) freeze_daily_budget_mb;
2517 		memorystatus_log_info("memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
2518 	}
2519 	/* Calculate the daily pageout budget */
2520 	freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
2521 	/* Multiply by memorystatus_freeze_budget_multiplier */
2522 	freeze_daily_pageouts_max = ((kFixedPointFactor * memorystatus_freeze_budget_multiplier / 100) * freeze_daily_pageouts_max) / kFixedPointFactor;
2523 
2524 	daily_budget_pageouts = (burst_multiple * (((uint64_t) interval_duration_min * freeze_daily_pageouts_max) / (kNumSecondsInDay / 60)));
2525 
2526 	/*
2527 	 * Add additional budget for time since the interval expired.
2528 	 * For example, if the interval expired n days ago, we should get an additional n days
2529 	 * of budget since we didn't use any budget during those n days.
2530 	 */
2531 	days_missed = time_since_last_interval_expired_sec * kFixedPointFactor / kNumSecondsInDay;
2532 	budget_missed = days_missed * freeze_daily_pageouts_max / kFixedPointFactor;
2533 	new_budget = rollover + daily_budget_pageouts + budget_missed;
2534 	return (uint32_t) MIN(new_budget, UINT32_MAX);
2535 }
2536 
2537 /*
2538  * Mark all non frozen, freezer-eligible processes as skipped for the given reason.
2539  * Used when we hit some system freeze limit and know that we won't be considering remaining processes.
2540  * If you're using this for a new reason, make sure to add it to memorystatus_freeze_init_proc so that
2541  * it gets set for new processes.
2542  * NB: These processes will retain this skip reason until they are reconsidered by memorystatus_is_process_eligible_for_freeze.
2543  */
2544 static void
memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason,bool locked)2545 memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason, bool locked)
2546 {
2547 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2548 	LCK_MTX_ASSERT(&proc_list_mlock, locked ? LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED);
2549 	unsigned int band = JETSAM_PRIORITY_IDLE;
2550 	proc_t p;
2551 
2552 	if (!locked) {
2553 		proc_list_lock();
2554 	}
2555 	p = memorystatus_get_first_proc_locked(&band, FALSE);
2556 	while (p) {
2557 		assert(p->p_memstat_effectivepriority == (int32_t) band);
2558 		if (!_memstat_proc_is_frozen(p) &&
2559 		    memorystatus_is_process_eligible_for_freeze(p)) {
2560 			assert(p->p_memstat_freeze_skip_reason == kMemorystatusFreezeSkipReasonNone);
2561 			p->p_memstat_freeze_skip_reason = (uint8_t) reason;
2562 		}
2563 		p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2564 	}
2565 	if (!locked) {
2566 		proc_list_unlock();
2567 	}
2568 }
2569 
2570 /*
2571  * Called after we fail to freeze a process.
2572  * Logs the failure, marks the process with the failure reason, and updates freezer stats.
2573  */
2574 static void
memorystatus_freeze_handle_error(proc_t p,const freezer_error_code_t freezer_error_code,bool was_refreeze,pid_t pid,const coalition_t coalition,const char * log_prefix)2575 memorystatus_freeze_handle_error(
2576 	proc_t p,
2577 	const freezer_error_code_t freezer_error_code,
2578 	bool was_refreeze,
2579 	pid_t pid,
2580 	const coalition_t coalition,
2581 	const char* log_prefix)
2582 {
2583 	const char *reason;
2584 	memorystatus_freeze_skip_reason_t skip_reason;
2585 
2586 	switch (freezer_error_code) {
2587 	case FREEZER_ERROR_EXCESS_SHARED_MEMORY:
2588 		memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
2589 		reason = "too much shared memory";
2590 		skip_reason = kMemorystatusFreezeSkipReasonExcessSharedMemory;
2591 		break;
2592 	case FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO:
2593 		memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
2594 		reason = "private-shared pages ratio";
2595 		skip_reason = kMemorystatusFreezeSkipReasonLowPrivateSharedRatio;
2596 		break;
2597 	case FREEZER_ERROR_NO_COMPRESSOR_SPACE:
2598 		memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
2599 		reason = "no compressor space";
2600 		skip_reason = kMemorystatusFreezeSkipReasonNoCompressorSpace;
2601 		break;
2602 	case FREEZER_ERROR_NO_SWAP_SPACE:
2603 		memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
2604 		reason = "no swap space";
2605 		skip_reason = kMemorystatusFreezeSkipReasonNoSwapSpace;
2606 		break;
2607 	case FREEZER_ERROR_NO_SLOTS:
2608 		memorystatus_freezer_stats.mfs_skipped_full_count++;
2609 		reason = "no slots";
2610 		skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
2611 		break;
2612 	default:
2613 		reason = "unknown error";
2614 		skip_reason = kMemorystatusFreezeSkipReasonOther;
2615 	}
2616 
2617 	p->p_memstat_freeze_skip_reason = (uint8_t) skip_reason;
2618 
2619 	memorystatus_log("%s: %sfreezing (%s) pid %d [%s]...skipped (%s)\n",
2620 	    log_prefix, was_refreeze ? "re" : "",
2621 	    (coalition == NULL ? "general" : "coalition-driven"), pid,
2622 	    ((p && *p->p_name) ? p->p_name : "unknown"), reason);
2623 }
2624 
2625 /*
2626  * Start a new normal throttle interval with the given budget.
2627  * Caller must hold the freezer mutex
2628  */
2629 static void
memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget,mach_timespec_t start_ts)2630 memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts)
2631 {
2632 	unsigned int band;
2633 	proc_t p, next_p;
2634 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2635 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2636 
2637 	normal_throttle_window->max_pageouts = new_budget;
2638 	normal_throttle_window->ts.tv_sec = normal_throttle_window->mins * 60;
2639 	normal_throttle_window->ts.tv_nsec = 0;
2640 	ADD_MACH_TIMESPEC(&normal_throttle_window->ts, &start_ts);
2641 	/* Since we update the throttle stats pre-freeze, adjust for overshoot here */
2642 	if (normal_throttle_window->pageouts > normal_throttle_window->max_pageouts) {
2643 		normal_throttle_window->pageouts -= normal_throttle_window->max_pageouts;
2644 	} else {
2645 		normal_throttle_window->pageouts = 0;
2646 	}
2647 	/* Ensure the normal window is now active. */
2648 	memorystatus_freeze_degradation = FALSE;
2649 
2650 	/*
2651 	 * Reset interval statistics.
2652 	 */
2653 	memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
2654 	memorystatus_freezer_stats.mfs_process_considered_count = 0;
2655 	memorystatus_freezer_stats.mfs_error_below_min_pages_count = 0;
2656 	memorystatus_freezer_stats.mfs_error_excess_shared_memory_count = 0;
2657 	memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count = 0;
2658 	memorystatus_freezer_stats.mfs_error_no_compressor_space_count = 0;
2659 	memorystatus_freezer_stats.mfs_error_no_swap_space_count = 0;
2660 	memorystatus_freezer_stats.mfs_error_low_probability_of_use_count = 0;
2661 	memorystatus_freezer_stats.mfs_error_elevated_count = 0;
2662 	memorystatus_freezer_stats.mfs_error_other_count = 0;
2663 	memorystatus_freezer_stats.mfs_refreeze_count = 0;
2664 	memorystatus_freezer_stats.mfs_bytes_refrozen = 0;
2665 	memorystatus_freezer_stats.mfs_below_threshold_count = 0;
2666 	memorystatus_freezer_stats.mfs_skipped_full_count = 0;
2667 	memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count = 0;
2668 	memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = 0;
2669 	memorystatus_thaw_count = 0;
2670 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed, 0, release);
2671 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, 0, release);
2672 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg, 0, release);
2673 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, 0, release);
2674 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen, memorystatus_frozen_count, release);
2675 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, memorystatus_frozen_count_webcontent, release);
2676 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, memorystatus_frozen_count_xpc_service, release);
2677 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_fg_resumed, 0, release);
2678 	os_atomic_inc(&memorystatus_freeze_current_interval, release);
2679 
2680 	/* Clear the focal thaw bit */
2681 	proc_list_lock();
2682 	band = JETSAM_PRIORITY_IDLE;
2683 	p = PROC_NULL;
2684 	next_p = PROC_NULL;
2685 
2686 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
2687 	while (next_p) {
2688 		p = next_p;
2689 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2690 
2691 		if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) {
2692 			break;
2693 		}
2694 		p->p_memstat_state &= ~P_MEMSTAT_FROZEN_FOCAL_THAW;
2695 	}
2696 	proc_list_unlock();
2697 
2698 	schedule_interval_reset(freeze_interval_reset_thread_call, normal_throttle_window);
2699 }
2700 
2701 #if DEVELOPMENT || DEBUG
2702 
2703 static int
2704 sysctl_memorystatus_freeze_calculate_new_budget SYSCTL_HANDLER_ARGS
2705 {
2706 #pragma unused(arg1, arg2)
2707 	int error = 0;
2708 	unsigned int time_since_last_interval_expired_sec = 0;
2709 	unsigned int new_budget;
2710 
2711 	error = sysctl_handle_int(oidp, &time_since_last_interval_expired_sec, 0, req);
2712 	if (error || !req->newptr) {
2713 		return error;
2714 	}
2715 
2716 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2717 		return ENOTSUP;
2718 	}
2719 	new_budget = memorystatus_freeze_calculate_new_budget(time_since_last_interval_expired_sec, 1, NORMAL_WINDOW_MINS, 0);
2720 	return copyout(&new_budget, req->oldptr, MIN(sizeof(req->oldlen), sizeof(new_budget)));
2721 }
2722 
2723 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_calculate_new_budget, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2724     0, 0, &sysctl_memorystatus_freeze_calculate_new_budget, "I", "");
2725 
2726 #endif /* DEVELOPMENT || DEBUG */
2727 
2728 /*
2729  * Called when we first run out of budget in an interval.
2730  * Marks idle processes as not frozen due to lack of budget.
2731  * NB: It might be worth having a CA event here.
2732  */
2733 static void
memorystatus_freeze_out_of_budget(const struct throttle_interval_t * interval)2734 memorystatus_freeze_out_of_budget(const struct throttle_interval_t *interval)
2735 {
2736 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2737 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2738 
2739 	mach_timespec_t time_left = {0, 0};
2740 	mach_timespec_t now_ts;
2741 	clock_sec_t sec;
2742 	clock_nsec_t nsec;
2743 
2744 	time_left.tv_sec = interval->ts.tv_sec;
2745 	time_left.tv_nsec = 0;
2746 	clock_get_system_nanotime(&sec, &nsec);
2747 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2748 	now_ts.tv_nsec = nsec;
2749 
2750 	SUB_MACH_TIMESPEC(&time_left, &now_ts);
2751 	memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = time_left.tv_sec;
2752 	memorystatus_log(
2753 		"memorystatus_freeze: Out of NAND write budget with %u minutes left in the current freezer interval. %u procs are frozen.\n",
2754 		time_left.tv_sec / 60, memorystatus_frozen_count);
2755 
2756 	memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfBudget, false);
2757 }
2758 
2759 /*
2760  * Called when we cross over the threshold of maximum frozen processes allowed.
2761  * Marks remaining idle processes as not frozen due to lack of slots.
2762  */
2763 static void
memorystatus_freeze_out_of_slots(void)2764 memorystatus_freeze_out_of_slots(void)
2765 {
2766 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2767 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2768 	assert(memorystatus_frozen_count == memorystatus_frozen_processes_max);
2769 
2770 	memorystatus_log(
2771 		"memorystatus_freeze: Out of slots in the freezer. %u procs are frozen.\n",
2772 		memorystatus_frozen_count);
2773 
2774 	memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfSlots, true);
2775 }
2776 
2777 /*
2778  * This function will do 4 things:
2779  *
2780  * 1) check to see if we are currently in a degraded freezer mode, and if so:
2781  *    - check to see if our window has expired and we should exit this mode, OR,
2782  *    - return a budget based on the degraded throttle window's max. pageouts vs current pageouts.
2783  *
2784  * 2) check to see if we are in a NEW normal window and update the normal throttle window's params.
2785  *
2786  * 3) check what the current normal window allows for a budget.
2787  *
2788  * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below
2789  *    what we would normally expect, then we are running low on our daily budget and need to enter
2790  *    degraded perf. mode.
2791  *
2792  *    Caller must hold the freezer mutex
2793  *    Caller must not hold the proc_list lock
2794  */
2795 
2796 static void
memorystatus_freeze_update_throttle(uint64_t * budget_pages_allowed)2797 memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
2798 {
2799 	clock_sec_t sec;
2800 	clock_nsec_t nsec;
2801 	mach_timespec_t now_ts;
2802 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2803 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2804 
2805 	unsigned int freeze_daily_pageouts_max = 0;
2806 	bool started_with_budget = (*budget_pages_allowed > 0);
2807 
2808 #if DEVELOPMENT || DEBUG
2809 	if (!memorystatus_freeze_throttle_enabled) {
2810 		/*
2811 		 * No throttling...we can use the full budget everytime.
2812 		 */
2813 		*budget_pages_allowed = UINT64_MAX;
2814 		return;
2815 	}
2816 #endif
2817 
2818 	clock_get_system_nanotime(&sec, &nsec);
2819 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2820 	now_ts.tv_nsec = nsec;
2821 
2822 	struct throttle_interval_t *interval = NULL;
2823 
2824 	if (memorystatus_freeze_degradation == TRUE) {
2825 		interval = degraded_throttle_window;
2826 
2827 		if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
2828 			interval->pageouts = 0;
2829 			interval->max_pageouts = 0;
2830 		} else {
2831 			*budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2832 		}
2833 	}
2834 
2835 	interval = normal_throttle_window;
2836 
2837 	/*
2838 	 * Current throttle window.
2839 	 * Deny freezing if we have no budget left.
2840 	 * Try graceful degradation if we are within 25% of:
2841 	 * - the daily budget, and
2842 	 * - the current budget left is below our normal budget expectations.
2843 	 */
2844 
2845 	if (memorystatus_freeze_degradation == FALSE) {
2846 		if (interval->pageouts >= interval->max_pageouts) {
2847 			*budget_pages_allowed = 0;
2848 			if (started_with_budget) {
2849 				memorystatus_freeze_out_of_budget(interval);
2850 			}
2851 		} else {
2852 			int budget_left = interval->max_pageouts - interval->pageouts;
2853 			int budget_threshold = (freeze_daily_pageouts_max * FREEZE_DEGRADATION_BUDGET_THRESHOLD) / 100;
2854 
2855 			mach_timespec_t time_left = {0, 0};
2856 
2857 			time_left.tv_sec = interval->ts.tv_sec;
2858 			time_left.tv_nsec = 0;
2859 
2860 			SUB_MACH_TIMESPEC(&time_left, &now_ts);
2861 
2862 			if (budget_left <= budget_threshold) {
2863 				/*
2864 				 * For the current normal window, calculate how much we would pageout in a DEGRADED_WINDOW_MINS duration.
2865 				 * And also calculate what we would pageout for the same DEGRADED_WINDOW_MINS duration if we had the full
2866 				 * daily pageout budget.
2867 				 */
2868 
2869 				unsigned int current_budget_rate_allowed = ((budget_left / time_left.tv_sec) / 60) * DEGRADED_WINDOW_MINS;
2870 				unsigned int normal_budget_rate_allowed = (freeze_daily_pageouts_max / NORMAL_WINDOW_MINS) * DEGRADED_WINDOW_MINS;
2871 
2872 				/*
2873 				 * The current rate of pageouts is below what we would expect for
2874 				 * the normal rate i.e. we have below normal budget left and so...
2875 				 */
2876 
2877 				if (current_budget_rate_allowed < normal_budget_rate_allowed) {
2878 					memorystatus_freeze_degradation = TRUE;
2879 					degraded_throttle_window->max_pageouts = current_budget_rate_allowed;
2880 					degraded_throttle_window->pageouts = 0;
2881 
2882 					/*
2883 					 * Switch over to the degraded throttle window so the budget
2884 					 * doled out is based on that window.
2885 					 */
2886 					interval = degraded_throttle_window;
2887 				}
2888 			}
2889 
2890 			*budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2891 		}
2892 	}
2893 
2894 	memorystatus_log_debug(
2895 		"memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining\n",
2896 		interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - now_ts.tv_sec) / 60);
2897 }
2898 
2899 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_apps_idle_delay_multiplier, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_apps_idle_delay_multiplier, 0, "");
2900 
2901 bool memorystatus_freeze_thread_init = false;
2902 static void
memorystatus_freeze_thread(void * param __unused,wait_result_t wr __unused)2903 memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
2904 {
2905 	static boolean_t memorystatus_freeze_swap_low = FALSE;
2906 	size_t max_to_freeze = 0, num_frozen = 0, num_frozen_this_iteration = 0;
2907 
2908 	if (!memorystatus_freeze_thread_init) {
2909 #if CONFIG_THREAD_GROUPS
2910 		thread_group_vm_add();
2911 #endif
2912 		memorystatus_freeze_thread_init = true;
2913 	}
2914 
2915 	max_to_freeze = memorystatus_pick_freeze_count_for_wakeup();
2916 
2917 	lck_mtx_lock(&freezer_mutex);
2918 	if (memorystatus_freeze_enabled) {
2919 		if (memorystatus_freezer_use_demotion_list && memorystatus_refreeze_eligible_count > 0) {
2920 			memorystatus_demote_frozen_processes(false); /* Normal mode. Consider demoting thawed processes. */
2921 		}
2922 		while (num_frozen < max_to_freeze &&
2923 		    memorystatus_can_freeze(&memorystatus_freeze_swap_low) &&
2924 		    ((memorystatus_frozen_count < memorystatus_frozen_processes_max) ||
2925 		    (memorystatus_refreeze_eligible_count >= memorystatus_min_thaw_refreeze_threshold))) {
2926 			/* Only freeze if we've not exceeded our pageout budgets.*/
2927 			memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2928 
2929 			if (memorystatus_freeze_budget_pages_remaining) {
2930 				num_frozen_this_iteration = memorystatus_freeze_top_process();
2931 				if (num_frozen_this_iteration == 0) {
2932 					/* Nothing left to freeze. */
2933 					break;
2934 				}
2935 				num_frozen += num_frozen_this_iteration;
2936 			} else {
2937 				memorystatus_demote_frozen_processes(true); /* urgent mode..force one demotion */
2938 				break;
2939 			}
2940 		}
2941 	}
2942 
2943 	/*
2944 	 * Give applications currently in the aging band a chance to age out into the idle band before
2945 	 * running the freezer again.
2946 	 */
2947 	if (memorystatus_freeze_dynamic_thread_delay_enabled) {
2948 		if ((num_frozen > 0) || (memorystatus_frozen_count == 0)) {
2949 			memorystatus_freeze_apps_idle_delay_multiplier = FREEZE_APPS_IDLE_DELAY_MULTIPLIER_FAST;
2950 		} else {
2951 			memorystatus_freeze_apps_idle_delay_multiplier = FREEZE_APPS_IDLE_DELAY_MULTIPLIER_SLOW;
2952 		}
2953 	}
2954 	memorystatus_freezer_thread_next_run_ts = mach_absolute_time() + (memorystatus_apps_idle_delay_time * memorystatus_freeze_apps_idle_delay_multiplier);
2955 
2956 	assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
2957 	lck_mtx_unlock(&freezer_mutex);
2958 
2959 	thread_block((thread_continue_t) memorystatus_freeze_thread);
2960 }
2961 
2962 int
memorystatus_get_process_is_freezable(pid_t pid,int * is_freezable)2963 memorystatus_get_process_is_freezable(pid_t pid, int *is_freezable)
2964 {
2965 	proc_t p = PROC_NULL;
2966 
2967 	if (pid == 0) {
2968 		return EINVAL;
2969 	}
2970 
2971 	p = proc_find(pid);
2972 	if (!p) {
2973 		return ESRCH;
2974 	}
2975 
2976 	/*
2977 	 * Only allow this on the current proc for now.
2978 	 * We can check for privileges and allow targeting another process in the future.
2979 	 */
2980 	if (p != current_proc()) {
2981 		proc_rele(p);
2982 		return EPERM;
2983 	}
2984 
2985 	proc_list_lock();
2986 	*is_freezable = ((p->p_memstat_state & P_MEMSTAT_FREEZE_DISABLED) ? 0 : 1);
2987 	proc_rele(p);
2988 	proc_list_unlock();
2989 
2990 	return 0;
2991 }
2992 
2993 errno_t
memorystatus_get_process_is_frozen(pid_t pid,int * is_frozen)2994 memorystatus_get_process_is_frozen(pid_t pid, int *is_frozen)
2995 {
2996 	proc_t p = PROC_NULL;
2997 
2998 	if (pid == 0) {
2999 		return EINVAL;
3000 	}
3001 
3002 	/*
3003 	 * Only allow this on the current proc for now.
3004 	 * We can check for privileges and allow targeting another process in the future.
3005 	 */
3006 	p = current_proc();
3007 	if (proc_getpid(p) != pid) {
3008 		return EPERM;
3009 	}
3010 
3011 	proc_list_lock();
3012 	*is_frozen = _memstat_proc_is_frozen(p);
3013 	proc_list_unlock();
3014 
3015 	return 0;
3016 }
3017 
3018 int
memorystatus_set_process_is_freezable(pid_t pid,boolean_t is_freezable)3019 memorystatus_set_process_is_freezable(pid_t pid, boolean_t is_freezable)
3020 {
3021 	proc_t p = PROC_NULL;
3022 
3023 	if (pid == 0) {
3024 		return EINVAL;
3025 	}
3026 
3027 	/*
3028 	 * To enable freezable status, you need to be root or an entitlement.
3029 	 */
3030 	if (is_freezable &&
3031 	    !kauth_cred_issuser(kauth_cred_get()) &&
3032 	    !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
3033 		return EPERM;
3034 	}
3035 
3036 	p = proc_find(pid);
3037 	if (!p) {
3038 		return ESRCH;
3039 	}
3040 
3041 	/*
3042 	 * A process can change its own status. A coalition leader can
3043 	 * change the status of coalition members.
3044 	 * An entitled process (or root) can change anyone's status.
3045 	 */
3046 	if (p != current_proc() &&
3047 	    !kauth_cred_issuser(kauth_cred_get()) &&
3048 	    !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
3049 		coalition_t coal = task_get_coalition(proc_task(p), COALITION_TYPE_JETSAM);
3050 		if (!coalition_is_leader(proc_task(current_proc()), coal)) {
3051 			proc_rele(p);
3052 			return EPERM;
3053 		}
3054 	}
3055 
3056 	proc_list_lock();
3057 	if (is_freezable == FALSE) {
3058 		/* Freeze preference set to FALSE. Set the P_MEMSTAT_FREEZE_DISABLED bit. */
3059 		p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED;
3060 		memorystatus_log_info("memorystatus_set_process_is_freezable: disabling freeze for pid %d [%s]\n",
3061 		    proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
3062 	} else {
3063 		p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED;
3064 		memorystatus_log_info("memorystatus_set_process_is_freezable: enabling freeze for pid %d [%s]\n",
3065 		    proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
3066 	}
3067 	proc_rele(p);
3068 	proc_list_unlock();
3069 
3070 	return 0;
3071 }
3072 
3073 /*
3074  * Called when process is created before it is added to a memorystatus bucket.
3075  */
3076 void
memorystatus_freeze_init_proc(proc_t p)3077 memorystatus_freeze_init_proc(proc_t p)
3078 {
3079 	/* NB: Process is not on the memorystatus lists yet so it's safe to modify the skip reason without the freezer mutex. */
3080 	if (memorystatus_freeze_budget_pages_remaining == 0) {
3081 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfBudget;
3082 	} else if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
3083 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
3084 	} else {
3085 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
3086 	}
3087 }
3088 
3089 
3090 static int
3091 sysctl_memorystatus_do_fastwake_warmup_all  SYSCTL_HANDLER_ARGS
3092 {
3093 #pragma unused(oidp, arg1, arg2)
3094 
3095 	if (!req->newptr) {
3096 		return EINVAL;
3097 	}
3098 
3099 	/* Need to be root or have entitlement */
3100 	if (!kauth_cred_issuser(kauth_cred_get()) && !IOCurrentTaskHasEntitlement( MEMORYSTATUS_ENTITLEMENT)) {
3101 		return EPERM;
3102 	}
3103 
3104 	if (memorystatus_freeze_enabled == false) {
3105 		return ENOTSUP;
3106 	}
3107 
3108 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
3109 		return ENOTSUP;
3110 	}
3111 
3112 	do_fastwake_warmup_all();
3113 
3114 	return 0;
3115 }
3116 
3117 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_do_fastwake_warmup_all, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3118     0, 0, &sysctl_memorystatus_do_fastwake_warmup_all, "I", "");
3119 
3120 /*
3121  * Takes in a candidate list from the user_addr, validates it, and copies it into the list pointer.
3122  * Takes ownership over the original value of list.
3123  * Assumes that list is protected by the freezer_mutex.
3124  * The caller should not hold any locks.
3125  */
3126 static errno_t
set_freezer_candidate_list(user_addr_t buffer,size_t buffer_size,struct memorystatus_freezer_candidate_list * list)3127 set_freezer_candidate_list(user_addr_t buffer, size_t buffer_size, struct memorystatus_freezer_candidate_list *list)
3128 {
3129 	errno_t error = 0;
3130 	memorystatus_properties_freeze_entry_v1 *entries = NULL, *tmp_entries = NULL;
3131 	size_t entry_count = 0, entries_size = 0, tmp_size = 0;
3132 
3133 	/* Validate the user provided list. */
3134 	if ((buffer == USER_ADDR_NULL) || (buffer_size == 0)) {
3135 		memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: NULL or empty list\n");
3136 		return EINVAL;
3137 	}
3138 
3139 	if (buffer_size % sizeof(memorystatus_properties_freeze_entry_v1) != 0) {
3140 		memorystatus_log_error(
3141 			"memorystatus_cmd_grp_set_freeze_priority: Invalid list length (caller might have comiled agsinst invalid headers.)\n");
3142 		return EINVAL;
3143 	}
3144 
3145 	entry_count = buffer_size / sizeof(memorystatus_properties_freeze_entry_v1);
3146 	entries_size = buffer_size;
3147 	entries = kalloc_data(buffer_size, Z_WAITOK | Z_ZERO);
3148 	if (entries == NULL) {
3149 		return ENOMEM;
3150 	}
3151 
3152 	error = copyin(buffer, entries, buffer_size);
3153 	if (error != 0) {
3154 		goto out;
3155 	}
3156 
3157 #if MACH_ASSERT
3158 	for (size_t i = 0; i < entry_count; i++) {
3159 		memorystatus_properties_freeze_entry_v1 *entry = &entries[i];
3160 		if (entry->version != 1) {
3161 			memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: Invalid entry version number.");
3162 			error = EINVAL;
3163 			goto out;
3164 		}
3165 		if (i > 0 && entry->priority >= entries[i - 1].priority) {
3166 			memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: Entry list is not in descending order.");
3167 			error = EINVAL;
3168 			goto out;
3169 		}
3170 	}
3171 #endif /* MACH_ASSERT */
3172 
3173 	lck_mtx_lock(&freezer_mutex);
3174 
3175 	tmp_entries = list->mfcl_list;
3176 	tmp_size = list->mfcl_length * sizeof(memorystatus_properties_freeze_entry_v1);
3177 	list->mfcl_list = entries;
3178 	list->mfcl_length = entry_count;
3179 
3180 	lck_mtx_unlock(&freezer_mutex);
3181 
3182 	entries = tmp_entries;
3183 	entries_size = tmp_size;
3184 
3185 out:
3186 	kfree_data(entries, entries_size);
3187 	return error;
3188 }
3189 
3190 errno_t
memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer,size_t buffer_size)3191 memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer, size_t buffer_size)
3192 {
3193 	return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_freeze_list);
3194 }
3195 
3196 errno_t
memorystatus_cmd_grp_set_demote_list(user_addr_t buffer,size_t buffer_size)3197 memorystatus_cmd_grp_set_demote_list(user_addr_t buffer, size_t buffer_size)
3198 {
3199 	return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_demote_list);
3200 }
3201 
3202 void
memorystatus_freezer_mark_ui_transition(proc_t p)3203 memorystatus_freezer_mark_ui_transition(proc_t p)
3204 {
3205 	bool frozen = false, previous_focal_thaw = false, xpc_service = false, suspended = false;
3206 	proc_list_lock();
3207 
3208 	if (isSysProc(p)) {
3209 		goto out;
3210 	}
3211 
3212 	frozen = _memstat_proc_is_frozen(p);
3213 	previous_focal_thaw = (p->p_memstat_state & P_MEMSTAT_FROZEN_FOCAL_THAW) != 0;
3214 	xpc_service = (p->p_memstat_state & P_MEMSTAT_FROZEN_XPC_SERVICE) != 0;
3215 	suspended = (p->p_memstat_state & P_MEMSTAT_SUSPENDED) != 0;
3216 	if (!suspended) {
3217 		if (frozen) {
3218 			if (!previous_focal_thaw) {
3219 				p->p_memstat_state |= P_MEMSTAT_FROZEN_FOCAL_THAW;
3220 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg), relaxed);
3221 				if (xpc_service) {
3222 					os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service), relaxed);
3223 				}
3224 			}
3225 		}
3226 		os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_fg_resumed), relaxed);
3227 	}
3228 
3229 out:
3230 	proc_list_unlock();
3231 }
3232 
3233 #endif /* CONFIG_FREEZE */
3234