xref: /xnu-11215.41.3/bsd/kern/kern_memorystatus_freeze.c (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  *
28  */
29 
30 #include <kern/sched_prim.h>
31 #include <kern/kalloc.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h>
34 #include <kern/locks.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/host.h>
38 #include <kern/policy_internal.h>
39 #include <kern/thread_call.h>
40 #include <kern/thread_group.h>
41 
42 #include <libkern/libkern.h>
43 #include <mach/coalition.h>
44 #include <mach/mach_time.h>
45 #include <mach/task.h>
46 #include <mach/host_priv.h>
47 #include <mach/mach_host.h>
48 #include <os/log.h>
49 #include <pexpert/pexpert.h>
50 #include <sys/coalition.h>
51 #include <sys/kern_event.h>
52 #include <sys/kdebug.h>
53 #include <sys/kdebug_kernel.h>
54 #include <sys/proc.h>
55 #include <sys/proc_info.h>
56 #include <sys/reason.h>
57 #include <sys/signal.h>
58 #include <sys/signalvar.h>
59 #include <sys/sysctl.h>
60 #include <sys/sysproto.h>
61 #include <sys/ubc.h> /* mach_to_bsd_errno */
62 #include <sys/wait.h>
63 #include <sys/tree.h>
64 #include <sys/priv.h>
65 #include <vm/vm_pageout.h>
66 #include <vm/vm_protos.h>
67 #include <vm/vm_page.h>
68 #include <vm/vm_compressor_xnu.h>
69 #include <vm/vm_compressor_backing_store_xnu.h>
70 #include <mach/machine/sdt.h>
71 #include <libkern/coreanalytics/coreanalytics.h>
72 #include <libkern/section_keywords.h>
73 #include <stdatomic.h>
74 
75 #include <IOKit/IOBSD.h>
76 
77 #if CONFIG_FREEZE
78 #include <vm/vm_map_xnu.h>
79 #endif /* CONFIG_FREEZE */
80 
81 #include <kern/kern_memorystatus_internal.h>
82 #include <sys/kern_memorystatus.h>
83 #include <sys/kern_memorystatus_freeze.h>
84 #include <sys/kern_memorystatus_notify.h>
85 #include <sys/ubc.h>
86 
87 #if CONFIG_JETSAM
88 
89 extern unsigned int memorystatus_available_pages;
90 extern unsigned int memorystatus_available_pages_pressure;
91 extern unsigned int memorystatus_available_pages_critical;
92 extern unsigned int memorystatus_available_pages_critical_base;
93 extern unsigned int memorystatus_available_pages_critical_idle_offset;
94 
95 #else /* CONFIG_JETSAM */
96 
97 extern uint64_t memorystatus_available_pages;
98 extern uint64_t memorystatus_available_pages_pressure;
99 extern uint64_t memorystatus_available_pages_critical;
100 
101 #endif /* CONFIG_JETSAM */
102 
103 unsigned int memorystatus_frozen_count = 0;
104 unsigned int memorystatus_frozen_count_webcontent = 0;
105 unsigned int memorystatus_frozen_count_xpc_service = 0;
106 
107 #if CONFIG_FREEZE
108 
109 static LCK_GRP_DECLARE(freezer_lck_grp, "freezer");
110 static LCK_MTX_DECLARE(freezer_mutex, &freezer_lck_grp);
111 
112 /* Thresholds */
113 unsigned int memorystatus_freeze_threshold = 0;
114 unsigned int memorystatus_freeze_pages_min = 0;
115 unsigned int memorystatus_freeze_pages_max = 0;
116 unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
117 unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT;
118 uint64_t     memorystatus_freeze_budget_pages_remaining = 0; /* Remaining # of pages that can be frozen to disk */
119 uint64_t     memorystatus_freeze_budget_multiplier = 100; /* Multiplies the daily budget by 100/multiplier */
120 boolean_t memorystatus_freeze_degradation = FALSE; /* Protected by the freezer mutex. Signals we are in a degraded freeze mode. */
121 unsigned int memorystatus_freeze_max_candidate_band = FREEZE_MAX_CANDIDATE_BAND;
122 
123 unsigned int memorystatus_max_frozen_demotions_daily = 0;
124 unsigned int memorystatus_thaw_count_demotion_threshold = 0;
125 unsigned int memorystatus_min_thaw_refreeze_threshold;
126 
127 #if XNU_TARGET_OS_WATCH
128 #define FREEZE_DYNAMIC_THREAD_DELAY_ENABLED_DEFAULT true
129 #else
130 #define FREEZE_DYNAMIC_THREAD_DELAY_ENABLED_DEFAULT false
131 #endif
132 boolean_t memorystatus_freeze_dynamic_thread_delay_enabled = FREEZE_DYNAMIC_THREAD_DELAY_ENABLED_DEFAULT;
133 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_dynamic_thread_delay_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_dynamic_thread_delay_enabled, 0, "");
134 
135 #define FREEZE_APPS_IDLE_DELAY_MULTIPLIER_FAST 1
136 #define FREEZE_APPS_IDLE_DELAY_MULTIPLIER_SLOW 30
137 #define FREEZE_APPS_IDLE_DELAY_MULTIPLIER_DEFAULT FREEZE_APPS_IDLE_DELAY_MULTIPLIER_FAST
138 unsigned int memorystatus_freeze_apps_idle_delay_multiplier = FREEZE_APPS_IDLE_DELAY_MULTIPLIER_DEFAULT;
139 
140 #if (XNU_TARGET_OS_IOS && !XNU_TARGET_OS_XR) || XNU_TARGET_OS_WATCH
141 #define FREEZE_ENABLED_DEFAULT TRUE
142 #else
143 #define FREEZE_ENABLED_DEFAULT FALSE
144 #endif
145 boolean_t memorystatus_freeze_enabled = FREEZE_ENABLED_DEFAULT;
146 
147 int memorystatus_freeze_wakeup = 0;
148 
149 #define MAX_XPC_SERVICE_PIDS 10 /* Max. # of XPC services per coalition we'll consider freezing. */
150 
151 unsigned int memorystatus_frozen_processes_max = 0;
152 unsigned int memorystatus_frozen_shared_mb = 0;
153 unsigned int memorystatus_frozen_shared_mb_max = 0;
154 unsigned int memorystatus_freeze_shared_mb_per_process_max = 0; /* Max. MB allowed per process to be freezer-eligible. */
155 #if XNU_TARGET_OS_WATCH
156 unsigned int memorystatus_freeze_private_shared_pages_ratio = 1; /* Ratio of private:shared pages for a process to be freezer-eligible. */
157 #else
158 unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of private:shared pages for a process to be freezer-eligible. */
159 #endif
160 unsigned int memorystatus_thaw_count = 0; /* # of thaws in the current freezer interval */
161 uint64_t memorystatus_thaw_count_since_boot = 0; /* The number of thaws since boot */
162 unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
163 
164 struct memorystatus_freezer_stats_t memorystatus_freezer_stats = {0};
165 
166 static inline boolean_t memorystatus_can_freeze_processes(void);
167 static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
168 static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
169 static uint32_t memorystatus_freeze_calculate_new_budget(
170 	unsigned int time_since_last_interval_expired_sec,
171 	unsigned int burst_multiple,
172 	unsigned int interval_duration_min,
173 	uint32_t rollover);
174 static void memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts);
175 
176 static void memorystatus_set_freeze_is_enabled(bool enabled);
177 static void memorystatus_disable_freeze(void);
178 static bool kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out);
179 
180 /* Stats */
181 static uint64_t memorystatus_freeze_pageouts = 0;
182 
183 /* Throttling */
184 #define DEGRADED_WINDOW_MINS    (30)
185 #define NORMAL_WINDOW_MINS      (24 * 60)
186 
187 /* Protected by the freezer_mutex */
188 static throttle_interval_t throttle_intervals[] = {
189 	{ DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
190 	{ NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
191 };
192 throttle_interval_t *degraded_throttle_window = &throttle_intervals[0];
193 throttle_interval_t *normal_throttle_window = &throttle_intervals[1];
194 uint32_t memorystatus_freeze_current_interval = 0;
195 static thread_call_t freeze_interval_reset_thread_call;
196 static uint32_t memorystatus_freeze_calculate_new_budget(
197 	unsigned int time_since_last_interval_expired_sec,
198 	unsigned int burst_multiple,
199 	unsigned int interval_duration_min,
200 	uint32_t rollover);
201 
202 struct memorystatus_freezer_candidate_list memorystatus_global_freeze_list = {NULL, 0};
203 struct memorystatus_freezer_candidate_list memorystatus_global_demote_list = {NULL, 0};
204 /*
205  * When enabled, freeze candidates are chosen from the memorystatus_global_freeze_list
206  * in order (as opposed to using the older LRU approach).
207  */
208 #if XNU_TARGET_OS_WATCH
209 #define FREEZER_USE_ORDERED_LIST_DEFAULT 1
210 #else
211 #define FREEZER_USE_ORDERED_LIST_DEFAULT 0
212 #endif
213 int memorystatus_freezer_use_ordered_list = FREEZER_USE_ORDERED_LIST_DEFAULT;
214 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_ordered_list, &memorystatus_freezer_use_ordered_list, 0, 1, "");
215 /*
216  * When enabled, demotion candidates are chosen from memorystatus_global_demotion_list
217  */
218 int memorystatus_freezer_use_demotion_list = 0;
219 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_demotion_list, &memorystatus_freezer_use_demotion_list, 0, 1, "");
220 
221 extern boolean_t vm_swap_max_budget(uint64_t *);
222 
223 static void memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed);
224 static void memorystatus_demote_frozen_processes(bool urgent_mode);
225 
226 static void memorystatus_freeze_handle_error(proc_t p, const freezer_error_code_t freezer_error_code, bool was_refreeze, pid_t pid, const coalition_t coalition, const char* log_prefix);
227 static void memorystatus_freeze_out_of_slots(void);
228 uint64_t memorystatus_freezer_thread_next_run_ts = 0;
229 
230 /* Sysctls needed for aggd stats */
231 
232 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count, 0, "");
233 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_webcontent, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_webcontent, 0, "");
234 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_xpc_service, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_xpc_service, 0, "");
235 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
236 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_thaw_count_since_boot, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count_since_boot, "");
237 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
238 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_interval, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_current_interval, 0, "");
239 
240 /*
241  * Force a new interval with the given budget (no rollover).
242  */
243 static void
memorystatus_freeze_force_new_interval(uint64_t new_budget)244 memorystatus_freeze_force_new_interval(uint64_t new_budget)
245 {
246 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
247 	mach_timespec_t now_ts;
248 	clock_sec_t sec;
249 	clock_nsec_t nsec;
250 
251 	clock_get_system_nanotime(&sec, &nsec);
252 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
253 	now_ts.tv_nsec = nsec;
254 	memorystatus_freeze_start_normal_throttle_interval((uint32_t) MIN(new_budget, UINT32_MAX), now_ts);
255 	/* Don't carry over any excess pageouts since we're forcing a new budget */
256 	normal_throttle_window->pageouts = 0;
257 	memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
258 }
259 #if DEVELOPMENT || DEBUG
260 static int sysctl_memorystatus_freeze_budget_pages_remaining SYSCTL_HANDLER_ARGS
261 {
262 	#pragma unused(arg1, arg2, oidp)
263 	int error, changed;
264 	uint64_t new_budget = memorystatus_freeze_budget_pages_remaining;
265 
266 	lck_mtx_lock(&freezer_mutex);
267 
268 	error = sysctl_io_number(req, memorystatus_freeze_budget_pages_remaining, sizeof(uint64_t), &new_budget, &changed);
269 	if (changed) {
270 		if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
271 			lck_mtx_unlock(&freezer_mutex);
272 			return ENOTSUP;
273 		}
274 		memorystatus_freeze_force_new_interval(new_budget);
275 	}
276 
277 	lck_mtx_unlock(&freezer_mutex);
278 	return error;
279 }
280 
281 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freeze_budget_pages_remaining, "Q", "");
282 #else /* DEVELOPMENT || DEBUG */
283 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, "");
284 #endif /* DEVELOPMENT || DEBUG */
285 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_excess_shared_memory_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_excess_shared_memory_count, "");
286 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_private_shared_ratio_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count, "");
287 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_compressor_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_compressor_space_count, "");
288 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_swap_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_swap_space_count, "");
289 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_below_min_pages_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_below_min_pages_count, "");
290 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_probability_of_use_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_probability_of_use_count, "");
291 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_elevated_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_elevated_count, "");
292 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_other_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_other_count, "");
293 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_process_considered_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_process_considered_count, "");
294 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_below_threshold_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_below_threshold_count, "");
295 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_full_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_full_count, "");
296 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_shared_mb_high_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count, "");
297 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_shared_pages_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_shared_pages_skipped, "");
298 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_bytes_refrozen, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_bytes_refrozen, "");
299 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_refreeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_refreeze_count, "");
300 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_freeze_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_freeze_pid_mismatches, "");
301 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_demote_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_demote_pid_mismatches, "");
302 
303 static_assert(_kMemorystatusFreezeSkipReasonMax <= UINT8_MAX);
304 
305 /*
306  * Calculates the hit rate for the freezer.
307  * The hit rate is defined as the percentage of procs that are currently in the
308  * freezer which we have thawed.
309  * A low hit rate means we're freezing bad candidates since they're not re-used.
310  */
311 static int
calculate_thaw_percentage(uint64_t frozen_count,uint64_t thaw_count)312 calculate_thaw_percentage(uint64_t frozen_count, uint64_t thaw_count)
313 {
314 	int thaw_percentage = 100;
315 
316 	if (frozen_count > 0) {
317 		if (thaw_count > frozen_count) {
318 			/*
319 			 * Both counts are using relaxed atomics & could be out of sync
320 			 * causing us to see thaw_percentage > 100.
321 			 */
322 			thaw_percentage = 100;
323 		} else {
324 			thaw_percentage = (int)(100 * thaw_count / frozen_count);
325 		}
326 	}
327 	return thaw_percentage;
328 }
329 
330 static int
get_thaw_percentage()331 get_thaw_percentage()
332 {
333 	uint64_t processes_frozen, processes_thawed;
334 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
335 	processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
336 	return calculate_thaw_percentage(processes_frozen, processes_thawed);
337 }
338 
339 static int
340 sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS
341 {
342 #pragma unused(arg1, arg2)
343 	int thaw_percentage = get_thaw_percentage();
344 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
345 }
346 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage, "I", "");
347 
348 static int
get_thaw_percentage_fg()349 get_thaw_percentage_fg()
350 {
351 	uint64_t processes_frozen, processes_thawed_fg;
352 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
353 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
354 	return calculate_thaw_percentage(processes_frozen, processes_thawed_fg);
355 }
356 
357 static int sysctl_memorystatus_freezer_thaw_percentage_fg SYSCTL_HANDLER_ARGS
358 {
359 #pragma unused(arg1, arg2)
360 	int thaw_percentage = get_thaw_percentage_fg();
361 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
362 }
363 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg, "I", "");
364 
365 static int
get_thaw_percentage_webcontent()366 get_thaw_percentage_webcontent()
367 {
368 	uint64_t processes_frozen_webcontent, processes_thawed_webcontent;
369 	processes_frozen_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, relaxed);
370 	processes_thawed_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, relaxed);
371 	return calculate_thaw_percentage(processes_frozen_webcontent, processes_thawed_webcontent);
372 }
373 
374 static int sysctl_memorystatus_freezer_thaw_percentage_webcontent SYSCTL_HANDLER_ARGS
375 {
376 #pragma unused(arg1, arg2)
377 	int thaw_percentage = get_thaw_percentage_webcontent();
378 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
379 }
380 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_webcontent, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_webcontent, "I", "");
381 
382 
383 static int
get_thaw_percentage_bg()384 get_thaw_percentage_bg()
385 {
386 	uint64_t processes_frozen, processes_thawed_fg, processes_thawed;
387 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
388 	processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
389 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
390 	return calculate_thaw_percentage(processes_frozen, processes_thawed - processes_thawed_fg);
391 }
392 
393 static int sysctl_memorystatus_freezer_thaw_percentage_bg SYSCTL_HANDLER_ARGS
394 {
395 #pragma unused(arg1, arg2)
396 	int thaw_percentage = get_thaw_percentage_bg();
397 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
398 }
399 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_bg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_bg, "I", "");
400 
401 static int
get_thaw_percentage_fg_non_xpc_service()402 get_thaw_percentage_fg_non_xpc_service()
403 {
404 	uint64_t processes_frozen, processes_frozen_xpc_service, processes_thawed_fg, processes_thawed_fg_xpc_service;
405 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
406 	processes_frozen_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, relaxed);
407 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
408 	processes_thawed_fg_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, relaxed);
409 	/*
410 	 * Since these are all relaxed loads, it's possible (although unlikely) to read a value for
411 	 * frozen/thawed xpc services that's > the value for processes frozen / thawed.
412 	 * Clamp just in case.
413 	 */
414 	processes_frozen_xpc_service = MIN(processes_frozen_xpc_service, processes_frozen);
415 	processes_thawed_fg_xpc_service = MIN(processes_thawed_fg_xpc_service, processes_thawed_fg);
416 	return calculate_thaw_percentage(processes_frozen - processes_frozen_xpc_service, processes_thawed_fg - processes_thawed_fg_xpc_service);
417 }
418 
419 static int sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service SYSCTL_HANDLER_ARGS
420 {
421 #pragma unused(arg1, arg2)
422 	int thaw_percentage = get_thaw_percentage_fg_non_xpc_service();
423 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
424 }
425 
426 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg_non_xpc_service, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service, "I", "");
427 
428 #define FREEZER_ERROR_STRING_LENGTH 128
429 
430 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_min, &memorystatus_freeze_pages_min, 0, UINT32_MAX, "");
431 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_max, &memorystatus_freeze_pages_max, 0, UINT32_MAX, "");
432 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_processes_max, &memorystatus_frozen_processes_max, 0, UINT32_MAX, "");
433 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_jetsam_band, &memorystatus_freeze_jetsam_band, JETSAM_PRIORITY_BACKGROUND, JETSAM_PRIORITY_FOREGROUND, "");
434 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_private_shared_pages_ratio, &memorystatus_freeze_private_shared_pages_ratio, 0, UINT32_MAX, "");
435 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_min_processes, &memorystatus_freeze_suspended_threshold, 0, UINT32_MAX, "");
436 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_max_candidate_band, &memorystatus_freeze_max_candidate_band, JETSAM_PRIORITY_IDLE, JETSAM_PRIORITY_FOREGROUND, "");
437 static int
438 sysctl_memorystatus_freeze_budget_multiplier SYSCTL_HANDLER_ARGS
439 {
440 #pragma unused(arg1, arg2, oidp, req)
441 	int error = 0, changed = 0;
442 	uint64_t val = memorystatus_freeze_budget_multiplier;
443 	unsigned int new_budget;
444 	clock_sec_t sec;
445 	clock_nsec_t nsec;
446 	mach_timespec_t now_ts;
447 
448 	error = sysctl_io_number(req, memorystatus_freeze_budget_multiplier, sizeof(val), &val, &changed);
449 	if (error) {
450 		return error;
451 	}
452 	if (changed) {
453 		if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
454 			return ENOTSUP;
455 		}
456 #if !(DEVELOPMENT || DEBUG)
457 		if (val > 100) {
458 			/* Can not increase budget on release. */
459 			return EINVAL;
460 		}
461 #endif
462 		lck_mtx_lock(&freezer_mutex);
463 
464 		memorystatus_freeze_budget_multiplier = val;
465 		/* Start a new throttle interval with this budget multiplier */
466 		new_budget = memorystatus_freeze_calculate_new_budget(0, 1, NORMAL_WINDOW_MINS, 0);
467 		clock_get_system_nanotime(&sec, &nsec);
468 		now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
469 		now_ts.tv_nsec = nsec;
470 		memorystatus_freeze_start_normal_throttle_interval(new_budget, now_ts);
471 		memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
472 
473 		lck_mtx_unlock(&freezer_mutex);
474 	}
475 	return 0;
476 }
477 EXPERIMENT_FACTOR_PROC(_kern, memorystatus_freeze_budget_multiplier, CTLTYPE_QUAD | CTLFLAG_RW, 0, 0, &sysctl_memorystatus_freeze_budget_multiplier, "Q", "");
478 /*
479  * max. # of frozen process demotions we will allow in our daily cycle.
480  */
481 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_max_freeze_demotions_daily, &memorystatus_max_frozen_demotions_daily, 0, UINT32_MAX, "");
482 
483 /*
484  * min # of thaws needed by a process to protect it from getting demoted into the IDLE band.
485  */
486 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_thaw_count_demotion_threshold, &memorystatus_thaw_count_demotion_threshold, 0, UINT32_MAX, "");
487 
488 /*
489  * min # of global thaws needed for us to consider refreezing these processes.
490  */
491 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_min_thaw_refreeze_threshold, &memorystatus_min_thaw_refreeze_threshold, 0, UINT32_MAX, "");
492 
493 #if DEVELOPMENT || DEBUG
494 
495 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, "");
496 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, "");
497 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
498 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, "");
499 
500 /*
501  * Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band.
502  * "0" means no limit.
503  * Default is 10% of system-wide task limit.
504  */
505 
506 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb_max, 0, "");
507 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, "");
508 
509 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, "");
510 
511 boolean_t memorystatus_freeze_throttle_enabled = TRUE;
512 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
513 
514 /*
515  * When set to true, this keeps frozen processes in the compressor pool in memory, instead of swapping them out to disk.
516  * Exposed via the sysctl kern.memorystatus_freeze_to_memory.
517  */
518 boolean_t memorystatus_freeze_to_memory = FALSE;
519 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_to_memory, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_to_memory, 0, "");
520 
521 #define VM_PAGES_FOR_ALL_PROCS    (2)
522 
523 /*
524  * Manual trigger of freeze and thaw for dev / debug kernels only.
525  */
526 static int
527 sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
528 {
529 #pragma unused(arg1, arg2)
530 	int error, pid = 0;
531 	proc_t p;
532 	freezer_error_code_t freezer_error_code = 0;
533 	pid_t pid_list[MAX_XPC_SERVICE_PIDS];
534 	int ntasks = 0;
535 	coalition_t coal = COALITION_NULL;
536 
537 	error = sysctl_handle_int(oidp, &pid, 0, req);
538 	if (error || !req->newptr) {
539 		return error;
540 	}
541 
542 	if (pid == VM_PAGES_FOR_ALL_PROCS) {
543 		error = mach_to_bsd_errno(vm_pageout_anonymous_pages());
544 		return error;
545 	}
546 
547 	lck_mtx_lock(&freezer_mutex);
548 	if (memorystatus_freeze_enabled == false) {
549 		lck_mtx_unlock(&freezer_mutex);
550 		memorystatus_log("sysctl_freeze: Freeze is DISABLED\n");
551 		return ENOTSUP;
552 	}
553 
554 again:
555 	p = proc_find(pid);
556 	if (p != NULL) {
557 		memorystatus_freezer_stats.mfs_process_considered_count++;
558 		uint32_t purgeable, wired, clean, dirty, shared;
559 		uint32_t max_pages = 0, state = 0;
560 
561 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
562 			/*
563 			 * Freezer backed by the compressor and swap file(s)
564 			 * will hold compressed data.
565 			 *
566 			 * Set the sysctl kern.memorystatus_freeze_to_memory to true to keep compressed data from
567 			 * being swapped out to disk. Note that this disables freezer swap support globally,
568 			 * not just for the process being frozen.
569 			 *
570 			 *
571 			 * We don't care about the global freezer budget or the process's (min/max) budget here.
572 			 * The freeze sysctl is meant to force-freeze a process.
573 			 *
574 			 * We also don't update any global or process stats on this path, so that the jetsam/ freeze
575 			 * logic remains unaffected. The tasks we're performing here are: freeze the process, set the
576 			 * P_MEMSTAT_FROZEN bit, and elevate the process to a higher band (if the freezer is active).
577 			 */
578 			max_pages = memorystatus_freeze_pages_max;
579 		} else {
580 			/*
581 			 * We only have the compressor without any swap.
582 			 */
583 			max_pages = UINT32_MAX - 1;
584 		}
585 
586 		proc_list_lock();
587 		state = p->p_memstat_state;
588 		proc_list_unlock();
589 
590 		/*
591 		 * The jetsam path also verifies that the process is a suspended App. We don't care about that here.
592 		 * We simply ensure that jetsam is not already working on the process and that the process has not
593 		 * explicitly disabled freezing.
594 		 */
595 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED)) {
596 			memorystatus_log_error("sysctl_freeze: p_memstat_state check failed, process is%s%s%s\n",
597 			    (state & P_MEMSTAT_TERMINATED) ? " terminated" : "",
598 			    (state & P_MEMSTAT_LOCKED) ? " locked" : "",
599 			    (state & P_MEMSTAT_FREEZE_DISABLED) ? " unfreezable" : "");
600 
601 			proc_rele(p);
602 			lck_mtx_unlock(&freezer_mutex);
603 			return EPERM;
604 		}
605 
606 		KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, memorystatus_available_pages, pid, max_pages);
607 		error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
608 		if (!error || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
609 			memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
610 		}
611 		KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, purgeable, wired, clean, dirty);
612 
613 		if (error) {
614 			memorystatus_freeze_handle_error(p, freezer_error_code, state & P_MEMSTAT_FROZEN, pid, coal, "sysctl_freeze");
615 			if (error == KERN_NO_SPACE) {
616 				/* Make it easy to distinguish between failures due to low compressor/ swap space and other failures. */
617 				error = ENOSPC;
618 			} else {
619 				error = EIO;
620 			}
621 		} else {
622 			proc_list_lock();
623 			if (!_memstat_proc_is_frozen(p)) {
624 				p->p_memstat_state |= P_MEMSTAT_FROZEN;
625 				p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
626 				memorystatus_frozen_count++;
627 				os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
628 				if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
629 					memorystatus_frozen_count_webcontent++;
630 					os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
631 				}
632 				if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
633 					memorystatus_freeze_out_of_slots();
634 				}
635 			} else {
636 				// This was a re-freeze
637 				if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
638 					memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
639 					memorystatus_freezer_stats.mfs_refreeze_count++;
640 				}
641 			}
642 			p->p_memstat_frozen_count++;
643 
644 			if (coal != NULL) {
645 				/* We just froze an xpc service. Mark it as such for telemetry */
646 				p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
647 				memorystatus_frozen_count_xpc_service++;
648 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
649 			}
650 
651 
652 			proc_list_unlock();
653 
654 			if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
655 				/*
656 				 * We elevate only if we are going to swap out the data.
657 				 */
658 				error = memorystatus_update_inactive_jetsam_priority_band(pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
659 				    memorystatus_freeze_jetsam_band, TRUE);
660 
661 				if (error) {
662 					memorystatus_log_error("sysctl_freeze: Elevating frozen process to higher jetsam band failed with %d\n", error);
663 				}
664 			}
665 		}
666 
667 		if ((error == 0) && (coal == NULL)) {
668 			/*
669 			 * We froze a process and so we check to see if it was
670 			 * a coalition leader and if it has XPC services that
671 			 * might need freezing.
672 			 * Only one leader can be frozen at a time and so we shouldn't
673 			 * enter this block more than once per call. Hence the
674 			 * check that 'coal' has to be NULL. We should make this an
675 			 * assert() or panic() once we have a much more concrete way
676 			 * to detect an app vs a daemon.
677 			 */
678 
679 			task_t          curr_task = NULL;
680 
681 			curr_task = proc_task(p);
682 			coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
683 			if (coalition_is_leader(curr_task, coal)) {
684 				ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
685 				    COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
686 
687 				if (ntasks > MAX_XPC_SERVICE_PIDS) {
688 					ntasks = MAX_XPC_SERVICE_PIDS;
689 				}
690 			}
691 		}
692 
693 		proc_rele(p);
694 
695 		while (ntasks) {
696 			pid = pid_list[--ntasks];
697 			goto again;
698 		}
699 
700 		lck_mtx_unlock(&freezer_mutex);
701 		return error;
702 	} else {
703 		memorystatus_log_error("sysctl_freeze: Invalid process\n");
704 	}
705 
706 
707 	lck_mtx_unlock(&freezer_mutex);
708 	return EINVAL;
709 }
710 
711 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
712     0, 0, &sysctl_memorystatus_freeze, "I", "");
713 
714 /*
715  * Manual trigger of agressive frozen demotion for dev / debug kernels only.
716  */
717 static int
718 sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS
719 {
720 #pragma unused(arg1, arg2)
721 	int error, val;
722 	/*
723 	 * Only demote on write to prevent demoting during `sysctl -a`.
724 	 * The actual value written doesn't matter.
725 	 */
726 	error = sysctl_handle_int(oidp, &val, 0, req);
727 	if (error || !req->newptr) {
728 		return error;
729 	}
730 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
731 		return ENOTSUP;
732 	}
733 	lck_mtx_lock(&freezer_mutex);
734 	memorystatus_demote_frozen_processes(false);
735 	lck_mtx_unlock(&freezer_mutex);
736 	return 0;
737 }
738 
739 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
740 
741 static int
742 sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
743 {
744 #pragma unused(arg1, arg2)
745 
746 	int error, pid = 0;
747 	proc_t p;
748 
749 	if (memorystatus_freeze_enabled == false) {
750 		return ENOTSUP;
751 	}
752 
753 	error = sysctl_handle_int(oidp, &pid, 0, req);
754 	if (error || !req->newptr) {
755 		return error;
756 	}
757 
758 	if (pid == VM_PAGES_FOR_ALL_PROCS) {
759 		do_fastwake_warmup_all();
760 		return 0;
761 	} else {
762 		p = proc_find(pid);
763 		if (p != NULL) {
764 			error = task_thaw(proc_task(p));
765 
766 			if (error) {
767 				error = EIO;
768 			} else {
769 				/*
770 				 * task_thaw() succeeded.
771 				 *
772 				 * We increment memorystatus_frozen_count on the sysctl freeze path.
773 				 * And so we need the P_MEMSTAT_FROZEN to decrement the frozen count
774 				 * when this process exits.
775 				 *
776 				 * proc_list_lock();
777 				 * p->p_memstat_state &= ~P_MEMSTAT_FROZEN;
778 				 * proc_list_unlock();
779 				 */
780 			}
781 			proc_rele(p);
782 			return error;
783 		}
784 	}
785 
786 	return EINVAL;
787 }
788 
789 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
790     0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
791 
792 
793 typedef struct _global_freezable_status {
794 	boolean_t       freeze_pages_threshold_crossed;
795 	boolean_t       freeze_eligible_procs_available;
796 	boolean_t       freeze_scheduled_in_future;
797 }global_freezable_status_t;
798 
799 typedef struct _proc_freezable_status {
800 	boolean_t    freeze_has_memstat_state;
801 	boolean_t    freeze_has_pages_min;
802 	int        freeze_has_probability;
803 	int        freeze_leader_eligible;
804 	boolean_t    freeze_attempted;
805 	uint32_t    p_memstat_state;
806 	uint32_t    p_pages;
807 	int        p_freeze_error_code;
808 	int        p_pid;
809 	int        p_leader_pid;
810 	char        p_name[MAXCOMLEN + 1];
811 }proc_freezable_status_t;
812 
813 #define MAX_FREEZABLE_PROCESSES 200 /* Total # of processes in band 0 that we evaluate for freezability */
814 
815 /*
816  * For coalition based freezing evaluations, we proceed as follows:
817  *  - detect that the process is a coalition member and a XPC service
818  *  - mark its 'freeze_leader_eligible' field with FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN
819  *  - continue its freezability evaluation assuming its leader will be freezable too
820  *
821  * Once we are done evaluating all processes, we do a quick run thru all
822  * processes and for a coalition member XPC service we look up the 'freezable'
823  * status of its leader and iff:
824  *  - the xpc service is freezable i.e. its individual freeze evaluation worked
825  *  - and, its leader is also marked freezable
826  * we update its 'freeze_leader_eligible' to FREEZE_PROC_LEADER_FREEZABLE_SUCCESS.
827  */
828 
829 #define FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN   (-1)
830 #define FREEZE_PROC_LEADER_FREEZABLE_SUCCESS    (1)
831 #define FREEZE_PROC_LEADER_FREEZABLE_FAILURE    (2)
832 
833 static int
memorystatus_freezer_get_status(user_addr_t buffer,size_t buffer_size,int32_t * retval)834 memorystatus_freezer_get_status(user_addr_t buffer, size_t buffer_size, int32_t *retval)
835 {
836 	uint32_t            proc_count = 0, freeze_eligible_proc_considered = 0, band = 0, xpc_index = 0, leader_index = 0;
837 	global_freezable_status_t    *list_head;
838 	proc_freezable_status_t     *list_entry, *list_entry_start;
839 	size_t                list_size = 0, entry_count = 0;
840 	proc_t                p, leader_proc;
841 	memstat_bucket_t        *bucket;
842 	uint32_t            state = 0, pages = 0;
843 	boolean_t            try_freeze = TRUE, xpc_skip_size_probability_check = FALSE;
844 	int                error = 0, probability_of_use = 0;
845 	pid_t              leader_pid = 0;
846 	struct memorystatus_freeze_list_iterator iterator;
847 
848 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
849 		return ENOTSUP;
850 	}
851 
852 	bzero(&iterator, sizeof(struct memorystatus_freeze_list_iterator));
853 
854 	list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
855 
856 	if (buffer_size < list_size) {
857 		return EINVAL;
858 	}
859 
860 	list_head = (global_freezable_status_t *)kalloc_data(list_size, Z_WAITOK | Z_ZERO);
861 	if (list_head == NULL) {
862 		return ENOMEM;
863 	}
864 
865 	list_size = sizeof(global_freezable_status_t);
866 
867 	lck_mtx_lock(&freezer_mutex);
868 	proc_list_lock();
869 
870 	uint64_t curr_time = mach_absolute_time();
871 
872 	list_head->freeze_pages_threshold_crossed = (memorystatus_available_pages < memorystatus_freeze_threshold);
873 	if (memorystatus_freezer_use_ordered_list) {
874 		list_head->freeze_eligible_procs_available = memorystatus_frozen_count < memorystatus_global_freeze_list.mfcl_length;
875 	} else {
876 		list_head->freeze_eligible_procs_available = ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold);
877 	}
878 	list_head->freeze_scheduled_in_future = (curr_time < memorystatus_freezer_thread_next_run_ts);
879 
880 	list_entry_start = (proc_freezable_status_t*) ((uintptr_t)list_head + sizeof(global_freezable_status_t));
881 	list_entry = list_entry_start;
882 
883 	bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
884 
885 	entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
886 
887 	if (memorystatus_freezer_use_ordered_list) {
888 		while (iterator.global_freeze_list_index < memorystatus_global_freeze_list.mfcl_length) {
889 			p = memorystatus_freezer_candidate_list_get_proc(
890 				&memorystatus_global_freeze_list,
891 				(iterator.global_freeze_list_index)++,
892 				NULL);
893 			if (p != PROC_NULL) {
894 				break;
895 			}
896 		}
897 	} else {
898 		p = memorystatus_get_first_proc_locked(&band, FALSE);
899 	}
900 
901 	proc_count++;
902 
903 	while ((proc_count <= MAX_FREEZABLE_PROCESSES) &&
904 	    (p) &&
905 	    (list_size < buffer_size)) {
906 		if (isSysProc(p)) {
907 			/*
908 			 * Daemon:- We will consider freezing it iff:
909 			 * - it belongs to a coalition and the leader is freeze-eligible (delayed evaluation)
910 			 * - its role in the coalition is XPC service.
911 			 *
912 			 * We skip memory size requirements in this case.
913 			 */
914 
915 			coalition_t     coal = COALITION_NULL;
916 			task_t          leader_task = NULL, curr_task = NULL;
917 			int             task_role_in_coalition = 0;
918 
919 			curr_task = proc_task(p);
920 			coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
921 
922 			if (coal == COALITION_NULL || coalition_is_leader(curr_task, coal)) {
923 				/*
924 				 * By default, XPC services without an app
925 				 * will be the leader of their own single-member
926 				 * coalition.
927 				 */
928 				goto skip_ineligible_xpc;
929 			}
930 
931 			leader_task = coalition_get_leader(coal);
932 			if (leader_task == TASK_NULL) {
933 				/*
934 				 * This jetsam coalition is currently leader-less.
935 				 * This could happen if the app died, but XPC services
936 				 * have not yet exited.
937 				 */
938 				goto skip_ineligible_xpc;
939 			}
940 
941 			leader_proc = (proc_t)get_bsdtask_info(leader_task);
942 			task_deallocate(leader_task);
943 
944 			if (leader_proc == PROC_NULL) {
945 				/* leader task is exiting */
946 				goto skip_ineligible_xpc;
947 			}
948 
949 			task_role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
950 
951 			if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
952 				xpc_skip_size_probability_check = TRUE;
953 				leader_pid = proc_getpid(leader_proc);
954 				goto continue_eval;
955 			}
956 
957 skip_ineligible_xpc:
958 			p = memorystatus_get_next_proc_locked(&band, p, FALSE);
959 			proc_count++;
960 			continue;
961 		}
962 
963 continue_eval:
964 		strlcpy(list_entry->p_name, p->p_name, MAXCOMLEN + 1);
965 
966 		list_entry->p_pid = proc_getpid(p);
967 
968 		state = p->p_memstat_state;
969 
970 		if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) ||
971 		    !(state & P_MEMSTAT_SUSPENDED)) {
972 			try_freeze = list_entry->freeze_has_memstat_state = FALSE;
973 		} else {
974 			try_freeze = list_entry->freeze_has_memstat_state = TRUE;
975 		}
976 
977 		list_entry->p_memstat_state = state;
978 
979 		if (xpc_skip_size_probability_check == TRUE) {
980 			/*
981 			 * Assuming the coalition leader is freezable
982 			 * we don't care re. minimum pages and probability
983 			 * as long as the process isn't marked P_MEMSTAT_FREEZE_DISABLED.
984 			 * XPC services have to be explicity opted-out of the disabled
985 			 * state. And we checked that state above.
986 			 */
987 			list_entry->freeze_has_pages_min = TRUE;
988 			list_entry->p_pages = -1;
989 			list_entry->freeze_has_probability = -1;
990 
991 			list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN;
992 			list_entry->p_leader_pid = leader_pid;
993 
994 			xpc_skip_size_probability_check = FALSE;
995 		} else {
996 			list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; /* Apps are freeze eligible and their own leaders. */
997 			list_entry->p_leader_pid = 0; /* Setting this to 0 signifies this isn't a coalition driven freeze. */
998 
999 			memorystatus_get_task_page_counts(proc_task(p), &pages, NULL, NULL);
1000 			if (pages < memorystatus_freeze_pages_min) {
1001 				try_freeze = list_entry->freeze_has_pages_min = FALSE;
1002 			} else {
1003 				list_entry->freeze_has_pages_min = TRUE;
1004 			}
1005 
1006 			list_entry->p_pages = pages;
1007 
1008 			if (entry_count) {
1009 				uint32_t j = 0;
1010 				for (j = 0; j < entry_count; j++) {
1011 					if (strncmp(memorystatus_global_probabilities_table[j].proc_name,
1012 					    p->p_name,
1013 					    MAXCOMLEN) == 0) {
1014 						probability_of_use = memorystatus_global_probabilities_table[j].use_probability;
1015 						break;
1016 					}
1017 				}
1018 
1019 				list_entry->freeze_has_probability = probability_of_use;
1020 
1021 				try_freeze = ((probability_of_use > 0) && try_freeze);
1022 			} else {
1023 				list_entry->freeze_has_probability = -1;
1024 			}
1025 		}
1026 
1027 		if (try_freeze) {
1028 			uint32_t purgeable, wired, clean, dirty, shared;
1029 			uint32_t max_pages = 0;
1030 			int freezer_error_code = 0;
1031 
1032 			error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, TRUE /* eval only */);
1033 
1034 			if (error) {
1035 				list_entry->p_freeze_error_code = freezer_error_code;
1036 			}
1037 
1038 			list_entry->freeze_attempted = TRUE;
1039 		}
1040 
1041 		list_entry++;
1042 		freeze_eligible_proc_considered++;
1043 
1044 		list_size += sizeof(proc_freezable_status_t);
1045 
1046 		if (memorystatus_freezer_use_ordered_list) {
1047 			p = PROC_NULL;
1048 			while (iterator.global_freeze_list_index < memorystatus_global_freeze_list.mfcl_length) {
1049 				p = memorystatus_freezer_candidate_list_get_proc(
1050 					&memorystatus_global_freeze_list,
1051 					(iterator.global_freeze_list_index)++,
1052 					NULL);
1053 				if (p != PROC_NULL) {
1054 					break;
1055 				}
1056 			}
1057 		} else {
1058 			p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1059 		}
1060 
1061 		proc_count++;
1062 	}
1063 
1064 	proc_list_unlock();
1065 	lck_mtx_unlock(&freezer_mutex);
1066 
1067 	list_entry = list_entry_start;
1068 
1069 	for (xpc_index = 0; xpc_index < freeze_eligible_proc_considered; xpc_index++) {
1070 		if (list_entry[xpc_index].freeze_leader_eligible == FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN) {
1071 			leader_pid = list_entry[xpc_index].p_leader_pid;
1072 
1073 			leader_proc = proc_find(leader_pid);
1074 
1075 			if (leader_proc) {
1076 				if (_memstat_proc_is_frozen(leader_proc)) {
1077 					/*
1078 					 * Leader has already been frozen.
1079 					 */
1080 					list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1081 					proc_rele(leader_proc);
1082 					continue;
1083 				}
1084 				proc_rele(leader_proc);
1085 			}
1086 
1087 			for (leader_index = 0; leader_index < freeze_eligible_proc_considered; leader_index++) {
1088 				if (list_entry[leader_index].p_pid == leader_pid) {
1089 					if (list_entry[leader_index].freeze_attempted && list_entry[leader_index].p_freeze_error_code == 0) {
1090 						list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1091 					} else {
1092 						list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1093 						list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1094 					}
1095 					break;
1096 				}
1097 			}
1098 
1099 			/*
1100 			 * Didn't find the leader entry. This might be likely because
1101 			 * the leader never made it down to band 0.
1102 			 */
1103 			if (leader_index == freeze_eligible_proc_considered) {
1104 				list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1105 				list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1106 			}
1107 		}
1108 	}
1109 
1110 	buffer_size = MIN(list_size, INT32_MAX);
1111 
1112 	error = copyout(list_head, buffer, buffer_size);
1113 	if (error == 0) {
1114 		*retval = (int32_t) buffer_size;
1115 	} else {
1116 		*retval = 0;
1117 	}
1118 
1119 	list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
1120 	kfree_data(list_head, list_size);
1121 
1122 	memorystatus_log_debug("memorystatus_freezer_get_status: returning %d (%lu - size)\n", error, (unsigned long)list_size);
1123 
1124 	return error;
1125 }
1126 
1127 #endif /* DEVELOPMENT || DEBUG */
1128 
1129 /*
1130  * Get a list of all processes in the freezer band which are currently frozen.
1131  * Used by powerlog to collect analytics on frozen process.
1132  */
1133 static int
memorystatus_freezer_get_procs(user_addr_t buffer,size_t buffer_size,int32_t * retval)1134 memorystatus_freezer_get_procs(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1135 {
1136 	global_frozen_procs_t *frozen_procs = NULL;
1137 	uint32_t band = memorystatus_freeze_jetsam_band;
1138 	proc_t p;
1139 	int error;
1140 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
1141 		return ENOTSUP;
1142 	}
1143 	if (buffer_size < sizeof(global_frozen_procs_t)) {
1144 		return EINVAL;
1145 	}
1146 	frozen_procs = (global_frozen_procs_t *)kalloc_data(sizeof(global_frozen_procs_t), Z_WAITOK | Z_ZERO);
1147 	if (frozen_procs == NULL) {
1148 		return ENOMEM;
1149 	}
1150 
1151 	proc_list_lock();
1152 	p = memorystatus_get_first_proc_locked(&band, FALSE);
1153 	while (p && frozen_procs->gfp_num_frozen < FREEZER_CONTROL_GET_PROCS_MAX_COUNT) {
1154 		if (_memstat_proc_is_frozen(p)) {
1155 			frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_pid = proc_getpid(p);
1156 			strlcpy(frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_name,
1157 			    p->p_name, sizeof(proc_name_t));
1158 			frozen_procs->gfp_num_frozen++;
1159 		}
1160 		p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1161 	}
1162 	proc_list_unlock();
1163 
1164 	buffer_size = MIN(buffer_size, sizeof(global_frozen_procs_t));
1165 	error = copyout(frozen_procs, buffer, buffer_size);
1166 	if (error == 0) {
1167 		*retval = (int32_t) buffer_size;
1168 	} else {
1169 		*retval = 0;
1170 	}
1171 	kfree_data(frozen_procs, sizeof(global_frozen_procs_t));
1172 
1173 	return error;
1174 }
1175 
1176 /*
1177  * If dasd is running an experiment that impacts their freezer candidate selection,
1178  * we record that in our telemetry.
1179  */
1180 static memorystatus_freezer_trial_identifiers_v1 dasd_trial_identifiers;
1181 
1182 static int
memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer,size_t buffer_size,int32_t * retval)1183 memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1184 {
1185 	memorystatus_freezer_trial_identifiers_v1 identifiers;
1186 	int error = 0;
1187 
1188 	if (buffer_size != sizeof(identifiers)) {
1189 		return EINVAL;
1190 	}
1191 	error = copyin(buffer, &identifiers, sizeof(identifiers));
1192 	if (error != 0) {
1193 		return error;
1194 	}
1195 	if (identifiers.version != 1) {
1196 		return EINVAL;
1197 	}
1198 	dasd_trial_identifiers = identifiers;
1199 	*retval = 0;
1200 	return error;
1201 }
1202 
1203 /*
1204  * Reset the freezer state by wiping out all suspended frozen apps, clearing
1205  * per-process freezer state, and starting a fresh interval.
1206  */
1207 static int
memorystatus_freezer_reset_state(int32_t * retval)1208 memorystatus_freezer_reset_state(int32_t *retval)
1209 {
1210 	uint32_t band = JETSAM_PRIORITY_IDLE;
1211 	/* Don't kill above the frozen band */
1212 	uint32_t kMaxBand = memorystatus_freeze_jetsam_band;
1213 	proc_t next_p = PROC_NULL;
1214 	uint64_t new_budget;
1215 
1216 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1217 		return ENOTSUP;
1218 	}
1219 
1220 	os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_GENERIC);
1221 	if (jetsam_reason == OS_REASON_NULL) {
1222 		memorystatus_log_error("memorystatus_freezer_reset_state -- sync: failed to allocate jetsam reason\n");
1223 	}
1224 	lck_mtx_lock(&freezer_mutex);
1225 	kill_all_frozen_processes(kMaxBand, true, jetsam_reason, NULL);
1226 	proc_list_lock();
1227 
1228 	/*
1229 	 * Clear the considered and skip reason flags on all processes
1230 	 * so we're starting fresh with the new policy.
1231 	 */
1232 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1233 	while (next_p) {
1234 		proc_t p = next_p;
1235 		uint32_t state = p->p_memstat_state;
1236 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1237 
1238 		if (p->p_memstat_effectivepriority > kMaxBand) {
1239 			break;
1240 		}
1241 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1242 			continue;
1243 		}
1244 
1245 		p->p_memstat_state &= ~(P_MEMSTAT_FREEZE_CONSIDERED);
1246 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1247 	}
1248 
1249 	proc_list_unlock();
1250 
1251 	new_budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1252 	memorystatus_freeze_force_new_interval(new_budget);
1253 
1254 	lck_mtx_unlock(&freezer_mutex);
1255 	*retval = 0;
1256 	return 0;
1257 }
1258 
1259 int
memorystatus_freezer_control(int32_t flags,user_addr_t buffer,size_t buffer_size,int32_t * retval)1260 memorystatus_freezer_control(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval)
1261 {
1262 	int err = ENOTSUP;
1263 
1264 #if DEVELOPMENT || DEBUG
1265 	if (flags == FREEZER_CONTROL_GET_STATUS) {
1266 		err = memorystatus_freezer_get_status(buffer, buffer_size, retval);
1267 	}
1268 #endif /* DEVELOPMENT || DEBUG */
1269 	if (flags == FREEZER_CONTROL_GET_PROCS) {
1270 		err = memorystatus_freezer_get_procs(buffer, buffer_size, retval);
1271 	} else if (flags == FREEZER_CONTROL_SET_DASD_TRIAL_IDENTIFIERS) {
1272 		err = memorystatus_freezer_set_dasd_trial_identifiers(buffer, buffer_size, retval);
1273 	} else if (flags == FREEZER_CONTROL_RESET_STATE) {
1274 		err = memorystatus_freezer_reset_state(retval);
1275 	}
1276 
1277 	return err;
1278 }
1279 
1280 static bool
kill_all_frozen_processes(uint64_t max_band,bool suspended_only,os_reason_t jetsam_reason,uint64_t * memory_reclaimed_out)1281 kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out)
1282 {
1283 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1284 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1285 
1286 	unsigned int band = 0;
1287 	proc_t p = PROC_NULL, next_p = PROC_NULL;
1288 	pid_t pid = 0;
1289 	bool retval = false, killed = false;
1290 	uint32_t state;
1291 	uint64_t memory_reclaimed = 0, footprint = 0, skips = 0;
1292 	proc_list_lock();
1293 
1294 	band = JETSAM_PRIORITY_IDLE;
1295 	p = PROC_NULL;
1296 	next_p = PROC_NULL;
1297 
1298 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1299 	while (next_p) {
1300 		p = next_p;
1301 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1302 		state = p->p_memstat_state;
1303 
1304 		if (p->p_memstat_effectivepriority > max_band) {
1305 			break;
1306 		}
1307 
1308 		if (!(state & P_MEMSTAT_FROZEN)) {
1309 			continue;
1310 		}
1311 
1312 		if (suspended_only && !(state & P_MEMSTAT_SUSPENDED)) {
1313 			continue;
1314 		}
1315 
1316 		if (state & P_MEMSTAT_ERROR) {
1317 			p->p_memstat_state &= ~P_MEMSTAT_ERROR;
1318 		}
1319 
1320 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1321 			memorystatus_log("memorystatus: Skipping kill of frozen process %s (%d) because it's already exiting.\n", p->p_name, proc_getpid(p));
1322 			skips++;
1323 			continue;
1324 		}
1325 
1326 		footprint = get_task_phys_footprint(proc_task(p));
1327 		pid = proc_getpid(p);
1328 		proc_list_unlock();
1329 
1330 		/* memorystatus_kill_with_jetsam_reason_sync drops a reference. */
1331 		os_reason_ref(jetsam_reason);
1332 		retval = memorystatus_kill_with_jetsam_reason_sync(pid, jetsam_reason);
1333 		if (retval) {
1334 			killed = true;
1335 			memory_reclaimed += footprint;
1336 		}
1337 		proc_list_lock();
1338 		/*
1339 		 * The bands might have changed when we dropped the proc list lock.
1340 		 * So start from the beginning.
1341 		 * Since we're preventing any further freezing by holding the freezer mutex,
1342 		 * and we skip anything we've already tried to kill this is guaranteed to terminate.
1343 		 */
1344 		band = 0;
1345 		skips = 0;
1346 		next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1347 	}
1348 
1349 	assert(skips <= memorystatus_frozen_count);
1350 #if MACH_ASSERT
1351 	if (!suspended_only && max_band == JETSAM_PRIORITY_MAX) {
1352 		/*
1353 		 * Check that we've killed all frozen processes.
1354 		 * Note that they may still be exiting (represented by skips).
1355 		 */
1356 		if (memorystatus_frozen_count - skips > 0) {
1357 			assert(memorystatus_freeze_enabled == false);
1358 
1359 			panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d",
1360 			    memorystatus_frozen_count);
1361 		}
1362 	}
1363 #endif /* MACH_ASSERT */
1364 	if (memory_reclaimed_out) {
1365 		*memory_reclaimed_out = memory_reclaimed;
1366 	}
1367 	proc_list_unlock();
1368 	return killed;
1369 }
1370 
1371 /*
1372  * Disables the freezer, jetsams all frozen processes,
1373  * and reclaims the swap space immediately.
1374  */
1375 
1376 void
memorystatus_disable_freeze(void)1377 memorystatus_disable_freeze(void)
1378 {
1379 	uint64_t memory_reclaimed = 0;
1380 	bool killed = false;
1381 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1382 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1383 
1384 
1385 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_START,
1386 	    memorystatus_available_pages);
1387 	memorystatus_log("memorystatus: Disabling freezer. Will kill all frozen processes\n");
1388 
1389 	/*
1390 	 * We hold the freezer_mutex (preventing anything from being frozen in parallel)
1391 	 * and all frozen processes will be killed
1392 	 * by the time we release it. Setting memorystatus_freeze_enabled to false,
1393 	 * ensures that no new processes will be frozen once we release the mutex.
1394 	 *
1395 	 */
1396 	memorystatus_freeze_enabled = false;
1397 
1398 	/*
1399 	 * Move dirty pages out from the throttle to the active queue since we're not freezing anymore.
1400 	 */
1401 	vm_page_reactivate_all_throttled();
1402 	os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE);
1403 	if (jetsam_reason == OS_REASON_NULL) {
1404 		memorystatus_log_error("memorystatus_disable_freeze -- sync: failed to allocate jetsam reason\n");
1405 	}
1406 
1407 	killed = kill_all_frozen_processes(JETSAM_PRIORITY_MAX, false, jetsam_reason, &memory_reclaimed);
1408 
1409 	if (killed) {
1410 		memorystatus_log_info("memorystatus: Killed all frozen processes.\n");
1411 		vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM);
1412 
1413 		memorystatus_post_snapshot();
1414 	} else {
1415 		memorystatus_log_info("memorystatus: No frozen processes to kill.\n");
1416 	}
1417 
1418 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_END,
1419 	    memorystatus_available_pages, memory_reclaimed);
1420 
1421 	return;
1422 }
1423 
1424 static void
memorystatus_set_freeze_is_enabled(bool enabled)1425 memorystatus_set_freeze_is_enabled(bool enabled)
1426 {
1427 	lck_mtx_lock(&freezer_mutex);
1428 	if (enabled != memorystatus_freeze_enabled) {
1429 		if (enabled) {
1430 			memorystatus_freeze_enabled = true;
1431 		} else {
1432 			memorystatus_disable_freeze();
1433 		}
1434 	}
1435 	lck_mtx_unlock(&freezer_mutex);
1436 }
1437 
1438 
1439 static int
1440 sysctl_freeze_enabled SYSCTL_HANDLER_ARGS
1441 {
1442 #pragma unused(arg1, arg2)
1443 	int error, val = memorystatus_freeze_enabled ? 1 : 0;
1444 
1445 	error = sysctl_handle_int(oidp, &val, 0, req);
1446 	if (error || !req->newptr) {
1447 		return error;
1448 	}
1449 
1450 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1451 		memorystatus_log_error("memorystatus: Failed attempt to set vm.freeze_enabled sysctl\n");
1452 		return EINVAL;
1453 	}
1454 
1455 	memorystatus_set_freeze_is_enabled(val);
1456 
1457 	return 0;
1458 }
1459 
1460 EXPERIMENT_FACTOR_PROC(_vm, freeze_enabled, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY, NULL, 0, sysctl_freeze_enabled, "I", "");
1461 
1462 static void
schedule_interval_reset(thread_call_t reset_thread_call,throttle_interval_t * interval)1463 schedule_interval_reset(thread_call_t reset_thread_call, throttle_interval_t *interval)
1464 {
1465 	uint64_t interval_expiration_ns = interval->ts.tv_sec * NSEC_PER_SEC + interval->ts.tv_nsec;
1466 	uint64_t interval_expiration_absolutetime;
1467 	nanoseconds_to_absolutetime(interval_expiration_ns, &interval_expiration_absolutetime);
1468 	memorystatus_log_info("memorystatus: scheduling new freezer interval at %llu absolute time\n", interval_expiration_absolutetime);
1469 
1470 	thread_call_enter_delayed(reset_thread_call, interval_expiration_absolutetime);
1471 }
1472 
1473 extern uuid_string_t trial_treatment_id;
1474 extern uuid_string_t trial_experiment_id;
1475 extern int trial_deployment_id;
1476 
1477 CA_EVENT(freezer_interval,
1478     CA_INT, budget_remaining,
1479     CA_INT, error_below_min_pages,
1480     CA_INT, error_excess_shared_memory,
1481     CA_INT, error_low_private_shared_ratio,
1482     CA_INT, error_no_compressor_space,
1483     CA_INT, error_no_swap_space,
1484     CA_INT, error_low_probability_of_use,
1485     CA_INT, error_elevated,
1486     CA_INT, error_other,
1487     CA_INT, frozen_count,
1488     CA_INT, pageouts,
1489     CA_INT, refreeze_average,
1490     CA_INT, skipped_full,
1491     CA_INT, skipped_shared_mb_high,
1492     CA_INT, swapusage,
1493     CA_INT, thaw_count,
1494     CA_INT, thaw_percentage,
1495     CA_INT, thaws_per_gb,
1496     CA_INT, trial_deployment_id,
1497     CA_INT, dasd_trial_deployment_id,
1498     CA_INT, budget_exhaustion_duration_remaining,
1499     CA_INT, thaw_percentage_webcontent,
1500     CA_INT, thaw_percentage_fg,
1501     CA_INT, thaw_percentage_bg,
1502     CA_INT, thaw_percentage_fg_non_xpc_service,
1503     CA_INT, fg_resume_count,
1504     CA_INT, unique_freeze_count,
1505     CA_INT, unique_thaw_count,
1506     CA_STATIC_STRING(CA_UUID_LEN), trial_treatment_id,
1507     CA_STATIC_STRING(CA_UUID_LEN), trial_experiment_id,
1508     CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_treatment_id,
1509     CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_experiment_id);
1510 
1511 
1512 /*
1513  * Record statistics from the expiring interval
1514  * via core analytics.
1515  */
1516 static void
memorystatus_freeze_record_interval_analytics(void)1517 memorystatus_freeze_record_interval_analytics(void)
1518 {
1519 	ca_event_t event = CA_EVENT_ALLOCATE(freezer_interval);
1520 	CA_EVENT_TYPE(freezer_interval) * e = event->data;
1521 	e->budget_remaining = memorystatus_freeze_budget_pages_remaining * PAGE_SIZE / (1UL << 20);
1522 	uint64_t process_considered_count, refrozen_count, below_threshold_count;
1523 	memory_object_size_t swap_size;
1524 	process_considered_count = memorystatus_freezer_stats.mfs_process_considered_count;
1525 	if (process_considered_count != 0) {
1526 		e->error_below_min_pages = memorystatus_freezer_stats.mfs_error_below_min_pages_count * 100 / process_considered_count;
1527 		e->error_excess_shared_memory = memorystatus_freezer_stats.mfs_error_excess_shared_memory_count * 100 / process_considered_count;
1528 		e->error_low_private_shared_ratio = memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count * 100 / process_considered_count;
1529 		e->error_no_compressor_space = memorystatus_freezer_stats.mfs_error_no_compressor_space_count * 100 / process_considered_count;
1530 		e->error_no_swap_space = memorystatus_freezer_stats.mfs_error_no_swap_space_count * 100 / process_considered_count;
1531 		e->error_low_probability_of_use = memorystatus_freezer_stats.mfs_error_low_probability_of_use_count * 100 / process_considered_count;
1532 		e->error_elevated = memorystatus_freezer_stats.mfs_error_elevated_count * 100 / process_considered_count;
1533 		e->error_other = memorystatus_freezer_stats.mfs_error_other_count * 100 / process_considered_count;
1534 	}
1535 	e->frozen_count = memorystatus_frozen_count;
1536 	e->pageouts = normal_throttle_window->pageouts * PAGE_SIZE / (1UL << 20);
1537 	refrozen_count = memorystatus_freezer_stats.mfs_refreeze_count;
1538 	if (refrozen_count != 0) {
1539 		e->refreeze_average = (memorystatus_freezer_stats.mfs_bytes_refrozen / (1UL << 20)) / refrozen_count;
1540 	}
1541 	below_threshold_count = memorystatus_freezer_stats.mfs_below_threshold_count;
1542 	if (below_threshold_count != 0) {
1543 		e->skipped_full = memorystatus_freezer_stats.mfs_skipped_full_count * 100 / below_threshold_count;
1544 		e->skipped_shared_mb_high = memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count * 100 / below_threshold_count;
1545 	}
1546 	if (VM_CONFIG_SWAP_IS_PRESENT) {
1547 		swap_size = vm_swap_get_total_space();
1548 		if (swap_size) {
1549 			e->swapusage = vm_swap_get_free_space() * 100 / swap_size;
1550 		}
1551 	}
1552 	e->thaw_count = memorystatus_thaw_count;
1553 	e->thaw_percentage = get_thaw_percentage();
1554 	e->thaw_percentage_webcontent = get_thaw_percentage_webcontent();
1555 	e->thaw_percentage_fg = get_thaw_percentage_fg();
1556 	e->thaw_percentage_bg = get_thaw_percentage_bg();
1557 	e->thaw_percentage_fg_non_xpc_service = get_thaw_percentage_fg_non_xpc_service();
1558 
1559 	if (e->pageouts / (1UL << 10) != 0) {
1560 		e->thaws_per_gb = memorystatus_thaw_count / (e->pageouts / (1UL << 10));
1561 	}
1562 	e->budget_exhaustion_duration_remaining = memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining;
1563 	e->fg_resume_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
1564 	e->unique_freeze_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1565 	e->unique_thaw_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
1566 
1567 	/*
1568 	 * Record any xnu or dasd experiment information
1569 	 */
1570 	strlcpy(e->trial_treatment_id, trial_treatment_id, CA_UUID_LEN);
1571 	strlcpy(e->trial_experiment_id, trial_experiment_id, CA_UUID_LEN);
1572 	e->trial_deployment_id = trial_deployment_id;
1573 	strlcpy(e->dasd_trial_treatment_id, dasd_trial_identifiers.treatment_id, CA_UUID_LEN);
1574 	strlcpy(e->dasd_trial_experiment_id, dasd_trial_identifiers.experiment_id, CA_UUID_LEN);
1575 	e->dasd_trial_deployment_id = dasd_trial_identifiers.deployment_id;
1576 
1577 	CA_EVENT_SEND(event);
1578 }
1579 
1580 static void
memorystatus_freeze_reset_interval(void * arg0,void * arg1)1581 memorystatus_freeze_reset_interval(void *arg0, void *arg1)
1582 {
1583 #pragma unused(arg0, arg1)
1584 	struct throttle_interval_t *interval = NULL;
1585 	clock_sec_t sec;
1586 	clock_nsec_t nsec;
1587 	mach_timespec_t now_ts;
1588 	uint32_t budget_rollover = 0;
1589 
1590 	clock_get_system_nanotime(&sec, &nsec);
1591 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
1592 	now_ts.tv_nsec = nsec;
1593 	interval = normal_throttle_window;
1594 
1595 	/* Record analytics from the old interval before resetting. */
1596 	memorystatus_freeze_record_interval_analytics();
1597 
1598 	lck_mtx_lock(&freezer_mutex);
1599 	/* How long has it been since the previous interval expired? */
1600 	mach_timespec_t expiration_period_ts = now_ts;
1601 	SUB_MACH_TIMESPEC(&expiration_period_ts, &interval->ts);
1602 	/* Get unused budget. Clamp to 0. We'll adjust for overused budget in the next interval. */
1603 	budget_rollover = interval->pageouts > interval->max_pageouts ?
1604 	    0 : interval->max_pageouts - interval->pageouts;
1605 
1606 	memorystatus_freeze_start_normal_throttle_interval(memorystatus_freeze_calculate_new_budget(
1607 		    expiration_period_ts.tv_sec, interval->burst_multiple,
1608 		    interval->mins, budget_rollover),
1609 	    now_ts);
1610 	memorystatus_freeze_budget_pages_remaining = interval->max_pageouts;
1611 
1612 	if (!memorystatus_freezer_use_demotion_list) {
1613 		memorystatus_demote_frozen_processes(false); /* normal mode...don't force a demotion */
1614 	}
1615 	lck_mtx_unlock(&freezer_mutex);
1616 }
1617 
1618 
1619 proc_t
memorystatus_get_coalition_leader_and_role(proc_t p,int * role_in_coalition)1620 memorystatus_get_coalition_leader_and_role(proc_t p, int *role_in_coalition)
1621 {
1622 	coalition_t     coal = COALITION_NULL;
1623 	task_t          leader_task = NULL, curr_task = NULL;
1624 	proc_t          leader_proc = PROC_NULL;
1625 
1626 	curr_task = proc_task(p);
1627 	coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1628 
1629 	if (coal == NULL || coalition_is_leader(curr_task, coal)) {
1630 		return p;
1631 	}
1632 
1633 	leader_task = coalition_get_leader(coal);
1634 	if (leader_task == TASK_NULL) {
1635 		/*
1636 		 * This jetsam coalition is currently leader-less.
1637 		 * This could happen if the app died, but XPC services
1638 		 * have not yet exited.
1639 		 */
1640 		return PROC_NULL;
1641 	}
1642 
1643 	leader_proc = (proc_t)get_bsdtask_info(leader_task);
1644 	task_deallocate(leader_task);
1645 
1646 	if (leader_proc == PROC_NULL) {
1647 		/* leader task is exiting */
1648 		return PROC_NULL;
1649 	}
1650 
1651 	*role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
1652 
1653 	return leader_proc;
1654 }
1655 
1656 bool
memorystatus_freeze_process_is_recommended(const proc_t p)1657 memorystatus_freeze_process_is_recommended(const proc_t p)
1658 {
1659 	assert(!memorystatus_freezer_use_ordered_list);
1660 	int probability_of_use = 0;
1661 
1662 	size_t entry_count = 0, i = 0;
1663 	entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
1664 	if (entry_count == 0) {
1665 		/*
1666 		 * If dasd hasn't supplied a table yet, we default to every app being eligible
1667 		 * for the freezer.
1668 		 */
1669 		return true;
1670 	}
1671 	for (i = 0; i < entry_count; i++) {
1672 		/*
1673 		 * NB: memorystatus_internal_probabilities.proc_name is MAXCOMLEN + 1 bytes
1674 		 * proc_t.p_name is 2*MAXCOMLEN + 1 bytes. So we only compare the first
1675 		 * MAXCOMLEN bytes here since the name in the probabilities table could
1676 		 * be truncated from the proc_t's p_name.
1677 		 */
1678 		if (strncmp(memorystatus_global_probabilities_table[i].proc_name,
1679 		    p->p_name,
1680 		    MAXCOMLEN) == 0) {
1681 			probability_of_use = memorystatus_global_probabilities_table[i].use_probability;
1682 			break;
1683 		}
1684 	}
1685 	return probability_of_use > 0;
1686 }
1687 
1688 __private_extern__ void
memorystatus_freeze_init(void)1689 memorystatus_freeze_init(void)
1690 {
1691 	kern_return_t result;
1692 	thread_t thread;
1693 
1694 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1695 		int32_t memorystatus_freezer_use_ordered_list_bootarg = 0;
1696 		if (PE_parse_boot_argn("memorystatus_freezer_use_ordered_list", &memorystatus_freezer_use_ordered_list_bootarg, sizeof(memorystatus_freezer_use_ordered_list_bootarg))) {
1697 			memorystatus_freezer_use_ordered_list = (memorystatus_freezer_use_ordered_list_bootarg != 0);
1698 		}
1699 
1700 		int32_t memorystatus_freeze_max_candidate_band_bootarg = 0;
1701 		if (PE_parse_boot_argn("memorystatus_freeze_max_candidate_band", &memorystatus_freeze_max_candidate_band_bootarg, sizeof(memorystatus_freeze_max_candidate_band_bootarg))) {
1702 			if (memorystatus_freeze_max_candidate_band_bootarg >= 0 && memorystatus_freeze_max_candidate_band_bootarg <= 1000) {
1703 				memorystatus_freeze_max_candidate_band = memorystatus_freeze_max_candidate_band_bootarg;
1704 			}
1705 		}
1706 
1707 		/*
1708 		 * This is just the default value if the underlying
1709 		 * storage device doesn't have any specific budget.
1710 		 * We check with the storage layer in memorystatus_freeze_update_throttle()
1711 		 * before we start our freezing the first time.
1712 		 */
1713 		memorystatus_freeze_budget_pages_remaining = (memorystatus_freeze_daily_mb_max * 1024 * 1024) / PAGE_SIZE;
1714 
1715 		result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
1716 		if (result == KERN_SUCCESS) {
1717 			proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
1718 			proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1719 			thread_set_thread_name(thread, "VM_freezer");
1720 
1721 			thread_deallocate(thread);
1722 		} else {
1723 			panic("Could not create memorystatus_freeze_thread");
1724 		}
1725 
1726 		freeze_interval_reset_thread_call = thread_call_allocate_with_options(memorystatus_freeze_reset_interval, NULL, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
1727 		/* Start a new interval */
1728 
1729 		lck_mtx_lock(&freezer_mutex);
1730 		uint32_t budget;
1731 		budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1732 		memorystatus_freeze_force_new_interval(budget);
1733 		lck_mtx_unlock(&freezer_mutex);
1734 	} else {
1735 		memorystatus_freeze_budget_pages_remaining = 0;
1736 	}
1737 }
1738 
1739 void
memorystatus_freeze_configure_for_swap()1740 memorystatus_freeze_configure_for_swap()
1741 {
1742 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1743 		return;
1744 	}
1745 
1746 	assert(memorystatus_swap_all_apps);
1747 
1748 	/*
1749 	 * We expect both a larger working set and larger individual apps
1750 	 * in this mode, so tune up the freezer accordingly.
1751 	 */
1752 	memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX_SWAP_ENABLED_DEFAULT;
1753 	memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS_SWAP_ENABLED_DEFAULT;
1754 	memorystatus_freeze_pages_max = FREEZE_PAGES_MAX_SWAP_ENABLED_DEFAULT;
1755 
1756 	/*
1757 	 * We don't have a budget when running with full app swap.
1758 	 * Force a new interval. memorystatus_freeze_calculate_new_budget should give us an
1759 	 * unlimited budget.
1760 	 */
1761 	lck_mtx_lock(&freezer_mutex);
1762 	uint32_t budget;
1763 	budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1764 	memorystatus_freeze_force_new_interval(budget);
1765 	lck_mtx_unlock(&freezer_mutex);
1766 }
1767 
1768 void
memorystatus_freeze_disable_swap()1769 memorystatus_freeze_disable_swap()
1770 {
1771 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1772 		return;
1773 	}
1774 
1775 	assert(!memorystatus_swap_all_apps);
1776 
1777 	memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX_DEFAULT;
1778 	memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS_DEFAULT;
1779 	memorystatus_freeze_pages_max = FREEZE_PAGES_MAX_DEFAULT;
1780 
1781 	/*
1782 	 * Calculate a new budget now that we're constrained by our daily write budget again.
1783 	 */
1784 	lck_mtx_lock(&freezer_mutex);
1785 	uint32_t budget;
1786 	budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1787 	memorystatus_freeze_force_new_interval(budget);
1788 	lck_mtx_unlock(&freezer_mutex);
1789 }
1790 
1791 /*
1792  * Called with both the freezer_mutex and proc_list_lock held & both will be held on return.
1793  */
1794 static int
memorystatus_freeze_process(proc_t p,coalition_t * coal,pid_t * coalition_list,unsigned int * coalition_list_length)1795 memorystatus_freeze_process(
1796 	proc_t p,
1797 	coalition_t *coal, /* IN / OUT */
1798 	pid_t *coalition_list, /* OUT */
1799 	unsigned int *coalition_list_length /* OUT */)
1800 {
1801 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1802 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1803 
1804 	kern_return_t kr;
1805 	uint32_t purgeable, wired, clean, dirty, shared;
1806 	uint64_t max_pages = 0;
1807 	freezer_error_code_t freezer_error_code = 0;
1808 	bool is_refreeze = false;
1809 	task_t curr_task = TASK_NULL;
1810 
1811 	pid_t aPid = proc_getpid(p);
1812 
1813 	is_refreeze = _memstat_proc_is_frozen(p);
1814 
1815 	/* Ensure the process is eligible for (re-)freezing */
1816 	if (is_refreeze && !memorystatus_freeze_proc_is_refreeze_eligible(p)) {
1817 		/* Process is already frozen & hasn't been thawed. Nothing to do here. */
1818 		return EINVAL;
1819 	}
1820 	if (is_refreeze) {
1821 		/*
1822 		 * Not currently being looked at for something.
1823 		 */
1824 		if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1825 			return EBUSY;
1826 		}
1827 
1828 		/*
1829 		 * We are going to try and refreeze and so re-evaluate
1830 		 * the process. We don't want to double count the shared
1831 		 * memory. So deduct the old snapshot here.
1832 		 */
1833 		memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages;
1834 		p->p_memstat_freeze_sharedanon_pages = 0;
1835 
1836 		p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
1837 		memorystatus_refreeze_eligible_count--;
1838 	} else {
1839 		if (!memorystatus_is_process_eligible_for_freeze(p)) {
1840 			return EINVAL;
1841 		}
1842 		if (memorystatus_frozen_count >= memorystatus_frozen_processes_max) {
1843 			memorystatus_freeze_handle_error(p, FREEZER_ERROR_NO_SLOTS, is_refreeze, aPid, (coal ? *coal : NULL), "memorystatus_freeze_process");
1844 			return ENOSPC;
1845 		}
1846 	}
1847 
1848 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1849 		/*
1850 		 * Freezer backed by the compressor and swap file(s)
1851 		 * will hold compressed data.
1852 		 */
1853 
1854 		max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1855 	} else {
1856 		/*
1857 		 * We only have the compressor pool.
1858 		 */
1859 		max_pages = UINT32_MAX - 1;
1860 	}
1861 
1862 	/* Mark as locked temporarily to avoid kill */
1863 	p->p_memstat_state |= P_MEMSTAT_LOCKED;
1864 
1865 	p = proc_ref(p, true);
1866 	if (!p) {
1867 		memorystatus_freezer_stats.mfs_error_other_count++;
1868 		return EBUSY;
1869 	}
1870 
1871 	proc_list_unlock();
1872 
1873 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, memorystatus_available_pages, aPid, max_pages);
1874 
1875 	max_pages = MIN(max_pages, UINT32_MAX);
1876 	kr = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, (uint32_t) max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1877 	if (kr == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1878 		memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
1879 	}
1880 
1881 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, purgeable, wired, clean, dirty);
1882 
1883 	memorystatus_log_debug("memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
1884 	    "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %llu, shared %d",
1885 	    (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1886 	    memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1887 
1888 	proc_list_lock();
1889 
1890 	/* Success? */
1891 	if (KERN_SUCCESS == kr) {
1892 		memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1893 
1894 		p->p_memstat_freeze_sharedanon_pages += shared;
1895 
1896 		memorystatus_frozen_shared_mb += shared;
1897 
1898 		if (!is_refreeze) {
1899 			p->p_memstat_state |= P_MEMSTAT_FROZEN;
1900 			p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1901 			memorystatus_frozen_count++;
1902 			os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1903 			if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
1904 				memorystatus_frozen_count_webcontent++;
1905 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
1906 			}
1907 			if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
1908 				memorystatus_freeze_out_of_slots();
1909 			}
1910 		} else {
1911 			// This was a re-freeze
1912 			if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1913 				memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
1914 				memorystatus_freezer_stats.mfs_refreeze_count++;
1915 			}
1916 		}
1917 
1918 		p->p_memstat_frozen_count++;
1919 
1920 		/*
1921 		 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1922 		 * to its higher jetsam band.
1923 		 */
1924 		proc_list_unlock();
1925 
1926 		memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1927 
1928 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1929 #if FREEZE_USE_ELEVATED_INACTIVE_BAND
1930 			int ret;
1931 			ret = memorystatus_update_inactive_jetsam_priority_band(proc_getpid(p), MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, memorystatus_freeze_jetsam_band, TRUE);
1932 
1933 			if (ret) {
1934 				memorystatus_log_error("Elevating the frozen process failed with %d\n", ret);
1935 				/* not fatal */
1936 			}
1937 #endif
1938 
1939 			/* Update stats */
1940 			for (unsigned int i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1941 				throttle_intervals[i].pageouts += dirty;
1942 			}
1943 		}
1944 		memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1945 		memorystatus_log("memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
1946 		    is_refreeze ? "re" : "", ((!coal || !*coal) ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"),
1947 		    memorystatus_freeze_budget_pages_remaining, is_refreeze ? "Re" : "", dirty);
1948 
1949 		proc_list_lock();
1950 
1951 		memorystatus_freeze_pageouts += dirty;
1952 
1953 		if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1954 			/*
1955 			 * Add some eviction logic here? At some point should we
1956 			 * jetsam a process to get back its swap space so that we
1957 			 * can freeze a more eligible process at this moment in time?
1958 			 */
1959 		}
1960 
1961 		/* Check if we just froze a coalition leader. If so, return the list of XPC services to freeze next. */
1962 		if (coal != NULL && *coal == NULL) {
1963 			curr_task = proc_task(p);
1964 			*coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1965 			if (coalition_is_leader(curr_task, *coal)) {
1966 				*coalition_list_length = coalition_get_pid_list(*coal, COALITION_ROLEMASK_XPC,
1967 				    COALITION_SORT_DEFAULT, coalition_list, MAX_XPC_SERVICE_PIDS);
1968 
1969 				if (*coalition_list_length > MAX_XPC_SERVICE_PIDS) {
1970 					*coalition_list_length = MAX_XPC_SERVICE_PIDS;
1971 				}
1972 			}
1973 		} else {
1974 			/* We just froze an xpc service. Mark it as such for telemetry */
1975 			p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
1976 			memorystatus_frozen_count_xpc_service++;
1977 			os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
1978 		}
1979 
1980 		p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1981 		wakeup(&p->p_memstat_state);
1982 		proc_rele(p);
1983 		return 0;
1984 	} else {
1985 		if (is_refreeze) {
1986 			if ((freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) ||
1987 			    (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO)) {
1988 				/*
1989 				 * Keeping this prior-frozen process in this high band when
1990 				 * we failed to re-freeze it due to bad shared memory usage
1991 				 * could cause excessive pressure on the lower bands.
1992 				 * We need to demote it for now. It'll get re-evaluated next
1993 				 * time because we don't set the P_MEMSTAT_FREEZE_IGNORE
1994 				 * bit.
1995 				 */
1996 
1997 				p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
1998 				memstat_update_priority_locked(p, JETSAM_PRIORITY_IDLE,
1999 				    MEMSTAT_PRIORITY_INSERT_HEAD | MEMSTAT_PRIORITY_NO_AGING);
2000 			}
2001 		} else {
2002 			p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
2003 		}
2004 		memorystatus_freeze_handle_error(p, freezer_error_code, p->p_memstat_state & P_MEMSTAT_FROZEN, aPid, (coal != NULL) ? *coal : NULL, "memorystatus_freeze_process");
2005 
2006 		p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
2007 		wakeup(&p->p_memstat_state);
2008 		proc_rele(p);
2009 
2010 		return EINVAL;
2011 	}
2012 }
2013 
2014 /*
2015  * Synchronously freeze the passed proc. Called with a reference to the proc held.
2016  *
2017  * Doesn't deal with:
2018  * - re-freezing because this is called on a specific process and
2019  *   not by the freezer thread. If that changes, we'll have to teach it about
2020  *   refreezing a frozen process.
2021  *
2022  * - grouped/coalition freezing because we are hoping to deprecate this
2023  *   interface as it was used by user-space to freeze particular processes. But
2024  *   we have moved away from that approach to having the kernel choose the optimal
2025  *   candidates to be frozen.
2026  *
2027  * Returns ENOTSUP if the freezer isn't supported on this device. Otherwise
2028  * returns EINVAL or the value returned by task_freeze().
2029  */
2030 int
memorystatus_freeze_process_sync(proc_t p)2031 memorystatus_freeze_process_sync(proc_t p)
2032 {
2033 	int ret = EINVAL;
2034 	boolean_t memorystatus_freeze_swap_low = FALSE;
2035 
2036 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2037 		return ENOTSUP;
2038 	}
2039 
2040 	lck_mtx_lock(&freezer_mutex);
2041 
2042 	if (p == NULL) {
2043 		memorystatus_log_error("memorystatus_freeze_process_sync: Invalid process\n");
2044 		goto exit;
2045 	}
2046 
2047 	if (memorystatus_freeze_enabled == false) {
2048 		memorystatus_log_error("memorystatus_freeze_process_sync: Freezing is DISABLED\n");
2049 		goto exit;
2050 	}
2051 
2052 	if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
2053 		memorystatus_log_info("memorystatus_freeze_process_sync: Low compressor and/or low swap space...skipping freeze\n");
2054 		goto exit;
2055 	}
2056 
2057 	memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2058 	if (!memorystatus_freeze_budget_pages_remaining) {
2059 		memorystatus_log_info("memorystatus_freeze_process_sync: exit with NO available budget\n");
2060 		goto exit;
2061 	}
2062 
2063 	proc_list_lock();
2064 
2065 	ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2066 
2067 exit:
2068 	lck_mtx_unlock(&freezer_mutex);
2069 
2070 	return ret;
2071 }
2072 
2073 proc_t
memorystatus_freezer_candidate_list_get_proc(struct memorystatus_freezer_candidate_list * list,size_t index,uint64_t * pid_mismatch_counter)2074 memorystatus_freezer_candidate_list_get_proc(
2075 	struct memorystatus_freezer_candidate_list *list,
2076 	size_t index,
2077 	uint64_t *pid_mismatch_counter)
2078 {
2079 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2080 	if (list->mfcl_list == NULL || list->mfcl_length <= index) {
2081 		return NULL;
2082 	}
2083 	memorystatus_properties_freeze_entry_v1 *entry = &list->mfcl_list[index];
2084 	if (entry->pid == NO_PID) {
2085 		/* Entry has been removed. */
2086 		return NULL;
2087 	}
2088 
2089 	proc_t p = proc_find_locked(entry->pid);
2090 	if (p && strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2091 		/*
2092 		 * We grab a reference when we are about to freeze the process. So drop
2093 		 * the reference that proc_find_locked() grabbed for us.
2094 		 * We also have the proc_list_lock so this process is stable.
2095 		 */
2096 		proc_rele(p);
2097 		return p;
2098 	} else {
2099 		if (p) {
2100 			/* pid rollover. */
2101 			proc_rele(p);
2102 		}
2103 		/*
2104 		 * The proc has exited since we received this list.
2105 		 * It may have re-launched with a new pid, so we go looking for it.
2106 		 */
2107 		unsigned int band = JETSAM_PRIORITY_IDLE;
2108 		p = memorystatus_get_first_proc_locked(&band, TRUE);
2109 		while (p != NULL && band <= memorystatus_freeze_max_candidate_band) {
2110 			if (strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2111 				if (pid_mismatch_counter != NULL) {
2112 					(*pid_mismatch_counter)++;
2113 				}
2114 				/* Stash the pid for faster lookup next time. */
2115 				entry->pid = proc_getpid(p);
2116 				return p;
2117 			}
2118 			p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2119 		}
2120 		/* No match. */
2121 		return NULL;
2122 	}
2123 }
2124 
2125 static size_t
memorystatus_freeze_pid_list(pid_t * pid_list,unsigned int num_pids)2126 memorystatus_freeze_pid_list(pid_t *pid_list, unsigned int num_pids)
2127 {
2128 	int ret = 0;
2129 	size_t num_frozen = 0;
2130 	while (num_pids > 0 &&
2131 	    memorystatus_frozen_count < memorystatus_frozen_processes_max) {
2132 		pid_t pid = pid_list[--num_pids];
2133 		proc_t p = proc_find_locked(pid);
2134 		if (p) {
2135 			proc_rele(p);
2136 			ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2137 			if (ret != 0) {
2138 				break;
2139 			}
2140 			num_frozen++;
2141 		}
2142 	}
2143 	return num_frozen;
2144 }
2145 
2146 /*
2147  * Attempt to freeze the best candidate process.
2148  * Keep trying until we freeze something or run out of candidates.
2149  * Returns the number of processes frozen (including coalition members).
2150  */
2151 static size_t
memorystatus_freeze_top_process(void)2152 memorystatus_freeze_top_process(void)
2153 {
2154 	int freeze_ret;
2155 	size_t num_frozen = 0;
2156 	coalition_t coal = COALITION_NULL;
2157 	pid_t pid_list[MAX_XPC_SERVICE_PIDS];
2158 	unsigned int ntasks = 0;
2159 	struct memorystatus_freeze_list_iterator iterator;
2160 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2161 
2162 	bzero(&iterator, sizeof(struct memorystatus_freeze_list_iterator));
2163 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages);
2164 
2165 	proc_list_lock();
2166 	while (true) {
2167 		proc_t p = memorystatus_freeze_pick_process(&iterator);
2168 		if (p == PROC_NULL) {
2169 			/* Nothing left to freeze */
2170 			break;
2171 		}
2172 		freeze_ret = memorystatus_freeze_process(p, &coal, pid_list, &ntasks);
2173 		if (freeze_ret == 0) {
2174 			num_frozen = 1;
2175 			/*
2176 			 * We froze a process successfully.
2177 			 * If it's a coalition head, freeze the coalition.
2178 			 * Then we're done for now.
2179 			 */
2180 			if (coal != NULL) {
2181 				num_frozen += memorystatus_freeze_pid_list(pid_list, ntasks);
2182 			}
2183 			break;
2184 		} else {
2185 			if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
2186 				break;
2187 			}
2188 			/*
2189 			 * Freeze failed but we're not out of space.
2190 			 * Keep trying to find a good candidate,
2191 			 * memorystatus_freeze_pick_process will not return this proc again until
2192 			 * we reset the iterator.
2193 			 */
2194 		}
2195 	}
2196 	proc_list_unlock();
2197 
2198 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_END, memorystatus_available_pages);
2199 
2200 	return num_frozen;
2201 }
2202 
2203 #if DEVELOPMENT || DEBUG
2204 /* For testing memorystatus_freeze_top_process */
2205 static int
2206 sysctl_memorystatus_freeze_top_process SYSCTL_HANDLER_ARGS
2207 {
2208 #pragma unused(arg1, arg2)
2209 	int error, val, ret = 0;
2210 	size_t num_frozen;
2211 	/*
2212 	 * Only freeze on write to prevent freezing during `sysctl -a`.
2213 	 * The actual value written doesn't matter.
2214 	 */
2215 	error = sysctl_handle_int(oidp, &val, 0, req);
2216 	if (error || !req->newptr) {
2217 		return error;
2218 	}
2219 
2220 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2221 		return ENOTSUP;
2222 	}
2223 
2224 	lck_mtx_lock(&freezer_mutex);
2225 	num_frozen = memorystatus_freeze_top_process();
2226 	lck_mtx_unlock(&freezer_mutex);
2227 
2228 	if (num_frozen == 0) {
2229 		ret = ESRCH;
2230 	}
2231 	return ret;
2232 }
2233 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_top_process, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2234     0, 0, &sysctl_memorystatus_freeze_top_process, "I", "");
2235 #endif /* DEVELOPMENT || DEBUG */
2236 
2237 static inline boolean_t
memorystatus_can_freeze_processes(void)2238 memorystatus_can_freeze_processes(void)
2239 {
2240 	boolean_t ret;
2241 
2242 	proc_list_lock();
2243 
2244 	if (memorystatus_suspended_count) {
2245 		memorystatus_freeze_suspended_threshold = MIN(memorystatus_freeze_suspended_threshold, FREEZE_SUSPENDED_THRESHOLD_DEFAULT);
2246 
2247 		if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
2248 			ret = TRUE;
2249 		} else {
2250 			ret = FALSE;
2251 		}
2252 	} else {
2253 		ret = FALSE;
2254 	}
2255 
2256 	proc_list_unlock();
2257 
2258 	return ret;
2259 }
2260 
2261 static boolean_t
memorystatus_can_freeze(boolean_t * memorystatus_freeze_swap_low)2262 memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
2263 {
2264 	boolean_t can_freeze = TRUE;
2265 
2266 	/* Only freeze if we're sufficiently low on memory; this holds off freeze right
2267 	*  after boot,  and is generally is a no-op once we've reached steady state. */
2268 	if (memorystatus_available_pages > memorystatus_freeze_threshold) {
2269 		return FALSE;
2270 	}
2271 
2272 	/* Check minimum suspended process threshold. */
2273 	if (!memorystatus_can_freeze_processes()) {
2274 		return FALSE;
2275 	}
2276 	assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
2277 
2278 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2279 		/*
2280 		 * In-core compressor used for freezing WITHOUT on-disk swap support.
2281 		 */
2282 		if (vm_compressor_low_on_space()) {
2283 			if (*memorystatus_freeze_swap_low) {
2284 				*memorystatus_freeze_swap_low = TRUE;
2285 			}
2286 
2287 			can_freeze = FALSE;
2288 		} else {
2289 			if (*memorystatus_freeze_swap_low) {
2290 				*memorystatus_freeze_swap_low = FALSE;
2291 			}
2292 
2293 			can_freeze = TRUE;
2294 		}
2295 	} else {
2296 		/*
2297 		 * Freezing WITH on-disk swap support.
2298 		 *
2299 		 * In-core compressor fronts the swap.
2300 		 */
2301 		if (vm_swap_low_on_space()) {
2302 			if (*memorystatus_freeze_swap_low) {
2303 				*memorystatus_freeze_swap_low = TRUE;
2304 			}
2305 
2306 			can_freeze = FALSE;
2307 		}
2308 	}
2309 
2310 	return can_freeze;
2311 }
2312 
2313 /*
2314  * Demote the given frozen process.
2315  * Caller must hold the proc_list_lock & it will be held on return.
2316  */
2317 static void
memorystatus_demote_frozen_process(proc_t p,bool urgent_mode __unused)2318 memorystatus_demote_frozen_process(proc_t p, bool urgent_mode __unused)
2319 {
2320 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2321 
2322 	/* We demote to IDLE unless someone has asserted a higher priority on this process. */
2323 	int priority = JETSAM_PRIORITY_IDLE;
2324 	p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
2325 	if (_memstat_proc_has_priority_assertion(p)) {
2326 		priority = MAX(p->p_memstat_assertionpriority, priority);
2327 	}
2328 	if (_memstat_proc_is_tracked(p) && _memstat_proc_is_dirty(p)) {
2329 		priority = MAX(p->p_memstat_requestedpriority, priority);
2330 	}
2331 	memstat_update_priority_locked(p, priority, MEMSTAT_PRIORITY_NO_AGING);
2332 #if DEVELOPMENT || DEBUG
2333 	memorystatus_log("memorystatus_demote_frozen_process(%s) pid %d [%s]\n",
2334 	    (urgent_mode ? "urgent" : "normal"), (p ? proc_getpid(p) : -1), ((p && *p->p_name) ? p->p_name : "unknown"));
2335 #endif /* DEVELOPMENT || DEBUG */
2336 
2337 	/*
2338 	 * The freezer thread will consider this a normal app to be frozen
2339 	 * because it is in the IDLE band. So we don't need the
2340 	 * P_MEMSTAT_REFREEZE_ELIGIBLE state here. Also, if it gets resumed
2341 	 * we'll correctly count it as eligible for re-freeze again.
2342 	 *
2343 	 * We don't drop the frozen count because this process still has
2344 	 * state on disk. So there's a chance it gets resumed and then it
2345 	 * should land in the higher jetsam band. For that it needs to
2346 	 * remain marked frozen.
2347 	 */
2348 	if (memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2349 		p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
2350 		memorystatus_refreeze_eligible_count--;
2351 	}
2352 }
2353 
2354 static unsigned int
memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)2355 memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)
2356 {
2357 	unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
2358 	unsigned int demoted_proc_count = 0;
2359 	proc_t p = PROC_NULL, next_p = PROC_NULL;
2360 	proc_list_lock();
2361 
2362 	next_p = memorystatus_get_first_proc_locked(&band, FALSE);
2363 	while (next_p) {
2364 		p = next_p;
2365 		next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2366 
2367 		if (!_memstat_proc_is_frozen(p)) {
2368 			continue;
2369 		}
2370 
2371 		if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
2372 			continue;
2373 		}
2374 
2375 		if (urgent_mode) {
2376 			if (!memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2377 				/*
2378 				 * This process hasn't been thawed recently and so most of
2379 				 * its state sits on NAND and so we skip it -- jetsamming it
2380 				 * won't help with memory pressure.
2381 				 */
2382 				continue;
2383 			}
2384 		} else {
2385 			if (p->p_memstat_thaw_count >= memorystatus_thaw_count_demotion_threshold) {
2386 				/*
2387 				 * This process has met / exceeded our thaw count demotion threshold
2388 				 * and so we let it live in the higher bands.
2389 				 */
2390 				continue;
2391 			}
2392 		}
2393 
2394 		memorystatus_demote_frozen_process(p, urgent_mode);
2395 		demoted_proc_count++;
2396 		if ((urgent_mode) || (demoted_proc_count == memorystatus_max_frozen_demotions_daily)) {
2397 			break;
2398 		}
2399 	}
2400 
2401 	proc_list_unlock();
2402 	return demoted_proc_count;
2403 }
2404 
2405 static unsigned int
memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)2406 memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)
2407 {
2408 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2409 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2410 	assert(memorystatus_freezer_use_demotion_list);
2411 	unsigned int demoted_proc_count = 0;
2412 
2413 	proc_list_lock();
2414 	for (size_t i = 0; i < memorystatus_global_demote_list.mfcl_length; i++) {
2415 		proc_t p = memorystatus_freezer_candidate_list_get_proc(
2416 			&memorystatus_global_demote_list,
2417 			i,
2418 			&memorystatus_freezer_stats.mfs_demote_pid_mismatches);
2419 		if (p != NULL && memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2420 			memorystatus_demote_frozen_process(p, urgent_mode);
2421 			/* Remove this entry now that it's been demoted. */
2422 			memorystatus_global_demote_list.mfcl_list[i].pid = NO_PID;
2423 			demoted_proc_count++;
2424 			/*
2425 			 * We only demote one proc at a time in this mode.
2426 			 * This gives jetsam a chance to kill the recently demoted processes.
2427 			 */
2428 			break;
2429 		}
2430 	}
2431 
2432 	proc_list_unlock();
2433 	return demoted_proc_count;
2434 }
2435 
2436 /*
2437  * This function evaluates if the currently frozen processes deserve
2438  * to stay in the higher jetsam band. There are 2 modes:
2439  * - 'force one == TRUE': (urgent mode)
2440  *	We are out of budget and can't refreeze a process. The process's
2441  * state, if it was resumed, will stay in compressed memory. If we let it
2442  * remain up in the higher frozen jetsam band, it'll put a lot of pressure on
2443  * the lower bands. So we force-demote the least-recently-used-and-thawed
2444  * process.
2445  *
2446  * - 'force_one == FALSE': (normal mode)
2447  *      If the # of thaws of a process is below our threshold, then we
2448  * will demote that process into the IDLE band.
2449  * We don't immediately kill the process here because it  already has
2450  * state on disk and so it might be worth giving it another shot at
2451  * getting thawed/resumed and used.
2452  */
2453 static void
memorystatus_demote_frozen_processes(bool urgent_mode)2454 memorystatus_demote_frozen_processes(bool urgent_mode)
2455 {
2456 	unsigned int demoted_proc_count = 0;
2457 
2458 	if (memorystatus_freeze_enabled == false) {
2459 		/*
2460 		 * Freeze has been disabled likely to
2461 		 * reclaim swap space. So don't change
2462 		 * any state on the frozen processes.
2463 		 */
2464 		return;
2465 	}
2466 
2467 	/*
2468 	 * We have two demotion policies which can be toggled by userspace.
2469 	 * In non-urgent mode, the ordered list policy will
2470 	 * choose a demotion candidate using the list provided by dasd.
2471 	 * The thaw count policy will demote the oldest process that hasn't been
2472 	 * thawed more than memorystatus_thaw_count_demotion_threshold times.
2473 	 *
2474 	 * If urgent_mode is set, both policies will only consider demoting
2475 	 * processes that are re-freeze eligible. But the ordering is different.
2476 	 * The ordered list policy will scan in the order given by dasd.
2477 	 * The thaw count policy will scan through the frozen band.
2478 	 */
2479 	if (memorystatus_freezer_use_demotion_list) {
2480 		demoted_proc_count += memorystatus_demote_frozen_processes_using_demote_list(urgent_mode);
2481 
2482 		if (demoted_proc_count == 0 && urgent_mode) {
2483 			/*
2484 			 * We're out of budget and the demotion list doesn't contain any valid
2485 			 * candidates. We still need to demote something. Fall back to scanning
2486 			 * the frozen band.
2487 			 */
2488 			memorystatus_demote_frozen_processes_using_thaw_count(true);
2489 		}
2490 	} else {
2491 		demoted_proc_count += memorystatus_demote_frozen_processes_using_thaw_count(urgent_mode);
2492 	}
2493 }
2494 
2495 /*
2496  * Calculate a new freezer budget.
2497  * @param time_since_last_interval_expired_sec How long has it been (in seconds) since the previous interval expired.
2498  * @param burst_multiple The burst_multiple for the new period
2499  * @param interval_duration_min How many minutes will the new interval be?
2500  * @param rollover The amount to rollover from the previous budget.
2501  *
2502  * @return A budget for the new interval.
2503  */
2504 static uint32_t
memorystatus_freeze_calculate_new_budget(unsigned int time_since_last_interval_expired_sec,unsigned int burst_multiple,unsigned int interval_duration_min,uint32_t rollover)2505 memorystatus_freeze_calculate_new_budget(
2506 	unsigned int time_since_last_interval_expired_sec,
2507 	unsigned int burst_multiple,
2508 	unsigned int interval_duration_min,
2509 	uint32_t rollover)
2510 {
2511 	uint64_t freeze_daily_budget = 0, freeze_daily_budget_mb = 0, daily_budget_pageouts = 0, budget_missed = 0, freeze_daily_pageouts_max = 0, new_budget = 0;
2512 	const static unsigned int kNumSecondsInDay = 60 * 60 * 24;
2513 	/* Precision factor for days_missed. 2 decimal points. */
2514 	const static unsigned int kFixedPointFactor = 100;
2515 	unsigned int days_missed;
2516 
2517 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2518 		return 0;
2519 	}
2520 	if (memorystatus_swap_all_apps) {
2521 		/*
2522 		 * We effectively have an unlimited budget when app swap is enabled.
2523 		 */
2524 		memorystatus_freeze_daily_mb_max = UINT32_MAX;
2525 		return UINT32_MAX;
2526 	}
2527 
2528 	/* Get the daily budget from the storage layer */
2529 	if (vm_swap_max_budget(&freeze_daily_budget)) {
2530 		freeze_daily_budget_mb = freeze_daily_budget / (1024 * 1024);
2531 		assert(freeze_daily_budget_mb <= UINT32_MAX);
2532 		memorystatus_freeze_daily_mb_max = (unsigned int) freeze_daily_budget_mb;
2533 		memorystatus_log_info("memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
2534 	}
2535 	/* Calculate the daily pageout budget */
2536 	freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
2537 	/* Multiply by memorystatus_freeze_budget_multiplier */
2538 	freeze_daily_pageouts_max = ((kFixedPointFactor * memorystatus_freeze_budget_multiplier / 100) * freeze_daily_pageouts_max) / kFixedPointFactor;
2539 
2540 	daily_budget_pageouts = (burst_multiple * (((uint64_t) interval_duration_min * freeze_daily_pageouts_max) / (kNumSecondsInDay / 60)));
2541 
2542 	/*
2543 	 * Add additional budget for time since the interval expired.
2544 	 * For example, if the interval expired n days ago, we should get an additional n days
2545 	 * of budget since we didn't use any budget during those n days.
2546 	 */
2547 	days_missed = time_since_last_interval_expired_sec * kFixedPointFactor / kNumSecondsInDay;
2548 	budget_missed = days_missed * freeze_daily_pageouts_max / kFixedPointFactor;
2549 	new_budget = rollover + daily_budget_pageouts + budget_missed;
2550 	return (uint32_t) MIN(new_budget, UINT32_MAX);
2551 }
2552 
2553 /*
2554  * Mark all non frozen, freezer-eligible processes as skipped for the given reason.
2555  * Used when we hit some system freeze limit and know that we won't be considering remaining processes.
2556  * If you're using this for a new reason, make sure to add it to memorystatus_freeze_init_proc so that
2557  * it gets set for new processes.
2558  * NB: These processes will retain this skip reason until they are reconsidered by memorystatus_is_process_eligible_for_freeze.
2559  */
2560 static void
memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason,bool locked)2561 memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason, bool locked)
2562 {
2563 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2564 	LCK_MTX_ASSERT(&proc_list_mlock, locked ? LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED);
2565 	unsigned int band = JETSAM_PRIORITY_IDLE;
2566 	proc_t p;
2567 
2568 	if (!locked) {
2569 		proc_list_lock();
2570 	}
2571 	p = memorystatus_get_first_proc_locked(&band, FALSE);
2572 	while (p) {
2573 		assert(p->p_memstat_effectivepriority == (int32_t) band);
2574 		if (!_memstat_proc_is_frozen(p) &&
2575 		    memorystatus_is_process_eligible_for_freeze(p)) {
2576 			assert(p->p_memstat_freeze_skip_reason == kMemorystatusFreezeSkipReasonNone);
2577 			p->p_memstat_freeze_skip_reason = (uint8_t) reason;
2578 		}
2579 		p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2580 	}
2581 	if (!locked) {
2582 		proc_list_unlock();
2583 	}
2584 }
2585 
2586 /*
2587  * Called after we fail to freeze a process.
2588  * Logs the failure, marks the process with the failure reason, and updates freezer stats.
2589  */
2590 static void
memorystatus_freeze_handle_error(proc_t p,const freezer_error_code_t freezer_error_code,bool was_refreeze,pid_t pid,const coalition_t coalition,const char * log_prefix)2591 memorystatus_freeze_handle_error(
2592 	proc_t p,
2593 	const freezer_error_code_t freezer_error_code,
2594 	bool was_refreeze,
2595 	pid_t pid,
2596 	const coalition_t coalition,
2597 	const char* log_prefix)
2598 {
2599 	const char *reason;
2600 	memorystatus_freeze_skip_reason_t skip_reason;
2601 
2602 	switch (freezer_error_code) {
2603 	case FREEZER_ERROR_EXCESS_SHARED_MEMORY:
2604 		memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
2605 		reason = "too much shared memory";
2606 		skip_reason = kMemorystatusFreezeSkipReasonExcessSharedMemory;
2607 		break;
2608 	case FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO:
2609 		memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
2610 		reason = "private-shared pages ratio";
2611 		skip_reason = kMemorystatusFreezeSkipReasonLowPrivateSharedRatio;
2612 		break;
2613 	case FREEZER_ERROR_NO_COMPRESSOR_SPACE:
2614 		memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
2615 		reason = "no compressor space";
2616 		skip_reason = kMemorystatusFreezeSkipReasonNoCompressorSpace;
2617 		break;
2618 	case FREEZER_ERROR_NO_SWAP_SPACE:
2619 		memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
2620 		reason = "no swap space";
2621 		skip_reason = kMemorystatusFreezeSkipReasonNoSwapSpace;
2622 		break;
2623 	case FREEZER_ERROR_NO_SLOTS:
2624 		memorystatus_freezer_stats.mfs_skipped_full_count++;
2625 		reason = "no slots";
2626 		skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
2627 		break;
2628 	default:
2629 		reason = "unknown error";
2630 		skip_reason = kMemorystatusFreezeSkipReasonOther;
2631 	}
2632 
2633 	p->p_memstat_freeze_skip_reason = (uint8_t) skip_reason;
2634 
2635 	memorystatus_log("%s: %sfreezing (%s) pid %d [%s]...skipped (%s)\n",
2636 	    log_prefix, was_refreeze ? "re" : "",
2637 	    (coalition == NULL ? "general" : "coalition-driven"), pid,
2638 	    ((p && *p->p_name) ? p->p_name : "unknown"), reason);
2639 }
2640 
2641 /*
2642  * Start a new normal throttle interval with the given budget.
2643  * Caller must hold the freezer mutex
2644  */
2645 static void
memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget,mach_timespec_t start_ts)2646 memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts)
2647 {
2648 	unsigned int band;
2649 	proc_t p, next_p;
2650 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2651 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2652 
2653 	normal_throttle_window->max_pageouts = new_budget;
2654 	normal_throttle_window->ts.tv_sec = normal_throttle_window->mins * 60;
2655 	normal_throttle_window->ts.tv_nsec = 0;
2656 	ADD_MACH_TIMESPEC(&normal_throttle_window->ts, &start_ts);
2657 	/* Since we update the throttle stats pre-freeze, adjust for overshoot here */
2658 	if (normal_throttle_window->pageouts > normal_throttle_window->max_pageouts) {
2659 		normal_throttle_window->pageouts -= normal_throttle_window->max_pageouts;
2660 	} else {
2661 		normal_throttle_window->pageouts = 0;
2662 	}
2663 	/* Ensure the normal window is now active. */
2664 	memorystatus_freeze_degradation = FALSE;
2665 
2666 	/*
2667 	 * Reset interval statistics.
2668 	 */
2669 	memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
2670 	memorystatus_freezer_stats.mfs_process_considered_count = 0;
2671 	memorystatus_freezer_stats.mfs_error_below_min_pages_count = 0;
2672 	memorystatus_freezer_stats.mfs_error_excess_shared_memory_count = 0;
2673 	memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count = 0;
2674 	memorystatus_freezer_stats.mfs_error_no_compressor_space_count = 0;
2675 	memorystatus_freezer_stats.mfs_error_no_swap_space_count = 0;
2676 	memorystatus_freezer_stats.mfs_error_low_probability_of_use_count = 0;
2677 	memorystatus_freezer_stats.mfs_error_elevated_count = 0;
2678 	memorystatus_freezer_stats.mfs_error_other_count = 0;
2679 	memorystatus_freezer_stats.mfs_refreeze_count = 0;
2680 	memorystatus_freezer_stats.mfs_bytes_refrozen = 0;
2681 	memorystatus_freezer_stats.mfs_below_threshold_count = 0;
2682 	memorystatus_freezer_stats.mfs_skipped_full_count = 0;
2683 	memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count = 0;
2684 	memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = 0;
2685 	memorystatus_thaw_count = 0;
2686 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed, 0, release);
2687 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, 0, release);
2688 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg, 0, release);
2689 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, 0, release);
2690 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen, memorystatus_frozen_count, release);
2691 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, memorystatus_frozen_count_webcontent, release);
2692 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, memorystatus_frozen_count_xpc_service, release);
2693 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_fg_resumed, 0, release);
2694 	os_atomic_inc(&memorystatus_freeze_current_interval, release);
2695 
2696 	/* Clear the focal thaw bit */
2697 	proc_list_lock();
2698 	band = JETSAM_PRIORITY_IDLE;
2699 	p = PROC_NULL;
2700 	next_p = PROC_NULL;
2701 
2702 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
2703 	while (next_p) {
2704 		p = next_p;
2705 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2706 
2707 		if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) {
2708 			break;
2709 		}
2710 		p->p_memstat_state &= ~P_MEMSTAT_FROZEN_FOCAL_THAW;
2711 	}
2712 	proc_list_unlock();
2713 
2714 	schedule_interval_reset(freeze_interval_reset_thread_call, normal_throttle_window);
2715 }
2716 
2717 #if DEVELOPMENT || DEBUG
2718 
2719 static int
2720 sysctl_memorystatus_freeze_calculate_new_budget SYSCTL_HANDLER_ARGS
2721 {
2722 #pragma unused(arg1, arg2)
2723 	int error = 0;
2724 	unsigned int time_since_last_interval_expired_sec = 0;
2725 	unsigned int new_budget;
2726 
2727 	error = sysctl_handle_int(oidp, &time_since_last_interval_expired_sec, 0, req);
2728 	if (error || !req->newptr) {
2729 		return error;
2730 	}
2731 
2732 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2733 		return ENOTSUP;
2734 	}
2735 	new_budget = memorystatus_freeze_calculate_new_budget(time_since_last_interval_expired_sec, 1, NORMAL_WINDOW_MINS, 0);
2736 	return copyout(&new_budget, req->oldptr, MIN(sizeof(req->oldlen), sizeof(new_budget)));
2737 }
2738 
2739 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_calculate_new_budget, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2740     0, 0, &sysctl_memorystatus_freeze_calculate_new_budget, "I", "");
2741 
2742 #endif /* DEVELOPMENT || DEBUG */
2743 
2744 /*
2745  * Called when we first run out of budget in an interval.
2746  * Marks idle processes as not frozen due to lack of budget.
2747  * NB: It might be worth having a CA event here.
2748  */
2749 static void
memorystatus_freeze_out_of_budget(const struct throttle_interval_t * interval)2750 memorystatus_freeze_out_of_budget(const struct throttle_interval_t *interval)
2751 {
2752 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2753 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2754 
2755 	mach_timespec_t time_left = {0, 0};
2756 	mach_timespec_t now_ts;
2757 	clock_sec_t sec;
2758 	clock_nsec_t nsec;
2759 
2760 	time_left.tv_sec = interval->ts.tv_sec;
2761 	time_left.tv_nsec = 0;
2762 	clock_get_system_nanotime(&sec, &nsec);
2763 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2764 	now_ts.tv_nsec = nsec;
2765 
2766 	SUB_MACH_TIMESPEC(&time_left, &now_ts);
2767 	memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = time_left.tv_sec;
2768 	memorystatus_log(
2769 		"memorystatus_freeze: Out of NAND write budget with %u minutes left in the current freezer interval. %u procs are frozen.\n",
2770 		time_left.tv_sec / 60, memorystatus_frozen_count);
2771 
2772 	memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfBudget, false);
2773 }
2774 
2775 /*
2776  * Called when we cross over the threshold of maximum frozen processes allowed.
2777  * Marks remaining idle processes as not frozen due to lack of slots.
2778  */
2779 static void
memorystatus_freeze_out_of_slots(void)2780 memorystatus_freeze_out_of_slots(void)
2781 {
2782 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2783 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2784 	assert(memorystatus_frozen_count == memorystatus_frozen_processes_max);
2785 
2786 	memorystatus_log(
2787 		"memorystatus_freeze: Out of slots in the freezer. %u procs are frozen.\n",
2788 		memorystatus_frozen_count);
2789 
2790 	memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfSlots, true);
2791 }
2792 
2793 /*
2794  * This function will do 4 things:
2795  *
2796  * 1) check to see if we are currently in a degraded freezer mode, and if so:
2797  *    - check to see if our window has expired and we should exit this mode, OR,
2798  *    - return a budget based on the degraded throttle window's max. pageouts vs current pageouts.
2799  *
2800  * 2) check to see if we are in a NEW normal window and update the normal throttle window's params.
2801  *
2802  * 3) check what the current normal window allows for a budget.
2803  *
2804  * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below
2805  *    what we would normally expect, then we are running low on our daily budget and need to enter
2806  *    degraded perf. mode.
2807  *
2808  *    Caller must hold the freezer mutex
2809  *    Caller must not hold the proc_list lock
2810  */
2811 
2812 static void
memorystatus_freeze_update_throttle(uint64_t * budget_pages_allowed)2813 memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
2814 {
2815 	clock_sec_t sec;
2816 	clock_nsec_t nsec;
2817 	mach_timespec_t now_ts;
2818 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2819 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2820 
2821 	unsigned int freeze_daily_pageouts_max = 0;
2822 	bool started_with_budget = (*budget_pages_allowed > 0);
2823 
2824 #if DEVELOPMENT || DEBUG
2825 	if (!memorystatus_freeze_throttle_enabled) {
2826 		/*
2827 		 * No throttling...we can use the full budget everytime.
2828 		 */
2829 		*budget_pages_allowed = UINT64_MAX;
2830 		return;
2831 	}
2832 #endif
2833 
2834 	clock_get_system_nanotime(&sec, &nsec);
2835 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2836 	now_ts.tv_nsec = nsec;
2837 
2838 	struct throttle_interval_t *interval = NULL;
2839 
2840 	if (memorystatus_freeze_degradation == TRUE) {
2841 		interval = degraded_throttle_window;
2842 
2843 		if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
2844 			interval->pageouts = 0;
2845 			interval->max_pageouts = 0;
2846 		} else {
2847 			*budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2848 		}
2849 	}
2850 
2851 	interval = normal_throttle_window;
2852 
2853 	/*
2854 	 * Current throttle window.
2855 	 * Deny freezing if we have no budget left.
2856 	 * Try graceful degradation if we are within 25% of:
2857 	 * - the daily budget, and
2858 	 * - the current budget left is below our normal budget expectations.
2859 	 */
2860 
2861 	if (memorystatus_freeze_degradation == FALSE) {
2862 		if (interval->pageouts >= interval->max_pageouts) {
2863 			*budget_pages_allowed = 0;
2864 			if (started_with_budget) {
2865 				memorystatus_freeze_out_of_budget(interval);
2866 			}
2867 		} else {
2868 			int budget_left = interval->max_pageouts - interval->pageouts;
2869 			int budget_threshold = (freeze_daily_pageouts_max * FREEZE_DEGRADATION_BUDGET_THRESHOLD) / 100;
2870 
2871 			mach_timespec_t time_left = {0, 0};
2872 
2873 			time_left.tv_sec = interval->ts.tv_sec;
2874 			time_left.tv_nsec = 0;
2875 
2876 			SUB_MACH_TIMESPEC(&time_left, &now_ts);
2877 
2878 			if (budget_left <= budget_threshold) {
2879 				/*
2880 				 * For the current normal window, calculate how much we would pageout in a DEGRADED_WINDOW_MINS duration.
2881 				 * And also calculate what we would pageout for the same DEGRADED_WINDOW_MINS duration if we had the full
2882 				 * daily pageout budget.
2883 				 */
2884 
2885 				unsigned int current_budget_rate_allowed = ((budget_left / time_left.tv_sec) / 60) * DEGRADED_WINDOW_MINS;
2886 				unsigned int normal_budget_rate_allowed = (freeze_daily_pageouts_max / NORMAL_WINDOW_MINS) * DEGRADED_WINDOW_MINS;
2887 
2888 				/*
2889 				 * The current rate of pageouts is below what we would expect for
2890 				 * the normal rate i.e. we have below normal budget left and so...
2891 				 */
2892 
2893 				if (current_budget_rate_allowed < normal_budget_rate_allowed) {
2894 					memorystatus_freeze_degradation = TRUE;
2895 					degraded_throttle_window->max_pageouts = current_budget_rate_allowed;
2896 					degraded_throttle_window->pageouts = 0;
2897 
2898 					/*
2899 					 * Switch over to the degraded throttle window so the budget
2900 					 * doled out is based on that window.
2901 					 */
2902 					interval = degraded_throttle_window;
2903 				}
2904 			}
2905 
2906 			*budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2907 		}
2908 	}
2909 
2910 	memorystatus_log_debug(
2911 		"memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining\n",
2912 		interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - now_ts.tv_sec) / 60);
2913 }
2914 
2915 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_apps_idle_delay_multiplier, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_apps_idle_delay_multiplier, 0, "");
2916 
2917 bool memorystatus_freeze_thread_init = false;
2918 static void
memorystatus_freeze_thread(void * param __unused,wait_result_t wr __unused)2919 memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
2920 {
2921 	static boolean_t memorystatus_freeze_swap_low = FALSE;
2922 	size_t max_to_freeze = 0, num_frozen = 0, num_frozen_this_iteration = 0;
2923 
2924 	if (!memorystatus_freeze_thread_init) {
2925 #if CONFIG_THREAD_GROUPS
2926 		thread_group_vm_add();
2927 #endif
2928 		memorystatus_freeze_thread_init = true;
2929 	}
2930 
2931 	max_to_freeze = memorystatus_pick_freeze_count_for_wakeup();
2932 
2933 	lck_mtx_lock(&freezer_mutex);
2934 	if (memorystatus_freeze_enabled) {
2935 		if (memorystatus_freezer_use_demotion_list && memorystatus_refreeze_eligible_count > 0) {
2936 			memorystatus_demote_frozen_processes(false); /* Normal mode. Consider demoting thawed processes. */
2937 		}
2938 		while (num_frozen < max_to_freeze &&
2939 		    memorystatus_can_freeze(&memorystatus_freeze_swap_low) &&
2940 		    ((memorystatus_frozen_count < memorystatus_frozen_processes_max) ||
2941 		    (memorystatus_refreeze_eligible_count >= memorystatus_min_thaw_refreeze_threshold))) {
2942 			/* Only freeze if we've not exceeded our pageout budgets.*/
2943 			memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2944 
2945 			if (memorystatus_freeze_budget_pages_remaining) {
2946 				num_frozen_this_iteration = memorystatus_freeze_top_process();
2947 				if (num_frozen_this_iteration == 0) {
2948 					/* Nothing left to freeze. */
2949 					break;
2950 				}
2951 				num_frozen += num_frozen_this_iteration;
2952 			} else {
2953 				memorystatus_demote_frozen_processes(true); /* urgent mode..force one demotion */
2954 				break;
2955 			}
2956 		}
2957 	}
2958 
2959 	/*
2960 	 * Give applications currently in the aging band a chance to age out into the idle band before
2961 	 * running the freezer again.
2962 	 */
2963 	if (memorystatus_freeze_dynamic_thread_delay_enabled) {
2964 		if ((num_frozen > 0) || (memorystatus_frozen_count == 0)) {
2965 			memorystatus_freeze_apps_idle_delay_multiplier = FREEZE_APPS_IDLE_DELAY_MULTIPLIER_FAST;
2966 		} else {
2967 			memorystatus_freeze_apps_idle_delay_multiplier = FREEZE_APPS_IDLE_DELAY_MULTIPLIER_SLOW;
2968 		}
2969 	}
2970 	memorystatus_freezer_thread_next_run_ts = mach_absolute_time() + (memorystatus_apps_idle_delay_time * memorystatus_freeze_apps_idle_delay_multiplier);
2971 
2972 	assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
2973 	lck_mtx_unlock(&freezer_mutex);
2974 
2975 	thread_block((thread_continue_t) memorystatus_freeze_thread);
2976 }
2977 
2978 int
memorystatus_get_process_is_freezable(pid_t pid,int * is_freezable)2979 memorystatus_get_process_is_freezable(pid_t pid, int *is_freezable)
2980 {
2981 	proc_t p = PROC_NULL;
2982 
2983 	if (pid == 0) {
2984 		return EINVAL;
2985 	}
2986 
2987 	p = proc_find(pid);
2988 	if (!p) {
2989 		return ESRCH;
2990 	}
2991 
2992 	/*
2993 	 * Only allow this on the current proc for now.
2994 	 * We can check for privileges and allow targeting another process in the future.
2995 	 */
2996 	if (p != current_proc()) {
2997 		proc_rele(p);
2998 		return EPERM;
2999 	}
3000 
3001 	proc_list_lock();
3002 	*is_freezable = ((p->p_memstat_state & P_MEMSTAT_FREEZE_DISABLED) ? 0 : 1);
3003 	proc_rele(p);
3004 	proc_list_unlock();
3005 
3006 	return 0;
3007 }
3008 
3009 errno_t
memorystatus_get_process_is_frozen(pid_t pid,int * is_frozen)3010 memorystatus_get_process_is_frozen(pid_t pid, int *is_frozen)
3011 {
3012 	proc_t p = PROC_NULL;
3013 
3014 	if (pid == 0) {
3015 		return EINVAL;
3016 	}
3017 
3018 	/*
3019 	 * Only allow this on the current proc for now.
3020 	 * We can check for privileges and allow targeting another process in the future.
3021 	 */
3022 	p = current_proc();
3023 	if (proc_getpid(p) != pid) {
3024 		return EPERM;
3025 	}
3026 
3027 	proc_list_lock();
3028 	*is_frozen = _memstat_proc_is_frozen(p);
3029 	proc_list_unlock();
3030 
3031 	return 0;
3032 }
3033 
3034 int
memorystatus_set_process_is_freezable(pid_t pid,boolean_t is_freezable)3035 memorystatus_set_process_is_freezable(pid_t pid, boolean_t is_freezable)
3036 {
3037 	proc_t p = PROC_NULL;
3038 
3039 	if (pid == 0) {
3040 		return EINVAL;
3041 	}
3042 
3043 	/*
3044 	 * To enable freezable status, you need to be root or an entitlement.
3045 	 */
3046 	if (is_freezable &&
3047 	    !kauth_cred_issuser(kauth_cred_get()) &&
3048 	    !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
3049 		return EPERM;
3050 	}
3051 
3052 	p = proc_find(pid);
3053 	if (!p) {
3054 		return ESRCH;
3055 	}
3056 
3057 	/*
3058 	 * A process can change its own status. A coalition leader can
3059 	 * change the status of coalition members.
3060 	 * An entitled process (or root) can change anyone's status.
3061 	 */
3062 	if (p != current_proc() &&
3063 	    !kauth_cred_issuser(kauth_cred_get()) &&
3064 	    !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
3065 		coalition_t coal = task_get_coalition(proc_task(p), COALITION_TYPE_JETSAM);
3066 		if (!coalition_is_leader(proc_task(current_proc()), coal)) {
3067 			proc_rele(p);
3068 			return EPERM;
3069 		}
3070 	}
3071 
3072 	proc_list_lock();
3073 	if (is_freezable == FALSE) {
3074 		/* Freeze preference set to FALSE. Set the P_MEMSTAT_FREEZE_DISABLED bit. */
3075 		p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED;
3076 		memorystatus_log_info("memorystatus_set_process_is_freezable: disabling freeze for pid %d [%s]\n",
3077 		    proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
3078 	} else {
3079 		p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED;
3080 		memorystatus_log_info("memorystatus_set_process_is_freezable: enabling freeze for pid %d [%s]\n",
3081 		    proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
3082 	}
3083 	proc_rele(p);
3084 	proc_list_unlock();
3085 
3086 	return 0;
3087 }
3088 
3089 /*
3090  * Called when process is created before it is added to a memorystatus bucket.
3091  */
3092 void
memorystatus_freeze_init_proc(proc_t p)3093 memorystatus_freeze_init_proc(proc_t p)
3094 {
3095 	/* NB: Process is not on the memorystatus lists yet so it's safe to modify the skip reason without the freezer mutex. */
3096 	if (memorystatus_freeze_budget_pages_remaining == 0) {
3097 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfBudget;
3098 	} else if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
3099 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
3100 	} else {
3101 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
3102 	}
3103 }
3104 
3105 
3106 static int
3107 sysctl_memorystatus_do_fastwake_warmup_all  SYSCTL_HANDLER_ARGS
3108 {
3109 #pragma unused(oidp, arg1, arg2)
3110 
3111 	if (!req->newptr) {
3112 		return EINVAL;
3113 	}
3114 
3115 	/* Need to be root or have entitlement */
3116 	if (!kauth_cred_issuser(kauth_cred_get()) && !IOCurrentTaskHasEntitlement( MEMORYSTATUS_ENTITLEMENT)) {
3117 		return EPERM;
3118 	}
3119 
3120 	if (memorystatus_freeze_enabled == false) {
3121 		return ENOTSUP;
3122 	}
3123 
3124 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
3125 		return ENOTSUP;
3126 	}
3127 
3128 	do_fastwake_warmup_all();
3129 
3130 	return 0;
3131 }
3132 
3133 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_do_fastwake_warmup_all, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3134     0, 0, &sysctl_memorystatus_do_fastwake_warmup_all, "I", "");
3135 
3136 /*
3137  * Takes in a candidate list from the user_addr, validates it, and copies it into the list pointer.
3138  * Takes ownership over the original value of list.
3139  * Assumes that list is protected by the freezer_mutex.
3140  * The caller should not hold any locks.
3141  */
3142 static errno_t
set_freezer_candidate_list(user_addr_t buffer,size_t buffer_size,struct memorystatus_freezer_candidate_list * list)3143 set_freezer_candidate_list(user_addr_t buffer, size_t buffer_size, struct memorystatus_freezer_candidate_list *list)
3144 {
3145 	errno_t error = 0;
3146 	memorystatus_properties_freeze_entry_v1 *entries = NULL, *tmp_entries = NULL;
3147 	size_t entry_count = 0, entries_size = 0, tmp_size = 0;
3148 
3149 	/* Validate the user provided list. */
3150 	if ((buffer == USER_ADDR_NULL) || (buffer_size == 0)) {
3151 		memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: NULL or empty list\n");
3152 		return EINVAL;
3153 	}
3154 
3155 	if (buffer_size % sizeof(memorystatus_properties_freeze_entry_v1) != 0) {
3156 		memorystatus_log_error(
3157 			"memorystatus_cmd_grp_set_freeze_priority: Invalid list length (caller might have comiled agsinst invalid headers.)\n");
3158 		return EINVAL;
3159 	}
3160 
3161 	entry_count = buffer_size / sizeof(memorystatus_properties_freeze_entry_v1);
3162 	entries_size = buffer_size;
3163 	entries = kalloc_data(buffer_size, Z_WAITOK | Z_ZERO);
3164 	if (entries == NULL) {
3165 		return ENOMEM;
3166 	}
3167 
3168 	error = copyin(buffer, entries, buffer_size);
3169 	if (error != 0) {
3170 		goto out;
3171 	}
3172 
3173 #if MACH_ASSERT
3174 	for (size_t i = 0; i < entry_count; i++) {
3175 		memorystatus_properties_freeze_entry_v1 *entry = &entries[i];
3176 		if (entry->version != 1) {
3177 			memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: Invalid entry version number.");
3178 			error = EINVAL;
3179 			goto out;
3180 		}
3181 		if (i > 0 && entry->priority >= entries[i - 1].priority) {
3182 			memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: Entry list is not in descending order.");
3183 			error = EINVAL;
3184 			goto out;
3185 		}
3186 	}
3187 #endif /* MACH_ASSERT */
3188 
3189 	lck_mtx_lock(&freezer_mutex);
3190 
3191 	tmp_entries = list->mfcl_list;
3192 	tmp_size = list->mfcl_length * sizeof(memorystatus_properties_freeze_entry_v1);
3193 	list->mfcl_list = entries;
3194 	list->mfcl_length = entry_count;
3195 
3196 	lck_mtx_unlock(&freezer_mutex);
3197 
3198 	entries = tmp_entries;
3199 	entries_size = tmp_size;
3200 
3201 out:
3202 	kfree_data(entries, entries_size);
3203 	return error;
3204 }
3205 
3206 errno_t
memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer,size_t buffer_size)3207 memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer, size_t buffer_size)
3208 {
3209 	return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_freeze_list);
3210 }
3211 
3212 errno_t
memorystatus_cmd_grp_set_demote_list(user_addr_t buffer,size_t buffer_size)3213 memorystatus_cmd_grp_set_demote_list(user_addr_t buffer, size_t buffer_size)
3214 {
3215 	return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_demote_list);
3216 }
3217 
3218 void
memorystatus_freezer_mark_ui_transition(proc_t p)3219 memorystatus_freezer_mark_ui_transition(proc_t p)
3220 {
3221 	bool frozen = false, previous_focal_thaw = false, xpc_service = false, suspended = false;
3222 	proc_list_lock();
3223 
3224 	if (isSysProc(p)) {
3225 		goto out;
3226 	}
3227 
3228 	frozen = _memstat_proc_is_frozen(p);
3229 	previous_focal_thaw = (p->p_memstat_state & P_MEMSTAT_FROZEN_FOCAL_THAW) != 0;
3230 	xpc_service = (p->p_memstat_state & P_MEMSTAT_FROZEN_XPC_SERVICE) != 0;
3231 	suspended = (p->p_memstat_state & P_MEMSTAT_SUSPENDED) != 0;
3232 	if (!suspended) {
3233 		if (frozen) {
3234 			if (!previous_focal_thaw) {
3235 				p->p_memstat_state |= P_MEMSTAT_FROZEN_FOCAL_THAW;
3236 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg), relaxed);
3237 				if (xpc_service) {
3238 					os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service), relaxed);
3239 				}
3240 			}
3241 		}
3242 		os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_fg_resumed), relaxed);
3243 	}
3244 
3245 out:
3246 	proc_list_unlock();
3247 }
3248 
3249 #endif /* CONFIG_FREEZE */
3250