xref: /xnu-10063.101.15/bsd/kern/kern_memorystatus_freeze.c (revision 94d3b452840153a99b38a3a9659680b2a006908e)
1 /*
2  * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  *
28  */
29 
30 #include <kern/sched_prim.h>
31 #include <kern/kalloc.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h>
34 #include <kern/locks.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/host.h>
38 #include <kern/policy_internal.h>
39 #include <kern/thread_call.h>
40 #include <kern/thread_group.h>
41 
42 #include <libkern/libkern.h>
43 #include <mach/coalition.h>
44 #include <mach/mach_time.h>
45 #include <mach/task.h>
46 #include <mach/host_priv.h>
47 #include <mach/mach_host.h>
48 #include <os/log.h>
49 #include <pexpert/pexpert.h>
50 #include <sys/coalition.h>
51 #include <sys/kern_event.h>
52 #include <sys/kdebug.h>
53 #include <sys/kdebug_kernel.h>
54 #include <sys/proc.h>
55 #include <sys/proc_info.h>
56 #include <sys/reason.h>
57 #include <sys/signal.h>
58 #include <sys/signalvar.h>
59 #include <sys/sysctl.h>
60 #include <sys/sysproto.h>
61 #include <sys/wait.h>
62 #include <sys/tree.h>
63 #include <sys/priv.h>
64 #include <vm/vm_pageout.h>
65 #include <vm/vm_protos.h>
66 #include <mach/machine/sdt.h>
67 #include <libkern/coreanalytics/coreanalytics.h>
68 #include <libkern/section_keywords.h>
69 #include <stdatomic.h>
70 
71 #include <IOKit/IOBSD.h>
72 
73 #if CONFIG_FREEZE
74 #include <vm/vm_map.h>
75 #endif /* CONFIG_FREEZE */
76 
77 #include <kern/kern_memorystatus_internal.h>
78 #include <sys/kern_memorystatus.h>
79 #include <sys/kern_memorystatus_freeze.h>
80 #include <sys/kern_memorystatus_notify.h>
81 
82 #if CONFIG_JETSAM
83 
84 extern unsigned int memorystatus_available_pages;
85 extern unsigned int memorystatus_available_pages_pressure;
86 extern unsigned int memorystatus_available_pages_critical;
87 extern unsigned int memorystatus_available_pages_critical_base;
88 extern unsigned int memorystatus_available_pages_critical_idle_offset;
89 
90 #else /* CONFIG_JETSAM */
91 
92 extern uint64_t memorystatus_available_pages;
93 extern uint64_t memorystatus_available_pages_pressure;
94 extern uint64_t memorystatus_available_pages_critical;
95 
96 #endif /* CONFIG_JETSAM */
97 
98 unsigned int memorystatus_frozen_count = 0;
99 unsigned int memorystatus_frozen_count_webcontent = 0;
100 unsigned int memorystatus_frozen_count_xpc_service = 0;
101 unsigned int memorystatus_suspended_count = 0;
102 
103 #if CONFIG_FREEZE
104 
105 static LCK_GRP_DECLARE(freezer_lck_grp, "freezer");
106 static LCK_MTX_DECLARE(freezer_mutex, &freezer_lck_grp);
107 
108 /* Thresholds */
109 unsigned int memorystatus_freeze_threshold = 0;
110 unsigned int memorystatus_freeze_pages_min = 0;
111 unsigned int memorystatus_freeze_pages_max = 0;
112 unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
113 unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT;
114 uint64_t     memorystatus_freeze_budget_pages_remaining = 0; /* Remaining # of pages that can be frozen to disk */
115 uint64_t     memorystatus_freeze_budget_multiplier = 100; /* Multiplies the daily budget by 100/multiplier */
116 boolean_t memorystatus_freeze_degradation = FALSE; /* Protected by the freezer mutex. Signals we are in a degraded freeze mode. */
117 unsigned int memorystatus_freeze_max_candidate_band = FREEZE_MAX_CANDIDATE_BAND;
118 
119 unsigned int memorystatus_max_frozen_demotions_daily = 0;
120 unsigned int memorystatus_thaw_count_demotion_threshold = 0;
121 unsigned int memorystatus_min_thaw_refreeze_threshold;
122 
123 #if XNU_TARGET_OS_WATCH
124 #define FREEZE_DYNAMIC_THREAD_DELAY_ENABLED_DEFAULT true
125 #else
126 #define FREEZE_DYNAMIC_THREAD_DELAY_ENABLED_DEFAULT false
127 #endif
128 boolean_t memorystatus_freeze_dynamic_thread_delay_enabled = FREEZE_DYNAMIC_THREAD_DELAY_ENABLED_DEFAULT;
129 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_dynamic_thread_delay_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_dynamic_thread_delay_enabled, 0, "");
130 
131 #define FREEZE_APPS_IDLE_DELAY_MULTIPLIER_FAST 1
132 #define FREEZE_APPS_IDLE_DELAY_MULTIPLIER_SLOW 30
133 #define FREEZE_APPS_IDLE_DELAY_MULTIPLIER_DEFAULT FREEZE_APPS_IDLE_DELAY_MULTIPLIER_FAST
134 unsigned int memorystatus_freeze_apps_idle_delay_multiplier = FREEZE_APPS_IDLE_DELAY_MULTIPLIER_DEFAULT;
135 
136 #if (XNU_TARGET_OS_IOS && || XNU_TARGET_OS_WATCH
137 #define FREEZE_ENABLED_DEFAULT true
138 #else
139 #define FREEZE_ENABLED_DEFAULT false
140 #endif
141 TUNABLE_WRITEABLE(bool, memorystatus_freeze_enabled, "freeze_enabled", FREEZE_ENABLED_DEFAULT);
142 
143 int memorystatus_freeze_wakeup = 0;
144 int memorystatus_freeze_jetsam_band = 0; /* the jetsam band which will contain P_MEMSTAT_FROZEN processes */
145 
146 #define MAX_XPC_SERVICE_PIDS 10 /* Max. # of XPC services per coalition we'll consider freezing. */
147 
148 #ifdef XNU_KERNEL_PRIVATE
149 
150 unsigned int memorystatus_frozen_processes_max = 0;
151 unsigned int memorystatus_frozen_shared_mb = 0;
152 unsigned int memorystatus_frozen_shared_mb_max = 0;
153 unsigned int memorystatus_freeze_shared_mb_per_process_max = 0; /* Max. MB allowed per process to be freezer-eligible. */
154 #if XNU_TARGET_OS_WATCH
155 unsigned int memorystatus_freeze_private_shared_pages_ratio = 1; /* Ratio of private:shared pages for a process to be freezer-eligible. */
156 #else
157 unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of private:shared pages for a process to be freezer-eligible. */
158 #endif
159 unsigned int memorystatus_thaw_count = 0; /* # of thaws in the current freezer interval */
160 uint64_t memorystatus_thaw_count_since_boot = 0; /* The number of thaws since boot */
161 unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
162 
163 struct memorystatus_freezer_stats_t memorystatus_freezer_stats = {0};
164 
165 #endif /* XNU_KERNEL_PRIVATE */
166 
167 static inline boolean_t memorystatus_can_freeze_processes(void);
168 static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
169 static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
170 static uint32_t memorystatus_freeze_calculate_new_budget(
171 	unsigned int time_since_last_interval_expired_sec,
172 	unsigned int burst_multiple,
173 	unsigned int interval_duration_min,
174 	uint32_t rollover);
175 static void memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts);
176 
177 static void memorystatus_set_freeze_is_enabled(bool enabled);
178 static void memorystatus_disable_freeze(void);
179 static bool kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out);
180 
181 /* Stats */
182 static uint64_t memorystatus_freeze_pageouts = 0;
183 
184 /* Throttling */
185 #define DEGRADED_WINDOW_MINS    (30)
186 #define NORMAL_WINDOW_MINS      (24 * 60)
187 
188 /* Protected by the freezer_mutex */
189 static throttle_interval_t throttle_intervals[] = {
190 	{ DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
191 	{ NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
192 };
193 throttle_interval_t *degraded_throttle_window = &throttle_intervals[0];
194 throttle_interval_t *normal_throttle_window = &throttle_intervals[1];
195 uint32_t memorystatus_freeze_current_interval = 0;
196 static thread_call_t freeze_interval_reset_thread_call;
197 static uint32_t memorystatus_freeze_calculate_new_budget(
198 	unsigned int time_since_last_interval_expired_sec,
199 	unsigned int burst_multiple,
200 	unsigned int interval_duration_min,
201 	uint32_t rollover);
202 
203 struct memorystatus_freezer_candidate_list memorystatus_global_freeze_list = {NULL, 0};
204 struct memorystatus_freezer_candidate_list memorystatus_global_demote_list = {NULL, 0};
205 /*
206  * When enabled, freeze candidates are chosen from the memorystatus_global_freeze_list
207  * in order (as opposed to using the older LRU approach).
208  */
209 #if XNU_TARGET_OS_WATCH
210 #define FREEZER_USE_ORDERED_LIST_DEFAULT 1
211 #else
212 #define FREEZER_USE_ORDERED_LIST_DEFAULT 0
213 #endif
214 int memorystatus_freezer_use_ordered_list = FREEZER_USE_ORDERED_LIST_DEFAULT;
215 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_ordered_list, &memorystatus_freezer_use_ordered_list, 0, 1, "");
216 /*
217  * When enabled, demotion candidates are chosen from memorystatus_global_demotion_list
218  */
219 int memorystatus_freezer_use_demotion_list = 0;
220 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_demotion_list, &memorystatus_freezer_use_demotion_list, 0, 1, "");
221 
222 extern uint64_t vm_swap_get_free_space(void);
223 extern boolean_t vm_swap_max_budget(uint64_t *);
224 
225 static void memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed);
226 static void memorystatus_demote_frozen_processes(bool urgent_mode);
227 
228 static void memorystatus_freeze_handle_error(proc_t p, const freezer_error_code_t freezer_error_code, bool was_refreeze, pid_t pid, const coalition_t coalition, const char* log_prefix);
229 static void memorystatus_freeze_out_of_slots(void);
230 uint64_t memorystatus_freezer_thread_next_run_ts = 0;
231 
232 /* Sysctls needed for aggd stats */
233 
234 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count, 0, "");
235 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_webcontent, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_webcontent, 0, "");
236 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_xpc_service, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_xpc_service, 0, "");
237 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
238 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_thaw_count_since_boot, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count_since_boot, "");
239 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
240 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_interval, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_current_interval, 0, "");
241 
242 /*
243  * Force a new interval with the given budget (no rollover).
244  */
245 static void
memorystatus_freeze_force_new_interval(uint64_t new_budget)246 memorystatus_freeze_force_new_interval(uint64_t new_budget)
247 {
248 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
249 	mach_timespec_t now_ts;
250 	clock_sec_t sec;
251 	clock_nsec_t nsec;
252 
253 	clock_get_system_nanotime(&sec, &nsec);
254 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
255 	now_ts.tv_nsec = nsec;
256 	memorystatus_freeze_start_normal_throttle_interval((uint32_t) MIN(new_budget, UINT32_MAX), now_ts);
257 	/* Don't carry over any excess pageouts since we're forcing a new budget */
258 	normal_throttle_window->pageouts = 0;
259 	memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
260 }
261 #if DEVELOPMENT || DEBUG
262 static int sysctl_memorystatus_freeze_budget_pages_remaining SYSCTL_HANDLER_ARGS
263 {
264 	#pragma unused(arg1, arg2, oidp)
265 	int error, changed;
266 	uint64_t new_budget = memorystatus_freeze_budget_pages_remaining;
267 
268 	lck_mtx_lock(&freezer_mutex);
269 
270 	error = sysctl_io_number(req, memorystatus_freeze_budget_pages_remaining, sizeof(uint64_t), &new_budget, &changed);
271 	if (changed) {
272 		if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
273 			lck_mtx_unlock(&freezer_mutex);
274 			return ENOTSUP;
275 		}
276 		memorystatus_freeze_force_new_interval(new_budget);
277 	}
278 
279 	lck_mtx_unlock(&freezer_mutex);
280 	return error;
281 }
282 
283 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freeze_budget_pages_remaining, "Q", "");
284 #else /* DEVELOPMENT || DEBUG */
285 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, "");
286 #endif /* DEVELOPMENT || DEBUG */
287 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_excess_shared_memory_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_excess_shared_memory_count, "");
288 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_private_shared_ratio_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count, "");
289 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_compressor_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_compressor_space_count, "");
290 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_swap_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_swap_space_count, "");
291 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_below_min_pages_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_below_min_pages_count, "");
292 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_probability_of_use_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_probability_of_use_count, "");
293 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_elevated_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_elevated_count, "");
294 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_other_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_other_count, "");
295 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_process_considered_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_process_considered_count, "");
296 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_below_threshold_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_below_threshold_count, "");
297 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_full_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_full_count, "");
298 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_shared_mb_high_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count, "");
299 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_shared_pages_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_shared_pages_skipped, "");
300 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_bytes_refrozen, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_bytes_refrozen, "");
301 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_refreeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_refreeze_count, "");
302 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_freeze_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_freeze_pid_mismatches, "");
303 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_demote_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_demote_pid_mismatches, "");
304 
305 static_assert(_kMemorystatusFreezeSkipReasonMax <= UINT8_MAX);
306 
307 /*
308  * Calculates the hit rate for the freezer.
309  * The hit rate is defined as the percentage of procs that are currently in the
310  * freezer which we have thawed.
311  * A low hit rate means we're freezing bad candidates since they're not re-used.
312  */
313 static int
calculate_thaw_percentage(uint64_t frozen_count,uint64_t thaw_count)314 calculate_thaw_percentage(uint64_t frozen_count, uint64_t thaw_count)
315 {
316 	int thaw_percentage = 100;
317 
318 	if (frozen_count > 0) {
319 		if (thaw_count > frozen_count) {
320 			/*
321 			 * Both counts are using relaxed atomics & could be out of sync
322 			 * causing us to see thaw_percentage > 100.
323 			 */
324 			thaw_percentage = 100;
325 		} else {
326 			thaw_percentage = (int)(100 * thaw_count / frozen_count);
327 		}
328 	}
329 	return thaw_percentage;
330 }
331 
332 static int
get_thaw_percentage()333 get_thaw_percentage()
334 {
335 	uint64_t processes_frozen, processes_thawed;
336 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
337 	processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
338 	return calculate_thaw_percentage(processes_frozen, processes_thawed);
339 }
340 
341 static int
342 sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS
343 {
344 #pragma unused(arg1, arg2)
345 	int thaw_percentage = get_thaw_percentage();
346 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
347 }
348 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage, "I", "");
349 
350 static int
get_thaw_percentage_fg()351 get_thaw_percentage_fg()
352 {
353 	uint64_t processes_frozen, processes_thawed_fg;
354 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
355 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
356 	return calculate_thaw_percentage(processes_frozen, processes_thawed_fg);
357 }
358 
359 static int sysctl_memorystatus_freezer_thaw_percentage_fg SYSCTL_HANDLER_ARGS
360 {
361 #pragma unused(arg1, arg2)
362 	int thaw_percentage = get_thaw_percentage_fg();
363 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
364 }
365 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg, "I", "");
366 
367 static int
get_thaw_percentage_webcontent()368 get_thaw_percentage_webcontent()
369 {
370 	uint64_t processes_frozen_webcontent, processes_thawed_webcontent;
371 	processes_frozen_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, relaxed);
372 	processes_thawed_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, relaxed);
373 	return calculate_thaw_percentage(processes_frozen_webcontent, processes_thawed_webcontent);
374 }
375 
376 static int sysctl_memorystatus_freezer_thaw_percentage_webcontent SYSCTL_HANDLER_ARGS
377 {
378 #pragma unused(arg1, arg2)
379 	int thaw_percentage = get_thaw_percentage_webcontent();
380 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
381 }
382 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_webcontent, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_webcontent, "I", "");
383 
384 
385 static int
get_thaw_percentage_bg()386 get_thaw_percentage_bg()
387 {
388 	uint64_t processes_frozen, processes_thawed_fg, processes_thawed;
389 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
390 	processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
391 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
392 	return calculate_thaw_percentage(processes_frozen, processes_thawed - processes_thawed_fg);
393 }
394 
395 static int sysctl_memorystatus_freezer_thaw_percentage_bg SYSCTL_HANDLER_ARGS
396 {
397 #pragma unused(arg1, arg2)
398 	int thaw_percentage = get_thaw_percentage_bg();
399 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
400 }
401 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_bg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_bg, "I", "");
402 
403 static int
get_thaw_percentage_fg_non_xpc_service()404 get_thaw_percentage_fg_non_xpc_service()
405 {
406 	uint64_t processes_frozen, processes_frozen_xpc_service, processes_thawed_fg, processes_thawed_fg_xpc_service;
407 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
408 	processes_frozen_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, relaxed);
409 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
410 	processes_thawed_fg_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, relaxed);
411 	/*
412 	 * Since these are all relaxed loads, it's possible (although unlikely) to read a value for
413 	 * frozen/thawed xpc services that's > the value for processes frozen / thawed.
414 	 * Clamp just in case.
415 	 */
416 	processes_frozen_xpc_service = MIN(processes_frozen_xpc_service, processes_frozen);
417 	processes_thawed_fg_xpc_service = MIN(processes_thawed_fg_xpc_service, processes_thawed_fg);
418 	return calculate_thaw_percentage(processes_frozen - processes_frozen_xpc_service, processes_thawed_fg - processes_thawed_fg_xpc_service);
419 }
420 
421 static int sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service SYSCTL_HANDLER_ARGS
422 {
423 #pragma unused(arg1, arg2)
424 	int thaw_percentage = get_thaw_percentage_fg_non_xpc_service();
425 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
426 }
427 
428 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg_non_xpc_service, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service, "I", "");
429 
430 #define FREEZER_ERROR_STRING_LENGTH 128
431 
432 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_min, &memorystatus_freeze_pages_min, 0, UINT32_MAX, "");
433 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_max, &memorystatus_freeze_pages_max, 0, UINT32_MAX, "");
434 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_processes_max, &memorystatus_frozen_processes_max, 0, UINT32_MAX, "");
435 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_jetsam_band, &memorystatus_freeze_jetsam_band, JETSAM_PRIORITY_BACKGROUND, JETSAM_PRIORITY_FOREGROUND, "");
436 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_private_shared_pages_ratio, &memorystatus_freeze_private_shared_pages_ratio, 0, UINT32_MAX, "");
437 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_min_processes, &memorystatus_freeze_suspended_threshold, 0, UINT32_MAX, "");
438 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_max_candidate_band, &memorystatus_freeze_max_candidate_band, JETSAM_PRIORITY_IDLE, JETSAM_PRIORITY_FOREGROUND, "");
439 static int
440 sysctl_memorystatus_freeze_budget_multiplier SYSCTL_HANDLER_ARGS
441 {
442 #pragma unused(arg1, arg2, oidp, req)
443 	int error = 0, changed = 0;
444 	uint64_t val = memorystatus_freeze_budget_multiplier;
445 	unsigned int new_budget;
446 	clock_sec_t sec;
447 	clock_nsec_t nsec;
448 	mach_timespec_t now_ts;
449 
450 	error = sysctl_io_number(req, memorystatus_freeze_budget_multiplier, sizeof(val), &val, &changed);
451 	if (error) {
452 		return error;
453 	}
454 	if (changed) {
455 		if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
456 			return ENOTSUP;
457 		}
458 #if !(DEVELOPMENT || DEBUG)
459 		if (val > 100) {
460 			/* Can not increase budget on release. */
461 			return EINVAL;
462 		}
463 #endif
464 		lck_mtx_lock(&freezer_mutex);
465 
466 		memorystatus_freeze_budget_multiplier = val;
467 		/* Start a new throttle interval with this budget multiplier */
468 		new_budget = memorystatus_freeze_calculate_new_budget(0, 1, NORMAL_WINDOW_MINS, 0);
469 		clock_get_system_nanotime(&sec, &nsec);
470 		now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
471 		now_ts.tv_nsec = nsec;
472 		memorystatus_freeze_start_normal_throttle_interval(new_budget, now_ts);
473 		memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
474 
475 		lck_mtx_unlock(&freezer_mutex);
476 	}
477 	return 0;
478 }
479 EXPERIMENT_FACTOR_PROC(_kern, memorystatus_freeze_budget_multiplier, CTLTYPE_QUAD | CTLFLAG_RW, 0, 0, &sysctl_memorystatus_freeze_budget_multiplier, "Q", "");
480 /*
481  * max. # of frozen process demotions we will allow in our daily cycle.
482  */
483 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_max_freeze_demotions_daily, &memorystatus_max_frozen_demotions_daily, 0, UINT32_MAX, "");
484 
485 /*
486  * min # of thaws needed by a process to protect it from getting demoted into the IDLE band.
487  */
488 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_thaw_count_demotion_threshold, &memorystatus_thaw_count_demotion_threshold, 0, UINT32_MAX, "");
489 
490 /*
491  * min # of global thaws needed for us to consider refreezing these processes.
492  */
493 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_min_thaw_refreeze_threshold, &memorystatus_min_thaw_refreeze_threshold, 0, UINT32_MAX, "");
494 
495 #if DEVELOPMENT || DEBUG
496 
497 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, "");
498 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, "");
499 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
500 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, "");
501 
502 /*
503  * Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band.
504  * "0" means no limit.
505  * Default is 10% of system-wide task limit.
506  */
507 
508 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb_max, 0, "");
509 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, "");
510 
511 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, "");
512 
513 boolean_t memorystatus_freeze_throttle_enabled = TRUE;
514 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
515 
516 /*
517  * When set to true, this keeps frozen processes in the compressor pool in memory, instead of swapping them out to disk.
518  * Exposed via the sysctl kern.memorystatus_freeze_to_memory.
519  */
520 boolean_t memorystatus_freeze_to_memory = FALSE;
521 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_to_memory, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_to_memory, 0, "");
522 
523 #define VM_PAGES_FOR_ALL_PROCS    (2)
524 
525 /*
526  * Manual trigger of freeze and thaw for dev / debug kernels only.
527  */
528 static int
529 sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
530 {
531 #pragma unused(arg1, arg2)
532 	int error, pid = 0;
533 	proc_t p;
534 	freezer_error_code_t freezer_error_code = 0;
535 	pid_t pid_list[MAX_XPC_SERVICE_PIDS];
536 	int ntasks = 0;
537 	coalition_t coal = COALITION_NULL;
538 
539 	error = sysctl_handle_int(oidp, &pid, 0, req);
540 	if (error || !req->newptr) {
541 		return error;
542 	}
543 
544 	if (pid == VM_PAGES_FOR_ALL_PROCS) {
545 		vm_pageout_anonymous_pages();
546 
547 		return 0;
548 	}
549 
550 	lck_mtx_lock(&freezer_mutex);
551 	if (memorystatus_freeze_enabled == false) {
552 		lck_mtx_unlock(&freezer_mutex);
553 		memorystatus_log("sysctl_freeze: Freeze is DISABLED\n");
554 		return ENOTSUP;
555 	}
556 
557 again:
558 	p = proc_find(pid);
559 	if (p != NULL) {
560 		memorystatus_freezer_stats.mfs_process_considered_count++;
561 		uint32_t purgeable, wired, clean, dirty, shared;
562 		uint32_t max_pages = 0, state = 0;
563 
564 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
565 			/*
566 			 * Freezer backed by the compressor and swap file(s)
567 			 * will hold compressed data.
568 			 *
569 			 * Set the sysctl kern.memorystatus_freeze_to_memory to true to keep compressed data from
570 			 * being swapped out to disk. Note that this disables freezer swap support globally,
571 			 * not just for the process being frozen.
572 			 *
573 			 *
574 			 * We don't care about the global freezer budget or the process's (min/max) budget here.
575 			 * The freeze sysctl is meant to force-freeze a process.
576 			 *
577 			 * We also don't update any global or process stats on this path, so that the jetsam/ freeze
578 			 * logic remains unaffected. The tasks we're performing here are: freeze the process, set the
579 			 * P_MEMSTAT_FROZEN bit, and elevate the process to a higher band (if the freezer is active).
580 			 */
581 			max_pages = memorystatus_freeze_pages_max;
582 		} else {
583 			/*
584 			 * We only have the compressor without any swap.
585 			 */
586 			max_pages = UINT32_MAX - 1;
587 		}
588 
589 		proc_list_lock();
590 		state = p->p_memstat_state;
591 		proc_list_unlock();
592 
593 		/*
594 		 * The jetsam path also verifies that the process is a suspended App. We don't care about that here.
595 		 * We simply ensure that jetsam is not already working on the process and that the process has not
596 		 * explicitly disabled freezing.
597 		 */
598 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED)) {
599 			memorystatus_log_error("sysctl_freeze: p_memstat_state check failed, process is%s%s%s\n",
600 			    (state & P_MEMSTAT_TERMINATED) ? " terminated" : "",
601 			    (state & P_MEMSTAT_LOCKED) ? " locked" : "",
602 			    (state & P_MEMSTAT_FREEZE_DISABLED) ? " unfreezable" : "");
603 
604 			proc_rele(p);
605 			lck_mtx_unlock(&freezer_mutex);
606 			return EPERM;
607 		}
608 
609 		KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, memorystatus_available_pages, pid, max_pages);
610 		error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
611 		if (!error || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
612 			memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
613 		}
614 		KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, purgeable, wired, clean, dirty);
615 
616 		if (error) {
617 			memorystatus_freeze_handle_error(p, freezer_error_code, state & P_MEMSTAT_FROZEN, pid, coal, "sysctl_freeze");
618 			if (error == KERN_NO_SPACE) {
619 				/* Make it easy to distinguish between failures due to low compressor/ swap space and other failures. */
620 				error = ENOSPC;
621 			} else {
622 				error = EIO;
623 			}
624 		} else {
625 			proc_list_lock();
626 			if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
627 				p->p_memstat_state |= P_MEMSTAT_FROZEN;
628 				p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
629 				memorystatus_frozen_count++;
630 				os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
631 				if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
632 					memorystatus_frozen_count_webcontent++;
633 					os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
634 				}
635 				if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
636 					memorystatus_freeze_out_of_slots();
637 				}
638 			} else {
639 				// This was a re-freeze
640 				if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
641 					memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
642 					memorystatus_freezer_stats.mfs_refreeze_count++;
643 				}
644 			}
645 			p->p_memstat_frozen_count++;
646 
647 			if (coal != NULL) {
648 				/* We just froze an xpc service. Mark it as such for telemetry */
649 				p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
650 				memorystatus_frozen_count_xpc_service++;
651 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
652 			}
653 
654 
655 			proc_list_unlock();
656 
657 			if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
658 				/*
659 				 * We elevate only if we are going to swap out the data.
660 				 */
661 				error = memorystatus_update_inactive_jetsam_priority_band(pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
662 				    memorystatus_freeze_jetsam_band, TRUE);
663 
664 				if (error) {
665 					memorystatus_log_error("sysctl_freeze: Elevating frozen process to higher jetsam band failed with %d\n", error);
666 				}
667 			}
668 		}
669 
670 		if ((error == 0) && (coal == NULL)) {
671 			/*
672 			 * We froze a process and so we check to see if it was
673 			 * a coalition leader and if it has XPC services that
674 			 * might need freezing.
675 			 * Only one leader can be frozen at a time and so we shouldn't
676 			 * enter this block more than once per call. Hence the
677 			 * check that 'coal' has to be NULL. We should make this an
678 			 * assert() or panic() once we have a much more concrete way
679 			 * to detect an app vs a daemon.
680 			 */
681 
682 			task_t          curr_task = NULL;
683 
684 			curr_task = proc_task(p);
685 			coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
686 			if (coalition_is_leader(curr_task, coal)) {
687 				ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
688 				    COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
689 
690 				if (ntasks > MAX_XPC_SERVICE_PIDS) {
691 					ntasks = MAX_XPC_SERVICE_PIDS;
692 				}
693 			}
694 		}
695 
696 		proc_rele(p);
697 
698 		while (ntasks) {
699 			pid = pid_list[--ntasks];
700 			goto again;
701 		}
702 
703 		lck_mtx_unlock(&freezer_mutex);
704 		return error;
705 	} else {
706 		memorystatus_log_error("sysctl_freeze: Invalid process\n");
707 	}
708 
709 
710 	lck_mtx_unlock(&freezer_mutex);
711 	return EINVAL;
712 }
713 
714 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
715     0, 0, &sysctl_memorystatus_freeze, "I", "");
716 
717 /*
718  * Manual trigger of agressive frozen demotion for dev / debug kernels only.
719  */
720 static int
721 sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS
722 {
723 #pragma unused(arg1, arg2)
724 	int error, val;
725 	/*
726 	 * Only demote on write to prevent demoting during `sysctl -a`.
727 	 * The actual value written doesn't matter.
728 	 */
729 	error = sysctl_handle_int(oidp, &val, 0, req);
730 	if (error || !req->newptr) {
731 		return error;
732 	}
733 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
734 		return ENOTSUP;
735 	}
736 	lck_mtx_lock(&freezer_mutex);
737 	memorystatus_demote_frozen_processes(false);
738 	lck_mtx_unlock(&freezer_mutex);
739 	return 0;
740 }
741 
742 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
743 
744 static int
745 sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
746 {
747 #pragma unused(arg1, arg2)
748 
749 	int error, pid = 0;
750 	proc_t p;
751 
752 	if (memorystatus_freeze_enabled == false) {
753 		return ENOTSUP;
754 	}
755 
756 	error = sysctl_handle_int(oidp, &pid, 0, req);
757 	if (error || !req->newptr) {
758 		return error;
759 	}
760 
761 	if (pid == VM_PAGES_FOR_ALL_PROCS) {
762 		do_fastwake_warmup_all();
763 		return 0;
764 	} else {
765 		p = proc_find(pid);
766 		if (p != NULL) {
767 			error = task_thaw(proc_task(p));
768 
769 			if (error) {
770 				error = EIO;
771 			} else {
772 				/*
773 				 * task_thaw() succeeded.
774 				 *
775 				 * We increment memorystatus_frozen_count on the sysctl freeze path.
776 				 * And so we need the P_MEMSTAT_FROZEN to decrement the frozen count
777 				 * when this process exits.
778 				 *
779 				 * proc_list_lock();
780 				 * p->p_memstat_state &= ~P_MEMSTAT_FROZEN;
781 				 * proc_list_unlock();
782 				 */
783 			}
784 			proc_rele(p);
785 			return error;
786 		}
787 	}
788 
789 	return EINVAL;
790 }
791 
792 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
793     0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
794 
795 
796 typedef struct _global_freezable_status {
797 	boolean_t       freeze_pages_threshold_crossed;
798 	boolean_t       freeze_eligible_procs_available;
799 	boolean_t       freeze_scheduled_in_future;
800 }global_freezable_status_t;
801 
802 typedef struct _proc_freezable_status {
803 	boolean_t    freeze_has_memstat_state;
804 	boolean_t    freeze_has_pages_min;
805 	int        freeze_has_probability;
806 	int        freeze_leader_eligible;
807 	boolean_t    freeze_attempted;
808 	uint32_t    p_memstat_state;
809 	uint32_t    p_pages;
810 	int        p_freeze_error_code;
811 	int        p_pid;
812 	int        p_leader_pid;
813 	char        p_name[MAXCOMLEN + 1];
814 }proc_freezable_status_t;
815 
816 #define MAX_FREEZABLE_PROCESSES 200 /* Total # of processes in band 0 that we evaluate for freezability */
817 
818 /*
819  * For coalition based freezing evaluations, we proceed as follows:
820  *  - detect that the process is a coalition member and a XPC service
821  *  - mark its 'freeze_leader_eligible' field with FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN
822  *  - continue its freezability evaluation assuming its leader will be freezable too
823  *
824  * Once we are done evaluating all processes, we do a quick run thru all
825  * processes and for a coalition member XPC service we look up the 'freezable'
826  * status of its leader and iff:
827  *  - the xpc service is freezable i.e. its individual freeze evaluation worked
828  *  - and, its leader is also marked freezable
829  * we update its 'freeze_leader_eligible' to FREEZE_PROC_LEADER_FREEZABLE_SUCCESS.
830  */
831 
832 #define FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN   (-1)
833 #define FREEZE_PROC_LEADER_FREEZABLE_SUCCESS    (1)
834 #define FREEZE_PROC_LEADER_FREEZABLE_FAILURE    (2)
835 
836 static int
memorystatus_freezer_get_status(user_addr_t buffer,size_t buffer_size,int32_t * retval)837 memorystatus_freezer_get_status(user_addr_t buffer, size_t buffer_size, int32_t *retval)
838 {
839 	uint32_t            proc_count = 0, freeze_eligible_proc_considered = 0, band = 0, xpc_index = 0, leader_index = 0;
840 	global_freezable_status_t    *list_head;
841 	proc_freezable_status_t     *list_entry, *list_entry_start;
842 	size_t                list_size = 0, entry_count = 0;
843 	proc_t                p, leader_proc;
844 	memstat_bucket_t        *bucket;
845 	uint32_t            state = 0, pages = 0;
846 	boolean_t            try_freeze = TRUE, xpc_skip_size_probability_check = FALSE;
847 	int                error = 0, probability_of_use = 0;
848 	pid_t              leader_pid = 0;
849 	struct memorystatus_freeze_list_iterator iterator;
850 
851 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
852 		return ENOTSUP;
853 	}
854 
855 	bzero(&iterator, sizeof(struct memorystatus_freeze_list_iterator));
856 
857 	list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
858 
859 	if (buffer_size < list_size) {
860 		return EINVAL;
861 	}
862 
863 	list_head = (global_freezable_status_t *)kalloc_data(list_size, Z_WAITOK | Z_ZERO);
864 	if (list_head == NULL) {
865 		return ENOMEM;
866 	}
867 
868 	list_size = sizeof(global_freezable_status_t);
869 
870 	lck_mtx_lock(&freezer_mutex);
871 	proc_list_lock();
872 
873 	uint64_t curr_time = mach_absolute_time();
874 
875 	list_head->freeze_pages_threshold_crossed = (memorystatus_available_pages < memorystatus_freeze_threshold);
876 	if (memorystatus_freezer_use_ordered_list) {
877 		list_head->freeze_eligible_procs_available = memorystatus_frozen_count < memorystatus_global_freeze_list.mfcl_length;
878 	} else {
879 		list_head->freeze_eligible_procs_available = ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold);
880 	}
881 	list_head->freeze_scheduled_in_future = (curr_time < memorystatus_freezer_thread_next_run_ts);
882 
883 	list_entry_start = (proc_freezable_status_t*) ((uintptr_t)list_head + sizeof(global_freezable_status_t));
884 	list_entry = list_entry_start;
885 
886 	bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
887 
888 	entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
889 
890 	if (memorystatus_freezer_use_ordered_list) {
891 		while (iterator.global_freeze_list_index < memorystatus_global_freeze_list.mfcl_length) {
892 			p = memorystatus_freezer_candidate_list_get_proc(
893 				&memorystatus_global_freeze_list,
894 				(iterator.global_freeze_list_index)++,
895 				NULL);
896 			if (p != PROC_NULL) {
897 				break;
898 			}
899 		}
900 	} else {
901 		p = memorystatus_get_first_proc_locked(&band, FALSE);
902 	}
903 
904 	proc_count++;
905 
906 	while ((proc_count <= MAX_FREEZABLE_PROCESSES) &&
907 	    (p) &&
908 	    (list_size < buffer_size)) {
909 		if (isSysProc(p)) {
910 			/*
911 			 * Daemon:- We will consider freezing it iff:
912 			 * - it belongs to a coalition and the leader is freeze-eligible (delayed evaluation)
913 			 * - its role in the coalition is XPC service.
914 			 *
915 			 * We skip memory size requirements in this case.
916 			 */
917 
918 			coalition_t     coal = COALITION_NULL;
919 			task_t          leader_task = NULL, curr_task = NULL;
920 			int             task_role_in_coalition = 0;
921 
922 			curr_task = proc_task(p);
923 			coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
924 
925 			if (coal == COALITION_NULL || coalition_is_leader(curr_task, coal)) {
926 				/*
927 				 * By default, XPC services without an app
928 				 * will be the leader of their own single-member
929 				 * coalition.
930 				 */
931 				goto skip_ineligible_xpc;
932 			}
933 
934 			leader_task = coalition_get_leader(coal);
935 			if (leader_task == TASK_NULL) {
936 				/*
937 				 * This jetsam coalition is currently leader-less.
938 				 * This could happen if the app died, but XPC services
939 				 * have not yet exited.
940 				 */
941 				goto skip_ineligible_xpc;
942 			}
943 
944 			leader_proc = (proc_t)get_bsdtask_info(leader_task);
945 			task_deallocate(leader_task);
946 
947 			if (leader_proc == PROC_NULL) {
948 				/* leader task is exiting */
949 				goto skip_ineligible_xpc;
950 			}
951 
952 			task_role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
953 
954 			if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
955 				xpc_skip_size_probability_check = TRUE;
956 				leader_pid = proc_getpid(leader_proc);
957 				goto continue_eval;
958 			}
959 
960 skip_ineligible_xpc:
961 			p = memorystatus_get_next_proc_locked(&band, p, FALSE);
962 			proc_count++;
963 			continue;
964 		}
965 
966 continue_eval:
967 		strlcpy(list_entry->p_name, p->p_name, MAXCOMLEN + 1);
968 
969 		list_entry->p_pid = proc_getpid(p);
970 
971 		state = p->p_memstat_state;
972 
973 		if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) ||
974 		    !(state & P_MEMSTAT_SUSPENDED)) {
975 			try_freeze = list_entry->freeze_has_memstat_state = FALSE;
976 		} else {
977 			try_freeze = list_entry->freeze_has_memstat_state = TRUE;
978 		}
979 
980 		list_entry->p_memstat_state = state;
981 
982 		if (xpc_skip_size_probability_check == TRUE) {
983 			/*
984 			 * Assuming the coalition leader is freezable
985 			 * we don't care re. minimum pages and probability
986 			 * as long as the process isn't marked P_MEMSTAT_FREEZE_DISABLED.
987 			 * XPC services have to be explicity opted-out of the disabled
988 			 * state. And we checked that state above.
989 			 */
990 			list_entry->freeze_has_pages_min = TRUE;
991 			list_entry->p_pages = -1;
992 			list_entry->freeze_has_probability = -1;
993 
994 			list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN;
995 			list_entry->p_leader_pid = leader_pid;
996 
997 			xpc_skip_size_probability_check = FALSE;
998 		} else {
999 			list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; /* Apps are freeze eligible and their own leaders. */
1000 			list_entry->p_leader_pid = 0; /* Setting this to 0 signifies this isn't a coalition driven freeze. */
1001 
1002 			memorystatus_get_task_page_counts(proc_task(p), &pages, NULL, NULL);
1003 			if (pages < memorystatus_freeze_pages_min) {
1004 				try_freeze = list_entry->freeze_has_pages_min = FALSE;
1005 			} else {
1006 				list_entry->freeze_has_pages_min = TRUE;
1007 			}
1008 
1009 			list_entry->p_pages = pages;
1010 
1011 			if (entry_count) {
1012 				uint32_t j = 0;
1013 				for (j = 0; j < entry_count; j++) {
1014 					if (strncmp(memorystatus_global_probabilities_table[j].proc_name,
1015 					    p->p_name,
1016 					    MAXCOMLEN) == 0) {
1017 						probability_of_use = memorystatus_global_probabilities_table[j].use_probability;
1018 						break;
1019 					}
1020 				}
1021 
1022 				list_entry->freeze_has_probability = probability_of_use;
1023 
1024 				try_freeze = ((probability_of_use > 0) && try_freeze);
1025 			} else {
1026 				list_entry->freeze_has_probability = -1;
1027 			}
1028 		}
1029 
1030 		if (try_freeze) {
1031 			uint32_t purgeable, wired, clean, dirty, shared;
1032 			uint32_t max_pages = 0;
1033 			int freezer_error_code = 0;
1034 
1035 			error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, TRUE /* eval only */);
1036 
1037 			if (error) {
1038 				list_entry->p_freeze_error_code = freezer_error_code;
1039 			}
1040 
1041 			list_entry->freeze_attempted = TRUE;
1042 		}
1043 
1044 		list_entry++;
1045 		freeze_eligible_proc_considered++;
1046 
1047 		list_size += sizeof(proc_freezable_status_t);
1048 
1049 		if (memorystatus_freezer_use_ordered_list) {
1050 			p = PROC_NULL;
1051 			while (iterator.global_freeze_list_index < memorystatus_global_freeze_list.mfcl_length) {
1052 				p = memorystatus_freezer_candidate_list_get_proc(
1053 					&memorystatus_global_freeze_list,
1054 					(iterator.global_freeze_list_index)++,
1055 					NULL);
1056 				if (p != PROC_NULL) {
1057 					break;
1058 				}
1059 			}
1060 		} else {
1061 			p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1062 		}
1063 
1064 		proc_count++;
1065 	}
1066 
1067 	proc_list_unlock();
1068 	lck_mtx_unlock(&freezer_mutex);
1069 
1070 	list_entry = list_entry_start;
1071 
1072 	for (xpc_index = 0; xpc_index < freeze_eligible_proc_considered; xpc_index++) {
1073 		if (list_entry[xpc_index].freeze_leader_eligible == FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN) {
1074 			leader_pid = list_entry[xpc_index].p_leader_pid;
1075 
1076 			leader_proc = proc_find(leader_pid);
1077 
1078 			if (leader_proc) {
1079 				if (leader_proc->p_memstat_state & P_MEMSTAT_FROZEN) {
1080 					/*
1081 					 * Leader has already been frozen.
1082 					 */
1083 					list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1084 					proc_rele(leader_proc);
1085 					continue;
1086 				}
1087 				proc_rele(leader_proc);
1088 			}
1089 
1090 			for (leader_index = 0; leader_index < freeze_eligible_proc_considered; leader_index++) {
1091 				if (list_entry[leader_index].p_pid == leader_pid) {
1092 					if (list_entry[leader_index].freeze_attempted && list_entry[leader_index].p_freeze_error_code == 0) {
1093 						list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1094 					} else {
1095 						list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1096 						list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1097 					}
1098 					break;
1099 				}
1100 			}
1101 
1102 			/*
1103 			 * Didn't find the leader entry. This might be likely because
1104 			 * the leader never made it down to band 0.
1105 			 */
1106 			if (leader_index == freeze_eligible_proc_considered) {
1107 				list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1108 				list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1109 			}
1110 		}
1111 	}
1112 
1113 	buffer_size = MIN(list_size, INT32_MAX);
1114 
1115 	error = copyout(list_head, buffer, buffer_size);
1116 	if (error == 0) {
1117 		*retval = (int32_t) buffer_size;
1118 	} else {
1119 		*retval = 0;
1120 	}
1121 
1122 	list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
1123 	kfree_data(list_head, list_size);
1124 
1125 	memorystatus_log_debug("memorystatus_freezer_get_status: returning %d (%lu - size)\n", error, (unsigned long)list_size);
1126 
1127 	return error;
1128 }
1129 
1130 #endif /* DEVELOPMENT || DEBUG */
1131 
1132 /*
1133  * Get a list of all processes in the freezer band which are currently frozen.
1134  * Used by powerlog to collect analytics on frozen process.
1135  */
1136 static int
memorystatus_freezer_get_procs(user_addr_t buffer,size_t buffer_size,int32_t * retval)1137 memorystatus_freezer_get_procs(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1138 {
1139 	global_frozen_procs_t *frozen_procs = NULL;
1140 	uint32_t band = memorystatus_freeze_jetsam_band;
1141 	proc_t p;
1142 	uint32_t state;
1143 	int error;
1144 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
1145 		return ENOTSUP;
1146 	}
1147 	if (buffer_size < sizeof(global_frozen_procs_t)) {
1148 		return EINVAL;
1149 	}
1150 	frozen_procs = (global_frozen_procs_t *)kalloc_data(sizeof(global_frozen_procs_t), Z_WAITOK | Z_ZERO);
1151 	if (frozen_procs == NULL) {
1152 		return ENOMEM;
1153 	}
1154 
1155 	proc_list_lock();
1156 	p = memorystatus_get_first_proc_locked(&band, FALSE);
1157 	while (p && frozen_procs->gfp_num_frozen < FREEZER_CONTROL_GET_PROCS_MAX_COUNT) {
1158 		state = p->p_memstat_state;
1159 		if (state & P_MEMSTAT_FROZEN) {
1160 			frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_pid = proc_getpid(p);
1161 			strlcpy(frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_name,
1162 			    p->p_name, sizeof(proc_name_t));
1163 			frozen_procs->gfp_num_frozen++;
1164 		}
1165 		p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1166 	}
1167 	proc_list_unlock();
1168 
1169 	buffer_size = MIN(buffer_size, sizeof(global_frozen_procs_t));
1170 	error = copyout(frozen_procs, buffer, buffer_size);
1171 	if (error == 0) {
1172 		*retval = (int32_t) buffer_size;
1173 	} else {
1174 		*retval = 0;
1175 	}
1176 	kfree_data(frozen_procs, sizeof(global_frozen_procs_t));
1177 
1178 	return error;
1179 }
1180 
1181 /*
1182  * If dasd is running an experiment that impacts their freezer candidate selection,
1183  * we record that in our telemetry.
1184  */
1185 static memorystatus_freezer_trial_identifiers_v1 dasd_trial_identifiers;
1186 
1187 static int
memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer,size_t buffer_size,int32_t * retval)1188 memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1189 {
1190 	memorystatus_freezer_trial_identifiers_v1 identifiers;
1191 	int error = 0;
1192 
1193 	if (buffer_size != sizeof(identifiers)) {
1194 		return EINVAL;
1195 	}
1196 	error = copyin(buffer, &identifiers, sizeof(identifiers));
1197 	if (error != 0) {
1198 		return error;
1199 	}
1200 	if (identifiers.version != 1) {
1201 		return EINVAL;
1202 	}
1203 	dasd_trial_identifiers = identifiers;
1204 	*retval = 0;
1205 	return error;
1206 }
1207 
1208 /*
1209  * Reset the freezer state by wiping out all suspended frozen apps, clearing
1210  * per-process freezer state, and starting a fresh interval.
1211  */
1212 static int
memorystatus_freezer_reset_state(int32_t * retval)1213 memorystatus_freezer_reset_state(int32_t *retval)
1214 {
1215 	uint32_t band = JETSAM_PRIORITY_IDLE;
1216 	/* Don't kill above the frozen band */
1217 	uint32_t kMaxBand = memorystatus_freeze_jetsam_band;
1218 	proc_t next_p = PROC_NULL;
1219 	uint64_t new_budget;
1220 
1221 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1222 		return ENOTSUP;
1223 	}
1224 
1225 	os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_GENERIC);
1226 	if (jetsam_reason == OS_REASON_NULL) {
1227 		memorystatus_log_error("memorystatus_freezer_reset_state -- sync: failed to allocate jetsam reason\n");
1228 	}
1229 	lck_mtx_lock(&freezer_mutex);
1230 	kill_all_frozen_processes(kMaxBand, true, jetsam_reason, NULL);
1231 	proc_list_lock();
1232 
1233 	/*
1234 	 * Clear the considered and skip reason flags on all processes
1235 	 * so we're starting fresh with the new policy.
1236 	 */
1237 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1238 	while (next_p) {
1239 		proc_t p = next_p;
1240 		uint32_t state = p->p_memstat_state;
1241 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1242 
1243 		if (p->p_memstat_effectivepriority > kMaxBand) {
1244 			break;
1245 		}
1246 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1247 			continue;
1248 		}
1249 
1250 		p->p_memstat_state &= ~(P_MEMSTAT_FREEZE_CONSIDERED);
1251 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1252 	}
1253 
1254 	proc_list_unlock();
1255 
1256 	new_budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1257 	memorystatus_freeze_force_new_interval(new_budget);
1258 
1259 	lck_mtx_unlock(&freezer_mutex);
1260 	*retval = 0;
1261 	return 0;
1262 }
1263 
1264 int
memorystatus_freezer_control(int32_t flags,user_addr_t buffer,size_t buffer_size,int32_t * retval)1265 memorystatus_freezer_control(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval)
1266 {
1267 	int err = ENOTSUP;
1268 
1269 #if DEVELOPMENT || DEBUG
1270 	if (flags == FREEZER_CONTROL_GET_STATUS) {
1271 		err = memorystatus_freezer_get_status(buffer, buffer_size, retval);
1272 	}
1273 #endif /* DEVELOPMENT || DEBUG */
1274 	if (flags == FREEZER_CONTROL_GET_PROCS) {
1275 		err = memorystatus_freezer_get_procs(buffer, buffer_size, retval);
1276 	} else if (flags == FREEZER_CONTROL_SET_DASD_TRIAL_IDENTIFIERS) {
1277 		err = memorystatus_freezer_set_dasd_trial_identifiers(buffer, buffer_size, retval);
1278 	} else if (flags == FREEZER_CONTROL_RESET_STATE) {
1279 		err = memorystatus_freezer_reset_state(retval);
1280 	}
1281 
1282 	return err;
1283 }
1284 
1285 extern void        vm_swap_consider_defragmenting(int);
1286 extern void vm_page_reactivate_all_throttled(void);
1287 
1288 static bool
kill_all_frozen_processes(uint64_t max_band,bool suspended_only,os_reason_t jetsam_reason,uint64_t * memory_reclaimed_out)1289 kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out)
1290 {
1291 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1292 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1293 
1294 	unsigned int band = 0;
1295 	proc_t p = PROC_NULL, next_p = PROC_NULL;
1296 	pid_t pid = 0;
1297 	bool retval = false, killed = false;
1298 	uint32_t state;
1299 	uint64_t memory_reclaimed = 0, footprint = 0, skips = 0;
1300 	proc_list_lock();
1301 
1302 	band = JETSAM_PRIORITY_IDLE;
1303 	p = PROC_NULL;
1304 	next_p = PROC_NULL;
1305 
1306 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1307 	while (next_p) {
1308 		p = next_p;
1309 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1310 		state = p->p_memstat_state;
1311 
1312 		if (p->p_memstat_effectivepriority > max_band) {
1313 			break;
1314 		}
1315 
1316 		if (!(state & P_MEMSTAT_FROZEN)) {
1317 			continue;
1318 		}
1319 
1320 		if (suspended_only && !(state & P_MEMSTAT_SUSPENDED)) {
1321 			continue;
1322 		}
1323 
1324 		if (state & P_MEMSTAT_ERROR) {
1325 			p->p_memstat_state &= ~P_MEMSTAT_ERROR;
1326 		}
1327 
1328 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1329 			memorystatus_log("memorystatus: Skipping kill of frozen process %s (%d) because it's already exiting.\n", p->p_name, proc_getpid(p));
1330 			skips++;
1331 			continue;
1332 		}
1333 
1334 		footprint = get_task_phys_footprint(proc_task(p));
1335 		pid = proc_getpid(p);
1336 		proc_list_unlock();
1337 
1338 		/* memorystatus_kill_with_jetsam_reason_sync drops a reference. */
1339 		os_reason_ref(jetsam_reason);
1340 		retval = memorystatus_kill_with_jetsam_reason_sync(pid, jetsam_reason);
1341 		if (retval) {
1342 			killed = true;
1343 			memory_reclaimed += footprint;
1344 		}
1345 		proc_list_lock();
1346 		/*
1347 		 * The bands might have changed when we dropped the proc list lock.
1348 		 * So start from the beginning.
1349 		 * Since we're preventing any further freezing by holding the freezer mutex,
1350 		 * and we skip anything we've already tried to kill this is guaranteed to terminate.
1351 		 */
1352 		band = 0;
1353 		skips = 0;
1354 		next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1355 	}
1356 
1357 	assert(skips <= memorystatus_frozen_count);
1358 #if DEVELOPMENT || DEBUG
1359 	if (!suspended_only && max_band >= JETSAM_PRIORITY_FOREGROUND) {
1360 		/*
1361 		 * Check that we've killed all frozen processes.
1362 		 * Note that they may still be exiting (represented by skips).
1363 		 */
1364 		if (memorystatus_frozen_count - skips > 0) {
1365 			assert(memorystatus_freeze_enabled == false);
1366 
1367 			panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d",
1368 			    memorystatus_frozen_count);
1369 		}
1370 	}
1371 #endif /* DEVELOPMENT || DEBUG */
1372 	if (memory_reclaimed_out) {
1373 		*memory_reclaimed_out = memory_reclaimed;
1374 	}
1375 	proc_list_unlock();
1376 	return killed;
1377 }
1378 
1379 /*
1380  * Disables the freezer, jetsams all frozen processes,
1381  * and reclaims the swap space immediately.
1382  */
1383 
1384 void
memorystatus_disable_freeze(void)1385 memorystatus_disable_freeze(void)
1386 {
1387 	uint64_t memory_reclaimed = 0;
1388 	bool killed = false;
1389 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1390 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1391 
1392 
1393 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_START,
1394 	    memorystatus_available_pages);
1395 	memorystatus_log("memorystatus: Disabling freezer. Will kill all frozen processes\n");
1396 
1397 	/*
1398 	 * We hold the freezer_mutex (preventing anything from being frozen in parallel)
1399 	 * and all frozen processes will be killed
1400 	 * by the time we release it. Setting memorystatus_freeze_enabled to false,
1401 	 * ensures that no new processes will be frozen once we release the mutex.
1402 	 *
1403 	 */
1404 	memorystatus_freeze_enabled = false;
1405 
1406 	/*
1407 	 * Move dirty pages out from the throttle to the active queue since we're not freezing anymore.
1408 	 */
1409 	vm_page_reactivate_all_throttled();
1410 	os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE);
1411 	if (jetsam_reason == OS_REASON_NULL) {
1412 		memorystatus_log_error("memorystatus_disable_freeze -- sync: failed to allocate jetsam reason\n");
1413 	}
1414 
1415 	killed = kill_all_frozen_processes(JETSAM_PRIORITY_FOREGROUND, false, jetsam_reason, &memory_reclaimed);
1416 
1417 	if (killed) {
1418 		memorystatus_log_info("memorystatus: Killed all frozen processes.\n");
1419 		vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM);
1420 
1421 		proc_list_lock();
1422 		size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
1423 		    sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
1424 		uint64_t timestamp_now = mach_absolute_time();
1425 		memorystatus_jetsam_snapshot->notification_time = timestamp_now;
1426 		memorystatus_jetsam_snapshot->js_gencount++;
1427 		if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
1428 		    timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) {
1429 			proc_list_unlock();
1430 			int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
1431 			if (!ret) {
1432 				proc_list_lock();
1433 				memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
1434 			}
1435 		}
1436 		proc_list_unlock();
1437 	} else {
1438 		memorystatus_log_info("memorystatus: No frozen processes to kill.\n");
1439 	}
1440 
1441 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_END,
1442 	    memorystatus_available_pages, memory_reclaimed);
1443 
1444 	return;
1445 }
1446 
1447 static void
memorystatus_set_freeze_is_enabled(bool enabled)1448 memorystatus_set_freeze_is_enabled(bool enabled)
1449 {
1450 	lck_mtx_lock(&freezer_mutex);
1451 	if (enabled != memorystatus_freeze_enabled) {
1452 		if (enabled) {
1453 			memorystatus_freeze_enabled = true;
1454 		} else {
1455 			memorystatus_disable_freeze();
1456 		}
1457 	}
1458 	lck_mtx_unlock(&freezer_mutex);
1459 }
1460 
1461 
1462 static int
1463 sysctl_freeze_enabled SYSCTL_HANDLER_ARGS
1464 {
1465 #pragma unused(arg1, arg2)
1466 	int error, val = memorystatus_freeze_enabled ? 1 : 0;
1467 
1468 	error = sysctl_handle_int(oidp, &val, 0, req);
1469 	if (error || !req->newptr) {
1470 		return error;
1471 	}
1472 
1473 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1474 		memorystatus_log_error("memorystatus: Failed attempt to set vm.freeze_enabled sysctl\n");
1475 		return EINVAL;
1476 	}
1477 
1478 	memorystatus_set_freeze_is_enabled(val);
1479 
1480 	return 0;
1481 }
1482 
1483 SYSCTL_PROC(_vm, OID_AUTO, freeze_enabled, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY, NULL, 0, sysctl_freeze_enabled, "I", "");
1484 
1485 static void
schedule_interval_reset(thread_call_t reset_thread_call,throttle_interval_t * interval)1486 schedule_interval_reset(thread_call_t reset_thread_call, throttle_interval_t *interval)
1487 {
1488 	uint64_t interval_expiration_ns = interval->ts.tv_sec * NSEC_PER_SEC + interval->ts.tv_nsec;
1489 	uint64_t interval_expiration_absolutetime;
1490 	nanoseconds_to_absolutetime(interval_expiration_ns, &interval_expiration_absolutetime);
1491 	memorystatus_log_info("memorystatus: scheduling new freezer interval at %llu absolute time\n", interval_expiration_absolutetime);
1492 
1493 	thread_call_enter_delayed(reset_thread_call, interval_expiration_absolutetime);
1494 }
1495 
1496 extern uuid_string_t trial_treatment_id;
1497 extern uuid_string_t trial_experiment_id;
1498 extern int trial_deployment_id;
1499 
1500 CA_EVENT(freezer_interval,
1501     CA_INT, budget_remaining,
1502     CA_INT, error_below_min_pages,
1503     CA_INT, error_excess_shared_memory,
1504     CA_INT, error_low_private_shared_ratio,
1505     CA_INT, error_no_compressor_space,
1506     CA_INT, error_no_swap_space,
1507     CA_INT, error_low_probability_of_use,
1508     CA_INT, error_elevated,
1509     CA_INT, error_other,
1510     CA_INT, frozen_count,
1511     CA_INT, pageouts,
1512     CA_INT, refreeze_average,
1513     CA_INT, skipped_full,
1514     CA_INT, skipped_shared_mb_high,
1515     CA_INT, swapusage,
1516     CA_INT, thaw_count,
1517     CA_INT, thaw_percentage,
1518     CA_INT, thaws_per_gb,
1519     CA_INT, trial_deployment_id,
1520     CA_INT, dasd_trial_deployment_id,
1521     CA_INT, budget_exhaustion_duration_remaining,
1522     CA_INT, thaw_percentage_webcontent,
1523     CA_INT, thaw_percentage_fg,
1524     CA_INT, thaw_percentage_bg,
1525     CA_INT, thaw_percentage_fg_non_xpc_service,
1526     CA_INT, fg_resume_count,
1527     CA_INT, unique_freeze_count,
1528     CA_INT, unique_thaw_count,
1529     CA_STATIC_STRING(CA_UUID_LEN), trial_treatment_id,
1530     CA_STATIC_STRING(CA_UUID_LEN), trial_experiment_id,
1531     CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_treatment_id,
1532     CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_experiment_id);
1533 
1534 extern uint64_t vm_swap_get_total_space(void);
1535 extern uint64_t vm_swap_get_free_space(void);
1536 
1537 /*
1538  * Record statistics from the expiring interval
1539  * via core analytics.
1540  */
1541 static void
memorystatus_freeze_record_interval_analytics(void)1542 memorystatus_freeze_record_interval_analytics(void)
1543 {
1544 	ca_event_t event = CA_EVENT_ALLOCATE(freezer_interval);
1545 	CA_EVENT_TYPE(freezer_interval) * e = event->data;
1546 	e->budget_remaining = memorystatus_freeze_budget_pages_remaining * PAGE_SIZE / (1UL << 20);
1547 	uint64_t process_considered_count, refrozen_count, below_threshold_count;
1548 	memory_object_size_t swap_size;
1549 	process_considered_count = memorystatus_freezer_stats.mfs_process_considered_count;
1550 	if (process_considered_count != 0) {
1551 		e->error_below_min_pages = memorystatus_freezer_stats.mfs_error_below_min_pages_count * 100 / process_considered_count;
1552 		e->error_excess_shared_memory = memorystatus_freezer_stats.mfs_error_excess_shared_memory_count * 100 / process_considered_count;
1553 		e->error_low_private_shared_ratio = memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count * 100 / process_considered_count;
1554 		e->error_no_compressor_space = memorystatus_freezer_stats.mfs_error_no_compressor_space_count * 100 / process_considered_count;
1555 		e->error_no_swap_space = memorystatus_freezer_stats.mfs_error_no_swap_space_count * 100 / process_considered_count;
1556 		e->error_low_probability_of_use = memorystatus_freezer_stats.mfs_error_low_probability_of_use_count * 100 / process_considered_count;
1557 		e->error_elevated = memorystatus_freezer_stats.mfs_error_elevated_count * 100 / process_considered_count;
1558 		e->error_other = memorystatus_freezer_stats.mfs_error_other_count * 100 / process_considered_count;
1559 	}
1560 	e->frozen_count = memorystatus_frozen_count;
1561 	e->pageouts = normal_throttle_window->pageouts * PAGE_SIZE / (1UL << 20);
1562 	refrozen_count = memorystatus_freezer_stats.mfs_refreeze_count;
1563 	if (refrozen_count != 0) {
1564 		e->refreeze_average = (memorystatus_freezer_stats.mfs_bytes_refrozen / (1UL << 20)) / refrozen_count;
1565 	}
1566 	below_threshold_count = memorystatus_freezer_stats.mfs_below_threshold_count;
1567 	if (below_threshold_count != 0) {
1568 		e->skipped_full = memorystatus_freezer_stats.mfs_skipped_full_count * 100 / below_threshold_count;
1569 		e->skipped_shared_mb_high = memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count * 100 / below_threshold_count;
1570 	}
1571 	if (VM_CONFIG_SWAP_IS_PRESENT) {
1572 		swap_size = vm_swap_get_total_space();
1573 		if (swap_size) {
1574 			e->swapusage = vm_swap_get_free_space() * 100 / swap_size;
1575 		}
1576 	}
1577 	e->thaw_count = memorystatus_thaw_count;
1578 	e->thaw_percentage = get_thaw_percentage();
1579 	e->thaw_percentage_webcontent = get_thaw_percentage_webcontent();
1580 	e->thaw_percentage_fg = get_thaw_percentage_fg();
1581 	e->thaw_percentage_bg = get_thaw_percentage_bg();
1582 	e->thaw_percentage_fg_non_xpc_service = get_thaw_percentage_fg_non_xpc_service();
1583 
1584 	if (e->pageouts / (1UL << 10) != 0) {
1585 		e->thaws_per_gb = memorystatus_thaw_count / (e->pageouts / (1UL << 10));
1586 	}
1587 	e->budget_exhaustion_duration_remaining = memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining;
1588 	e->fg_resume_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
1589 	e->unique_freeze_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1590 	e->unique_thaw_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
1591 
1592 	/*
1593 	 * Record any xnu or dasd experiment information
1594 	 */
1595 	strlcpy(e->trial_treatment_id, trial_treatment_id, CA_UUID_LEN);
1596 	strlcpy(e->trial_experiment_id, trial_experiment_id, CA_UUID_LEN);
1597 	e->trial_deployment_id = trial_deployment_id;
1598 	strlcpy(e->dasd_trial_treatment_id, dasd_trial_identifiers.treatment_id, CA_UUID_LEN);
1599 	strlcpy(e->dasd_trial_experiment_id, dasd_trial_identifiers.experiment_id, CA_UUID_LEN);
1600 	e->dasd_trial_deployment_id = dasd_trial_identifiers.deployment_id;
1601 
1602 	CA_EVENT_SEND(event);
1603 }
1604 
1605 static void
memorystatus_freeze_reset_interval(void * arg0,void * arg1)1606 memorystatus_freeze_reset_interval(void *arg0, void *arg1)
1607 {
1608 #pragma unused(arg0, arg1)
1609 	struct throttle_interval_t *interval = NULL;
1610 	clock_sec_t sec;
1611 	clock_nsec_t nsec;
1612 	mach_timespec_t now_ts;
1613 	uint32_t budget_rollover = 0;
1614 
1615 	clock_get_system_nanotime(&sec, &nsec);
1616 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
1617 	now_ts.tv_nsec = nsec;
1618 	interval = normal_throttle_window;
1619 
1620 	/* Record analytics from the old interval before resetting. */
1621 	memorystatus_freeze_record_interval_analytics();
1622 
1623 	lck_mtx_lock(&freezer_mutex);
1624 	/* How long has it been since the previous interval expired? */
1625 	mach_timespec_t expiration_period_ts = now_ts;
1626 	SUB_MACH_TIMESPEC(&expiration_period_ts, &interval->ts);
1627 	/* Get unused budget. Clamp to 0. We'll adjust for overused budget in the next interval. */
1628 	budget_rollover = interval->pageouts > interval->max_pageouts ?
1629 	    0 : interval->max_pageouts - interval->pageouts;
1630 
1631 	memorystatus_freeze_start_normal_throttle_interval(memorystatus_freeze_calculate_new_budget(
1632 		    expiration_period_ts.tv_sec, interval->burst_multiple,
1633 		    interval->mins, budget_rollover),
1634 	    now_ts);
1635 	memorystatus_freeze_budget_pages_remaining = interval->max_pageouts;
1636 
1637 	if (!memorystatus_freezer_use_demotion_list) {
1638 		memorystatus_demote_frozen_processes(false); /* normal mode...don't force a demotion */
1639 	}
1640 	lck_mtx_unlock(&freezer_mutex);
1641 }
1642 
1643 
1644 proc_t
memorystatus_get_coalition_leader_and_role(proc_t p,int * role_in_coalition)1645 memorystatus_get_coalition_leader_and_role(proc_t p, int *role_in_coalition)
1646 {
1647 	coalition_t     coal = COALITION_NULL;
1648 	task_t          leader_task = NULL, curr_task = NULL;
1649 	proc_t          leader_proc = PROC_NULL;
1650 
1651 	curr_task = proc_task(p);
1652 	coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1653 
1654 	if (coal == NULL || coalition_is_leader(curr_task, coal)) {
1655 		return p;
1656 	}
1657 
1658 	leader_task = coalition_get_leader(coal);
1659 	if (leader_task == TASK_NULL) {
1660 		/*
1661 		 * This jetsam coalition is currently leader-less.
1662 		 * This could happen if the app died, but XPC services
1663 		 * have not yet exited.
1664 		 */
1665 		return PROC_NULL;
1666 	}
1667 
1668 	leader_proc = (proc_t)get_bsdtask_info(leader_task);
1669 	task_deallocate(leader_task);
1670 
1671 	if (leader_proc == PROC_NULL) {
1672 		/* leader task is exiting */
1673 		return PROC_NULL;
1674 	}
1675 
1676 	*role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
1677 
1678 	return leader_proc;
1679 }
1680 
1681 bool
memorystatus_freeze_process_is_recommended(const proc_t p)1682 memorystatus_freeze_process_is_recommended(const proc_t p)
1683 {
1684 	assert(!memorystatus_freezer_use_ordered_list);
1685 	int probability_of_use = 0;
1686 
1687 	size_t entry_count = 0, i = 0;
1688 	entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
1689 	if (entry_count == 0) {
1690 		/*
1691 		 * If dasd hasn't supplied a table yet, we default to every app being eligible
1692 		 * for the freezer.
1693 		 */
1694 		return true;
1695 	}
1696 	for (i = 0; i < entry_count; i++) {
1697 		/*
1698 		 * NB: memorystatus_internal_probabilities.proc_name is MAXCOMLEN + 1 bytes
1699 		 * proc_t.p_name is 2*MAXCOMLEN + 1 bytes. So we only compare the first
1700 		 * MAXCOMLEN bytes here since the name in the probabilities table could
1701 		 * be truncated from the proc_t's p_name.
1702 		 */
1703 		if (strncmp(memorystatus_global_probabilities_table[i].proc_name,
1704 		    p->p_name,
1705 		    MAXCOMLEN) == 0) {
1706 			probability_of_use = memorystatus_global_probabilities_table[i].use_probability;
1707 			break;
1708 		}
1709 	}
1710 	return probability_of_use > 0;
1711 }
1712 
1713 __private_extern__ void
memorystatus_freeze_init(void)1714 memorystatus_freeze_init(void)
1715 {
1716 	kern_return_t result;
1717 	thread_t thread;
1718 
1719 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1720 		/*
1721 		 * This is just the default value if the underlying
1722 		 * storage device doesn't have any specific budget.
1723 		 * We check with the storage layer in memorystatus_freeze_update_throttle()
1724 		 * before we start our freezing the first time.
1725 		 */
1726 		memorystatus_freeze_budget_pages_remaining = (memorystatus_freeze_daily_mb_max * 1024 * 1024) / PAGE_SIZE;
1727 
1728 		result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
1729 		if (result == KERN_SUCCESS) {
1730 			proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
1731 			proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1732 			thread_set_thread_name(thread, "VM_freezer");
1733 
1734 			thread_deallocate(thread);
1735 		} else {
1736 			panic("Could not create memorystatus_freeze_thread");
1737 		}
1738 
1739 		freeze_interval_reset_thread_call = thread_call_allocate_with_options(memorystatus_freeze_reset_interval, NULL, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
1740 		/* Start a new interval */
1741 
1742 		lck_mtx_lock(&freezer_mutex);
1743 		uint32_t budget;
1744 		budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1745 		memorystatus_freeze_force_new_interval(budget);
1746 		lck_mtx_unlock(&freezer_mutex);
1747 	} else {
1748 		memorystatus_freeze_budget_pages_remaining = 0;
1749 	}
1750 }
1751 
1752 void
memorystatus_freeze_configure_for_swap()1753 memorystatus_freeze_configure_for_swap()
1754 {
1755 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1756 		return;
1757 	}
1758 
1759 	assert(memorystatus_swap_all_apps);
1760 
1761 	/*
1762 	 * We expect both a larger working set and larger individual apps
1763 	 * in this mode, so tune up the freezer accordingly.
1764 	 */
1765 	memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX_SWAP_ENABLED;
1766 	memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS_SWAP_ENABLED;
1767 	memorystatus_freeze_pages_max = FREEZE_PAGES_MAX_SWAP_ENABLED;
1768 
1769 	/*
1770 	 * We don't have a budget when running with full app swap.
1771 	 * Force a new interval. memorystatus_freeze_calculate_new_budget should give us an
1772 	 * unlimited budget.
1773 	 */
1774 	lck_mtx_lock(&freezer_mutex);
1775 	uint32_t budget;
1776 	budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1777 	memorystatus_freeze_force_new_interval(budget);
1778 	lck_mtx_unlock(&freezer_mutex);
1779 }
1780 
1781 void
memorystatus_freeze_disable_swap()1782 memorystatus_freeze_disable_swap()
1783 {
1784 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1785 		return;
1786 	}
1787 
1788 	assert(!memorystatus_swap_all_apps);
1789 
1790 	memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX;
1791 	memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS;
1792 	memorystatus_freeze_pages_max = FREEZE_PAGES_MAX;
1793 
1794 	/*
1795 	 * Calculate a new budget now that we're constrained by our daily write budget again.
1796 	 */
1797 	lck_mtx_lock(&freezer_mutex);
1798 	uint32_t budget;
1799 	budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1800 	memorystatus_freeze_force_new_interval(budget);
1801 	lck_mtx_unlock(&freezer_mutex);
1802 }
1803 
1804 /*
1805  * Called with both the freezer_mutex and proc_list_lock held & both will be held on return.
1806  */
1807 static int
memorystatus_freeze_process(proc_t p,coalition_t * coal,pid_t * coalition_list,unsigned int * coalition_list_length)1808 memorystatus_freeze_process(
1809 	proc_t p,
1810 	coalition_t *coal, /* IN / OUT */
1811 	pid_t *coalition_list, /* OUT */
1812 	unsigned int *coalition_list_length /* OUT */)
1813 {
1814 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1815 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1816 
1817 	kern_return_t kr;
1818 	uint32_t purgeable, wired, clean, dirty, shared;
1819 	uint64_t max_pages = 0;
1820 	freezer_error_code_t freezer_error_code = 0;
1821 	bool is_refreeze = false;
1822 	task_t curr_task = TASK_NULL;
1823 
1824 	pid_t aPid = proc_getpid(p);
1825 
1826 	is_refreeze = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
1827 
1828 	/* Ensure the process is eligible for (re-)freezing */
1829 	if (is_refreeze && !memorystatus_freeze_proc_is_refreeze_eligible(p)) {
1830 		/* Process is already frozen & hasn't been thawed. Nothing to do here. */
1831 		return EINVAL;
1832 	}
1833 	if (is_refreeze) {
1834 		/*
1835 		 * Not currently being looked at for something.
1836 		 */
1837 		if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1838 			return EBUSY;
1839 		}
1840 
1841 		/*
1842 		 * We are going to try and refreeze and so re-evaluate
1843 		 * the process. We don't want to double count the shared
1844 		 * memory. So deduct the old snapshot here.
1845 		 */
1846 		memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages;
1847 		p->p_memstat_freeze_sharedanon_pages = 0;
1848 
1849 		p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
1850 		memorystatus_refreeze_eligible_count--;
1851 	} else {
1852 		if (!memorystatus_is_process_eligible_for_freeze(p)) {
1853 			return EINVAL;
1854 		}
1855 		if (memorystatus_frozen_count >= memorystatus_frozen_processes_max) {
1856 			memorystatus_freeze_handle_error(p, FREEZER_ERROR_NO_SLOTS, is_refreeze, aPid, (coal ? *coal : NULL), "memorystatus_freeze_process");
1857 			return ENOSPC;
1858 		}
1859 	}
1860 
1861 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1862 		/*
1863 		 * Freezer backed by the compressor and swap file(s)
1864 		 * will hold compressed data.
1865 		 */
1866 
1867 		max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1868 	} else {
1869 		/*
1870 		 * We only have the compressor pool.
1871 		 */
1872 		max_pages = UINT32_MAX - 1;
1873 	}
1874 
1875 	/* Mark as locked temporarily to avoid kill */
1876 	p->p_memstat_state |= P_MEMSTAT_LOCKED;
1877 
1878 	p = proc_ref(p, true);
1879 	if (!p) {
1880 		memorystatus_freezer_stats.mfs_error_other_count++;
1881 		return EBUSY;
1882 	}
1883 
1884 	proc_list_unlock();
1885 
1886 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, memorystatus_available_pages, aPid, max_pages);
1887 
1888 	max_pages = MIN(max_pages, UINT32_MAX);
1889 	kr = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, (uint32_t) max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1890 	if (kr == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1891 		memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
1892 	}
1893 
1894 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, purgeable, wired, clean, dirty);
1895 
1896 	memorystatus_log_debug("memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
1897 	    "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %llu, shared %d",
1898 	    (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1899 	    memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1900 
1901 	proc_list_lock();
1902 
1903 	/* Success? */
1904 	if (KERN_SUCCESS == kr) {
1905 		memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1906 
1907 		p->p_memstat_freeze_sharedanon_pages += shared;
1908 
1909 		memorystatus_frozen_shared_mb += shared;
1910 
1911 		if (!is_refreeze) {
1912 			p->p_memstat_state |= P_MEMSTAT_FROZEN;
1913 			p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1914 			memorystatus_frozen_count++;
1915 			os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1916 			if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
1917 				memorystatus_frozen_count_webcontent++;
1918 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
1919 			}
1920 			if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
1921 				memorystatus_freeze_out_of_slots();
1922 			}
1923 		} else {
1924 			// This was a re-freeze
1925 			if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1926 				memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
1927 				memorystatus_freezer_stats.mfs_refreeze_count++;
1928 			}
1929 		}
1930 
1931 		p->p_memstat_frozen_count++;
1932 
1933 		/*
1934 		 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1935 		 * to its higher jetsam band.
1936 		 */
1937 		proc_list_unlock();
1938 
1939 		memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1940 
1941 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1942 			int ret;
1943 			unsigned int i;
1944 			ret = memorystatus_update_inactive_jetsam_priority_band(proc_getpid(p), MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, memorystatus_freeze_jetsam_band, TRUE);
1945 
1946 			if (ret) {
1947 				memorystatus_log_error("Elevating the frozen process failed with %d\n", ret);
1948 				/* not fatal */
1949 			}
1950 
1951 			/* Update stats */
1952 			for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1953 				throttle_intervals[i].pageouts += dirty;
1954 			}
1955 		}
1956 		memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1957 		memorystatus_log("memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
1958 		    is_refreeze ? "re" : "", ((!coal || !*coal) ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"),
1959 		    memorystatus_freeze_budget_pages_remaining, is_refreeze ? "Re" : "", dirty);
1960 
1961 		proc_list_lock();
1962 
1963 		memorystatus_freeze_pageouts += dirty;
1964 
1965 		if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1966 			/*
1967 			 * Add some eviction logic here? At some point should we
1968 			 * jetsam a process to get back its swap space so that we
1969 			 * can freeze a more eligible process at this moment in time?
1970 			 */
1971 		}
1972 
1973 		/* Check if we just froze a coalition leader. If so, return the list of XPC services to freeze next. */
1974 		if (coal != NULL && *coal == NULL) {
1975 			curr_task = proc_task(p);
1976 			*coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1977 			if (coalition_is_leader(curr_task, *coal)) {
1978 				*coalition_list_length = coalition_get_pid_list(*coal, COALITION_ROLEMASK_XPC,
1979 				    COALITION_SORT_DEFAULT, coalition_list, MAX_XPC_SERVICE_PIDS);
1980 
1981 				if (*coalition_list_length > MAX_XPC_SERVICE_PIDS) {
1982 					*coalition_list_length = MAX_XPC_SERVICE_PIDS;
1983 				}
1984 			}
1985 		} else {
1986 			/* We just froze an xpc service. Mark it as such for telemetry */
1987 			p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
1988 			memorystatus_frozen_count_xpc_service++;
1989 			os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
1990 		}
1991 
1992 		p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1993 		wakeup(&p->p_memstat_state);
1994 		proc_rele(p);
1995 		return 0;
1996 	} else {
1997 		if (is_refreeze) {
1998 			if ((freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) ||
1999 			    (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO)) {
2000 				/*
2001 				 * Keeping this prior-frozen process in this high band when
2002 				 * we failed to re-freeze it due to bad shared memory usage
2003 				 * could cause excessive pressure on the lower bands.
2004 				 * We need to demote it for now. It'll get re-evaluated next
2005 				 * time because we don't set the P_MEMSTAT_FREEZE_IGNORE
2006 				 * bit.
2007 				 */
2008 
2009 				p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
2010 				memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2011 				memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, TRUE, TRUE);
2012 			}
2013 		} else {
2014 			p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
2015 		}
2016 		memorystatus_freeze_handle_error(p, freezer_error_code, p->p_memstat_state & P_MEMSTAT_FROZEN, aPid, (coal != NULL) ? *coal : NULL, "memorystatus_freeze_process");
2017 
2018 		p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
2019 		wakeup(&p->p_memstat_state);
2020 		proc_rele(p);
2021 
2022 		return EINVAL;
2023 	}
2024 }
2025 
2026 /*
2027  * Synchronously freeze the passed proc. Called with a reference to the proc held.
2028  *
2029  * Doesn't deal with:
2030  * - re-freezing because this is called on a specific process and
2031  *   not by the freezer thread. If that changes, we'll have to teach it about
2032  *   refreezing a frozen process.
2033  *
2034  * - grouped/coalition freezing because we are hoping to deprecate this
2035  *   interface as it was used by user-space to freeze particular processes. But
2036  *   we have moved away from that approach to having the kernel choose the optimal
2037  *   candidates to be frozen.
2038  *
2039  * Returns ENOTSUP if the freezer isn't supported on this device. Otherwise
2040  * returns EINVAL or the value returned by task_freeze().
2041  */
2042 int
memorystatus_freeze_process_sync(proc_t p)2043 memorystatus_freeze_process_sync(proc_t p)
2044 {
2045 	int ret = EINVAL;
2046 	boolean_t memorystatus_freeze_swap_low = FALSE;
2047 
2048 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2049 		return ENOTSUP;
2050 	}
2051 
2052 	lck_mtx_lock(&freezer_mutex);
2053 
2054 	if (p == NULL) {
2055 		memorystatus_log_error("memorystatus_freeze_process_sync: Invalid process\n");
2056 		goto exit;
2057 	}
2058 
2059 	if (memorystatus_freeze_enabled == false) {
2060 		memorystatus_log_error("memorystatus_freeze_process_sync: Freezing is DISABLED\n");
2061 		goto exit;
2062 	}
2063 
2064 	if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
2065 		memorystatus_log_info("memorystatus_freeze_process_sync: Low compressor and/or low swap space...skipping freeze\n");
2066 		goto exit;
2067 	}
2068 
2069 	memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2070 	if (!memorystatus_freeze_budget_pages_remaining) {
2071 		memorystatus_log_info("memorystatus_freeze_process_sync: exit with NO available budget\n");
2072 		goto exit;
2073 	}
2074 
2075 	proc_list_lock();
2076 
2077 	ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2078 
2079 exit:
2080 	lck_mtx_unlock(&freezer_mutex);
2081 
2082 	return ret;
2083 }
2084 
2085 proc_t
memorystatus_freezer_candidate_list_get_proc(struct memorystatus_freezer_candidate_list * list,size_t index,uint64_t * pid_mismatch_counter)2086 memorystatus_freezer_candidate_list_get_proc(
2087 	struct memorystatus_freezer_candidate_list *list,
2088 	size_t index,
2089 	uint64_t *pid_mismatch_counter)
2090 {
2091 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2092 	if (list->mfcl_list == NULL || list->mfcl_length <= index) {
2093 		return NULL;
2094 	}
2095 	memorystatus_properties_freeze_entry_v1 *entry = &list->mfcl_list[index];
2096 	if (entry->pid == NO_PID) {
2097 		/* Entry has been removed. */
2098 		return NULL;
2099 	}
2100 
2101 	proc_t p = proc_find_locked(entry->pid);
2102 	if (p && strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2103 		/*
2104 		 * We grab a reference when we are about to freeze the process. So drop
2105 		 * the reference that proc_find_locked() grabbed for us.
2106 		 * We also have the proc_list_lock so this process is stable.
2107 		 */
2108 		proc_rele(p);
2109 		return p;
2110 	} else {
2111 		if (p) {
2112 			/* pid rollover. */
2113 			proc_rele(p);
2114 		}
2115 		/*
2116 		 * The proc has exited since we received this list.
2117 		 * It may have re-launched with a new pid, so we go looking for it.
2118 		 */
2119 		unsigned int band = JETSAM_PRIORITY_IDLE;
2120 		p = memorystatus_get_first_proc_locked(&band, TRUE);
2121 		while (p != NULL && band <= memorystatus_freeze_max_candidate_band) {
2122 			if (strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2123 				if (pid_mismatch_counter != NULL) {
2124 					(*pid_mismatch_counter)++;
2125 				}
2126 				/* Stash the pid for faster lookup next time. */
2127 				entry->pid = proc_getpid(p);
2128 				return p;
2129 			}
2130 			p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2131 		}
2132 		/* No match. */
2133 		return NULL;
2134 	}
2135 }
2136 
2137 static size_t
memorystatus_freeze_pid_list(pid_t * pid_list,unsigned int num_pids)2138 memorystatus_freeze_pid_list(pid_t *pid_list, unsigned int num_pids)
2139 {
2140 	int ret = 0;
2141 	size_t num_frozen = 0;
2142 	while (num_pids > 0 &&
2143 	    memorystatus_frozen_count < memorystatus_frozen_processes_max) {
2144 		pid_t pid = pid_list[--num_pids];
2145 		proc_t p = proc_find_locked(pid);
2146 		if (p) {
2147 			proc_rele(p);
2148 			ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2149 			if (ret != 0) {
2150 				break;
2151 			}
2152 			num_frozen++;
2153 		}
2154 	}
2155 	return num_frozen;
2156 }
2157 
2158 /*
2159  * Attempt to freeze the best candidate process.
2160  * Keep trying until we freeze something or run out of candidates.
2161  * Returns the number of processes frozen (including coalition members).
2162  */
2163 static size_t
memorystatus_freeze_top_process(void)2164 memorystatus_freeze_top_process(void)
2165 {
2166 	int freeze_ret;
2167 	size_t num_frozen = 0;
2168 	coalition_t coal = COALITION_NULL;
2169 	pid_t pid_list[MAX_XPC_SERVICE_PIDS];
2170 	unsigned int ntasks = 0;
2171 	struct memorystatus_freeze_list_iterator iterator;
2172 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2173 
2174 	bzero(&iterator, sizeof(struct memorystatus_freeze_list_iterator));
2175 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages);
2176 
2177 	proc_list_lock();
2178 	while (true) {
2179 		proc_t p = memorystatus_freeze_pick_process(&iterator);
2180 		if (p == PROC_NULL) {
2181 			/* Nothing left to freeze */
2182 			break;
2183 		}
2184 		freeze_ret = memorystatus_freeze_process(p, &coal, pid_list, &ntasks);
2185 		if (freeze_ret == 0) {
2186 			num_frozen = 1;
2187 			/*
2188 			 * We froze a process successfully.
2189 			 * If it's a coalition head, freeze the coalition.
2190 			 * Then we're done for now.
2191 			 */
2192 			if (coal != NULL) {
2193 				num_frozen += memorystatus_freeze_pid_list(pid_list, ntasks);
2194 			}
2195 			break;
2196 		} else {
2197 			if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
2198 				break;
2199 			}
2200 			/*
2201 			 * Freeze failed but we're not out of space.
2202 			 * Keep trying to find a good candidate,
2203 			 * memorystatus_freeze_pick_process will not return this proc again until
2204 			 * we reset the iterator.
2205 			 */
2206 		}
2207 	}
2208 	proc_list_unlock();
2209 
2210 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_END, memorystatus_available_pages);
2211 
2212 	return num_frozen;
2213 }
2214 
2215 #if DEVELOPMENT || DEBUG
2216 /* For testing memorystatus_freeze_top_process */
2217 static int
2218 sysctl_memorystatus_freeze_top_process SYSCTL_HANDLER_ARGS
2219 {
2220 #pragma unused(arg1, arg2)
2221 	int error, val, ret = 0;
2222 	size_t num_frozen;
2223 	/*
2224 	 * Only freeze on write to prevent freezing during `sysctl -a`.
2225 	 * The actual value written doesn't matter.
2226 	 */
2227 	error = sysctl_handle_int(oidp, &val, 0, req);
2228 	if (error || !req->newptr) {
2229 		return error;
2230 	}
2231 
2232 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2233 		return ENOTSUP;
2234 	}
2235 
2236 	lck_mtx_lock(&freezer_mutex);
2237 	num_frozen = memorystatus_freeze_top_process();
2238 	lck_mtx_unlock(&freezer_mutex);
2239 
2240 	if (num_frozen == 0) {
2241 		ret = ESRCH;
2242 	}
2243 	return ret;
2244 }
2245 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_top_process, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2246     0, 0, &sysctl_memorystatus_freeze_top_process, "I", "");
2247 #endif /* DEVELOPMENT || DEBUG */
2248 
2249 static inline boolean_t
memorystatus_can_freeze_processes(void)2250 memorystatus_can_freeze_processes(void)
2251 {
2252 	boolean_t ret;
2253 
2254 	proc_list_lock();
2255 
2256 	if (memorystatus_suspended_count) {
2257 		memorystatus_freeze_suspended_threshold = MIN(memorystatus_freeze_suspended_threshold, FREEZE_SUSPENDED_THRESHOLD_DEFAULT);
2258 
2259 		if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
2260 			ret = TRUE;
2261 		} else {
2262 			ret = FALSE;
2263 		}
2264 	} else {
2265 		ret = FALSE;
2266 	}
2267 
2268 	proc_list_unlock();
2269 
2270 	return ret;
2271 }
2272 
2273 static boolean_t
memorystatus_can_freeze(boolean_t * memorystatus_freeze_swap_low)2274 memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
2275 {
2276 	boolean_t can_freeze = TRUE;
2277 
2278 	/* Only freeze if we're sufficiently low on memory; this holds off freeze right
2279 	*  after boot,  and is generally is a no-op once we've reached steady state. */
2280 	if (memorystatus_available_pages > memorystatus_freeze_threshold) {
2281 		return FALSE;
2282 	}
2283 
2284 	/* Check minimum suspended process threshold. */
2285 	if (!memorystatus_can_freeze_processes()) {
2286 		return FALSE;
2287 	}
2288 	assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
2289 
2290 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2291 		/*
2292 		 * In-core compressor used for freezing WITHOUT on-disk swap support.
2293 		 */
2294 		if (vm_compressor_low_on_space()) {
2295 			if (*memorystatus_freeze_swap_low) {
2296 				*memorystatus_freeze_swap_low = TRUE;
2297 			}
2298 
2299 			can_freeze = FALSE;
2300 		} else {
2301 			if (*memorystatus_freeze_swap_low) {
2302 				*memorystatus_freeze_swap_low = FALSE;
2303 			}
2304 
2305 			can_freeze = TRUE;
2306 		}
2307 	} else {
2308 		/*
2309 		 * Freezing WITH on-disk swap support.
2310 		 *
2311 		 * In-core compressor fronts the swap.
2312 		 */
2313 		if (vm_swap_low_on_space()) {
2314 			if (*memorystatus_freeze_swap_low) {
2315 				*memorystatus_freeze_swap_low = TRUE;
2316 			}
2317 
2318 			can_freeze = FALSE;
2319 		}
2320 	}
2321 
2322 	return can_freeze;
2323 }
2324 
2325 /*
2326  * Demote the given frozen process.
2327  * Caller must hold the proc_list_lock & it will be held on return.
2328  */
2329 static void
memorystatus_demote_frozen_process(proc_t p,bool urgent_mode __unused)2330 memorystatus_demote_frozen_process(proc_t p, bool urgent_mode __unused)
2331 {
2332 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2333 
2334 	/* We demote to IDLE unless someone has asserted a higher priority on this process. */
2335 	int maxpriority = JETSAM_PRIORITY_IDLE;
2336 	p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
2337 	memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2338 
2339 	maxpriority = MAX(p->p_memstat_assertionpriority, maxpriority);
2340 	memorystatus_update_priority_locked(p, maxpriority, FALSE, FALSE);
2341 #if DEVELOPMENT || DEBUG
2342 	memorystatus_log("memorystatus_demote_frozen_process(%s) pid %d [%s]\n",
2343 	    (urgent_mode ? "urgent" : "normal"), (p ? proc_getpid(p) : -1), ((p && *p->p_name) ? p->p_name : "unknown"));
2344 #endif /* DEVELOPMENT || DEBUG */
2345 
2346 	/*
2347 	 * The freezer thread will consider this a normal app to be frozen
2348 	 * because it is in the IDLE band. So we don't need the
2349 	 * P_MEMSTAT_REFREEZE_ELIGIBLE state here. Also, if it gets resumed
2350 	 * we'll correctly count it as eligible for re-freeze again.
2351 	 *
2352 	 * We don't drop the frozen count because this process still has
2353 	 * state on disk. So there's a chance it gets resumed and then it
2354 	 * should land in the higher jetsam band. For that it needs to
2355 	 * remain marked frozen.
2356 	 */
2357 	if (memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2358 		p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
2359 		memorystatus_refreeze_eligible_count--;
2360 	}
2361 }
2362 
2363 static unsigned int
memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)2364 memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)
2365 {
2366 	unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
2367 	unsigned int demoted_proc_count = 0;
2368 	proc_t p = PROC_NULL, next_p = PROC_NULL;
2369 	proc_list_lock();
2370 
2371 	next_p = memorystatus_get_first_proc_locked(&band, FALSE);
2372 	while (next_p) {
2373 		p = next_p;
2374 		next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2375 
2376 		if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
2377 			continue;
2378 		}
2379 
2380 		if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
2381 			continue;
2382 		}
2383 
2384 		if (urgent_mode) {
2385 			if (!memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2386 				/*
2387 				 * This process hasn't been thawed recently and so most of
2388 				 * its state sits on NAND and so we skip it -- jetsamming it
2389 				 * won't help with memory pressure.
2390 				 */
2391 				continue;
2392 			}
2393 		} else {
2394 			if (p->p_memstat_thaw_count >= memorystatus_thaw_count_demotion_threshold) {
2395 				/*
2396 				 * This process has met / exceeded our thaw count demotion threshold
2397 				 * and so we let it live in the higher bands.
2398 				 */
2399 				continue;
2400 			}
2401 		}
2402 
2403 		memorystatus_demote_frozen_process(p, urgent_mode);
2404 		demoted_proc_count++;
2405 		if ((urgent_mode) || (demoted_proc_count == memorystatus_max_frozen_demotions_daily)) {
2406 			break;
2407 		}
2408 	}
2409 
2410 	proc_list_unlock();
2411 	return demoted_proc_count;
2412 }
2413 
2414 static unsigned int
memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)2415 memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)
2416 {
2417 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2418 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2419 	assert(memorystatus_freezer_use_demotion_list);
2420 	unsigned int demoted_proc_count = 0;
2421 
2422 	proc_list_lock();
2423 	for (size_t i = 0; i < memorystatus_global_demote_list.mfcl_length; i++) {
2424 		proc_t p = memorystatus_freezer_candidate_list_get_proc(
2425 			&memorystatus_global_demote_list,
2426 			i,
2427 			&memorystatus_freezer_stats.mfs_demote_pid_mismatches);
2428 		if (p != NULL && memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2429 			memorystatus_demote_frozen_process(p, urgent_mode);
2430 			/* Remove this entry now that it's been demoted. */
2431 			memorystatus_global_demote_list.mfcl_list[i].pid = NO_PID;
2432 			demoted_proc_count++;
2433 			/*
2434 			 * We only demote one proc at a time in this mode.
2435 			 * This gives jetsam a chance to kill the recently demoted processes.
2436 			 */
2437 			break;
2438 		}
2439 	}
2440 
2441 	proc_list_unlock();
2442 	return demoted_proc_count;
2443 }
2444 
2445 /*
2446  * This function evaluates if the currently frozen processes deserve
2447  * to stay in the higher jetsam band. There are 2 modes:
2448  * - 'force one == TRUE': (urgent mode)
2449  *	We are out of budget and can't refreeze a process. The process's
2450  * state, if it was resumed, will stay in compressed memory. If we let it
2451  * remain up in the higher frozen jetsam band, it'll put a lot of pressure on
2452  * the lower bands. So we force-demote the least-recently-used-and-thawed
2453  * process.
2454  *
2455  * - 'force_one == FALSE': (normal mode)
2456  *      If the # of thaws of a process is below our threshold, then we
2457  * will demote that process into the IDLE band.
2458  * We don't immediately kill the process here because it  already has
2459  * state on disk and so it might be worth giving it another shot at
2460  * getting thawed/resumed and used.
2461  */
2462 static void
memorystatus_demote_frozen_processes(bool urgent_mode)2463 memorystatus_demote_frozen_processes(bool urgent_mode)
2464 {
2465 	unsigned int demoted_proc_count = 0;
2466 
2467 	if (memorystatus_freeze_enabled == false) {
2468 		/*
2469 		 * Freeze has been disabled likely to
2470 		 * reclaim swap space. So don't change
2471 		 * any state on the frozen processes.
2472 		 */
2473 		return;
2474 	}
2475 
2476 	/*
2477 	 * We have two demotion policies which can be toggled by userspace.
2478 	 * In non-urgent mode, the ordered list policy will
2479 	 * choose a demotion candidate using the list provided by dasd.
2480 	 * The thaw count policy will demote the oldest process that hasn't been
2481 	 * thawed more than memorystatus_thaw_count_demotion_threshold times.
2482 	 *
2483 	 * If urgent_mode is set, both policies will only consider demoting
2484 	 * processes that are re-freeze eligible. But the ordering is different.
2485 	 * The ordered list policy will scan in the order given by dasd.
2486 	 * The thaw count policy will scan through the frozen band.
2487 	 */
2488 	if (memorystatus_freezer_use_demotion_list) {
2489 		demoted_proc_count += memorystatus_demote_frozen_processes_using_demote_list(urgent_mode);
2490 
2491 		if (demoted_proc_count == 0 && urgent_mode) {
2492 			/*
2493 			 * We're out of budget and the demotion list doesn't contain any valid
2494 			 * candidates. We still need to demote something. Fall back to scanning
2495 			 * the frozen band.
2496 			 */
2497 			memorystatus_demote_frozen_processes_using_thaw_count(true);
2498 		}
2499 	} else {
2500 		demoted_proc_count += memorystatus_demote_frozen_processes_using_thaw_count(urgent_mode);
2501 	}
2502 }
2503 
2504 /*
2505  * Calculate a new freezer budget.
2506  * @param time_since_last_interval_expired_sec How long has it been (in seconds) since the previous interval expired.
2507  * @param burst_multiple The burst_multiple for the new period
2508  * @param interval_duration_min How many minutes will the new interval be?
2509  * @param rollover The amount to rollover from the previous budget.
2510  *
2511  * @return A budget for the new interval.
2512  */
2513 static uint32_t
memorystatus_freeze_calculate_new_budget(unsigned int time_since_last_interval_expired_sec,unsigned int burst_multiple,unsigned int interval_duration_min,uint32_t rollover)2514 memorystatus_freeze_calculate_new_budget(
2515 	unsigned int time_since_last_interval_expired_sec,
2516 	unsigned int burst_multiple,
2517 	unsigned int interval_duration_min,
2518 	uint32_t rollover)
2519 {
2520 	uint64_t freeze_daily_budget = 0, freeze_daily_budget_mb = 0, daily_budget_pageouts = 0, budget_missed = 0, freeze_daily_pageouts_max = 0, new_budget = 0;
2521 	const static unsigned int kNumSecondsInDay = 60 * 60 * 24;
2522 	/* Precision factor for days_missed. 2 decimal points. */
2523 	const static unsigned int kFixedPointFactor = 100;
2524 	unsigned int days_missed;
2525 
2526 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2527 		return 0;
2528 	}
2529 	if (memorystatus_swap_all_apps) {
2530 		/*
2531 		 * We effectively have an unlimited budget when app swap is enabled.
2532 		 */
2533 		memorystatus_freeze_daily_mb_max = UINT32_MAX;
2534 		return UINT32_MAX;
2535 	}
2536 
2537 	/* Get the daily budget from the storage layer */
2538 	if (vm_swap_max_budget(&freeze_daily_budget)) {
2539 		freeze_daily_budget_mb = freeze_daily_budget / (1024 * 1024);
2540 		assert(freeze_daily_budget_mb <= UINT32_MAX);
2541 		memorystatus_freeze_daily_mb_max = (unsigned int) freeze_daily_budget_mb;
2542 		memorystatus_log_info("memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
2543 	}
2544 	/* Calculate the daily pageout budget */
2545 	freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
2546 	/* Multiply by memorystatus_freeze_budget_multiplier */
2547 	freeze_daily_pageouts_max = ((kFixedPointFactor * memorystatus_freeze_budget_multiplier / 100) * freeze_daily_pageouts_max) / kFixedPointFactor;
2548 
2549 	daily_budget_pageouts = (burst_multiple * (((uint64_t) interval_duration_min * freeze_daily_pageouts_max) / (kNumSecondsInDay / 60)));
2550 
2551 	/*
2552 	 * Add additional budget for time since the interval expired.
2553 	 * For example, if the interval expired n days ago, we should get an additional n days
2554 	 * of budget since we didn't use any budget during those n days.
2555 	 */
2556 	days_missed = time_since_last_interval_expired_sec * kFixedPointFactor / kNumSecondsInDay;
2557 	budget_missed = days_missed * freeze_daily_pageouts_max / kFixedPointFactor;
2558 	new_budget = rollover + daily_budget_pageouts + budget_missed;
2559 	return (uint32_t) MIN(new_budget, UINT32_MAX);
2560 }
2561 
2562 /*
2563  * Mark all non frozen, freezer-eligible processes as skipped for the given reason.
2564  * Used when we hit some system freeze limit and know that we won't be considering remaining processes.
2565  * If you're using this for a new reason, make sure to add it to memorystatus_freeze_init_proc so that
2566  * it gets set for new processes.
2567  * NB: These processes will retain this skip reason until they are reconsidered by memorystatus_is_process_eligible_for_freeze.
2568  */
2569 static void
memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason,bool locked)2570 memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason, bool locked)
2571 {
2572 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2573 	LCK_MTX_ASSERT(&proc_list_mlock, locked ? LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED);
2574 	unsigned int band = JETSAM_PRIORITY_IDLE;
2575 	proc_t p;
2576 
2577 	if (!locked) {
2578 		proc_list_lock();
2579 	}
2580 	p = memorystatus_get_first_proc_locked(&band, FALSE);
2581 	while (p) {
2582 		assert(p->p_memstat_effectivepriority == (int32_t) band);
2583 		if (!(p->p_memstat_state & P_MEMSTAT_FROZEN) && memorystatus_is_process_eligible_for_freeze(p)) {
2584 			assert(p->p_memstat_freeze_skip_reason == kMemorystatusFreezeSkipReasonNone);
2585 			p->p_memstat_freeze_skip_reason = (uint8_t) reason;
2586 		}
2587 		p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2588 	}
2589 	if (!locked) {
2590 		proc_list_unlock();
2591 	}
2592 }
2593 
2594 /*
2595  * Called after we fail to freeze a process.
2596  * Logs the failure, marks the process with the failure reason, and updates freezer stats.
2597  */
2598 static void
memorystatus_freeze_handle_error(proc_t p,const freezer_error_code_t freezer_error_code,bool was_refreeze,pid_t pid,const coalition_t coalition,const char * log_prefix)2599 memorystatus_freeze_handle_error(
2600 	proc_t p,
2601 	const freezer_error_code_t freezer_error_code,
2602 	bool was_refreeze,
2603 	pid_t pid,
2604 	const coalition_t coalition,
2605 	const char* log_prefix)
2606 {
2607 	const char *reason;
2608 	memorystatus_freeze_skip_reason_t skip_reason;
2609 
2610 	switch (freezer_error_code) {
2611 	case FREEZER_ERROR_EXCESS_SHARED_MEMORY:
2612 		memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
2613 		reason = "too much shared memory";
2614 		skip_reason = kMemorystatusFreezeSkipReasonExcessSharedMemory;
2615 		break;
2616 	case FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO:
2617 		memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
2618 		reason = "private-shared pages ratio";
2619 		skip_reason = kMemorystatusFreezeSkipReasonLowPrivateSharedRatio;
2620 		break;
2621 	case FREEZER_ERROR_NO_COMPRESSOR_SPACE:
2622 		memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
2623 		reason = "no compressor space";
2624 		skip_reason = kMemorystatusFreezeSkipReasonNoCompressorSpace;
2625 		break;
2626 	case FREEZER_ERROR_NO_SWAP_SPACE:
2627 		memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
2628 		reason = "no swap space";
2629 		skip_reason = kMemorystatusFreezeSkipReasonNoSwapSpace;
2630 		break;
2631 	case FREEZER_ERROR_NO_SLOTS:
2632 		memorystatus_freezer_stats.mfs_skipped_full_count++;
2633 		reason = "no slots";
2634 		skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
2635 		break;
2636 	default:
2637 		reason = "unknown error";
2638 		skip_reason = kMemorystatusFreezeSkipReasonOther;
2639 	}
2640 
2641 	p->p_memstat_freeze_skip_reason = (uint8_t) skip_reason;
2642 
2643 	memorystatus_log("%s: %sfreezing (%s) pid %d [%s]...skipped (%s)\n",
2644 	    log_prefix, was_refreeze ? "re" : "",
2645 	    (coalition == NULL ? "general" : "coalition-driven"), pid,
2646 	    ((p && *p->p_name) ? p->p_name : "unknown"), reason);
2647 }
2648 
2649 /*
2650  * Start a new normal throttle interval with the given budget.
2651  * Caller must hold the freezer mutex
2652  */
2653 static void
memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget,mach_timespec_t start_ts)2654 memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts)
2655 {
2656 	unsigned int band;
2657 	proc_t p, next_p;
2658 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2659 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2660 
2661 	normal_throttle_window->max_pageouts = new_budget;
2662 	normal_throttle_window->ts.tv_sec = normal_throttle_window->mins * 60;
2663 	normal_throttle_window->ts.tv_nsec = 0;
2664 	ADD_MACH_TIMESPEC(&normal_throttle_window->ts, &start_ts);
2665 	/* Since we update the throttle stats pre-freeze, adjust for overshoot here */
2666 	if (normal_throttle_window->pageouts > normal_throttle_window->max_pageouts) {
2667 		normal_throttle_window->pageouts -= normal_throttle_window->max_pageouts;
2668 	} else {
2669 		normal_throttle_window->pageouts = 0;
2670 	}
2671 	/* Ensure the normal window is now active. */
2672 	memorystatus_freeze_degradation = FALSE;
2673 
2674 	/*
2675 	 * Reset interval statistics.
2676 	 */
2677 	memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
2678 	memorystatus_freezer_stats.mfs_process_considered_count = 0;
2679 	memorystatus_freezer_stats.mfs_error_below_min_pages_count = 0;
2680 	memorystatus_freezer_stats.mfs_error_excess_shared_memory_count = 0;
2681 	memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count = 0;
2682 	memorystatus_freezer_stats.mfs_error_no_compressor_space_count = 0;
2683 	memorystatus_freezer_stats.mfs_error_no_swap_space_count = 0;
2684 	memorystatus_freezer_stats.mfs_error_low_probability_of_use_count = 0;
2685 	memorystatus_freezer_stats.mfs_error_elevated_count = 0;
2686 	memorystatus_freezer_stats.mfs_error_other_count = 0;
2687 	memorystatus_freezer_stats.mfs_refreeze_count = 0;
2688 	memorystatus_freezer_stats.mfs_bytes_refrozen = 0;
2689 	memorystatus_freezer_stats.mfs_below_threshold_count = 0;
2690 	memorystatus_freezer_stats.mfs_skipped_full_count = 0;
2691 	memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count = 0;
2692 	memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = 0;
2693 	memorystatus_thaw_count = 0;
2694 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed, 0, release);
2695 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, 0, release);
2696 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg, 0, release);
2697 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, 0, release);
2698 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen, memorystatus_frozen_count, release);
2699 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, memorystatus_frozen_count_webcontent, release);
2700 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, memorystatus_frozen_count_xpc_service, release);
2701 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_fg_resumed, 0, release);
2702 	os_atomic_inc(&memorystatus_freeze_current_interval, release);
2703 
2704 	/* Clear the focal thaw bit */
2705 	proc_list_lock();
2706 	band = JETSAM_PRIORITY_IDLE;
2707 	p = PROC_NULL;
2708 	next_p = PROC_NULL;
2709 
2710 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
2711 	while (next_p) {
2712 		p = next_p;
2713 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2714 
2715 		if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) {
2716 			break;
2717 		}
2718 		p->p_memstat_state &= ~P_MEMSTAT_FROZEN_FOCAL_THAW;
2719 	}
2720 	proc_list_unlock();
2721 
2722 	schedule_interval_reset(freeze_interval_reset_thread_call, normal_throttle_window);
2723 }
2724 
2725 #if DEVELOPMENT || DEBUG
2726 
2727 static int
2728 sysctl_memorystatus_freeze_calculate_new_budget SYSCTL_HANDLER_ARGS
2729 {
2730 #pragma unused(arg1, arg2)
2731 	int error = 0;
2732 	unsigned int time_since_last_interval_expired_sec = 0;
2733 	unsigned int new_budget;
2734 
2735 	error = sysctl_handle_int(oidp, &time_since_last_interval_expired_sec, 0, req);
2736 	if (error || !req->newptr) {
2737 		return error;
2738 	}
2739 
2740 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2741 		return ENOTSUP;
2742 	}
2743 	new_budget = memorystatus_freeze_calculate_new_budget(time_since_last_interval_expired_sec, 1, NORMAL_WINDOW_MINS, 0);
2744 	return copyout(&new_budget, req->oldptr, MIN(sizeof(req->oldlen), sizeof(new_budget)));
2745 }
2746 
2747 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_calculate_new_budget, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2748     0, 0, &sysctl_memorystatus_freeze_calculate_new_budget, "I", "");
2749 
2750 #endif /* DEVELOPMENT || DEBUG */
2751 
2752 /*
2753  * Called when we first run out of budget in an interval.
2754  * Marks idle processes as not frozen due to lack of budget.
2755  * NB: It might be worth having a CA event here.
2756  */
2757 static void
memorystatus_freeze_out_of_budget(const struct throttle_interval_t * interval)2758 memorystatus_freeze_out_of_budget(const struct throttle_interval_t *interval)
2759 {
2760 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2761 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2762 
2763 	mach_timespec_t time_left = {0, 0};
2764 	mach_timespec_t now_ts;
2765 	clock_sec_t sec;
2766 	clock_nsec_t nsec;
2767 
2768 	time_left.tv_sec = interval->ts.tv_sec;
2769 	time_left.tv_nsec = 0;
2770 	clock_get_system_nanotime(&sec, &nsec);
2771 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2772 	now_ts.tv_nsec = nsec;
2773 
2774 	SUB_MACH_TIMESPEC(&time_left, &now_ts);
2775 	memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = time_left.tv_sec;
2776 	memorystatus_log(
2777 		"memorystatus_freeze: Out of NAND write budget with %u minutes left in the current freezer interval. %u procs are frozen.\n",
2778 		time_left.tv_sec / 60, memorystatus_frozen_count);
2779 
2780 	memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfBudget, false);
2781 }
2782 
2783 /*
2784  * Called when we cross over the threshold of maximum frozen processes allowed.
2785  * Marks remaining idle processes as not frozen due to lack of slots.
2786  */
2787 static void
memorystatus_freeze_out_of_slots(void)2788 memorystatus_freeze_out_of_slots(void)
2789 {
2790 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2791 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2792 	assert(memorystatus_frozen_count == memorystatus_frozen_processes_max);
2793 
2794 	memorystatus_log(
2795 		"memorystatus_freeze: Out of slots in the freezer. %u procs are frozen.\n",
2796 		memorystatus_frozen_count);
2797 
2798 	memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfSlots, true);
2799 }
2800 
2801 /*
2802  * This function will do 4 things:
2803  *
2804  * 1) check to see if we are currently in a degraded freezer mode, and if so:
2805  *    - check to see if our window has expired and we should exit this mode, OR,
2806  *    - return a budget based on the degraded throttle window's max. pageouts vs current pageouts.
2807  *
2808  * 2) check to see if we are in a NEW normal window and update the normal throttle window's params.
2809  *
2810  * 3) check what the current normal window allows for a budget.
2811  *
2812  * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below
2813  *    what we would normally expect, then we are running low on our daily budget and need to enter
2814  *    degraded perf. mode.
2815  *
2816  *    Caller must hold the freezer mutex
2817  *    Caller must not hold the proc_list lock
2818  */
2819 
2820 static void
memorystatus_freeze_update_throttle(uint64_t * budget_pages_allowed)2821 memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
2822 {
2823 	clock_sec_t sec;
2824 	clock_nsec_t nsec;
2825 	mach_timespec_t now_ts;
2826 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2827 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2828 
2829 	unsigned int freeze_daily_pageouts_max = 0;
2830 	bool started_with_budget = (*budget_pages_allowed > 0);
2831 
2832 #if DEVELOPMENT || DEBUG
2833 	if (!memorystatus_freeze_throttle_enabled) {
2834 		/*
2835 		 * No throttling...we can use the full budget everytime.
2836 		 */
2837 		*budget_pages_allowed = UINT64_MAX;
2838 		return;
2839 	}
2840 #endif
2841 
2842 	clock_get_system_nanotime(&sec, &nsec);
2843 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2844 	now_ts.tv_nsec = nsec;
2845 
2846 	struct throttle_interval_t *interval = NULL;
2847 
2848 	if (memorystatus_freeze_degradation == TRUE) {
2849 		interval = degraded_throttle_window;
2850 
2851 		if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
2852 			interval->pageouts = 0;
2853 			interval->max_pageouts = 0;
2854 		} else {
2855 			*budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2856 		}
2857 	}
2858 
2859 	interval = normal_throttle_window;
2860 
2861 	/*
2862 	 * Current throttle window.
2863 	 * Deny freezing if we have no budget left.
2864 	 * Try graceful degradation if we are within 25% of:
2865 	 * - the daily budget, and
2866 	 * - the current budget left is below our normal budget expectations.
2867 	 */
2868 
2869 	if (memorystatus_freeze_degradation == FALSE) {
2870 		if (interval->pageouts >= interval->max_pageouts) {
2871 			*budget_pages_allowed = 0;
2872 			if (started_with_budget) {
2873 				memorystatus_freeze_out_of_budget(interval);
2874 			}
2875 		} else {
2876 			int budget_left = interval->max_pageouts - interval->pageouts;
2877 			int budget_threshold = (freeze_daily_pageouts_max * FREEZE_DEGRADATION_BUDGET_THRESHOLD) / 100;
2878 
2879 			mach_timespec_t time_left = {0, 0};
2880 
2881 			time_left.tv_sec = interval->ts.tv_sec;
2882 			time_left.tv_nsec = 0;
2883 
2884 			SUB_MACH_TIMESPEC(&time_left, &now_ts);
2885 
2886 			if (budget_left <= budget_threshold) {
2887 				/*
2888 				 * For the current normal window, calculate how much we would pageout in a DEGRADED_WINDOW_MINS duration.
2889 				 * And also calculate what we would pageout for the same DEGRADED_WINDOW_MINS duration if we had the full
2890 				 * daily pageout budget.
2891 				 */
2892 
2893 				unsigned int current_budget_rate_allowed = ((budget_left / time_left.tv_sec) / 60) * DEGRADED_WINDOW_MINS;
2894 				unsigned int normal_budget_rate_allowed = (freeze_daily_pageouts_max / NORMAL_WINDOW_MINS) * DEGRADED_WINDOW_MINS;
2895 
2896 				/*
2897 				 * The current rate of pageouts is below what we would expect for
2898 				 * the normal rate i.e. we have below normal budget left and so...
2899 				 */
2900 
2901 				if (current_budget_rate_allowed < normal_budget_rate_allowed) {
2902 					memorystatus_freeze_degradation = TRUE;
2903 					degraded_throttle_window->max_pageouts = current_budget_rate_allowed;
2904 					degraded_throttle_window->pageouts = 0;
2905 
2906 					/*
2907 					 * Switch over to the degraded throttle window so the budget
2908 					 * doled out is based on that window.
2909 					 */
2910 					interval = degraded_throttle_window;
2911 				}
2912 			}
2913 
2914 			*budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2915 		}
2916 	}
2917 
2918 	memorystatus_log_debug(
2919 		"memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining\n",
2920 		interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - now_ts.tv_sec) / 60);
2921 }
2922 
2923 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_apps_idle_delay_multiplier, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_apps_idle_delay_multiplier, 0, "");
2924 
2925 bool memorystatus_freeze_thread_init = false;
2926 static void
memorystatus_freeze_thread(void * param __unused,wait_result_t wr __unused)2927 memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
2928 {
2929 	static boolean_t memorystatus_freeze_swap_low = FALSE;
2930 	size_t max_to_freeze = 0, num_frozen = 0, num_frozen_this_iteration = 0;
2931 
2932 	if (!memorystatus_freeze_thread_init) {
2933 #if CONFIG_THREAD_GROUPS
2934 		thread_group_vm_add();
2935 #endif
2936 		memorystatus_freeze_thread_init = true;
2937 	}
2938 
2939 	max_to_freeze = memorystatus_pick_freeze_count_for_wakeup();
2940 
2941 	lck_mtx_lock(&freezer_mutex);
2942 	if (memorystatus_freeze_enabled) {
2943 		if (memorystatus_freezer_use_demotion_list && memorystatus_refreeze_eligible_count > 0) {
2944 			memorystatus_demote_frozen_processes(false); /* Normal mode. Consider demoting thawed processes. */
2945 		}
2946 		while (num_frozen < max_to_freeze &&
2947 		    memorystatus_can_freeze(&memorystatus_freeze_swap_low) &&
2948 		    ((memorystatus_frozen_count < memorystatus_frozen_processes_max) ||
2949 		    (memorystatus_refreeze_eligible_count >= memorystatus_min_thaw_refreeze_threshold))) {
2950 			/* Only freeze if we've not exceeded our pageout budgets.*/
2951 			memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2952 
2953 			if (memorystatus_freeze_budget_pages_remaining) {
2954 				num_frozen_this_iteration = memorystatus_freeze_top_process();
2955 				if (num_frozen_this_iteration == 0) {
2956 					/* Nothing left to freeze. */
2957 					break;
2958 				}
2959 				num_frozen += num_frozen_this_iteration;
2960 			} else {
2961 				memorystatus_demote_frozen_processes(true); /* urgent mode..force one demotion */
2962 				break;
2963 			}
2964 		}
2965 	}
2966 
2967 	/*
2968 	 * Give applications currently in the aging band a chance to age out into the idle band before
2969 	 * running the freezer again.
2970 	 */
2971 	if (memorystatus_freeze_dynamic_thread_delay_enabled) {
2972 		if ((num_frozen > 0) || (memorystatus_frozen_count == 0)) {
2973 			memorystatus_freeze_apps_idle_delay_multiplier = FREEZE_APPS_IDLE_DELAY_MULTIPLIER_FAST;
2974 		} else {
2975 			memorystatus_freeze_apps_idle_delay_multiplier = FREEZE_APPS_IDLE_DELAY_MULTIPLIER_SLOW;
2976 		}
2977 	}
2978 	memorystatus_freezer_thread_next_run_ts = mach_absolute_time() + (memorystatus_apps_idle_delay_time * memorystatus_freeze_apps_idle_delay_multiplier);
2979 
2980 	assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
2981 	lck_mtx_unlock(&freezer_mutex);
2982 
2983 	thread_block((thread_continue_t) memorystatus_freeze_thread);
2984 }
2985 
2986 int
memorystatus_get_process_is_freezable(pid_t pid,int * is_freezable)2987 memorystatus_get_process_is_freezable(pid_t pid, int *is_freezable)
2988 {
2989 	proc_t p = PROC_NULL;
2990 
2991 	if (pid == 0) {
2992 		return EINVAL;
2993 	}
2994 
2995 	p = proc_find(pid);
2996 	if (!p) {
2997 		return ESRCH;
2998 	}
2999 
3000 	/*
3001 	 * Only allow this on the current proc for now.
3002 	 * We can check for privileges and allow targeting another process in the future.
3003 	 */
3004 	if (p != current_proc()) {
3005 		proc_rele(p);
3006 		return EPERM;
3007 	}
3008 
3009 	proc_list_lock();
3010 	*is_freezable = ((p->p_memstat_state & P_MEMSTAT_FREEZE_DISABLED) ? 0 : 1);
3011 	proc_rele(p);
3012 	proc_list_unlock();
3013 
3014 	return 0;
3015 }
3016 
3017 errno_t
memorystatus_get_process_is_frozen(pid_t pid,int * is_frozen)3018 memorystatus_get_process_is_frozen(pid_t pid, int *is_frozen)
3019 {
3020 	proc_t p = PROC_NULL;
3021 
3022 	if (pid == 0) {
3023 		return EINVAL;
3024 	}
3025 
3026 	/*
3027 	 * Only allow this on the current proc for now.
3028 	 * We can check for privileges and allow targeting another process in the future.
3029 	 */
3030 	p = current_proc();
3031 	if (proc_getpid(p) != pid) {
3032 		return EPERM;
3033 	}
3034 
3035 	proc_list_lock();
3036 	*is_frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
3037 	proc_list_unlock();
3038 
3039 	return 0;
3040 }
3041 
3042 int
memorystatus_set_process_is_freezable(pid_t pid,boolean_t is_freezable)3043 memorystatus_set_process_is_freezable(pid_t pid, boolean_t is_freezable)
3044 {
3045 	proc_t p = PROC_NULL;
3046 
3047 	if (pid == 0) {
3048 		return EINVAL;
3049 	}
3050 
3051 	/*
3052 	 * To enable freezable status, you need to be root or an entitlement.
3053 	 */
3054 	if (is_freezable &&
3055 	    !kauth_cred_issuser(kauth_cred_get()) &&
3056 	    !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
3057 		return EPERM;
3058 	}
3059 
3060 	p = proc_find(pid);
3061 	if (!p) {
3062 		return ESRCH;
3063 	}
3064 
3065 	/*
3066 	 * A process can change its own status. A coalition leader can
3067 	 * change the status of coalition members.
3068 	 * An entitled process (or root) can change anyone's status.
3069 	 */
3070 	if (p != current_proc() &&
3071 	    !kauth_cred_issuser(kauth_cred_get()) &&
3072 	    !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
3073 		coalition_t coal = task_get_coalition(proc_task(p), COALITION_TYPE_JETSAM);
3074 		if (!coalition_is_leader(proc_task(current_proc()), coal)) {
3075 			proc_rele(p);
3076 			return EPERM;
3077 		}
3078 	}
3079 
3080 	proc_list_lock();
3081 	if (is_freezable == FALSE) {
3082 		/* Freeze preference set to FALSE. Set the P_MEMSTAT_FREEZE_DISABLED bit. */
3083 		p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED;
3084 		memorystatus_log_info("memorystatus_set_process_is_freezable: disabling freeze for pid %d [%s]\n",
3085 		    proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
3086 	} else {
3087 		p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED;
3088 		memorystatus_log_info("memorystatus_set_process_is_freezable: enabling freeze for pid %d [%s]\n",
3089 		    proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
3090 	}
3091 	proc_rele(p);
3092 	proc_list_unlock();
3093 
3094 	return 0;
3095 }
3096 
3097 /*
3098  * Called when process is created before it is added to a memorystatus bucket.
3099  */
3100 void
memorystatus_freeze_init_proc(proc_t p)3101 memorystatus_freeze_init_proc(proc_t p)
3102 {
3103 	/* NB: Process is not on the memorystatus lists yet so it's safe to modify the skip reason without the freezer mutex. */
3104 	if (memorystatus_freeze_budget_pages_remaining == 0) {
3105 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfBudget;
3106 	} else if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
3107 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
3108 	} else {
3109 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
3110 	}
3111 }
3112 
3113 
3114 static int
3115 sysctl_memorystatus_do_fastwake_warmup_all  SYSCTL_HANDLER_ARGS
3116 {
3117 #pragma unused(oidp, arg1, arg2)
3118 
3119 	if (!req->newptr) {
3120 		return EINVAL;
3121 	}
3122 
3123 	/* Need to be root or have entitlement */
3124 	if (!kauth_cred_issuser(kauth_cred_get()) && !IOCurrentTaskHasEntitlement( MEMORYSTATUS_ENTITLEMENT)) {
3125 		return EPERM;
3126 	}
3127 
3128 	if (memorystatus_freeze_enabled == false) {
3129 		return ENOTSUP;
3130 	}
3131 
3132 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
3133 		return ENOTSUP;
3134 	}
3135 
3136 	do_fastwake_warmup_all();
3137 
3138 	return 0;
3139 }
3140 
3141 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_do_fastwake_warmup_all, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3142     0, 0, &sysctl_memorystatus_do_fastwake_warmup_all, "I", "");
3143 
3144 /*
3145  * Takes in a candidate list from the user_addr, validates it, and copies it into the list pointer.
3146  * Takes ownership over the original value of list.
3147  * Assumes that list is protected by the freezer_mutex.
3148  * The caller should not hold any locks.
3149  */
3150 static errno_t
set_freezer_candidate_list(user_addr_t buffer,size_t buffer_size,struct memorystatus_freezer_candidate_list * list)3151 set_freezer_candidate_list(user_addr_t buffer, size_t buffer_size, struct memorystatus_freezer_candidate_list *list)
3152 {
3153 	errno_t error = 0;
3154 	memorystatus_properties_freeze_entry_v1 *entries = NULL, *tmp_entries = NULL;
3155 	size_t entry_count = 0, entries_size = 0, tmp_size = 0;
3156 
3157 	/* Validate the user provided list. */
3158 	if ((buffer == USER_ADDR_NULL) || (buffer_size == 0)) {
3159 		memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: NULL or empty list\n");
3160 		return EINVAL;
3161 	}
3162 
3163 	if (buffer_size % sizeof(memorystatus_properties_freeze_entry_v1) != 0) {
3164 		memorystatus_log_error(
3165 			"memorystatus_cmd_grp_set_freeze_priority: Invalid list length (caller might have comiled agsinst invalid headers.)\n");
3166 		return EINVAL;
3167 	}
3168 
3169 	entry_count = buffer_size / sizeof(memorystatus_properties_freeze_entry_v1);
3170 	entries_size = buffer_size;
3171 	entries = kalloc_data(buffer_size, Z_WAITOK | Z_ZERO);
3172 	if (entries == NULL) {
3173 		return ENOMEM;
3174 	}
3175 
3176 	error = copyin(buffer, entries, buffer_size);
3177 	if (error != 0) {
3178 		goto out;
3179 	}
3180 
3181 #if MACH_ASSERT
3182 	for (size_t i = 0; i < entry_count; i++) {
3183 		memorystatus_properties_freeze_entry_v1 *entry = &entries[i];
3184 		if (entry->version != 1) {
3185 			memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: Invalid entry version number.");
3186 			error = EINVAL;
3187 			goto out;
3188 		}
3189 		if (i > 0 && entry->priority >= entries[i - 1].priority) {
3190 			memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: Entry list is not in descending order.");
3191 			error = EINVAL;
3192 			goto out;
3193 		}
3194 	}
3195 #endif /* MACH_ASSERT */
3196 
3197 	lck_mtx_lock(&freezer_mutex);
3198 
3199 	tmp_entries = list->mfcl_list;
3200 	tmp_size = list->mfcl_length * sizeof(memorystatus_properties_freeze_entry_v1);
3201 	list->mfcl_list = entries;
3202 	list->mfcl_length = entry_count;
3203 
3204 	lck_mtx_unlock(&freezer_mutex);
3205 
3206 	entries = tmp_entries;
3207 	entries_size = tmp_size;
3208 
3209 out:
3210 	kfree_data(entries, entries_size);
3211 	return error;
3212 }
3213 
3214 errno_t
memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer,size_t buffer_size)3215 memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer, size_t buffer_size)
3216 {
3217 	return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_freeze_list);
3218 }
3219 
3220 errno_t
memorystatus_cmd_grp_set_demote_list(user_addr_t buffer,size_t buffer_size)3221 memorystatus_cmd_grp_set_demote_list(user_addr_t buffer, size_t buffer_size)
3222 {
3223 	return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_demote_list);
3224 }
3225 
3226 void
memorystatus_freezer_mark_ui_transition(proc_t p)3227 memorystatus_freezer_mark_ui_transition(proc_t p)
3228 {
3229 	bool frozen = false, previous_focal_thaw = false, xpc_service = false, suspended = false;
3230 	proc_list_lock();
3231 
3232 	if (isSysProc(p)) {
3233 		goto out;
3234 	}
3235 
3236 	frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
3237 	previous_focal_thaw = (p->p_memstat_state & P_MEMSTAT_FROZEN_FOCAL_THAW) != 0;
3238 	xpc_service = (p->p_memstat_state & P_MEMSTAT_FROZEN_XPC_SERVICE) != 0;
3239 	suspended = (p->p_memstat_state & P_MEMSTAT_SUSPENDED) != 0;
3240 	if (!suspended) {
3241 		if (frozen) {
3242 			if (!previous_focal_thaw) {
3243 				p->p_memstat_state |= P_MEMSTAT_FROZEN_FOCAL_THAW;
3244 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg), relaxed);
3245 				if (xpc_service) {
3246 					os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service), relaxed);
3247 				}
3248 			}
3249 		}
3250 		os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_fg_resumed), relaxed);
3251 	}
3252 
3253 out:
3254 	proc_list_unlock();
3255 }
3256 
3257 #endif /* CONFIG_FREEZE */
3258