xref: /xnu-10002.61.3/bsd/kern/kern_memorystatus_freeze.c (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  *
28  */
29 
30 #include <kern/sched_prim.h>
31 #include <kern/kalloc.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h>
34 #include <kern/locks.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/host.h>
38 #include <kern/policy_internal.h>
39 #include <kern/thread_call.h>
40 #include <kern/thread_group.h>
41 
42 #include <libkern/libkern.h>
43 #include <mach/coalition.h>
44 #include <mach/mach_time.h>
45 #include <mach/task.h>
46 #include <mach/host_priv.h>
47 #include <mach/mach_host.h>
48 #include <os/log.h>
49 #include <pexpert/pexpert.h>
50 #include <sys/coalition.h>
51 #include <sys/kern_event.h>
52 #include <sys/kdebug.h>
53 #include <sys/kdebug_kernel.h>
54 #include <sys/proc.h>
55 #include <sys/proc_info.h>
56 #include <sys/reason.h>
57 #include <sys/signal.h>
58 #include <sys/signalvar.h>
59 #include <sys/sysctl.h>
60 #include <sys/sysproto.h>
61 #include <sys/wait.h>
62 #include <sys/tree.h>
63 #include <sys/priv.h>
64 #include <vm/vm_pageout.h>
65 #include <vm/vm_protos.h>
66 #include <mach/machine/sdt.h>
67 #include <libkern/coreanalytics/coreanalytics.h>
68 #include <libkern/section_keywords.h>
69 #include <stdatomic.h>
70 
71 #include <IOKit/IOBSD.h>
72 
73 #if CONFIG_FREEZE
74 #include <vm/vm_map.h>
75 #endif /* CONFIG_FREEZE */
76 
77 #include <kern/kern_memorystatus_internal.h>
78 #include <sys/kern_memorystatus.h>
79 #include <sys/kern_memorystatus_freeze.h>
80 #include <sys/kern_memorystatus_notify.h>
81 
82 #if CONFIG_JETSAM
83 
84 extern unsigned int memorystatus_available_pages;
85 extern unsigned int memorystatus_available_pages_pressure;
86 extern unsigned int memorystatus_available_pages_critical;
87 extern unsigned int memorystatus_available_pages_critical_base;
88 extern unsigned int memorystatus_available_pages_critical_idle_offset;
89 
90 #else /* CONFIG_JETSAM */
91 
92 extern uint64_t memorystatus_available_pages;
93 extern uint64_t memorystatus_available_pages_pressure;
94 extern uint64_t memorystatus_available_pages_critical;
95 
96 #endif /* CONFIG_JETSAM */
97 
98 unsigned int memorystatus_frozen_count = 0;
99 unsigned int memorystatus_frozen_count_webcontent = 0;
100 unsigned int memorystatus_frozen_count_xpc_service = 0;
101 unsigned int memorystatus_suspended_count = 0;
102 
103 #if CONFIG_FREEZE
104 
105 static LCK_GRP_DECLARE(freezer_lck_grp, "freezer");
106 static LCK_MTX_DECLARE(freezer_mutex, &freezer_lck_grp);
107 
108 /* Thresholds */
109 unsigned int memorystatus_freeze_threshold = 0;
110 unsigned int memorystatus_freeze_pages_min = 0;
111 unsigned int memorystatus_freeze_pages_max = 0;
112 unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
113 unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT;
114 uint64_t     memorystatus_freeze_budget_pages_remaining = 0; /* Remaining # of pages that can be frozen to disk */
115 uint64_t     memorystatus_freeze_budget_multiplier = 100; /* Multiplies the daily budget by 100/multiplier */
116 boolean_t memorystatus_freeze_degradation = FALSE; /* Protected by the freezer mutex. Signals we are in a degraded freeze mode. */
117 unsigned int memorystatus_freeze_max_candidate_band = FREEZE_MAX_CANDIDATE_BAND;
118 
119 unsigned int memorystatus_max_frozen_demotions_daily = 0;
120 unsigned int memorystatus_thaw_count_demotion_threshold = 0;
121 unsigned int memorystatus_min_thaw_refreeze_threshold;
122 
123 boolean_t memorystatus_freeze_enabled = FALSE;
124 int memorystatus_freeze_wakeup = 0;
125 int memorystatus_freeze_jetsam_band = 0; /* the jetsam band which will contain P_MEMSTAT_FROZEN processes */
126 
127 #define MAX_XPC_SERVICE_PIDS 10 /* Max. # of XPC services per coalition we'll consider freezing. */
128 
129 #ifdef XNU_KERNEL_PRIVATE
130 
131 unsigned int memorystatus_frozen_processes_max = 0;
132 unsigned int memorystatus_frozen_shared_mb = 0;
133 unsigned int memorystatus_frozen_shared_mb_max = 0;
134 unsigned int memorystatus_freeze_shared_mb_per_process_max = 0; /* Max. MB allowed per process to be freezer-eligible. */
135 unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of private:shared pages for a process to be freezer-eligible. */
136 unsigned int memorystatus_thaw_count = 0; /* # of thaws in the current freezer interval */
137 uint64_t memorystatus_thaw_count_since_boot = 0; /* The number of thaws since boot */
138 unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
139 
140 struct memorystatus_freezer_stats_t memorystatus_freezer_stats = {0};
141 
142 #endif /* XNU_KERNEL_PRIVATE */
143 
144 static inline boolean_t memorystatus_can_freeze_processes(void);
145 static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
146 static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
147 static uint32_t memorystatus_freeze_calculate_new_budget(
148 	unsigned int time_since_last_interval_expired_sec,
149 	unsigned int burst_multiple,
150 	unsigned int interval_duration_min,
151 	uint32_t rollover);
152 static void memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts);
153 
154 static void memorystatus_set_freeze_is_enabled(bool enabled);
155 static void memorystatus_disable_freeze(void);
156 static bool kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out);
157 
158 /* Stats */
159 static uint64_t memorystatus_freeze_pageouts = 0;
160 
161 /* Throttling */
162 #define DEGRADED_WINDOW_MINS    (30)
163 #define NORMAL_WINDOW_MINS      (24 * 60)
164 
165 /* Protected by the freezer_mutex */
166 static throttle_interval_t throttle_intervals[] = {
167 	{ DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
168 	{ NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
169 };
170 throttle_interval_t *degraded_throttle_window = &throttle_intervals[0];
171 throttle_interval_t *normal_throttle_window = &throttle_intervals[1];
172 uint32_t memorystatus_freeze_current_interval = 0;
173 static thread_call_t freeze_interval_reset_thread_call;
174 static uint32_t memorystatus_freeze_calculate_new_budget(
175 	unsigned int time_since_last_interval_expired_sec,
176 	unsigned int burst_multiple,
177 	unsigned int interval_duration_min,
178 	uint32_t rollover);
179 
180 struct memorystatus_freezer_candidate_list memorystatus_global_freeze_list = {NULL, 0};
181 struct memorystatus_freezer_candidate_list memorystatus_global_demote_list = {NULL, 0};
182 /*
183  * When enabled, freeze candidates are chosen from the memorystatus_global_freeze_list
184  * in order (as opposed to using the older LRU approach).
185  */
186 int memorystatus_freezer_use_ordered_list = 0;
187 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_ordered_list, &memorystatus_freezer_use_ordered_list, 0, 1, "");
188 /*
189  * When enabled, demotion candidates are chosen from memorystatus_global_demotion_list
190  */
191 int memorystatus_freezer_use_demotion_list = 0;
192 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_demotion_list, &memorystatus_freezer_use_demotion_list, 0, 1, "");
193 
194 extern uint64_t vm_swap_get_free_space(void);
195 extern boolean_t vm_swap_max_budget(uint64_t *);
196 
197 static void memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed);
198 static void memorystatus_demote_frozen_processes(bool urgent_mode);
199 
200 static void memorystatus_freeze_handle_error(proc_t p, const freezer_error_code_t freezer_error_code, bool was_refreeze, pid_t pid, const coalition_t coalition, const char* log_prefix);
201 static void memorystatus_freeze_out_of_slots(void);
202 uint64_t memorystatus_freezer_thread_next_run_ts = 0;
203 
204 /* Sysctls needed for aggd stats */
205 
206 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count, 0, "");
207 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_webcontent, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_webcontent, 0, "");
208 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_xpc_service, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_xpc_service, 0, "");
209 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
210 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_thaw_count_since_boot, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count_since_boot, "");
211 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
212 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_interval, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_current_interval, 0, "");
213 
214 /*
215  * Force a new interval with the given budget (no rollover).
216  */
217 static void
memorystatus_freeze_force_new_interval(uint64_t new_budget)218 memorystatus_freeze_force_new_interval(uint64_t new_budget)
219 {
220 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
221 	mach_timespec_t now_ts;
222 	clock_sec_t sec;
223 	clock_nsec_t nsec;
224 
225 	clock_get_system_nanotime(&sec, &nsec);
226 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
227 	now_ts.tv_nsec = nsec;
228 	memorystatus_freeze_start_normal_throttle_interval((uint32_t) MIN(new_budget, UINT32_MAX), now_ts);
229 	/* Don't carry over any excess pageouts since we're forcing a new budget */
230 	normal_throttle_window->pageouts = 0;
231 	memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
232 }
233 #if DEVELOPMENT || DEBUG
234 static int sysctl_memorystatus_freeze_budget_pages_remaining SYSCTL_HANDLER_ARGS
235 {
236 	#pragma unused(arg1, arg2, oidp)
237 	int error, changed;
238 	uint64_t new_budget = memorystatus_freeze_budget_pages_remaining;
239 
240 	lck_mtx_lock(&freezer_mutex);
241 
242 	error = sysctl_io_number(req, memorystatus_freeze_budget_pages_remaining, sizeof(uint64_t), &new_budget, &changed);
243 	if (changed) {
244 		if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
245 			lck_mtx_unlock(&freezer_mutex);
246 			return ENOTSUP;
247 		}
248 		memorystatus_freeze_force_new_interval(new_budget);
249 	}
250 
251 	lck_mtx_unlock(&freezer_mutex);
252 	return error;
253 }
254 
255 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freeze_budget_pages_remaining, "Q", "");
256 #else /* DEVELOPMENT || DEBUG */
257 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, "");
258 #endif /* DEVELOPMENT || DEBUG */
259 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_excess_shared_memory_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_excess_shared_memory_count, "");
260 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_private_shared_ratio_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count, "");
261 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_compressor_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_compressor_space_count, "");
262 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_swap_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_swap_space_count, "");
263 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_below_min_pages_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_below_min_pages_count, "");
264 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_probability_of_use_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_probability_of_use_count, "");
265 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_elevated_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_elevated_count, "");
266 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_other_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_other_count, "");
267 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_process_considered_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_process_considered_count, "");
268 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_below_threshold_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_below_threshold_count, "");
269 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_full_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_full_count, "");
270 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_shared_mb_high_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count, "");
271 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_shared_pages_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_shared_pages_skipped, "");
272 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_bytes_refrozen, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_bytes_refrozen, "");
273 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_refreeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_refreeze_count, "");
274 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_freeze_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_freeze_pid_mismatches, "");
275 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_demote_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_demote_pid_mismatches, "");
276 
277 static_assert(_kMemorystatusFreezeSkipReasonMax <= UINT8_MAX);
278 
279 /*
280  * Calculates the hit rate for the freezer.
281  * The hit rate is defined as the percentage of procs that are currently in the
282  * freezer which we have thawed.
283  * A low hit rate means we're freezing bad candidates since they're not re-used.
284  */
285 static int
calculate_thaw_percentage(uint64_t frozen_count,uint64_t thaw_count)286 calculate_thaw_percentage(uint64_t frozen_count, uint64_t thaw_count)
287 {
288 	int thaw_percentage = 100;
289 
290 	if (frozen_count > 0) {
291 		if (thaw_count > frozen_count) {
292 			/*
293 			 * Both counts are using relaxed atomics & could be out of sync
294 			 * causing us to see thaw_percentage > 100.
295 			 */
296 			thaw_percentage = 100;
297 		} else {
298 			thaw_percentage = (int)(100 * thaw_count / frozen_count);
299 		}
300 	}
301 	return thaw_percentage;
302 }
303 
304 static int
get_thaw_percentage()305 get_thaw_percentage()
306 {
307 	uint64_t processes_frozen, processes_thawed;
308 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
309 	processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
310 	return calculate_thaw_percentage(processes_frozen, processes_thawed);
311 }
312 
313 static int
314 sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS
315 {
316 #pragma unused(arg1, arg2)
317 	int thaw_percentage = get_thaw_percentage();
318 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
319 }
320 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage, "I", "");
321 
322 static int
get_thaw_percentage_fg()323 get_thaw_percentage_fg()
324 {
325 	uint64_t processes_frozen, processes_thawed_fg;
326 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
327 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
328 	return calculate_thaw_percentage(processes_frozen, processes_thawed_fg);
329 }
330 
331 static int sysctl_memorystatus_freezer_thaw_percentage_fg SYSCTL_HANDLER_ARGS
332 {
333 #pragma unused(arg1, arg2)
334 	int thaw_percentage = get_thaw_percentage_fg();
335 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
336 }
337 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg, "I", "");
338 
339 static int
get_thaw_percentage_webcontent()340 get_thaw_percentage_webcontent()
341 {
342 	uint64_t processes_frozen_webcontent, processes_thawed_webcontent;
343 	processes_frozen_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, relaxed);
344 	processes_thawed_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, relaxed);
345 	return calculate_thaw_percentage(processes_frozen_webcontent, processes_thawed_webcontent);
346 }
347 
348 static int sysctl_memorystatus_freezer_thaw_percentage_webcontent SYSCTL_HANDLER_ARGS
349 {
350 #pragma unused(arg1, arg2)
351 	int thaw_percentage = get_thaw_percentage_webcontent();
352 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
353 }
354 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_webcontent, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_webcontent, "I", "");
355 
356 
357 static int
get_thaw_percentage_bg()358 get_thaw_percentage_bg()
359 {
360 	uint64_t processes_frozen, processes_thawed_fg, processes_thawed;
361 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
362 	processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
363 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
364 	return calculate_thaw_percentage(processes_frozen, processes_thawed - processes_thawed_fg);
365 }
366 
367 static int sysctl_memorystatus_freezer_thaw_percentage_bg SYSCTL_HANDLER_ARGS
368 {
369 #pragma unused(arg1, arg2)
370 	int thaw_percentage = get_thaw_percentage_bg();
371 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
372 }
373 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_bg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_bg, "I", "");
374 
375 static int
get_thaw_percentage_fg_non_xpc_service()376 get_thaw_percentage_fg_non_xpc_service()
377 {
378 	uint64_t processes_frozen, processes_frozen_xpc_service, processes_thawed_fg, processes_thawed_fg_xpc_service;
379 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
380 	processes_frozen_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, relaxed);
381 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
382 	processes_thawed_fg_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, relaxed);
383 	/*
384 	 * Since these are all relaxed loads, it's possible (although unlikely) to read a value for
385 	 * frozen/thawed xpc services that's > the value for processes frozen / thawed.
386 	 * Clamp just in case.
387 	 */
388 	processes_frozen_xpc_service = MIN(processes_frozen_xpc_service, processes_frozen);
389 	processes_thawed_fg_xpc_service = MIN(processes_thawed_fg_xpc_service, processes_thawed_fg);
390 	return calculate_thaw_percentage(processes_frozen - processes_frozen_xpc_service, processes_thawed_fg - processes_thawed_fg_xpc_service);
391 }
392 
393 static int sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service SYSCTL_HANDLER_ARGS
394 {
395 #pragma unused(arg1, arg2)
396 	int thaw_percentage = get_thaw_percentage_fg_non_xpc_service();
397 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
398 }
399 
400 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg_non_xpc_service, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service, "I", "");
401 
402 #define FREEZER_ERROR_STRING_LENGTH 128
403 
404 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_min, &memorystatus_freeze_pages_min, 0, UINT32_MAX, "");
405 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_max, &memorystatus_freeze_pages_max, 0, UINT32_MAX, "");
406 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_processes_max, &memorystatus_frozen_processes_max, 0, UINT32_MAX, "");
407 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_jetsam_band, &memorystatus_freeze_jetsam_band, JETSAM_PRIORITY_BACKGROUND, JETSAM_PRIORITY_FOREGROUND, "");
408 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_private_shared_pages_ratio, &memorystatus_freeze_private_shared_pages_ratio, 0, UINT32_MAX, "");
409 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_min_processes, &memorystatus_freeze_suspended_threshold, 0, UINT32_MAX, "");
410 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_max_candidate_band, &memorystatus_freeze_max_candidate_band, JETSAM_PRIORITY_IDLE, JETSAM_PRIORITY_FOREGROUND, "");
411 static int
412 sysctl_memorystatus_freeze_budget_multiplier SYSCTL_HANDLER_ARGS
413 {
414 #pragma unused(arg1, arg2, oidp, req)
415 	int error = 0, changed = 0;
416 	uint64_t val = memorystatus_freeze_budget_multiplier;
417 	unsigned int new_budget;
418 	clock_sec_t sec;
419 	clock_nsec_t nsec;
420 	mach_timespec_t now_ts;
421 
422 	error = sysctl_io_number(req, memorystatus_freeze_budget_multiplier, sizeof(val), &val, &changed);
423 	if (error) {
424 		return error;
425 	}
426 	if (changed) {
427 		if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
428 			return ENOTSUP;
429 		}
430 #if !(DEVELOPMENT || DEBUG)
431 		if (val > 100) {
432 			/* Can not increase budget on release. */
433 			return EINVAL;
434 		}
435 #endif
436 		lck_mtx_lock(&freezer_mutex);
437 
438 		memorystatus_freeze_budget_multiplier = val;
439 		/* Start a new throttle interval with this budget multiplier */
440 		new_budget = memorystatus_freeze_calculate_new_budget(0, 1, NORMAL_WINDOW_MINS, 0);
441 		clock_get_system_nanotime(&sec, &nsec);
442 		now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
443 		now_ts.tv_nsec = nsec;
444 		memorystatus_freeze_start_normal_throttle_interval(new_budget, now_ts);
445 		memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
446 
447 		lck_mtx_unlock(&freezer_mutex);
448 	}
449 	return 0;
450 }
451 EXPERIMENT_FACTOR_PROC(_kern, memorystatus_freeze_budget_multiplier, CTLTYPE_QUAD | CTLFLAG_RW, 0, 0, &sysctl_memorystatus_freeze_budget_multiplier, "Q", "");
452 /*
453  * max. # of frozen process demotions we will allow in our daily cycle.
454  */
455 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_max_freeze_demotions_daily, &memorystatus_max_frozen_demotions_daily, 0, UINT32_MAX, "");
456 
457 /*
458  * min # of thaws needed by a process to protect it from getting demoted into the IDLE band.
459  */
460 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_thaw_count_demotion_threshold, &memorystatus_thaw_count_demotion_threshold, 0, UINT32_MAX, "");
461 
462 /*
463  * min # of global thaws needed for us to consider refreezing these processes.
464  */
465 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_min_thaw_refreeze_threshold, &memorystatus_min_thaw_refreeze_threshold, 0, UINT32_MAX, "");
466 
467 #if DEVELOPMENT || DEBUG
468 
469 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, "");
470 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, "");
471 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
472 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, "");
473 
474 /*
475  * Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band.
476  * "0" means no limit.
477  * Default is 10% of system-wide task limit.
478  */
479 
480 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb_max, 0, "");
481 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, "");
482 
483 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, "");
484 
485 boolean_t memorystatus_freeze_throttle_enabled = TRUE;
486 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
487 
488 /*
489  * When set to true, this keeps frozen processes in the compressor pool in memory, instead of swapping them out to disk.
490  * Exposed via the sysctl kern.memorystatus_freeze_to_memory.
491  */
492 boolean_t memorystatus_freeze_to_memory = FALSE;
493 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_to_memory, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_to_memory, 0, "");
494 
495 #define VM_PAGES_FOR_ALL_PROCS    (2)
496 
497 /*
498  * Manual trigger of freeze and thaw for dev / debug kernels only.
499  */
500 static int
501 sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
502 {
503 #pragma unused(arg1, arg2)
504 	int error, pid = 0;
505 	proc_t p;
506 	freezer_error_code_t freezer_error_code = 0;
507 	pid_t pid_list[MAX_XPC_SERVICE_PIDS];
508 	int ntasks = 0;
509 	coalition_t coal = COALITION_NULL;
510 
511 	error = sysctl_handle_int(oidp, &pid, 0, req);
512 	if (error || !req->newptr) {
513 		return error;
514 	}
515 
516 	if (pid == VM_PAGES_FOR_ALL_PROCS) {
517 		vm_pageout_anonymous_pages();
518 
519 		return 0;
520 	}
521 
522 	lck_mtx_lock(&freezer_mutex);
523 	if (memorystatus_freeze_enabled == FALSE) {
524 		lck_mtx_unlock(&freezer_mutex);
525 		memorystatus_log("sysctl_freeze: Freeze is DISABLED\n");
526 		return ENOTSUP;
527 	}
528 
529 again:
530 	p = proc_find(pid);
531 	if (p != NULL) {
532 		memorystatus_freezer_stats.mfs_process_considered_count++;
533 		uint32_t purgeable, wired, clean, dirty, shared;
534 		uint32_t max_pages = 0, state = 0;
535 
536 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
537 			/*
538 			 * Freezer backed by the compressor and swap file(s)
539 			 * will hold compressed data.
540 			 *
541 			 * Set the sysctl kern.memorystatus_freeze_to_memory to true to keep compressed data from
542 			 * being swapped out to disk. Note that this disables freezer swap support globally,
543 			 * not just for the process being frozen.
544 			 *
545 			 *
546 			 * We don't care about the global freezer budget or the process's (min/max) budget here.
547 			 * The freeze sysctl is meant to force-freeze a process.
548 			 *
549 			 * We also don't update any global or process stats on this path, so that the jetsam/ freeze
550 			 * logic remains unaffected. The tasks we're performing here are: freeze the process, set the
551 			 * P_MEMSTAT_FROZEN bit, and elevate the process to a higher band (if the freezer is active).
552 			 */
553 			max_pages = memorystatus_freeze_pages_max;
554 		} else {
555 			/*
556 			 * We only have the compressor without any swap.
557 			 */
558 			max_pages = UINT32_MAX - 1;
559 		}
560 
561 		proc_list_lock();
562 		state = p->p_memstat_state;
563 		proc_list_unlock();
564 
565 		/*
566 		 * The jetsam path also verifies that the process is a suspended App. We don't care about that here.
567 		 * We simply ensure that jetsam is not already working on the process and that the process has not
568 		 * explicitly disabled freezing.
569 		 */
570 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED)) {
571 			memorystatus_log_error("sysctl_freeze: p_memstat_state check failed, process is%s%s%s\n",
572 			    (state & P_MEMSTAT_TERMINATED) ? " terminated" : "",
573 			    (state & P_MEMSTAT_LOCKED) ? " locked" : "",
574 			    (state & P_MEMSTAT_FREEZE_DISABLED) ? " unfreezable" : "");
575 
576 			proc_rele(p);
577 			lck_mtx_unlock(&freezer_mutex);
578 			return EPERM;
579 		}
580 
581 		KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, memorystatus_available_pages, pid, max_pages);
582 		error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
583 		if (!error || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
584 			memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
585 		}
586 		KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, purgeable, wired, clean, dirty);
587 
588 		if (error) {
589 			memorystatus_freeze_handle_error(p, freezer_error_code, state & P_MEMSTAT_FROZEN, pid, coal, "sysctl_freeze");
590 			if (error == KERN_NO_SPACE) {
591 				/* Make it easy to distinguish between failures due to low compressor/ swap space and other failures. */
592 				error = ENOSPC;
593 			} else {
594 				error = EIO;
595 			}
596 		} else {
597 			proc_list_lock();
598 			if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
599 				p->p_memstat_state |= P_MEMSTAT_FROZEN;
600 				p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
601 				memorystatus_frozen_count++;
602 				os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
603 				if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
604 					memorystatus_frozen_count_webcontent++;
605 					os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
606 				}
607 				if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
608 					memorystatus_freeze_out_of_slots();
609 				}
610 			} else {
611 				// This was a re-freeze
612 				if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
613 					memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
614 					memorystatus_freezer_stats.mfs_refreeze_count++;
615 				}
616 			}
617 			p->p_memstat_frozen_count++;
618 
619 			if (coal != NULL) {
620 				/* We just froze an xpc service. Mark it as such for telemetry */
621 				p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
622 				memorystatus_frozen_count_xpc_service++;
623 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
624 			}
625 
626 
627 			proc_list_unlock();
628 
629 			if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
630 				/*
631 				 * We elevate only if we are going to swap out the data.
632 				 */
633 				error = memorystatus_update_inactive_jetsam_priority_band(pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
634 				    memorystatus_freeze_jetsam_band, TRUE);
635 
636 				if (error) {
637 					memorystatus_log_error("sysctl_freeze: Elevating frozen process to higher jetsam band failed with %d\n", error);
638 				}
639 			}
640 		}
641 
642 		if ((error == 0) && (coal == NULL)) {
643 			/*
644 			 * We froze a process and so we check to see if it was
645 			 * a coalition leader and if it has XPC services that
646 			 * might need freezing.
647 			 * Only one leader can be frozen at a time and so we shouldn't
648 			 * enter this block more than once per call. Hence the
649 			 * check that 'coal' has to be NULL. We should make this an
650 			 * assert() or panic() once we have a much more concrete way
651 			 * to detect an app vs a daemon.
652 			 */
653 
654 			task_t          curr_task = NULL;
655 
656 			curr_task = proc_task(p);
657 			coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
658 			if (coalition_is_leader(curr_task, coal)) {
659 				ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
660 				    COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
661 
662 				if (ntasks > MAX_XPC_SERVICE_PIDS) {
663 					ntasks = MAX_XPC_SERVICE_PIDS;
664 				}
665 			}
666 		}
667 
668 		proc_rele(p);
669 
670 		while (ntasks) {
671 			pid = pid_list[--ntasks];
672 			goto again;
673 		}
674 
675 		lck_mtx_unlock(&freezer_mutex);
676 		return error;
677 	} else {
678 		memorystatus_log_error("sysctl_freeze: Invalid process\n");
679 	}
680 
681 
682 	lck_mtx_unlock(&freezer_mutex);
683 	return EINVAL;
684 }
685 
686 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
687     0, 0, &sysctl_memorystatus_freeze, "I", "");
688 
689 /*
690  * Manual trigger of agressive frozen demotion for dev / debug kernels only.
691  */
692 static int
693 sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS
694 {
695 #pragma unused(arg1, arg2)
696 	int error, val;
697 	/*
698 	 * Only demote on write to prevent demoting during `sysctl -a`.
699 	 * The actual value written doesn't matter.
700 	 */
701 	error = sysctl_handle_int(oidp, &val, 0, req);
702 	if (error || !req->newptr) {
703 		return error;
704 	}
705 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
706 		return ENOTSUP;
707 	}
708 	lck_mtx_lock(&freezer_mutex);
709 	memorystatus_demote_frozen_processes(false);
710 	lck_mtx_unlock(&freezer_mutex);
711 	return 0;
712 }
713 
714 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
715 
716 static int
717 sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
718 {
719 #pragma unused(arg1, arg2)
720 
721 	int error, pid = 0;
722 	proc_t p;
723 
724 	if (memorystatus_freeze_enabled == FALSE) {
725 		return ENOTSUP;
726 	}
727 
728 	error = sysctl_handle_int(oidp, &pid, 0, req);
729 	if (error || !req->newptr) {
730 		return error;
731 	}
732 
733 	if (pid == VM_PAGES_FOR_ALL_PROCS) {
734 		do_fastwake_warmup_all();
735 		return 0;
736 	} else {
737 		p = proc_find(pid);
738 		if (p != NULL) {
739 			error = task_thaw(proc_task(p));
740 
741 			if (error) {
742 				error = EIO;
743 			} else {
744 				/*
745 				 * task_thaw() succeeded.
746 				 *
747 				 * We increment memorystatus_frozen_count on the sysctl freeze path.
748 				 * And so we need the P_MEMSTAT_FROZEN to decrement the frozen count
749 				 * when this process exits.
750 				 *
751 				 * proc_list_lock();
752 				 * p->p_memstat_state &= ~P_MEMSTAT_FROZEN;
753 				 * proc_list_unlock();
754 				 */
755 			}
756 			proc_rele(p);
757 			return error;
758 		}
759 	}
760 
761 	return EINVAL;
762 }
763 
764 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
765     0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
766 
767 
768 typedef struct _global_freezable_status {
769 	boolean_t       freeze_pages_threshold_crossed;
770 	boolean_t       freeze_eligible_procs_available;
771 	boolean_t       freeze_scheduled_in_future;
772 }global_freezable_status_t;
773 
774 typedef struct _proc_freezable_status {
775 	boolean_t    freeze_has_memstat_state;
776 	boolean_t    freeze_has_pages_min;
777 	int        freeze_has_probability;
778 	int        freeze_leader_eligible;
779 	boolean_t    freeze_attempted;
780 	uint32_t    p_memstat_state;
781 	uint32_t    p_pages;
782 	int        p_freeze_error_code;
783 	int        p_pid;
784 	int        p_leader_pid;
785 	char        p_name[MAXCOMLEN + 1];
786 }proc_freezable_status_t;
787 
788 #define MAX_FREEZABLE_PROCESSES 200 /* Total # of processes in band 0 that we evaluate for freezability */
789 
790 /*
791  * For coalition based freezing evaluations, we proceed as follows:
792  *  - detect that the process is a coalition member and a XPC service
793  *  - mark its 'freeze_leader_eligible' field with FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN
794  *  - continue its freezability evaluation assuming its leader will be freezable too
795  *
796  * Once we are done evaluating all processes, we do a quick run thru all
797  * processes and for a coalition member XPC service we look up the 'freezable'
798  * status of its leader and iff:
799  *  - the xpc service is freezable i.e. its individual freeze evaluation worked
800  *  - and, its leader is also marked freezable
801  * we update its 'freeze_leader_eligible' to FREEZE_PROC_LEADER_FREEZABLE_SUCCESS.
802  */
803 
804 #define FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN   (-1)
805 #define FREEZE_PROC_LEADER_FREEZABLE_SUCCESS    (1)
806 #define FREEZE_PROC_LEADER_FREEZABLE_FAILURE    (2)
807 
808 static int
memorystatus_freezer_get_status(user_addr_t buffer,size_t buffer_size,int32_t * retval)809 memorystatus_freezer_get_status(user_addr_t buffer, size_t buffer_size, int32_t *retval)
810 {
811 	uint32_t            proc_count = 0, freeze_eligible_proc_considered = 0, band = 0, xpc_index = 0, leader_index = 0;
812 	global_freezable_status_t    *list_head;
813 	proc_freezable_status_t     *list_entry, *list_entry_start;
814 	size_t                list_size = 0, entry_count = 0;
815 	proc_t                p, leader_proc;
816 	memstat_bucket_t        *bucket;
817 	uint32_t            state = 0, pages = 0;
818 	boolean_t            try_freeze = TRUE, xpc_skip_size_probability_check = FALSE;
819 	int                error = 0, probability_of_use = 0;
820 	pid_t              leader_pid = 0;
821 
822 
823 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
824 		return ENOTSUP;
825 	}
826 
827 	list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
828 
829 	if (buffer_size < list_size) {
830 		return EINVAL;
831 	}
832 
833 	list_head = (global_freezable_status_t *)kalloc_data(list_size, Z_WAITOK | Z_ZERO);
834 	if (list_head == NULL) {
835 		return ENOMEM;
836 	}
837 
838 	list_size = sizeof(global_freezable_status_t);
839 
840 	proc_list_lock();
841 
842 	uint64_t curr_time = mach_absolute_time();
843 
844 	list_head->freeze_pages_threshold_crossed = (memorystatus_available_pages < memorystatus_freeze_threshold);
845 	list_head->freeze_eligible_procs_available = ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold);
846 	list_head->freeze_scheduled_in_future = (curr_time < memorystatus_freezer_thread_next_run_ts);
847 
848 	list_entry_start = (proc_freezable_status_t*) ((uintptr_t)list_head + sizeof(global_freezable_status_t));
849 	list_entry = list_entry_start;
850 
851 	bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
852 
853 	entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
854 
855 	p = memorystatus_get_first_proc_locked(&band, FALSE);
856 	proc_count++;
857 
858 	while ((proc_count <= MAX_FREEZABLE_PROCESSES) &&
859 	    (p) &&
860 	    (list_size < buffer_size)) {
861 		if (isSysProc(p)) {
862 			/*
863 			 * Daemon:- We will consider freezing it iff:
864 			 * - it belongs to a coalition and the leader is freeze-eligible (delayed evaluation)
865 			 * - its role in the coalition is XPC service.
866 			 *
867 			 * We skip memory size requirements in this case.
868 			 */
869 
870 			coalition_t     coal = COALITION_NULL;
871 			task_t          leader_task = NULL, curr_task = NULL;
872 			int             task_role_in_coalition = 0;
873 
874 			curr_task = proc_task(p);
875 			coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
876 
877 			if (coal == COALITION_NULL || coalition_is_leader(curr_task, coal)) {
878 				/*
879 				 * By default, XPC services without an app
880 				 * will be the leader of their own single-member
881 				 * coalition.
882 				 */
883 				goto skip_ineligible_xpc;
884 			}
885 
886 			leader_task = coalition_get_leader(coal);
887 			if (leader_task == TASK_NULL) {
888 				/*
889 				 * This jetsam coalition is currently leader-less.
890 				 * This could happen if the app died, but XPC services
891 				 * have not yet exited.
892 				 */
893 				goto skip_ineligible_xpc;
894 			}
895 
896 			leader_proc = (proc_t)get_bsdtask_info(leader_task);
897 			task_deallocate(leader_task);
898 
899 			if (leader_proc == PROC_NULL) {
900 				/* leader task is exiting */
901 				goto skip_ineligible_xpc;
902 			}
903 
904 			task_role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
905 
906 			if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
907 				xpc_skip_size_probability_check = TRUE;
908 				leader_pid = proc_getpid(leader_proc);
909 				goto continue_eval;
910 			}
911 
912 skip_ineligible_xpc:
913 			p = memorystatus_get_next_proc_locked(&band, p, FALSE);
914 			proc_count++;
915 			continue;
916 		}
917 
918 continue_eval:
919 		strlcpy(list_entry->p_name, p->p_name, MAXCOMLEN + 1);
920 
921 		list_entry->p_pid = proc_getpid(p);
922 
923 		state = p->p_memstat_state;
924 
925 		if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) ||
926 		    !(state & P_MEMSTAT_SUSPENDED)) {
927 			try_freeze = list_entry->freeze_has_memstat_state = FALSE;
928 		} else {
929 			try_freeze = list_entry->freeze_has_memstat_state = TRUE;
930 		}
931 
932 		list_entry->p_memstat_state = state;
933 
934 		if (xpc_skip_size_probability_check == TRUE) {
935 			/*
936 			 * Assuming the coalition leader is freezable
937 			 * we don't care re. minimum pages and probability
938 			 * as long as the process isn't marked P_MEMSTAT_FREEZE_DISABLED.
939 			 * XPC services have to be explicity opted-out of the disabled
940 			 * state. And we checked that state above.
941 			 */
942 			list_entry->freeze_has_pages_min = TRUE;
943 			list_entry->p_pages = -1;
944 			list_entry->freeze_has_probability = -1;
945 
946 			list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN;
947 			list_entry->p_leader_pid = leader_pid;
948 
949 			xpc_skip_size_probability_check = FALSE;
950 		} else {
951 			list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; /* Apps are freeze eligible and their own leaders. */
952 			list_entry->p_leader_pid = 0; /* Setting this to 0 signifies this isn't a coalition driven freeze. */
953 
954 			memorystatus_get_task_page_counts(proc_task(p), &pages, NULL, NULL);
955 			if (pages < memorystatus_freeze_pages_min) {
956 				try_freeze = list_entry->freeze_has_pages_min = FALSE;
957 			} else {
958 				list_entry->freeze_has_pages_min = TRUE;
959 			}
960 
961 			list_entry->p_pages = pages;
962 
963 			if (entry_count) {
964 				uint32_t j = 0;
965 				for (j = 0; j < entry_count; j++) {
966 					if (strncmp(memorystatus_global_probabilities_table[j].proc_name,
967 					    p->p_name,
968 					    MAXCOMLEN) == 0) {
969 						probability_of_use = memorystatus_global_probabilities_table[j].use_probability;
970 						break;
971 					}
972 				}
973 
974 				list_entry->freeze_has_probability = probability_of_use;
975 
976 				try_freeze = ((probability_of_use > 0) && try_freeze);
977 			} else {
978 				list_entry->freeze_has_probability = -1;
979 			}
980 		}
981 
982 		if (try_freeze) {
983 			uint32_t purgeable, wired, clean, dirty, shared;
984 			uint32_t max_pages = 0;
985 			int freezer_error_code = 0;
986 
987 			error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, TRUE /* eval only */);
988 
989 			if (error) {
990 				list_entry->p_freeze_error_code = freezer_error_code;
991 			}
992 
993 			list_entry->freeze_attempted = TRUE;
994 		}
995 
996 		list_entry++;
997 		freeze_eligible_proc_considered++;
998 
999 		list_size += sizeof(proc_freezable_status_t);
1000 
1001 		p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1002 		proc_count++;
1003 	}
1004 
1005 	proc_list_unlock();
1006 
1007 	list_entry = list_entry_start;
1008 
1009 	for (xpc_index = 0; xpc_index < freeze_eligible_proc_considered; xpc_index++) {
1010 		if (list_entry[xpc_index].freeze_leader_eligible == FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN) {
1011 			leader_pid = list_entry[xpc_index].p_leader_pid;
1012 
1013 			leader_proc = proc_find(leader_pid);
1014 
1015 			if (leader_proc) {
1016 				if (leader_proc->p_memstat_state & P_MEMSTAT_FROZEN) {
1017 					/*
1018 					 * Leader has already been frozen.
1019 					 */
1020 					list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1021 					proc_rele(leader_proc);
1022 					continue;
1023 				}
1024 				proc_rele(leader_proc);
1025 			}
1026 
1027 			for (leader_index = 0; leader_index < freeze_eligible_proc_considered; leader_index++) {
1028 				if (list_entry[leader_index].p_pid == leader_pid) {
1029 					if (list_entry[leader_index].freeze_attempted && list_entry[leader_index].p_freeze_error_code == 0) {
1030 						list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1031 					} else {
1032 						list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1033 						list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1034 					}
1035 					break;
1036 				}
1037 			}
1038 
1039 			/*
1040 			 * Didn't find the leader entry. This might be likely because
1041 			 * the leader never made it down to band 0.
1042 			 */
1043 			if (leader_index == freeze_eligible_proc_considered) {
1044 				list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1045 				list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1046 			}
1047 		}
1048 	}
1049 
1050 	buffer_size = MIN(list_size, INT32_MAX);
1051 
1052 	error = copyout(list_head, buffer, buffer_size);
1053 	if (error == 0) {
1054 		*retval = (int32_t) buffer_size;
1055 	} else {
1056 		*retval = 0;
1057 	}
1058 
1059 	list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
1060 	kfree_data(list_head, list_size);
1061 
1062 	memorystatus_log_debug("memorystatus_freezer_get_status: returning %d (%lu - size)\n", error, (unsigned long)list_size);
1063 
1064 	return error;
1065 }
1066 
1067 #endif /* DEVELOPMENT || DEBUG */
1068 
1069 /*
1070  * Get a list of all processes in the freezer band which are currently frozen.
1071  * Used by powerlog to collect analytics on frozen process.
1072  */
1073 static int
memorystatus_freezer_get_procs(user_addr_t buffer,size_t buffer_size,int32_t * retval)1074 memorystatus_freezer_get_procs(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1075 {
1076 	global_frozen_procs_t *frozen_procs = NULL;
1077 	uint32_t band = memorystatus_freeze_jetsam_band;
1078 	proc_t p;
1079 	uint32_t state;
1080 	int error;
1081 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
1082 		return ENOTSUP;
1083 	}
1084 	if (buffer_size < sizeof(global_frozen_procs_t)) {
1085 		return EINVAL;
1086 	}
1087 	frozen_procs = (global_frozen_procs_t *)kalloc_data(sizeof(global_frozen_procs_t), Z_WAITOK | Z_ZERO);
1088 	if (frozen_procs == NULL) {
1089 		return ENOMEM;
1090 	}
1091 
1092 	proc_list_lock();
1093 	p = memorystatus_get_first_proc_locked(&band, FALSE);
1094 	while (p && frozen_procs->gfp_num_frozen < FREEZER_CONTROL_GET_PROCS_MAX_COUNT) {
1095 		state = p->p_memstat_state;
1096 		if (state & P_MEMSTAT_FROZEN) {
1097 			frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_pid = proc_getpid(p);
1098 			strlcpy(frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_name,
1099 			    p->p_name, sizeof(proc_name_t));
1100 			frozen_procs->gfp_num_frozen++;
1101 		}
1102 		p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1103 	}
1104 	proc_list_unlock();
1105 
1106 	buffer_size = MIN(buffer_size, sizeof(global_frozen_procs_t));
1107 	error = copyout(frozen_procs, buffer, buffer_size);
1108 	if (error == 0) {
1109 		*retval = (int32_t) buffer_size;
1110 	} else {
1111 		*retval = 0;
1112 	}
1113 	kfree_data(frozen_procs, sizeof(global_frozen_procs_t));
1114 
1115 	return error;
1116 }
1117 
1118 /*
1119  * If dasd is running an experiment that impacts their freezer candidate selection,
1120  * we record that in our telemetry.
1121  */
1122 static memorystatus_freezer_trial_identifiers_v1 dasd_trial_identifiers;
1123 
1124 static int
memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer,size_t buffer_size,int32_t * retval)1125 memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1126 {
1127 	memorystatus_freezer_trial_identifiers_v1 identifiers;
1128 	int error = 0;
1129 
1130 	if (buffer_size != sizeof(identifiers)) {
1131 		return EINVAL;
1132 	}
1133 	error = copyin(buffer, &identifiers, sizeof(identifiers));
1134 	if (error != 0) {
1135 		return error;
1136 	}
1137 	if (identifiers.version != 1) {
1138 		return EINVAL;
1139 	}
1140 	dasd_trial_identifiers = identifiers;
1141 	*retval = 0;
1142 	return error;
1143 }
1144 
1145 /*
1146  * Reset the freezer state by wiping out all suspended frozen apps, clearing
1147  * per-process freezer state, and starting a fresh interval.
1148  */
1149 static int
memorystatus_freezer_reset_state(int32_t * retval)1150 memorystatus_freezer_reset_state(int32_t *retval)
1151 {
1152 	uint32_t band = JETSAM_PRIORITY_IDLE;
1153 	/* Don't kill above the frozen band */
1154 	uint32_t kMaxBand = memorystatus_freeze_jetsam_band;
1155 	proc_t next_p = PROC_NULL;
1156 	uint64_t new_budget;
1157 
1158 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1159 		return ENOTSUP;
1160 	}
1161 
1162 	os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_GENERIC);
1163 	if (jetsam_reason == OS_REASON_NULL) {
1164 		memorystatus_log_error("memorystatus_freezer_reset_state -- sync: failed to allocate jetsam reason\n");
1165 	}
1166 	lck_mtx_lock(&freezer_mutex);
1167 	kill_all_frozen_processes(kMaxBand, true, jetsam_reason, NULL);
1168 	proc_list_lock();
1169 
1170 	/*
1171 	 * Clear the considered and skip reason flags on all processes
1172 	 * so we're starting fresh with the new policy.
1173 	 */
1174 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1175 	while (next_p) {
1176 		proc_t p = next_p;
1177 		uint32_t state = p->p_memstat_state;
1178 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1179 
1180 		if (p->p_memstat_effectivepriority > kMaxBand) {
1181 			break;
1182 		}
1183 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1184 			continue;
1185 		}
1186 
1187 		p->p_memstat_state &= ~(P_MEMSTAT_FREEZE_CONSIDERED);
1188 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1189 	}
1190 
1191 	proc_list_unlock();
1192 
1193 	new_budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1194 	memorystatus_freeze_force_new_interval(new_budget);
1195 
1196 	lck_mtx_unlock(&freezer_mutex);
1197 	*retval = 0;
1198 	return 0;
1199 }
1200 
1201 int
memorystatus_freezer_control(int32_t flags,user_addr_t buffer,size_t buffer_size,int32_t * retval)1202 memorystatus_freezer_control(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval)
1203 {
1204 	int err = ENOTSUP;
1205 
1206 #if DEVELOPMENT || DEBUG
1207 	if (flags == FREEZER_CONTROL_GET_STATUS) {
1208 		err = memorystatus_freezer_get_status(buffer, buffer_size, retval);
1209 	}
1210 #endif /* DEVELOPMENT || DEBUG */
1211 	if (flags == FREEZER_CONTROL_GET_PROCS) {
1212 		err = memorystatus_freezer_get_procs(buffer, buffer_size, retval);
1213 	} else if (flags == FREEZER_CONTROL_SET_DASD_TRIAL_IDENTIFIERS) {
1214 		err = memorystatus_freezer_set_dasd_trial_identifiers(buffer, buffer_size, retval);
1215 	} else if (flags == FREEZER_CONTROL_RESET_STATE) {
1216 		err = memorystatus_freezer_reset_state(retval);
1217 	}
1218 
1219 	return err;
1220 }
1221 
1222 extern void        vm_swap_consider_defragmenting(int);
1223 extern void vm_page_reactivate_all_throttled(void);
1224 
1225 static bool
kill_all_frozen_processes(uint64_t max_band,bool suspended_only,os_reason_t jetsam_reason,uint64_t * memory_reclaimed_out)1226 kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out)
1227 {
1228 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1229 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1230 
1231 	unsigned int band = 0;
1232 	proc_t p = PROC_NULL, next_p = PROC_NULL;
1233 	pid_t pid = 0;
1234 	bool retval = false, killed = false;
1235 	uint32_t state;
1236 	uint64_t memory_reclaimed = 0, footprint = 0, skips = 0;
1237 	proc_list_lock();
1238 
1239 	band = JETSAM_PRIORITY_IDLE;
1240 	p = PROC_NULL;
1241 	next_p = PROC_NULL;
1242 
1243 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1244 	while (next_p) {
1245 		p = next_p;
1246 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1247 		state = p->p_memstat_state;
1248 
1249 		if (p->p_memstat_effectivepriority > max_band) {
1250 			break;
1251 		}
1252 
1253 		if (!(state & P_MEMSTAT_FROZEN)) {
1254 			continue;
1255 		}
1256 
1257 		if (suspended_only && !(state & P_MEMSTAT_SUSPENDED)) {
1258 			continue;
1259 		}
1260 
1261 		if (state & P_MEMSTAT_ERROR) {
1262 			p->p_memstat_state &= ~P_MEMSTAT_ERROR;
1263 		}
1264 
1265 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1266 			memorystatus_log("memorystatus: Skipping kill of frozen process %s (%d) because it's already exiting.\n", p->p_name, proc_getpid(p));
1267 			skips++;
1268 			continue;
1269 		}
1270 
1271 		footprint = get_task_phys_footprint(proc_task(p));
1272 		pid = proc_getpid(p);
1273 		proc_list_unlock();
1274 
1275 		/* memorystatus_kill_with_jetsam_reason_sync drops a reference. */
1276 		os_reason_ref(jetsam_reason);
1277 		retval = memorystatus_kill_with_jetsam_reason_sync(pid, jetsam_reason);
1278 		if (retval) {
1279 			killed = true;
1280 			memory_reclaimed += footprint;
1281 		}
1282 		proc_list_lock();
1283 		/*
1284 		 * The bands might have changed when we dropped the proc list lock.
1285 		 * So start from the beginning.
1286 		 * Since we're preventing any further freezing by holding the freezer mutex,
1287 		 * and we skip anything we've already tried to kill this is guaranteed to terminate.
1288 		 */
1289 		band = 0;
1290 		skips = 0;
1291 		next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1292 	}
1293 
1294 	assert(skips <= memorystatus_frozen_count);
1295 #if DEVELOPMENT || DEBUG
1296 	if (!suspended_only && max_band >= JETSAM_PRIORITY_FOREGROUND) {
1297 		/*
1298 		 * Check that we've killed all frozen processes.
1299 		 * Note that they may still be exiting (represented by skips).
1300 		 */
1301 		if (memorystatus_frozen_count - skips > 0) {
1302 			assert(memorystatus_freeze_enabled == FALSE);
1303 
1304 			panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d",
1305 			    memorystatus_frozen_count);
1306 		}
1307 	}
1308 #endif /* DEVELOPMENT || DEBUG */
1309 	if (memory_reclaimed_out) {
1310 		*memory_reclaimed_out = memory_reclaimed;
1311 	}
1312 	proc_list_unlock();
1313 	return killed;
1314 }
1315 
1316 /*
1317  * Disables the freezer, jetsams all frozen processes,
1318  * and reclaims the swap space immediately.
1319  */
1320 
1321 void
memorystatus_disable_freeze(void)1322 memorystatus_disable_freeze(void)
1323 {
1324 	uint64_t memory_reclaimed = 0;
1325 	bool killed = false;
1326 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1327 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1328 
1329 
1330 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_START,
1331 	    memorystatus_available_pages);
1332 	memorystatus_log("memorystatus: Disabling freezer. Will kill all frozen processes\n");
1333 
1334 	/*
1335 	 * We hold the freezer_mutex (preventing anything from being frozen in parallel)
1336 	 * and all frozen processes will be killed
1337 	 * by the time we release it. Setting memorystatus_freeze_enabled to false,
1338 	 * ensures that no new processes will be frozen once we release the mutex.
1339 	 *
1340 	 */
1341 	memorystatus_freeze_enabled = FALSE;
1342 
1343 	/*
1344 	 * Move dirty pages out from the throttle to the active queue since we're not freezing anymore.
1345 	 */
1346 	vm_page_reactivate_all_throttled();
1347 	os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE);
1348 	if (jetsam_reason == OS_REASON_NULL) {
1349 		memorystatus_log_error("memorystatus_disable_freeze -- sync: failed to allocate jetsam reason\n");
1350 	}
1351 
1352 	killed = kill_all_frozen_processes(JETSAM_PRIORITY_FOREGROUND, false, jetsam_reason, &memory_reclaimed);
1353 
1354 	if (killed) {
1355 		memorystatus_log_info("memorystatus: Killed all frozen processes.\n");
1356 		vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM);
1357 
1358 		proc_list_lock();
1359 		size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
1360 		    sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
1361 		uint64_t timestamp_now = mach_absolute_time();
1362 		memorystatus_jetsam_snapshot->notification_time = timestamp_now;
1363 		memorystatus_jetsam_snapshot->js_gencount++;
1364 		if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
1365 		    timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) {
1366 			proc_list_unlock();
1367 			int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
1368 			if (!ret) {
1369 				proc_list_lock();
1370 				memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
1371 			}
1372 		}
1373 		proc_list_unlock();
1374 	} else {
1375 		memorystatus_log_info("memorystatus: No frozen processes to kill.\n");
1376 	}
1377 
1378 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_END,
1379 	    memorystatus_available_pages, memory_reclaimed);
1380 
1381 	return;
1382 }
1383 
1384 static void
memorystatus_set_freeze_is_enabled(bool enabled)1385 memorystatus_set_freeze_is_enabled(bool enabled)
1386 {
1387 	lck_mtx_lock(&freezer_mutex);
1388 	if (enabled != memorystatus_freeze_enabled) {
1389 		if (enabled) {
1390 			memorystatus_freeze_enabled = true;
1391 		} else {
1392 			memorystatus_disable_freeze();
1393 		}
1394 	}
1395 	lck_mtx_unlock(&freezer_mutex);
1396 }
1397 
1398 
1399 static int
1400 sysctl_freeze_enabled SYSCTL_HANDLER_ARGS
1401 {
1402 #pragma unused(arg1, arg2)
1403 	int error, val = memorystatus_freeze_enabled ? 1 : 0;
1404 
1405 	error = sysctl_handle_int(oidp, &val, 0, req);
1406 	if (error || !req->newptr) {
1407 		return error;
1408 	}
1409 
1410 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1411 		memorystatus_log_error("memorystatus: Failed attempt to set vm.freeze_enabled sysctl\n");
1412 		return EINVAL;
1413 	}
1414 
1415 	memorystatus_set_freeze_is_enabled(val);
1416 
1417 	return 0;
1418 }
1419 
1420 SYSCTL_PROC(_vm, OID_AUTO, freeze_enabled, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY, NULL, 0, sysctl_freeze_enabled, "I", "");
1421 
1422 static void
schedule_interval_reset(thread_call_t reset_thread_call,throttle_interval_t * interval)1423 schedule_interval_reset(thread_call_t reset_thread_call, throttle_interval_t *interval)
1424 {
1425 	uint64_t interval_expiration_ns = interval->ts.tv_sec * NSEC_PER_SEC + interval->ts.tv_nsec;
1426 	uint64_t interval_expiration_absolutetime;
1427 	nanoseconds_to_absolutetime(interval_expiration_ns, &interval_expiration_absolutetime);
1428 	memorystatus_log_info("memorystatus: scheduling new freezer interval at %llu absolute time\n", interval_expiration_absolutetime);
1429 
1430 	thread_call_enter_delayed(reset_thread_call, interval_expiration_absolutetime);
1431 }
1432 
1433 extern uuid_string_t trial_treatment_id;
1434 extern uuid_string_t trial_experiment_id;
1435 extern int trial_deployment_id;
1436 
1437 CA_EVENT(freezer_interval,
1438     CA_INT, budget_remaining,
1439     CA_INT, error_below_min_pages,
1440     CA_INT, error_excess_shared_memory,
1441     CA_INT, error_low_private_shared_ratio,
1442     CA_INT, error_no_compressor_space,
1443     CA_INT, error_no_swap_space,
1444     CA_INT, error_low_probability_of_use,
1445     CA_INT, error_elevated,
1446     CA_INT, error_other,
1447     CA_INT, frozen_count,
1448     CA_INT, pageouts,
1449     CA_INT, refreeze_average,
1450     CA_INT, skipped_full,
1451     CA_INT, skipped_shared_mb_high,
1452     CA_INT, swapusage,
1453     CA_INT, thaw_count,
1454     CA_INT, thaw_percentage,
1455     CA_INT, thaws_per_gb,
1456     CA_INT, trial_deployment_id,
1457     CA_INT, dasd_trial_deployment_id,
1458     CA_INT, budget_exhaustion_duration_remaining,
1459     CA_INT, thaw_percentage_webcontent,
1460     CA_INT, thaw_percentage_fg,
1461     CA_INT, thaw_percentage_bg,
1462     CA_INT, thaw_percentage_fg_non_xpc_service,
1463     CA_INT, fg_resume_count,
1464     CA_INT, unique_freeze_count,
1465     CA_INT, unique_thaw_count,
1466     CA_STATIC_STRING(CA_UUID_LEN), trial_treatment_id,
1467     CA_STATIC_STRING(CA_UUID_LEN), trial_experiment_id,
1468     CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_treatment_id,
1469     CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_experiment_id);
1470 
1471 extern uint64_t vm_swap_get_total_space(void);
1472 extern uint64_t vm_swap_get_free_space(void);
1473 
1474 /*
1475  * Record statistics from the expiring interval
1476  * via core analytics.
1477  */
1478 static void
memorystatus_freeze_record_interval_analytics(void)1479 memorystatus_freeze_record_interval_analytics(void)
1480 {
1481 	ca_event_t event = CA_EVENT_ALLOCATE(freezer_interval);
1482 	CA_EVENT_TYPE(freezer_interval) * e = event->data;
1483 	e->budget_remaining = memorystatus_freeze_budget_pages_remaining * PAGE_SIZE / (1UL << 20);
1484 	uint64_t process_considered_count, refrozen_count, below_threshold_count;
1485 	memory_object_size_t swap_size;
1486 	process_considered_count = memorystatus_freezer_stats.mfs_process_considered_count;
1487 	if (process_considered_count != 0) {
1488 		e->error_below_min_pages = memorystatus_freezer_stats.mfs_error_below_min_pages_count * 100 / process_considered_count;
1489 		e->error_excess_shared_memory = memorystatus_freezer_stats.mfs_error_excess_shared_memory_count * 100 / process_considered_count;
1490 		e->error_low_private_shared_ratio = memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count * 100 / process_considered_count;
1491 		e->error_no_compressor_space = memorystatus_freezer_stats.mfs_error_no_compressor_space_count * 100 / process_considered_count;
1492 		e->error_no_swap_space = memorystatus_freezer_stats.mfs_error_no_swap_space_count * 100 / process_considered_count;
1493 		e->error_low_probability_of_use = memorystatus_freezer_stats.mfs_error_low_probability_of_use_count * 100 / process_considered_count;
1494 		e->error_elevated = memorystatus_freezer_stats.mfs_error_elevated_count * 100 / process_considered_count;
1495 		e->error_other = memorystatus_freezer_stats.mfs_error_other_count * 100 / process_considered_count;
1496 	}
1497 	e->frozen_count = memorystatus_frozen_count;
1498 	e->pageouts = normal_throttle_window->pageouts * PAGE_SIZE / (1UL << 20);
1499 	refrozen_count = memorystatus_freezer_stats.mfs_refreeze_count;
1500 	if (refrozen_count != 0) {
1501 		e->refreeze_average = (memorystatus_freezer_stats.mfs_bytes_refrozen / (1UL << 20)) / refrozen_count;
1502 	}
1503 	below_threshold_count = memorystatus_freezer_stats.mfs_below_threshold_count;
1504 	if (below_threshold_count != 0) {
1505 		e->skipped_full = memorystatus_freezer_stats.mfs_skipped_full_count * 100 / below_threshold_count;
1506 		e->skipped_shared_mb_high = memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count * 100 / below_threshold_count;
1507 	}
1508 	if (VM_CONFIG_SWAP_IS_PRESENT) {
1509 		swap_size = vm_swap_get_total_space();
1510 		if (swap_size) {
1511 			e->swapusage = vm_swap_get_free_space() * 100 / swap_size;
1512 		}
1513 	}
1514 	e->thaw_count = memorystatus_thaw_count;
1515 	e->thaw_percentage = get_thaw_percentage();
1516 	e->thaw_percentage_webcontent = get_thaw_percentage_webcontent();
1517 	e->thaw_percentage_fg = get_thaw_percentage_fg();
1518 	e->thaw_percentage_bg = get_thaw_percentage_bg();
1519 	e->thaw_percentage_fg_non_xpc_service = get_thaw_percentage_fg_non_xpc_service();
1520 
1521 	if (e->pageouts / (1UL << 10) != 0) {
1522 		e->thaws_per_gb = memorystatus_thaw_count / (e->pageouts / (1UL << 10));
1523 	}
1524 	e->budget_exhaustion_duration_remaining = memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining;
1525 	e->fg_resume_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
1526 	e->unique_freeze_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1527 	e->unique_thaw_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
1528 
1529 	/*
1530 	 * Record any xnu or dasd experiment information
1531 	 */
1532 	strlcpy(e->trial_treatment_id, trial_treatment_id, CA_UUID_LEN);
1533 	strlcpy(e->trial_experiment_id, trial_experiment_id, CA_UUID_LEN);
1534 	e->trial_deployment_id = trial_deployment_id;
1535 	strlcpy(e->dasd_trial_treatment_id, dasd_trial_identifiers.treatment_id, CA_UUID_LEN);
1536 	strlcpy(e->dasd_trial_experiment_id, dasd_trial_identifiers.experiment_id, CA_UUID_LEN);
1537 	e->dasd_trial_deployment_id = dasd_trial_identifiers.deployment_id;
1538 
1539 	CA_EVENT_SEND(event);
1540 }
1541 
1542 static void
memorystatus_freeze_reset_interval(void * arg0,void * arg1)1543 memorystatus_freeze_reset_interval(void *arg0, void *arg1)
1544 {
1545 #pragma unused(arg0, arg1)
1546 	struct throttle_interval_t *interval = NULL;
1547 	clock_sec_t sec;
1548 	clock_nsec_t nsec;
1549 	mach_timespec_t now_ts;
1550 	uint32_t budget_rollover = 0;
1551 
1552 	clock_get_system_nanotime(&sec, &nsec);
1553 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
1554 	now_ts.tv_nsec = nsec;
1555 	interval = normal_throttle_window;
1556 
1557 	/* Record analytics from the old interval before resetting. */
1558 	memorystatus_freeze_record_interval_analytics();
1559 
1560 	lck_mtx_lock(&freezer_mutex);
1561 	/* How long has it been since the previous interval expired? */
1562 	mach_timespec_t expiration_period_ts = now_ts;
1563 	SUB_MACH_TIMESPEC(&expiration_period_ts, &interval->ts);
1564 	/* Get unused budget. Clamp to 0. We'll adjust for overused budget in the next interval. */
1565 	budget_rollover = interval->pageouts > interval->max_pageouts ?
1566 	    0 : interval->max_pageouts - interval->pageouts;
1567 
1568 	memorystatus_freeze_start_normal_throttle_interval(memorystatus_freeze_calculate_new_budget(
1569 		    expiration_period_ts.tv_sec, interval->burst_multiple,
1570 		    interval->mins, budget_rollover),
1571 	    now_ts);
1572 	memorystatus_freeze_budget_pages_remaining = interval->max_pageouts;
1573 
1574 	if (!memorystatus_freezer_use_demotion_list) {
1575 		memorystatus_demote_frozen_processes(false); /* normal mode...don't force a demotion */
1576 	}
1577 	lck_mtx_unlock(&freezer_mutex);
1578 }
1579 
1580 
1581 proc_t
memorystatus_get_coalition_leader_and_role(proc_t p,int * role_in_coalition)1582 memorystatus_get_coalition_leader_and_role(proc_t p, int *role_in_coalition)
1583 {
1584 	coalition_t     coal = COALITION_NULL;
1585 	task_t          leader_task = NULL, curr_task = NULL;
1586 	proc_t          leader_proc = PROC_NULL;
1587 
1588 	curr_task = proc_task(p);
1589 	coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1590 
1591 	if (coal == NULL || coalition_is_leader(curr_task, coal)) {
1592 		return p;
1593 	}
1594 
1595 	leader_task = coalition_get_leader(coal);
1596 	if (leader_task == TASK_NULL) {
1597 		/*
1598 		 * This jetsam coalition is currently leader-less.
1599 		 * This could happen if the app died, but XPC services
1600 		 * have not yet exited.
1601 		 */
1602 		return PROC_NULL;
1603 	}
1604 
1605 	leader_proc = (proc_t)get_bsdtask_info(leader_task);
1606 	task_deallocate(leader_task);
1607 
1608 	if (leader_proc == PROC_NULL) {
1609 		/* leader task is exiting */
1610 		return PROC_NULL;
1611 	}
1612 
1613 	*role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
1614 
1615 	return leader_proc;
1616 }
1617 
1618 bool
memorystatus_freeze_process_is_recommended(const proc_t p)1619 memorystatus_freeze_process_is_recommended(const proc_t p)
1620 {
1621 	assert(!memorystatus_freezer_use_ordered_list);
1622 	int probability_of_use = 0;
1623 
1624 	size_t entry_count = 0, i = 0;
1625 	entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
1626 	if (entry_count == 0) {
1627 		/*
1628 		 * If dasd hasn't supplied a table yet, we default to every app being eligible
1629 		 * for the freezer.
1630 		 */
1631 		return true;
1632 	}
1633 	for (i = 0; i < entry_count; i++) {
1634 		/*
1635 		 * NB: memorystatus_internal_probabilities.proc_name is MAXCOMLEN + 1 bytes
1636 		 * proc_t.p_name is 2*MAXCOMLEN + 1 bytes. So we only compare the first
1637 		 * MAXCOMLEN bytes here since the name in the probabilities table could
1638 		 * be truncated from the proc_t's p_name.
1639 		 */
1640 		if (strncmp(memorystatus_global_probabilities_table[i].proc_name,
1641 		    p->p_name,
1642 		    MAXCOMLEN) == 0) {
1643 			probability_of_use = memorystatus_global_probabilities_table[i].use_probability;
1644 			break;
1645 		}
1646 	}
1647 	return probability_of_use > 0;
1648 }
1649 
1650 __private_extern__ void
memorystatus_freeze_init(void)1651 memorystatus_freeze_init(void)
1652 {
1653 	kern_return_t result;
1654 	thread_t thread;
1655 
1656 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1657 		/*
1658 		 * This is just the default value if the underlying
1659 		 * storage device doesn't have any specific budget.
1660 		 * We check with the storage layer in memorystatus_freeze_update_throttle()
1661 		 * before we start our freezing the first time.
1662 		 */
1663 		memorystatus_freeze_budget_pages_remaining = (memorystatus_freeze_daily_mb_max * 1024 * 1024) / PAGE_SIZE;
1664 
1665 		result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
1666 		if (result == KERN_SUCCESS) {
1667 			proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
1668 			proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1669 			thread_set_thread_name(thread, "VM_freezer");
1670 
1671 			thread_deallocate(thread);
1672 		} else {
1673 			panic("Could not create memorystatus_freeze_thread");
1674 		}
1675 
1676 		freeze_interval_reset_thread_call = thread_call_allocate_with_options(memorystatus_freeze_reset_interval, NULL, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
1677 		/* Start a new interval */
1678 
1679 		lck_mtx_lock(&freezer_mutex);
1680 		uint32_t budget;
1681 		budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1682 		memorystatus_freeze_force_new_interval(budget);
1683 		lck_mtx_unlock(&freezer_mutex);
1684 	} else {
1685 		memorystatus_freeze_budget_pages_remaining = 0;
1686 	}
1687 }
1688 
1689 void
memorystatus_freeze_configure_for_swap()1690 memorystatus_freeze_configure_for_swap()
1691 {
1692 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1693 		return;
1694 	}
1695 
1696 	assert(memorystatus_swap_all_apps);
1697 
1698 	/*
1699 	 * We expect both a larger working set and larger individual apps
1700 	 * in this mode, so tune up the freezer accordingly.
1701 	 */
1702 	memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX_SWAP_ENABLED;
1703 	memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS_SWAP_ENABLED;
1704 	memorystatus_freeze_pages_max = FREEZE_PAGES_MAX_SWAP_ENABLED;
1705 
1706 	/*
1707 	 * We don't have a budget when running with full app swap.
1708 	 * Force a new interval. memorystatus_freeze_calculate_new_budget should give us an
1709 	 * unlimited budget.
1710 	 */
1711 	lck_mtx_lock(&freezer_mutex);
1712 	uint32_t budget;
1713 	budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1714 	memorystatus_freeze_force_new_interval(budget);
1715 	lck_mtx_unlock(&freezer_mutex);
1716 }
1717 
1718 void
memorystatus_freeze_disable_swap()1719 memorystatus_freeze_disable_swap()
1720 {
1721 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1722 		return;
1723 	}
1724 
1725 	assert(!memorystatus_swap_all_apps);
1726 
1727 	memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX;
1728 	memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS;
1729 	memorystatus_freeze_pages_max = FREEZE_PAGES_MAX;
1730 
1731 	/*
1732 	 * Calculate a new budget now that we're constrained by our daily write budget again.
1733 	 */
1734 	lck_mtx_lock(&freezer_mutex);
1735 	uint32_t budget;
1736 	budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1737 	memorystatus_freeze_force_new_interval(budget);
1738 	lck_mtx_unlock(&freezer_mutex);
1739 }
1740 
1741 /*
1742  * Called with both the freezer_mutex and proc_list_lock held & both will be held on return.
1743  */
1744 static int
memorystatus_freeze_process(proc_t p,coalition_t * coal,pid_t * coalition_list,unsigned int * coalition_list_length)1745 memorystatus_freeze_process(
1746 	proc_t p,
1747 	coalition_t *coal, /* IN / OUT */
1748 	pid_t *coalition_list, /* OUT */
1749 	unsigned int *coalition_list_length /* OUT */)
1750 {
1751 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1752 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1753 
1754 	kern_return_t kr;
1755 	uint32_t purgeable, wired, clean, dirty, shared;
1756 	uint64_t max_pages = 0;
1757 	freezer_error_code_t freezer_error_code = 0;
1758 	bool is_refreeze = false;
1759 	task_t curr_task = TASK_NULL;
1760 
1761 	pid_t aPid = proc_getpid(p);
1762 
1763 	is_refreeze = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
1764 
1765 	/* Ensure the process is eligible for (re-)freezing */
1766 	if (is_refreeze && !memorystatus_freeze_proc_is_refreeze_eligible(p)) {
1767 		/* Process is already frozen & hasn't been thawed. Nothing to do here. */
1768 		return EINVAL;
1769 	}
1770 	if (is_refreeze) {
1771 		/*
1772 		 * Not currently being looked at for something.
1773 		 */
1774 		if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1775 			return EBUSY;
1776 		}
1777 
1778 		/*
1779 		 * We are going to try and refreeze and so re-evaluate
1780 		 * the process. We don't want to double count the shared
1781 		 * memory. So deduct the old snapshot here.
1782 		 */
1783 		memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages;
1784 		p->p_memstat_freeze_sharedanon_pages = 0;
1785 
1786 		p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
1787 		memorystatus_refreeze_eligible_count--;
1788 	} else {
1789 		if (!memorystatus_is_process_eligible_for_freeze(p)) {
1790 			return EINVAL;
1791 		}
1792 		if (memorystatus_frozen_count >= memorystatus_frozen_processes_max) {
1793 			memorystatus_freeze_handle_error(p, FREEZER_ERROR_NO_SLOTS, is_refreeze, aPid, (coal ? *coal : NULL), "memorystatus_freeze_process");
1794 			return ENOSPC;
1795 		}
1796 	}
1797 
1798 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1799 		/*
1800 		 * Freezer backed by the compressor and swap file(s)
1801 		 * will hold compressed data.
1802 		 */
1803 
1804 		max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1805 	} else {
1806 		/*
1807 		 * We only have the compressor pool.
1808 		 */
1809 		max_pages = UINT32_MAX - 1;
1810 	}
1811 
1812 	/* Mark as locked temporarily to avoid kill */
1813 	p->p_memstat_state |= P_MEMSTAT_LOCKED;
1814 
1815 	p = proc_ref(p, true);
1816 	if (!p) {
1817 		memorystatus_freezer_stats.mfs_error_other_count++;
1818 		return EBUSY;
1819 	}
1820 
1821 	proc_list_unlock();
1822 
1823 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, memorystatus_available_pages, aPid, max_pages);
1824 
1825 	max_pages = MIN(max_pages, UINT32_MAX);
1826 	kr = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, (uint32_t) max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1827 	if (kr == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1828 		memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
1829 	}
1830 
1831 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, purgeable, wired, clean, dirty);
1832 
1833 	memorystatus_log_debug("memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
1834 	    "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %llu, shared %d",
1835 	    (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1836 	    memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1837 
1838 	proc_list_lock();
1839 
1840 	/* Success? */
1841 	if (KERN_SUCCESS == kr) {
1842 		memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1843 
1844 		p->p_memstat_freeze_sharedanon_pages += shared;
1845 
1846 		memorystatus_frozen_shared_mb += shared;
1847 
1848 		if (!is_refreeze) {
1849 			p->p_memstat_state |= P_MEMSTAT_FROZEN;
1850 			p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1851 			memorystatus_frozen_count++;
1852 			os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1853 			if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
1854 				memorystatus_frozen_count_webcontent++;
1855 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
1856 			}
1857 			if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
1858 				memorystatus_freeze_out_of_slots();
1859 			}
1860 		} else {
1861 			// This was a re-freeze
1862 			if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1863 				memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
1864 				memorystatus_freezer_stats.mfs_refreeze_count++;
1865 			}
1866 		}
1867 
1868 		p->p_memstat_frozen_count++;
1869 
1870 		/*
1871 		 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1872 		 * to its higher jetsam band.
1873 		 */
1874 		proc_list_unlock();
1875 
1876 		memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1877 
1878 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1879 			int ret;
1880 			unsigned int i;
1881 			ret = memorystatus_update_inactive_jetsam_priority_band(proc_getpid(p), MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, memorystatus_freeze_jetsam_band, TRUE);
1882 
1883 			if (ret) {
1884 				memorystatus_log_error("Elevating the frozen process failed with %d\n", ret);
1885 				/* not fatal */
1886 			}
1887 
1888 			/* Update stats */
1889 			for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1890 				throttle_intervals[i].pageouts += dirty;
1891 			}
1892 		}
1893 		memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1894 		memorystatus_log("memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
1895 		    is_refreeze ? "re" : "", ((!coal || !*coal) ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"),
1896 		    memorystatus_freeze_budget_pages_remaining, is_refreeze ? "Re" : "", dirty);
1897 
1898 		proc_list_lock();
1899 
1900 		memorystatus_freeze_pageouts += dirty;
1901 
1902 		if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1903 			/*
1904 			 * Add some eviction logic here? At some point should we
1905 			 * jetsam a process to get back its swap space so that we
1906 			 * can freeze a more eligible process at this moment in time?
1907 			 */
1908 		}
1909 
1910 		/* Check if we just froze a coalition leader. If so, return the list of XPC services to freeze next. */
1911 		if (coal != NULL && *coal == NULL) {
1912 			curr_task = proc_task(p);
1913 			*coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1914 			if (coalition_is_leader(curr_task, *coal)) {
1915 				*coalition_list_length = coalition_get_pid_list(*coal, COALITION_ROLEMASK_XPC,
1916 				    COALITION_SORT_DEFAULT, coalition_list, MAX_XPC_SERVICE_PIDS);
1917 
1918 				if (*coalition_list_length > MAX_XPC_SERVICE_PIDS) {
1919 					*coalition_list_length = MAX_XPC_SERVICE_PIDS;
1920 				}
1921 			}
1922 		} else {
1923 			/* We just froze an xpc service. Mark it as such for telemetry */
1924 			p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
1925 			memorystatus_frozen_count_xpc_service++;
1926 			os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
1927 		}
1928 
1929 		p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1930 		wakeup(&p->p_memstat_state);
1931 		proc_rele(p);
1932 		return 0;
1933 	} else {
1934 		if (is_refreeze) {
1935 			if ((freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) ||
1936 			    (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO)) {
1937 				/*
1938 				 * Keeping this prior-frozen process in this high band when
1939 				 * we failed to re-freeze it due to bad shared memory usage
1940 				 * could cause excessive pressure on the lower bands.
1941 				 * We need to demote it for now. It'll get re-evaluated next
1942 				 * time because we don't set the P_MEMSTAT_FREEZE_IGNORE
1943 				 * bit.
1944 				 */
1945 
1946 				p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
1947 				memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1948 				memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, TRUE, TRUE);
1949 			}
1950 		} else {
1951 			p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
1952 		}
1953 		memorystatus_freeze_handle_error(p, freezer_error_code, p->p_memstat_state & P_MEMSTAT_FROZEN, aPid, (coal != NULL) ? *coal : NULL, "memorystatus_freeze_process");
1954 
1955 		p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1956 		wakeup(&p->p_memstat_state);
1957 		proc_rele(p);
1958 
1959 		return EINVAL;
1960 	}
1961 }
1962 
1963 /*
1964  * Synchronously freeze the passed proc. Called with a reference to the proc held.
1965  *
1966  * Doesn't deal with:
1967  * - re-freezing because this is called on a specific process and
1968  *   not by the freezer thread. If that changes, we'll have to teach it about
1969  *   refreezing a frozen process.
1970  *
1971  * - grouped/coalition freezing because we are hoping to deprecate this
1972  *   interface as it was used by user-space to freeze particular processes. But
1973  *   we have moved away from that approach to having the kernel choose the optimal
1974  *   candidates to be frozen.
1975  *
1976  * Returns ENOTSUP if the freezer isn't supported on this device. Otherwise
1977  * returns EINVAL or the value returned by task_freeze().
1978  */
1979 int
memorystatus_freeze_process_sync(proc_t p)1980 memorystatus_freeze_process_sync(proc_t p)
1981 {
1982 	int ret = EINVAL;
1983 	boolean_t memorystatus_freeze_swap_low = FALSE;
1984 
1985 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1986 		return ENOTSUP;
1987 	}
1988 
1989 	lck_mtx_lock(&freezer_mutex);
1990 
1991 	if (p == NULL) {
1992 		memorystatus_log_error("memorystatus_freeze_process_sync: Invalid process\n");
1993 		goto exit;
1994 	}
1995 
1996 	if (memorystatus_freeze_enabled == FALSE) {
1997 		memorystatus_log_error("memorystatus_freeze_process_sync: Freezing is DISABLED\n");
1998 		goto exit;
1999 	}
2000 
2001 	if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
2002 		memorystatus_log_info("memorystatus_freeze_process_sync: Low compressor and/or low swap space...skipping freeze\n");
2003 		goto exit;
2004 	}
2005 
2006 	memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2007 	if (!memorystatus_freeze_budget_pages_remaining) {
2008 		memorystatus_log_info("memorystatus_freeze_process_sync: exit with NO available budget\n");
2009 		goto exit;
2010 	}
2011 
2012 	proc_list_lock();
2013 
2014 	ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2015 
2016 exit:
2017 	lck_mtx_unlock(&freezer_mutex);
2018 
2019 	return ret;
2020 }
2021 
2022 proc_t
memorystatus_freezer_candidate_list_get_proc(struct memorystatus_freezer_candidate_list * list,size_t index,uint64_t * pid_mismatch_counter)2023 memorystatus_freezer_candidate_list_get_proc(
2024 	struct memorystatus_freezer_candidate_list *list,
2025 	size_t index,
2026 	uint64_t *pid_mismatch_counter)
2027 {
2028 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2029 	if (list->mfcl_list == NULL || list->mfcl_length <= index) {
2030 		return NULL;
2031 	}
2032 	memorystatus_properties_freeze_entry_v1 *entry = &list->mfcl_list[index];
2033 	if (entry->pid == NO_PID) {
2034 		/* Entry has been removed. */
2035 		return NULL;
2036 	}
2037 
2038 	proc_t p = proc_find_locked(entry->pid);
2039 	if (p && strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2040 		/*
2041 		 * We grab a reference when we are about to freeze the process. So drop
2042 		 * the reference that proc_find_locked() grabbed for us.
2043 		 * We also have the proc_list_lock so this process is stable.
2044 		 */
2045 		proc_rele(p);
2046 		return p;
2047 	} else {
2048 		if (p) {
2049 			/* pid rollover. */
2050 			proc_rele(p);
2051 		}
2052 		/*
2053 		 * The proc has exited since we received this list.
2054 		 * It may have re-launched with a new pid, so we go looking for it.
2055 		 */
2056 		unsigned int band = JETSAM_PRIORITY_IDLE;
2057 		p = memorystatus_get_first_proc_locked(&band, TRUE);
2058 		while (p != NULL && band <= memorystatus_freeze_max_candidate_band) {
2059 			if (strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2060 				(*pid_mismatch_counter)++;
2061 				/* Stash the pid for faster lookup next time. */
2062 				entry->pid = proc_getpid(p);
2063 				return p;
2064 			}
2065 			p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2066 		}
2067 		/* No match. */
2068 		return NULL;
2069 	}
2070 }
2071 
2072 static size_t
memorystatus_freeze_pid_list(pid_t * pid_list,unsigned int num_pids)2073 memorystatus_freeze_pid_list(pid_t *pid_list, unsigned int num_pids)
2074 {
2075 	int ret = 0;
2076 	size_t num_frozen = 0;
2077 	while (num_pids > 0 &&
2078 	    memorystatus_frozen_count < memorystatus_frozen_processes_max) {
2079 		pid_t pid = pid_list[--num_pids];
2080 		proc_t p = proc_find_locked(pid);
2081 		if (p) {
2082 			proc_rele(p);
2083 			ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2084 			if (ret != 0) {
2085 				break;
2086 			}
2087 			num_frozen++;
2088 		}
2089 	}
2090 	return num_frozen;
2091 }
2092 
2093 /*
2094  * Attempt to freeze the best candidate process.
2095  * Keep trying until we freeze something or run out of candidates.
2096  * Returns the number of processes frozen (including coalition members).
2097  */
2098 static size_t
memorystatus_freeze_top_process(void)2099 memorystatus_freeze_top_process(void)
2100 {
2101 	int freeze_ret;
2102 	size_t num_frozen = 0;
2103 	coalition_t coal = COALITION_NULL;
2104 	pid_t pid_list[MAX_XPC_SERVICE_PIDS];
2105 	unsigned int ntasks = 0;
2106 	struct memorystatus_freeze_list_iterator iterator;
2107 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2108 
2109 	bzero(&iterator, sizeof(struct memorystatus_freeze_list_iterator));
2110 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages);
2111 
2112 	proc_list_lock();
2113 	while (true) {
2114 		proc_t p = memorystatus_freeze_pick_process(&iterator);
2115 		if (p == PROC_NULL) {
2116 			/* Nothing left to freeze */
2117 			break;
2118 		}
2119 		freeze_ret = memorystatus_freeze_process(p, &coal, pid_list, &ntasks);
2120 		if (freeze_ret == 0) {
2121 			num_frozen = 1;
2122 			/*
2123 			 * We froze a process successfully.
2124 			 * If it's a coalition head, freeze the coalition.
2125 			 * Then we're done for now.
2126 			 */
2127 			if (coal != NULL) {
2128 				num_frozen += memorystatus_freeze_pid_list(pid_list, ntasks);
2129 			}
2130 			break;
2131 		} else {
2132 			if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
2133 				break;
2134 			}
2135 			/*
2136 			 * Freeze failed but we're not out of space.
2137 			 * Keep trying to find a good candidate,
2138 			 * memorystatus_freeze_pick_process will not return this proc again until
2139 			 * we reset the iterator.
2140 			 */
2141 		}
2142 	}
2143 	proc_list_unlock();
2144 
2145 	KDBG(MEMSTAT_CODE(BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_END, memorystatus_available_pages);
2146 
2147 	return num_frozen;
2148 }
2149 
2150 #if DEVELOPMENT || DEBUG
2151 /* For testing memorystatus_freeze_top_process */
2152 static int
2153 sysctl_memorystatus_freeze_top_process SYSCTL_HANDLER_ARGS
2154 {
2155 #pragma unused(arg1, arg2)
2156 	int error, val, ret = 0;
2157 	size_t num_frozen;
2158 	/*
2159 	 * Only freeze on write to prevent freezing during `sysctl -a`.
2160 	 * The actual value written doesn't matter.
2161 	 */
2162 	error = sysctl_handle_int(oidp, &val, 0, req);
2163 	if (error || !req->newptr) {
2164 		return error;
2165 	}
2166 
2167 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2168 		return ENOTSUP;
2169 	}
2170 
2171 	lck_mtx_lock(&freezer_mutex);
2172 	num_frozen = memorystatus_freeze_top_process();
2173 	lck_mtx_unlock(&freezer_mutex);
2174 
2175 	if (num_frozen == 0) {
2176 		ret = ESRCH;
2177 	}
2178 	return ret;
2179 }
2180 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_top_process, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2181     0, 0, &sysctl_memorystatus_freeze_top_process, "I", "");
2182 #endif /* DEVELOPMENT || DEBUG */
2183 
2184 static inline boolean_t
memorystatus_can_freeze_processes(void)2185 memorystatus_can_freeze_processes(void)
2186 {
2187 	boolean_t ret;
2188 
2189 	proc_list_lock();
2190 
2191 	if (memorystatus_suspended_count) {
2192 		memorystatus_freeze_suspended_threshold = MIN(memorystatus_freeze_suspended_threshold, FREEZE_SUSPENDED_THRESHOLD_DEFAULT);
2193 
2194 		if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
2195 			ret = TRUE;
2196 		} else {
2197 			ret = FALSE;
2198 		}
2199 	} else {
2200 		ret = FALSE;
2201 	}
2202 
2203 	proc_list_unlock();
2204 
2205 	return ret;
2206 }
2207 
2208 static boolean_t
memorystatus_can_freeze(boolean_t * memorystatus_freeze_swap_low)2209 memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
2210 {
2211 	boolean_t can_freeze = TRUE;
2212 
2213 	/* Only freeze if we're sufficiently low on memory; this holds off freeze right
2214 	*  after boot,  and is generally is a no-op once we've reached steady state. */
2215 	if (memorystatus_available_pages > memorystatus_freeze_threshold) {
2216 		return FALSE;
2217 	}
2218 
2219 	/* Check minimum suspended process threshold. */
2220 	if (!memorystatus_can_freeze_processes()) {
2221 		return FALSE;
2222 	}
2223 	assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
2224 
2225 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2226 		/*
2227 		 * In-core compressor used for freezing WITHOUT on-disk swap support.
2228 		 */
2229 		if (vm_compressor_low_on_space()) {
2230 			if (*memorystatus_freeze_swap_low) {
2231 				*memorystatus_freeze_swap_low = TRUE;
2232 			}
2233 
2234 			can_freeze = FALSE;
2235 		} else {
2236 			if (*memorystatus_freeze_swap_low) {
2237 				*memorystatus_freeze_swap_low = FALSE;
2238 			}
2239 
2240 			can_freeze = TRUE;
2241 		}
2242 	} else {
2243 		/*
2244 		 * Freezing WITH on-disk swap support.
2245 		 *
2246 		 * In-core compressor fronts the swap.
2247 		 */
2248 		if (vm_swap_low_on_space()) {
2249 			if (*memorystatus_freeze_swap_low) {
2250 				*memorystatus_freeze_swap_low = TRUE;
2251 			}
2252 
2253 			can_freeze = FALSE;
2254 		}
2255 	}
2256 
2257 	return can_freeze;
2258 }
2259 
2260 /*
2261  * Demote the given frozen process.
2262  * Caller must hold the proc_list_lock & it will be held on return.
2263  */
2264 static void
memorystatus_demote_frozen_process(proc_t p,bool urgent_mode __unused)2265 memorystatus_demote_frozen_process(proc_t p, bool urgent_mode __unused)
2266 {
2267 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2268 
2269 	/* We demote to IDLE unless someone has asserted a higher priority on this process. */
2270 	int maxpriority = JETSAM_PRIORITY_IDLE;
2271 	p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
2272 	memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2273 
2274 	maxpriority = MAX(p->p_memstat_assertionpriority, maxpriority);
2275 	memorystatus_update_priority_locked(p, maxpriority, FALSE, FALSE);
2276 #if DEVELOPMENT || DEBUG
2277 	memorystatus_log("memorystatus_demote_frozen_process(%s) pid %d [%s]\n",
2278 	    (urgent_mode ? "urgent" : "normal"), (p ? proc_getpid(p) : -1), ((p && *p->p_name) ? p->p_name : "unknown"));
2279 #endif /* DEVELOPMENT || DEBUG */
2280 
2281 	/*
2282 	 * The freezer thread will consider this a normal app to be frozen
2283 	 * because it is in the IDLE band. So we don't need the
2284 	 * P_MEMSTAT_REFREEZE_ELIGIBLE state here. Also, if it gets resumed
2285 	 * we'll correctly count it as eligible for re-freeze again.
2286 	 *
2287 	 * We don't drop the frozen count because this process still has
2288 	 * state on disk. So there's a chance it gets resumed and then it
2289 	 * should land in the higher jetsam band. For that it needs to
2290 	 * remain marked frozen.
2291 	 */
2292 	if (memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2293 		p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
2294 		memorystatus_refreeze_eligible_count--;
2295 	}
2296 }
2297 
2298 static unsigned int
memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)2299 memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)
2300 {
2301 	unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
2302 	unsigned int demoted_proc_count = 0;
2303 	proc_t p = PROC_NULL, next_p = PROC_NULL;
2304 	proc_list_lock();
2305 
2306 	next_p = memorystatus_get_first_proc_locked(&band, FALSE);
2307 	while (next_p) {
2308 		p = next_p;
2309 		next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2310 
2311 		if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
2312 			continue;
2313 		}
2314 
2315 		if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
2316 			continue;
2317 		}
2318 
2319 		if (urgent_mode) {
2320 			if (!memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2321 				/*
2322 				 * This process hasn't been thawed recently and so most of
2323 				 * its state sits on NAND and so we skip it -- jetsamming it
2324 				 * won't help with memory pressure.
2325 				 */
2326 				continue;
2327 			}
2328 		} else {
2329 			if (p->p_memstat_thaw_count >= memorystatus_thaw_count_demotion_threshold) {
2330 				/*
2331 				 * This process has met / exceeded our thaw count demotion threshold
2332 				 * and so we let it live in the higher bands.
2333 				 */
2334 				continue;
2335 			}
2336 		}
2337 
2338 		memorystatus_demote_frozen_process(p, urgent_mode);
2339 		demoted_proc_count++;
2340 		if ((urgent_mode) || (demoted_proc_count == memorystatus_max_frozen_demotions_daily)) {
2341 			break;
2342 		}
2343 	}
2344 
2345 	proc_list_unlock();
2346 	return demoted_proc_count;
2347 }
2348 
2349 static unsigned int
memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)2350 memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)
2351 {
2352 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2353 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2354 	assert(memorystatus_freezer_use_demotion_list);
2355 	unsigned int demoted_proc_count = 0;
2356 
2357 	proc_list_lock();
2358 	for (size_t i = 0; i < memorystatus_global_demote_list.mfcl_length; i++) {
2359 		proc_t p = memorystatus_freezer_candidate_list_get_proc(
2360 			&memorystatus_global_demote_list,
2361 			i,
2362 			&memorystatus_freezer_stats.mfs_demote_pid_mismatches);
2363 		if (p != NULL && memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2364 			memorystatus_demote_frozen_process(p, urgent_mode);
2365 			/* Remove this entry now that it's been demoted. */
2366 			memorystatus_global_demote_list.mfcl_list[i].pid = NO_PID;
2367 			demoted_proc_count++;
2368 			/*
2369 			 * We only demote one proc at a time in this mode.
2370 			 * This gives jetsam a chance to kill the recently demoted processes.
2371 			 */
2372 			break;
2373 		}
2374 	}
2375 
2376 	proc_list_unlock();
2377 	return demoted_proc_count;
2378 }
2379 
2380 /*
2381  * This function evaluates if the currently frozen processes deserve
2382  * to stay in the higher jetsam band. There are 2 modes:
2383  * - 'force one == TRUE': (urgent mode)
2384  *	We are out of budget and can't refreeze a process. The process's
2385  * state, if it was resumed, will stay in compressed memory. If we let it
2386  * remain up in the higher frozen jetsam band, it'll put a lot of pressure on
2387  * the lower bands. So we force-demote the least-recently-used-and-thawed
2388  * process.
2389  *
2390  * - 'force_one == FALSE': (normal mode)
2391  *      If the # of thaws of a process is below our threshold, then we
2392  * will demote that process into the IDLE band.
2393  * We don't immediately kill the process here because it  already has
2394  * state on disk and so it might be worth giving it another shot at
2395  * getting thawed/resumed and used.
2396  */
2397 static void
memorystatus_demote_frozen_processes(bool urgent_mode)2398 memorystatus_demote_frozen_processes(bool urgent_mode)
2399 {
2400 	unsigned int demoted_proc_count = 0;
2401 
2402 	if (memorystatus_freeze_enabled == FALSE) {
2403 		/*
2404 		 * Freeze has been disabled likely to
2405 		 * reclaim swap space. So don't change
2406 		 * any state on the frozen processes.
2407 		 */
2408 		return;
2409 	}
2410 
2411 	/*
2412 	 * We have two demotion policies which can be toggled by userspace.
2413 	 * In non-urgent mode, the ordered list policy will
2414 	 * choose a demotion candidate using the list provided by dasd.
2415 	 * The thaw count policy will demote the oldest process that hasn't been
2416 	 * thawed more than memorystatus_thaw_count_demotion_threshold times.
2417 	 *
2418 	 * If urgent_mode is set, both policies will only consider demoting
2419 	 * processes that are re-freeze eligible. But the ordering is different.
2420 	 * The ordered list policy will scan in the order given by dasd.
2421 	 * The thaw count policy will scan through the frozen band.
2422 	 */
2423 	if (memorystatus_freezer_use_demotion_list) {
2424 		demoted_proc_count += memorystatus_demote_frozen_processes_using_demote_list(urgent_mode);
2425 
2426 		if (demoted_proc_count == 0 && urgent_mode) {
2427 			/*
2428 			 * We're out of budget and the demotion list doesn't contain any valid
2429 			 * candidates. We still need to demote something. Fall back to scanning
2430 			 * the frozen band.
2431 			 */
2432 			memorystatus_demote_frozen_processes_using_thaw_count(true);
2433 		}
2434 	} else {
2435 		demoted_proc_count += memorystatus_demote_frozen_processes_using_thaw_count(urgent_mode);
2436 	}
2437 }
2438 
2439 /*
2440  * Calculate a new freezer budget.
2441  * @param time_since_last_interval_expired_sec How long has it been (in seconds) since the previous interval expired.
2442  * @param burst_multiple The burst_multiple for the new period
2443  * @param interval_duration_min How many minutes will the new interval be?
2444  * @param rollover The amount to rollover from the previous budget.
2445  *
2446  * @return A budget for the new interval.
2447  */
2448 static uint32_t
memorystatus_freeze_calculate_new_budget(unsigned int time_since_last_interval_expired_sec,unsigned int burst_multiple,unsigned int interval_duration_min,uint32_t rollover)2449 memorystatus_freeze_calculate_new_budget(
2450 	unsigned int time_since_last_interval_expired_sec,
2451 	unsigned int burst_multiple,
2452 	unsigned int interval_duration_min,
2453 	uint32_t rollover)
2454 {
2455 	uint64_t freeze_daily_budget = 0, freeze_daily_budget_mb = 0, daily_budget_pageouts = 0, budget_missed = 0, freeze_daily_pageouts_max = 0, new_budget = 0;
2456 	const static unsigned int kNumSecondsInDay = 60 * 60 * 24;
2457 	/* Precision factor for days_missed. 2 decimal points. */
2458 	const static unsigned int kFixedPointFactor = 100;
2459 	unsigned int days_missed;
2460 
2461 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2462 		return 0;
2463 	}
2464 	if (memorystatus_swap_all_apps) {
2465 		/*
2466 		 * We effectively have an unlimited budget when app swap is enabled.
2467 		 */
2468 		memorystatus_freeze_daily_mb_max = UINT32_MAX;
2469 		return UINT32_MAX;
2470 	}
2471 
2472 	/* Get the daily budget from the storage layer */
2473 	if (vm_swap_max_budget(&freeze_daily_budget)) {
2474 		freeze_daily_budget_mb = freeze_daily_budget / (1024 * 1024);
2475 		assert(freeze_daily_budget_mb <= UINT32_MAX);
2476 		memorystatus_freeze_daily_mb_max = (unsigned int) freeze_daily_budget_mb;
2477 		memorystatus_log_info("memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
2478 	}
2479 	/* Calculate the daily pageout budget */
2480 	freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
2481 	/* Multiply by memorystatus_freeze_budget_multiplier */
2482 	freeze_daily_pageouts_max = ((kFixedPointFactor * memorystatus_freeze_budget_multiplier / 100) * freeze_daily_pageouts_max) / kFixedPointFactor;
2483 
2484 	daily_budget_pageouts = (burst_multiple * (((uint64_t) interval_duration_min * freeze_daily_pageouts_max) / (kNumSecondsInDay / 60)));
2485 
2486 	/*
2487 	 * Add additional budget for time since the interval expired.
2488 	 * For example, if the interval expired n days ago, we should get an additional n days
2489 	 * of budget since we didn't use any budget during those n days.
2490 	 */
2491 	days_missed = time_since_last_interval_expired_sec * kFixedPointFactor / kNumSecondsInDay;
2492 	budget_missed = days_missed * freeze_daily_pageouts_max / kFixedPointFactor;
2493 	new_budget = rollover + daily_budget_pageouts + budget_missed;
2494 	return (uint32_t) MIN(new_budget, UINT32_MAX);
2495 }
2496 
2497 /*
2498  * Mark all non frozen, freezer-eligible processes as skipped for the given reason.
2499  * Used when we hit some system freeze limit and know that we won't be considering remaining processes.
2500  * If you're using this for a new reason, make sure to add it to memorystatus_freeze_init_proc so that
2501  * it gets set for new processes.
2502  * NB: These processes will retain this skip reason until they are reconsidered by memorystatus_is_process_eligible_for_freeze.
2503  */
2504 static void
memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason,bool locked)2505 memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason, bool locked)
2506 {
2507 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2508 	LCK_MTX_ASSERT(&proc_list_mlock, locked ? LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED);
2509 	unsigned int band = JETSAM_PRIORITY_IDLE;
2510 	proc_t p;
2511 
2512 	if (!locked) {
2513 		proc_list_lock();
2514 	}
2515 	p = memorystatus_get_first_proc_locked(&band, FALSE);
2516 	while (p) {
2517 		assert(p->p_memstat_effectivepriority == (int32_t) band);
2518 		if (!(p->p_memstat_state & P_MEMSTAT_FROZEN) && memorystatus_is_process_eligible_for_freeze(p)) {
2519 			assert(p->p_memstat_freeze_skip_reason == kMemorystatusFreezeSkipReasonNone);
2520 			p->p_memstat_freeze_skip_reason = (uint8_t) reason;
2521 		}
2522 		p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2523 	}
2524 	if (!locked) {
2525 		proc_list_unlock();
2526 	}
2527 }
2528 
2529 /*
2530  * Called after we fail to freeze a process.
2531  * Logs the failure, marks the process with the failure reason, and updates freezer stats.
2532  */
2533 static void
memorystatus_freeze_handle_error(proc_t p,const freezer_error_code_t freezer_error_code,bool was_refreeze,pid_t pid,const coalition_t coalition,const char * log_prefix)2534 memorystatus_freeze_handle_error(
2535 	proc_t p,
2536 	const freezer_error_code_t freezer_error_code,
2537 	bool was_refreeze,
2538 	pid_t pid,
2539 	const coalition_t coalition,
2540 	const char* log_prefix)
2541 {
2542 	const char *reason;
2543 	memorystatus_freeze_skip_reason_t skip_reason;
2544 
2545 	switch (freezer_error_code) {
2546 	case FREEZER_ERROR_EXCESS_SHARED_MEMORY:
2547 		memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
2548 		reason = "too much shared memory";
2549 		skip_reason = kMemorystatusFreezeSkipReasonExcessSharedMemory;
2550 		break;
2551 	case FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO:
2552 		memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
2553 		reason = "private-shared pages ratio";
2554 		skip_reason = kMemorystatusFreezeSkipReasonLowPrivateSharedRatio;
2555 		break;
2556 	case FREEZER_ERROR_NO_COMPRESSOR_SPACE:
2557 		memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
2558 		reason = "no compressor space";
2559 		skip_reason = kMemorystatusFreezeSkipReasonNoCompressorSpace;
2560 		break;
2561 	case FREEZER_ERROR_NO_SWAP_SPACE:
2562 		memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
2563 		reason = "no swap space";
2564 		skip_reason = kMemorystatusFreezeSkipReasonNoSwapSpace;
2565 		break;
2566 	case FREEZER_ERROR_NO_SLOTS:
2567 		memorystatus_freezer_stats.mfs_skipped_full_count++;
2568 		reason = "no slots";
2569 		skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
2570 		break;
2571 	default:
2572 		reason = "unknown error";
2573 		skip_reason = kMemorystatusFreezeSkipReasonOther;
2574 	}
2575 
2576 	p->p_memstat_freeze_skip_reason = (uint8_t) skip_reason;
2577 
2578 	memorystatus_log("%s: %sfreezing (%s) pid %d [%s]...skipped (%s)\n",
2579 	    log_prefix, was_refreeze ? "re" : "",
2580 	    (coalition == NULL ? "general" : "coalition-driven"), pid,
2581 	    ((p && *p->p_name) ? p->p_name : "unknown"), reason);
2582 }
2583 
2584 /*
2585  * Start a new normal throttle interval with the given budget.
2586  * Caller must hold the freezer mutex
2587  */
2588 static void
memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget,mach_timespec_t start_ts)2589 memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts)
2590 {
2591 	unsigned int band;
2592 	proc_t p, next_p;
2593 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2594 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2595 
2596 	normal_throttle_window->max_pageouts = new_budget;
2597 	normal_throttle_window->ts.tv_sec = normal_throttle_window->mins * 60;
2598 	normal_throttle_window->ts.tv_nsec = 0;
2599 	ADD_MACH_TIMESPEC(&normal_throttle_window->ts, &start_ts);
2600 	/* Since we update the throttle stats pre-freeze, adjust for overshoot here */
2601 	if (normal_throttle_window->pageouts > normal_throttle_window->max_pageouts) {
2602 		normal_throttle_window->pageouts -= normal_throttle_window->max_pageouts;
2603 	} else {
2604 		normal_throttle_window->pageouts = 0;
2605 	}
2606 	/* Ensure the normal window is now active. */
2607 	memorystatus_freeze_degradation = FALSE;
2608 
2609 	/*
2610 	 * Reset interval statistics.
2611 	 */
2612 	memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
2613 	memorystatus_freezer_stats.mfs_process_considered_count = 0;
2614 	memorystatus_freezer_stats.mfs_error_below_min_pages_count = 0;
2615 	memorystatus_freezer_stats.mfs_error_excess_shared_memory_count = 0;
2616 	memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count = 0;
2617 	memorystatus_freezer_stats.mfs_error_no_compressor_space_count = 0;
2618 	memorystatus_freezer_stats.mfs_error_no_swap_space_count = 0;
2619 	memorystatus_freezer_stats.mfs_error_low_probability_of_use_count = 0;
2620 	memorystatus_freezer_stats.mfs_error_elevated_count = 0;
2621 	memorystatus_freezer_stats.mfs_error_other_count = 0;
2622 	memorystatus_freezer_stats.mfs_refreeze_count = 0;
2623 	memorystatus_freezer_stats.mfs_bytes_refrozen = 0;
2624 	memorystatus_freezer_stats.mfs_below_threshold_count = 0;
2625 	memorystatus_freezer_stats.mfs_skipped_full_count = 0;
2626 	memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count = 0;
2627 	memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = 0;
2628 	memorystatus_thaw_count = 0;
2629 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed, 0, release);
2630 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, 0, release);
2631 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg, 0, release);
2632 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, 0, release);
2633 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen, memorystatus_frozen_count, release);
2634 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, memorystatus_frozen_count_webcontent, release);
2635 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, memorystatus_frozen_count_xpc_service, release);
2636 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_fg_resumed, 0, release);
2637 	os_atomic_inc(&memorystatus_freeze_current_interval, release);
2638 
2639 	/* Clear the focal thaw bit */
2640 	proc_list_lock();
2641 	band = JETSAM_PRIORITY_IDLE;
2642 	p = PROC_NULL;
2643 	next_p = PROC_NULL;
2644 
2645 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
2646 	while (next_p) {
2647 		p = next_p;
2648 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2649 
2650 		if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) {
2651 			break;
2652 		}
2653 		p->p_memstat_state &= ~P_MEMSTAT_FROZEN_FOCAL_THAW;
2654 	}
2655 	proc_list_unlock();
2656 
2657 	schedule_interval_reset(freeze_interval_reset_thread_call, normal_throttle_window);
2658 }
2659 
2660 #if DEVELOPMENT || DEBUG
2661 
2662 static int
2663 sysctl_memorystatus_freeze_calculate_new_budget SYSCTL_HANDLER_ARGS
2664 {
2665 #pragma unused(arg1, arg2)
2666 	int error = 0;
2667 	unsigned int time_since_last_interval_expired_sec = 0;
2668 	unsigned int new_budget;
2669 
2670 	error = sysctl_handle_int(oidp, &time_since_last_interval_expired_sec, 0, req);
2671 	if (error || !req->newptr) {
2672 		return error;
2673 	}
2674 
2675 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2676 		return ENOTSUP;
2677 	}
2678 	new_budget = memorystatus_freeze_calculate_new_budget(time_since_last_interval_expired_sec, 1, NORMAL_WINDOW_MINS, 0);
2679 	return copyout(&new_budget, req->oldptr, MIN(sizeof(req->oldlen), sizeof(new_budget)));
2680 }
2681 
2682 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_calculate_new_budget, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2683     0, 0, &sysctl_memorystatus_freeze_calculate_new_budget, "I", "");
2684 
2685 #endif /* DEVELOPMENT || DEBUG */
2686 
2687 /*
2688  * Called when we first run out of budget in an interval.
2689  * Marks idle processes as not frozen due to lack of budget.
2690  * NB: It might be worth having a CA event here.
2691  */
2692 static void
memorystatus_freeze_out_of_budget(const struct throttle_interval_t * interval)2693 memorystatus_freeze_out_of_budget(const struct throttle_interval_t *interval)
2694 {
2695 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2696 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2697 
2698 	mach_timespec_t time_left = {0, 0};
2699 	mach_timespec_t now_ts;
2700 	clock_sec_t sec;
2701 	clock_nsec_t nsec;
2702 
2703 	time_left.tv_sec = interval->ts.tv_sec;
2704 	time_left.tv_nsec = 0;
2705 	clock_get_system_nanotime(&sec, &nsec);
2706 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2707 	now_ts.tv_nsec = nsec;
2708 
2709 	SUB_MACH_TIMESPEC(&time_left, &now_ts);
2710 	memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = time_left.tv_sec;
2711 	memorystatus_log(
2712 		"memorystatus_freeze: Out of NAND write budget with %u minutes left in the current freezer interval. %u procs are frozen.\n",
2713 		time_left.tv_sec / 60, memorystatus_frozen_count);
2714 
2715 	memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfBudget, false);
2716 }
2717 
2718 /*
2719  * Called when we cross over the threshold of maximum frozen processes allowed.
2720  * Marks remaining idle processes as not frozen due to lack of slots.
2721  */
2722 static void
memorystatus_freeze_out_of_slots(void)2723 memorystatus_freeze_out_of_slots(void)
2724 {
2725 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2726 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2727 	assert(memorystatus_frozen_count == memorystatus_frozen_processes_max);
2728 
2729 	memorystatus_log(
2730 		"memorystatus_freeze: Out of slots in the freezer. %u procs are frozen.\n",
2731 		memorystatus_frozen_count);
2732 
2733 	memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfSlots, true);
2734 }
2735 
2736 /*
2737  * This function will do 4 things:
2738  *
2739  * 1) check to see if we are currently in a degraded freezer mode, and if so:
2740  *    - check to see if our window has expired and we should exit this mode, OR,
2741  *    - return a budget based on the degraded throttle window's max. pageouts vs current pageouts.
2742  *
2743  * 2) check to see if we are in a NEW normal window and update the normal throttle window's params.
2744  *
2745  * 3) check what the current normal window allows for a budget.
2746  *
2747  * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below
2748  *    what we would normally expect, then we are running low on our daily budget and need to enter
2749  *    degraded perf. mode.
2750  *
2751  *    Caller must hold the freezer mutex
2752  *    Caller must not hold the proc_list lock
2753  */
2754 
2755 static void
memorystatus_freeze_update_throttle(uint64_t * budget_pages_allowed)2756 memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
2757 {
2758 	clock_sec_t sec;
2759 	clock_nsec_t nsec;
2760 	mach_timespec_t now_ts;
2761 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2762 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2763 
2764 	unsigned int freeze_daily_pageouts_max = 0;
2765 	bool started_with_budget = (*budget_pages_allowed > 0);
2766 
2767 #if DEVELOPMENT || DEBUG
2768 	if (!memorystatus_freeze_throttle_enabled) {
2769 		/*
2770 		 * No throttling...we can use the full budget everytime.
2771 		 */
2772 		*budget_pages_allowed = UINT64_MAX;
2773 		return;
2774 	}
2775 #endif
2776 
2777 	clock_get_system_nanotime(&sec, &nsec);
2778 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2779 	now_ts.tv_nsec = nsec;
2780 
2781 	struct throttle_interval_t *interval = NULL;
2782 
2783 	if (memorystatus_freeze_degradation == TRUE) {
2784 		interval = degraded_throttle_window;
2785 
2786 		if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
2787 			interval->pageouts = 0;
2788 			interval->max_pageouts = 0;
2789 		} else {
2790 			*budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2791 		}
2792 	}
2793 
2794 	interval = normal_throttle_window;
2795 
2796 	/*
2797 	 * Current throttle window.
2798 	 * Deny freezing if we have no budget left.
2799 	 * Try graceful degradation if we are within 25% of:
2800 	 * - the daily budget, and
2801 	 * - the current budget left is below our normal budget expectations.
2802 	 */
2803 
2804 	if (memorystatus_freeze_degradation == FALSE) {
2805 		if (interval->pageouts >= interval->max_pageouts) {
2806 			*budget_pages_allowed = 0;
2807 			if (started_with_budget) {
2808 				memorystatus_freeze_out_of_budget(interval);
2809 			}
2810 		} else {
2811 			int budget_left = interval->max_pageouts - interval->pageouts;
2812 			int budget_threshold = (freeze_daily_pageouts_max * FREEZE_DEGRADATION_BUDGET_THRESHOLD) / 100;
2813 
2814 			mach_timespec_t time_left = {0, 0};
2815 
2816 			time_left.tv_sec = interval->ts.tv_sec;
2817 			time_left.tv_nsec = 0;
2818 
2819 			SUB_MACH_TIMESPEC(&time_left, &now_ts);
2820 
2821 			if (budget_left <= budget_threshold) {
2822 				/*
2823 				 * For the current normal window, calculate how much we would pageout in a DEGRADED_WINDOW_MINS duration.
2824 				 * And also calculate what we would pageout for the same DEGRADED_WINDOW_MINS duration if we had the full
2825 				 * daily pageout budget.
2826 				 */
2827 
2828 				unsigned int current_budget_rate_allowed = ((budget_left / time_left.tv_sec) / 60) * DEGRADED_WINDOW_MINS;
2829 				unsigned int normal_budget_rate_allowed = (freeze_daily_pageouts_max / NORMAL_WINDOW_MINS) * DEGRADED_WINDOW_MINS;
2830 
2831 				/*
2832 				 * The current rate of pageouts is below what we would expect for
2833 				 * the normal rate i.e. we have below normal budget left and so...
2834 				 */
2835 
2836 				if (current_budget_rate_allowed < normal_budget_rate_allowed) {
2837 					memorystatus_freeze_degradation = TRUE;
2838 					degraded_throttle_window->max_pageouts = current_budget_rate_allowed;
2839 					degraded_throttle_window->pageouts = 0;
2840 
2841 					/*
2842 					 * Switch over to the degraded throttle window so the budget
2843 					 * doled out is based on that window.
2844 					 */
2845 					interval = degraded_throttle_window;
2846 				}
2847 			}
2848 
2849 			*budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2850 		}
2851 	}
2852 
2853 	memorystatus_log_debug(
2854 		"memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining\n",
2855 		interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - now_ts.tv_sec) / 60);
2856 }
2857 
2858 bool memorystatus_freeze_thread_init = false;
2859 static void
memorystatus_freeze_thread(void * param __unused,wait_result_t wr __unused)2860 memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
2861 {
2862 	static boolean_t memorystatus_freeze_swap_low = FALSE;
2863 	size_t max_to_freeze = 0, num_frozen = 0, num_frozen_this_iteration = 0;
2864 
2865 	if (!memorystatus_freeze_thread_init) {
2866 #if CONFIG_THREAD_GROUPS
2867 		thread_group_vm_add();
2868 #endif
2869 		memorystatus_freeze_thread_init = true;
2870 	}
2871 
2872 	max_to_freeze = memorystatus_pick_freeze_count_for_wakeup();
2873 
2874 	lck_mtx_lock(&freezer_mutex);
2875 	if (memorystatus_freeze_enabled) {
2876 		if (memorystatus_freezer_use_demotion_list && memorystatus_refreeze_eligible_count > 0) {
2877 			memorystatus_demote_frozen_processes(false); /* Normal mode. Consider demoting thawed processes. */
2878 		}
2879 		while (num_frozen < max_to_freeze &&
2880 		    memorystatus_can_freeze(&memorystatus_freeze_swap_low) &&
2881 		    ((memorystatus_frozen_count < memorystatus_frozen_processes_max) ||
2882 		    (memorystatus_refreeze_eligible_count >= memorystatus_min_thaw_refreeze_threshold))) {
2883 			/* Only freeze if we've not exceeded our pageout budgets.*/
2884 			memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2885 
2886 			if (memorystatus_freeze_budget_pages_remaining) {
2887 				num_frozen_this_iteration = memorystatus_freeze_top_process();
2888 				if (num_frozen_this_iteration == 0) {
2889 					/* Nothing left to freeze. */
2890 					break;
2891 				}
2892 				num_frozen += num_frozen_this_iteration;
2893 			} else {
2894 				memorystatus_demote_frozen_processes(true); /* urgent mode..force one demotion */
2895 				break;
2896 			}
2897 		}
2898 	}
2899 
2900 	/*
2901 	 * Give applications currently in the aging band a chance to age out into the idle band before
2902 	 * running the freezer again.
2903 	 */
2904 	memorystatus_freezer_thread_next_run_ts = mach_absolute_time() + memorystatus_apps_idle_delay_time;
2905 
2906 	assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
2907 	lck_mtx_unlock(&freezer_mutex);
2908 
2909 	thread_block((thread_continue_t) memorystatus_freeze_thread);
2910 }
2911 
2912 int
memorystatus_get_process_is_freezable(pid_t pid,int * is_freezable)2913 memorystatus_get_process_is_freezable(pid_t pid, int *is_freezable)
2914 {
2915 	proc_t p = PROC_NULL;
2916 
2917 	if (pid == 0) {
2918 		return EINVAL;
2919 	}
2920 
2921 	p = proc_find(pid);
2922 	if (!p) {
2923 		return ESRCH;
2924 	}
2925 
2926 	/*
2927 	 * Only allow this on the current proc for now.
2928 	 * We can check for privileges and allow targeting another process in the future.
2929 	 */
2930 	if (p != current_proc()) {
2931 		proc_rele(p);
2932 		return EPERM;
2933 	}
2934 
2935 	proc_list_lock();
2936 	*is_freezable = ((p->p_memstat_state & P_MEMSTAT_FREEZE_DISABLED) ? 0 : 1);
2937 	proc_rele(p);
2938 	proc_list_unlock();
2939 
2940 	return 0;
2941 }
2942 
2943 errno_t
memorystatus_get_process_is_frozen(pid_t pid,int * is_frozen)2944 memorystatus_get_process_is_frozen(pid_t pid, int *is_frozen)
2945 {
2946 	proc_t p = PROC_NULL;
2947 
2948 	if (pid == 0) {
2949 		return EINVAL;
2950 	}
2951 
2952 	/*
2953 	 * Only allow this on the current proc for now.
2954 	 * We can check for privileges and allow targeting another process in the future.
2955 	 */
2956 	p = current_proc();
2957 	if (proc_getpid(p) != pid) {
2958 		return EPERM;
2959 	}
2960 
2961 	proc_list_lock();
2962 	*is_frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
2963 	proc_list_unlock();
2964 
2965 	return 0;
2966 }
2967 
2968 int
memorystatus_set_process_is_freezable(pid_t pid,boolean_t is_freezable)2969 memorystatus_set_process_is_freezable(pid_t pid, boolean_t is_freezable)
2970 {
2971 	proc_t p = PROC_NULL;
2972 
2973 	if (pid == 0) {
2974 		return EINVAL;
2975 	}
2976 
2977 	/*
2978 	 * To enable freezable status, you need to be root or an entitlement.
2979 	 */
2980 	if (is_freezable &&
2981 	    !kauth_cred_issuser(kauth_cred_get()) &&
2982 	    !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
2983 		return EPERM;
2984 	}
2985 
2986 	p = proc_find(pid);
2987 	if (!p) {
2988 		return ESRCH;
2989 	}
2990 
2991 	/*
2992 	 * A process can change its own status. A coalition leader can
2993 	 * change the status of coalition members.
2994 	 * An entitled process (or root) can change anyone's status.
2995 	 */
2996 	if (p != current_proc() &&
2997 	    !kauth_cred_issuser(kauth_cred_get()) &&
2998 	    !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
2999 		coalition_t coal = task_get_coalition(proc_task(p), COALITION_TYPE_JETSAM);
3000 		if (!coalition_is_leader(proc_task(current_proc()), coal)) {
3001 			proc_rele(p);
3002 			return EPERM;
3003 		}
3004 	}
3005 
3006 	proc_list_lock();
3007 	if (is_freezable == FALSE) {
3008 		/* Freeze preference set to FALSE. Set the P_MEMSTAT_FREEZE_DISABLED bit. */
3009 		p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED;
3010 		memorystatus_log_info("memorystatus_set_process_is_freezable: disabling freeze for pid %d [%s]\n",
3011 		    proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
3012 	} else {
3013 		p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED;
3014 		memorystatus_log_info("memorystatus_set_process_is_freezable: enabling freeze for pid %d [%s]\n",
3015 		    proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
3016 	}
3017 	proc_rele(p);
3018 	proc_list_unlock();
3019 
3020 	return 0;
3021 }
3022 
3023 /*
3024  * Called when process is created before it is added to a memorystatus bucket.
3025  */
3026 void
memorystatus_freeze_init_proc(proc_t p)3027 memorystatus_freeze_init_proc(proc_t p)
3028 {
3029 	/* NB: Process is not on the memorystatus lists yet so it's safe to modify the skip reason without the freezer mutex. */
3030 	if (memorystatus_freeze_budget_pages_remaining == 0) {
3031 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfBudget;
3032 	} else if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
3033 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
3034 	} else {
3035 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
3036 	}
3037 }
3038 
3039 
3040 static int
3041 sysctl_memorystatus_do_fastwake_warmup_all  SYSCTL_HANDLER_ARGS
3042 {
3043 #pragma unused(oidp, arg1, arg2)
3044 
3045 	if (!req->newptr) {
3046 		return EINVAL;
3047 	}
3048 
3049 	/* Need to be root or have entitlement */
3050 	if (!kauth_cred_issuser(kauth_cred_get()) && !IOCurrentTaskHasEntitlement( MEMORYSTATUS_ENTITLEMENT)) {
3051 		return EPERM;
3052 	}
3053 
3054 	if (memorystatus_freeze_enabled == FALSE) {
3055 		return ENOTSUP;
3056 	}
3057 
3058 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
3059 		return ENOTSUP;
3060 	}
3061 
3062 	do_fastwake_warmup_all();
3063 
3064 	return 0;
3065 }
3066 
3067 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_do_fastwake_warmup_all, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3068     0, 0, &sysctl_memorystatus_do_fastwake_warmup_all, "I", "");
3069 
3070 /*
3071  * Takes in a candidate list from the user_addr, validates it, and copies it into the list pointer.
3072  * Takes ownership over the original value of list.
3073  * Assumes that list is protected by the freezer_mutex.
3074  * The caller should not hold any locks.
3075  */
3076 static errno_t
set_freezer_candidate_list(user_addr_t buffer,size_t buffer_size,struct memorystatus_freezer_candidate_list * list)3077 set_freezer_candidate_list(user_addr_t buffer, size_t buffer_size, struct memorystatus_freezer_candidate_list *list)
3078 {
3079 	errno_t error = 0;
3080 	memorystatus_properties_freeze_entry_v1 *entries = NULL, *tmp_entries = NULL;
3081 	size_t entry_count = 0, entries_size = 0, tmp_size = 0;
3082 
3083 	/* Validate the user provided list. */
3084 	if ((buffer == USER_ADDR_NULL) || (buffer_size == 0)) {
3085 		memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: NULL or empty list\n");
3086 		return EINVAL;
3087 	}
3088 
3089 	if (buffer_size % sizeof(memorystatus_properties_freeze_entry_v1) != 0) {
3090 		memorystatus_log_error(
3091 			"memorystatus_cmd_grp_set_freeze_priority: Invalid list length (caller might have comiled agsinst invalid headers.)\n");
3092 		return EINVAL;
3093 	}
3094 
3095 	entry_count = buffer_size / sizeof(memorystatus_properties_freeze_entry_v1);
3096 	entries_size = buffer_size;
3097 	entries = kalloc_data(buffer_size, Z_WAITOK | Z_ZERO);
3098 	if (entries == NULL) {
3099 		return ENOMEM;
3100 	}
3101 
3102 	error = copyin(buffer, entries, buffer_size);
3103 	if (error != 0) {
3104 		goto out;
3105 	}
3106 
3107 #if MACH_ASSERT
3108 	for (size_t i = 0; i < entry_count; i++) {
3109 		memorystatus_properties_freeze_entry_v1 *entry = &entries[i];
3110 		if (entry->version != 1) {
3111 			memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: Invalid entry version number.");
3112 			error = EINVAL;
3113 			goto out;
3114 		}
3115 		if (i > 0 && entry->priority >= entries[i - 1].priority) {
3116 			memorystatus_log_error("memorystatus_cmd_grp_set_freeze_priority: Entry list is not in descending order.");
3117 			error = EINVAL;
3118 			goto out;
3119 		}
3120 	}
3121 #endif /* MACH_ASSERT */
3122 
3123 	lck_mtx_lock(&freezer_mutex);
3124 
3125 	tmp_entries = list->mfcl_list;
3126 	tmp_size = list->mfcl_length * sizeof(memorystatus_properties_freeze_entry_v1);
3127 	list->mfcl_list = entries;
3128 	list->mfcl_length = entry_count;
3129 
3130 	lck_mtx_unlock(&freezer_mutex);
3131 
3132 	entries = tmp_entries;
3133 	entries_size = tmp_size;
3134 
3135 out:
3136 	kfree_data(entries, entries_size);
3137 	return error;
3138 }
3139 
3140 errno_t
memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer,size_t buffer_size)3141 memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer, size_t buffer_size)
3142 {
3143 	return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_freeze_list);
3144 }
3145 
3146 errno_t
memorystatus_cmd_grp_set_demote_list(user_addr_t buffer,size_t buffer_size)3147 memorystatus_cmd_grp_set_demote_list(user_addr_t buffer, size_t buffer_size)
3148 {
3149 	return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_demote_list);
3150 }
3151 
3152 void
memorystatus_freezer_mark_ui_transition(proc_t p)3153 memorystatus_freezer_mark_ui_transition(proc_t p)
3154 {
3155 	bool frozen = false, previous_focal_thaw = false, xpc_service = false, suspended = false;
3156 	proc_list_lock();
3157 
3158 	if (isSysProc(p)) {
3159 		goto out;
3160 	}
3161 
3162 	frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
3163 	previous_focal_thaw = (p->p_memstat_state & P_MEMSTAT_FROZEN_FOCAL_THAW) != 0;
3164 	xpc_service = (p->p_memstat_state & P_MEMSTAT_FROZEN_XPC_SERVICE) != 0;
3165 	suspended = (p->p_memstat_state & P_MEMSTAT_SUSPENDED) != 0;
3166 	if (!suspended) {
3167 		if (frozen) {
3168 			if (!previous_focal_thaw) {
3169 				p->p_memstat_state |= P_MEMSTAT_FROZEN_FOCAL_THAW;
3170 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg), relaxed);
3171 				if (xpc_service) {
3172 					os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service), relaxed);
3173 				}
3174 			}
3175 		}
3176 		os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_fg_resumed), relaxed);
3177 	}
3178 
3179 out:
3180 	proc_list_unlock();
3181 }
3182 
3183 #endif /* CONFIG_FREEZE */
3184