xref: /xnu-8792.61.2/bsd/kern/kern_memorystatus_freeze.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  *
28  */
29 
30 #include <kern/sched_prim.h>
31 #include <kern/kalloc.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h>
34 #include <kern/locks.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/host.h>
38 #include <kern/policy_internal.h>
39 #include <kern/thread_call.h>
40 #include <kern/thread_group.h>
41 
42 #include <libkern/libkern.h>
43 #include <mach/coalition.h>
44 #include <mach/mach_time.h>
45 #include <mach/task.h>
46 #include <mach/host_priv.h>
47 #include <mach/mach_host.h>
48 #include <os/log.h>
49 #include <pexpert/pexpert.h>
50 #include <sys/coalition.h>
51 #include <sys/kern_event.h>
52 #include <sys/proc.h>
53 #include <sys/proc_info.h>
54 #include <sys/reason.h>
55 #include <sys/signal.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/sysproto.h>
59 #include <sys/wait.h>
60 #include <sys/tree.h>
61 #include <sys/priv.h>
62 #include <vm/vm_pageout.h>
63 #include <vm/vm_protos.h>
64 #include <mach/machine/sdt.h>
65 #include <libkern/coreanalytics/coreanalytics.h>
66 #include <libkern/section_keywords.h>
67 #include <stdatomic.h>
68 
69 #include <IOKit/IOBSD.h>
70 
71 #if CONFIG_FREEZE
72 #include <vm/vm_map.h>
73 #endif /* CONFIG_FREEZE */
74 
75 #include "kern_memorystatus_internal.h"
76 #include <sys/kern_memorystatus.h>
77 #include <sys/kern_memorystatus_freeze.h>
78 #include <sys/kern_memorystatus_notify.h>
79 
80 #if CONFIG_JETSAM
81 
82 extern unsigned int memorystatus_available_pages;
83 extern unsigned int memorystatus_available_pages_pressure;
84 extern unsigned int memorystatus_available_pages_critical;
85 extern unsigned int memorystatus_available_pages_critical_base;
86 extern unsigned int memorystatus_available_pages_critical_idle_offset;
87 
88 #else /* CONFIG_JETSAM */
89 
90 extern uint64_t memorystatus_available_pages;
91 extern uint64_t memorystatus_available_pages_pressure;
92 extern uint64_t memorystatus_available_pages_critical;
93 
94 #endif /* CONFIG_JETSAM */
95 
96 unsigned int memorystatus_frozen_count = 0;
97 unsigned int memorystatus_frozen_count_webcontent = 0;
98 unsigned int memorystatus_frozen_count_xpc_service = 0;
99 unsigned int memorystatus_suspended_count = 0;
100 unsigned long freeze_threshold_percentage = 50;
101 
102 #if CONFIG_FREEZE
103 
104 static LCK_GRP_DECLARE(freezer_lck_grp, "freezer");
105 static LCK_MTX_DECLARE(freezer_mutex, &freezer_lck_grp);
106 
107 /* Thresholds */
108 unsigned int memorystatus_freeze_threshold = 0;
109 unsigned int memorystatus_freeze_pages_min = 0;
110 unsigned int memorystatus_freeze_pages_max = 0;
111 unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
112 unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT;
113 uint64_t     memorystatus_freeze_budget_pages_remaining = 0; /* Remaining # of pages that can be frozen to disk */
114 uint64_t     memorystatus_freeze_budget_multiplier = 100; /* Multiplies the daily budget by 100/multiplier */
115 boolean_t memorystatus_freeze_degradation = FALSE; /* Protected by the freezer mutex. Signals we are in a degraded freeze mode. */
116 unsigned int memorystatus_freeze_max_candidate_band = FREEZE_MAX_CANDIDATE_BAND;
117 
118 unsigned int memorystatus_max_frozen_demotions_daily = 0;
119 unsigned int memorystatus_thaw_count_demotion_threshold = 0;
120 
121 boolean_t memorystatus_freeze_enabled = FALSE;
122 int memorystatus_freeze_wakeup = 0;
123 int memorystatus_freeze_jetsam_band = 0; /* the jetsam band which will contain P_MEMSTAT_FROZEN processes */
124 
125 #define MAX_XPC_SERVICE_PIDS 10 /* Max. # of XPC services per coalition we'll consider freezing. */
126 
127 #ifdef XNU_KERNEL_PRIVATE
128 
129 unsigned int memorystatus_frozen_processes_max = 0;
130 unsigned int memorystatus_frozen_shared_mb = 0;
131 unsigned int memorystatus_frozen_shared_mb_max = 0;
132 unsigned int memorystatus_freeze_shared_mb_per_process_max = 0; /* Max. MB allowed per process to be freezer-eligible. */
133 unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of private:shared pages for a process to be freezer-eligible. */
134 unsigned int memorystatus_thaw_count = 0; /* # of thaws in the current freezer interval */
135 uint64_t memorystatus_thaw_count_since_boot = 0; /* The number of thaws since boot */
136 unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
137 
138 struct memorystatus_freezer_stats_t memorystatus_freezer_stats = {0};
139 
140 #endif /* XNU_KERNEL_PRIVATE */
141 
142 static inline boolean_t memorystatus_can_freeze_processes(void);
143 static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
144 static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
145 static uint32_t memorystatus_freeze_calculate_new_budget(
146 	unsigned int time_since_last_interval_expired_sec,
147 	unsigned int burst_multiple,
148 	unsigned int interval_duration_min,
149 	uint32_t rollover);
150 static void memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts);
151 
152 static void memorystatus_set_freeze_is_enabled(bool enabled);
153 static void memorystatus_disable_freeze(void);
154 static bool kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out);
155 
156 /* Stats */
157 static uint64_t memorystatus_freeze_pageouts = 0;
158 
159 /* Throttling */
160 #define DEGRADED_WINDOW_MINS    (30)
161 #define NORMAL_WINDOW_MINS      (24 * 60)
162 
163 /* Protected by the freezer_mutex */
164 static throttle_interval_t throttle_intervals[] = {
165 	{ DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
166 	{ NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
167 };
168 throttle_interval_t *degraded_throttle_window = &throttle_intervals[0];
169 throttle_interval_t *normal_throttle_window = &throttle_intervals[1];
170 uint32_t memorystatus_freeze_current_interval = 0;
171 static thread_call_t freeze_interval_reset_thread_call;
172 static uint32_t memorystatus_freeze_calculate_new_budget(
173 	unsigned int time_since_last_interval_expired_sec,
174 	unsigned int burst_multiple,
175 	unsigned int interval_duration_min,
176 	uint32_t rollover);
177 
178 struct memorystatus_freezer_candidate_list memorystatus_global_freeze_list = {NULL, 0};
179 struct memorystatus_freezer_candidate_list memorystatus_global_demote_list = {NULL, 0};
180 /*
181  * When enabled, freeze candidates are chosen from the memorystatus_global_freeze_list
182  * in order (as opposed to using the older LRU approach).
183  */
184 int memorystatus_freezer_use_ordered_list = 0;
185 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_ordered_list, &memorystatus_freezer_use_ordered_list, 0, 1, "");
186 /*
187  * When enabled, demotion candidates are chosen from memorystatus_global_demotion_list
188  */
189 int memorystatus_freezer_use_demotion_list = 0;
190 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_demotion_list, &memorystatus_freezer_use_demotion_list, 0, 1, "");
191 
192 extern uint64_t vm_swap_get_free_space(void);
193 extern boolean_t vm_swap_max_budget(uint64_t *);
194 
195 static void memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed);
196 static void memorystatus_demote_frozen_processes(bool urgent_mode);
197 
198 static void memorystatus_freeze_handle_error(proc_t p, const int freezer_error_code, bool was_refreeze, pid_t pid, const coalition_t coalition, const char* log_prefix);
199 static void memorystatus_freeze_out_of_slots(void);
200 uint64_t memorystatus_freezer_thread_next_run_ts = 0;
201 
202 /* Sysctls needed for aggd stats */
203 
204 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count, 0, "");
205 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_webcontent, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_webcontent, 0, "");
206 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_xpc_service, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_xpc_service, 0, "");
207 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
208 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_thaw_count_since_boot, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count_since_boot, "");
209 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
210 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_interval, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_current_interval, 0, "");
211 
212 /*
213  * Force a new interval with the given budget (no rollover).
214  */
215 static void
memorystatus_freeze_force_new_interval(uint64_t new_budget)216 memorystatus_freeze_force_new_interval(uint64_t new_budget)
217 {
218 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
219 	mach_timespec_t now_ts;
220 	clock_sec_t sec;
221 	clock_nsec_t nsec;
222 
223 	clock_get_system_nanotime(&sec, &nsec);
224 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
225 	now_ts.tv_nsec = nsec;
226 	memorystatus_freeze_start_normal_throttle_interval((uint32_t) MIN(new_budget, UINT32_MAX), now_ts);
227 	/* Don't carry over any excess pageouts since we're forcing a new budget */
228 	normal_throttle_window->pageouts = 0;
229 	memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
230 }
231 #if DEVELOPMENT || DEBUG
232 static int sysctl_memorystatus_freeze_budget_pages_remaining SYSCTL_HANDLER_ARGS
233 {
234 	#pragma unused(arg1, arg2, oidp)
235 	int error, changed;
236 	uint64_t new_budget = memorystatus_freeze_budget_pages_remaining;
237 
238 	lck_mtx_lock(&freezer_mutex);
239 
240 	error = sysctl_io_number(req, memorystatus_freeze_budget_pages_remaining, sizeof(uint64_t), &new_budget, &changed);
241 	if (changed) {
242 		if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
243 			lck_mtx_unlock(&freezer_mutex);
244 			return ENOTSUP;
245 		}
246 		memorystatus_freeze_force_new_interval(new_budget);
247 	}
248 
249 	lck_mtx_unlock(&freezer_mutex);
250 	return error;
251 }
252 
253 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freeze_budget_pages_remaining, "Q", "");
254 #else /* DEVELOPMENT || DEBUG */
255 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, "");
256 #endif /* DEVELOPMENT || DEBUG */
257 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_excess_shared_memory_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_excess_shared_memory_count, "");
258 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_private_shared_ratio_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count, "");
259 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_compressor_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_compressor_space_count, "");
260 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_swap_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_swap_space_count, "");
261 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_below_min_pages_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_below_min_pages_count, "");
262 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_probability_of_use_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_probability_of_use_count, "");
263 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_elevated_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_elevated_count, "");
264 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_other_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_other_count, "");
265 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_process_considered_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_process_considered_count, "");
266 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_below_threshold_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_below_threshold_count, "");
267 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_full_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_full_count, "");
268 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_shared_mb_high_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count, "");
269 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_shared_pages_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_shared_pages_skipped, "");
270 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_bytes_refrozen, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_bytes_refrozen, "");
271 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_refreeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_refreeze_count, "");
272 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_freeze_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_freeze_pid_mismatches, "");
273 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_demote_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_demote_pid_mismatches, "");
274 
275 static_assert(_kMemorystatusFreezeSkipReasonMax <= UINT8_MAX);
276 
277 /*
278  * Calculates the hit rate for the freezer.
279  * The hit rate is defined as the percentage of procs that are currently in the
280  * freezer which we have thawed.
281  * A low hit rate means we're freezing bad candidates since they're not re-used.
282  */
283 static int
calculate_thaw_percentage(uint64_t frozen_count,uint64_t thaw_count)284 calculate_thaw_percentage(uint64_t frozen_count, uint64_t thaw_count)
285 {
286 	int thaw_percentage = 100;
287 
288 	if (frozen_count > 0) {
289 		if (thaw_count > frozen_count) {
290 			/*
291 			 * Both counts are using relaxed atomics & could be out of sync
292 			 * causing us to see thaw_percentage > 100.
293 			 */
294 			thaw_percentage = 100;
295 		} else {
296 			thaw_percentage = (int)(100 * thaw_count / frozen_count);
297 		}
298 	}
299 	return thaw_percentage;
300 }
301 
302 static int
get_thaw_percentage()303 get_thaw_percentage()
304 {
305 	uint64_t processes_frozen, processes_thawed;
306 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
307 	processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
308 	return calculate_thaw_percentage(processes_frozen, processes_thawed);
309 }
310 
311 static int
312 sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS
313 {
314 #pragma unused(arg1, arg2)
315 	int thaw_percentage = get_thaw_percentage();
316 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
317 }
318 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage, "I", "");
319 
320 static int
get_thaw_percentage_fg()321 get_thaw_percentage_fg()
322 {
323 	uint64_t processes_frozen, processes_thawed_fg;
324 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
325 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
326 	return calculate_thaw_percentage(processes_frozen, processes_thawed_fg);
327 }
328 
329 static int sysctl_memorystatus_freezer_thaw_percentage_fg SYSCTL_HANDLER_ARGS
330 {
331 #pragma unused(arg1, arg2)
332 	int thaw_percentage = get_thaw_percentage_fg();
333 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
334 }
335 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg, "I", "");
336 
337 static int
get_thaw_percentage_webcontent()338 get_thaw_percentage_webcontent()
339 {
340 	uint64_t processes_frozen_webcontent, processes_thawed_webcontent;
341 	processes_frozen_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, relaxed);
342 	processes_thawed_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, relaxed);
343 	return calculate_thaw_percentage(processes_frozen_webcontent, processes_thawed_webcontent);
344 }
345 
346 static int sysctl_memorystatus_freezer_thaw_percentage_webcontent SYSCTL_HANDLER_ARGS
347 {
348 #pragma unused(arg1, arg2)
349 	int thaw_percentage = get_thaw_percentage_webcontent();
350 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
351 }
352 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_webcontent, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_webcontent, "I", "");
353 
354 
355 static int
get_thaw_percentage_bg()356 get_thaw_percentage_bg()
357 {
358 	uint64_t processes_frozen, processes_thawed_fg, processes_thawed;
359 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
360 	processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
361 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
362 	return calculate_thaw_percentage(processes_frozen, processes_thawed - processes_thawed_fg);
363 }
364 
365 static int sysctl_memorystatus_freezer_thaw_percentage_bg SYSCTL_HANDLER_ARGS
366 {
367 #pragma unused(arg1, arg2)
368 	int thaw_percentage = get_thaw_percentage_bg();
369 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
370 }
371 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_bg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_bg, "I", "");
372 
373 static int
get_thaw_percentage_fg_non_xpc_service()374 get_thaw_percentage_fg_non_xpc_service()
375 {
376 	uint64_t processes_frozen, processes_frozen_xpc_service, processes_thawed_fg, processes_thawed_fg_xpc_service;
377 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
378 	processes_frozen_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, relaxed);
379 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
380 	processes_thawed_fg_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, relaxed);
381 	/*
382 	 * Since these are all relaxed loads, it's possible (although unlikely) to read a value for
383 	 * frozen/thawed xpc services that's > the value for processes frozen / thawed.
384 	 * Clamp just in case.
385 	 */
386 	processes_frozen_xpc_service = MIN(processes_frozen_xpc_service, processes_frozen);
387 	processes_thawed_fg_xpc_service = MIN(processes_thawed_fg_xpc_service, processes_thawed_fg);
388 	return calculate_thaw_percentage(processes_frozen - processes_frozen_xpc_service, processes_thawed_fg - processes_thawed_fg_xpc_service);
389 }
390 
391 static int sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service SYSCTL_HANDLER_ARGS
392 {
393 #pragma unused(arg1, arg2)
394 	int thaw_percentage = get_thaw_percentage_fg_non_xpc_service();
395 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
396 }
397 
398 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg_non_xpc_service, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service, "I", "");
399 
400 #define FREEZER_ERROR_STRING_LENGTH 128
401 
402 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_min, &memorystatus_freeze_pages_min, 0, UINT32_MAX, "");
403 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_max, &memorystatus_freeze_pages_max, 0, UINT32_MAX, "");
404 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_processes_max, &memorystatus_frozen_processes_max, 0, UINT32_MAX, "");
405 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_jetsam_band, &memorystatus_freeze_jetsam_band, JETSAM_PRIORITY_BACKGROUND, JETSAM_PRIORITY_FOREGROUND, "");
406 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_private_shared_pages_ratio, &memorystatus_freeze_private_shared_pages_ratio, 0, UINT32_MAX, "");
407 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_min_processes, &memorystatus_freeze_suspended_threshold, 0, UINT32_MAX, "");
408 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_max_candidate_band, &memorystatus_freeze_max_candidate_band, JETSAM_PRIORITY_IDLE, JETSAM_PRIORITY_FOREGROUND, "");
409 static int
410 sysctl_memorystatus_freeze_budget_multiplier SYSCTL_HANDLER_ARGS
411 {
412 #pragma unused(arg1, arg2, oidp, req)
413 	int error = 0, changed = 0;
414 	uint64_t val = memorystatus_freeze_budget_multiplier;
415 	unsigned int new_budget;
416 	clock_sec_t sec;
417 	clock_nsec_t nsec;
418 	mach_timespec_t now_ts;
419 
420 	error = sysctl_io_number(req, memorystatus_freeze_budget_multiplier, sizeof(val), &val, &changed);
421 	if (error) {
422 		return error;
423 	}
424 	if (changed) {
425 		if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
426 			return ENOTSUP;
427 		}
428 #if !(DEVELOPMENT || DEBUG)
429 		if (val > 100) {
430 			/* Can not increase budget on release. */
431 			return EINVAL;
432 		}
433 #endif
434 		lck_mtx_lock(&freezer_mutex);
435 
436 		memorystatus_freeze_budget_multiplier = val;
437 		/* Start a new throttle interval with this budget multiplier */
438 		new_budget = memorystatus_freeze_calculate_new_budget(0, 1, NORMAL_WINDOW_MINS, 0);
439 		clock_get_system_nanotime(&sec, &nsec);
440 		now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
441 		now_ts.tv_nsec = nsec;
442 		memorystatus_freeze_start_normal_throttle_interval(new_budget, now_ts);
443 		memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
444 
445 		lck_mtx_unlock(&freezer_mutex);
446 	}
447 	return 0;
448 }
449 EXPERIMENT_FACTOR_PROC(_kern, memorystatus_freeze_budget_multiplier, CTLTYPE_QUAD | CTLFLAG_RW, 0, 0, &sysctl_memorystatus_freeze_budget_multiplier, "Q", "");
450 /*
451  * max. # of frozen process demotions we will allow in our daily cycle.
452  */
453 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_max_freeze_demotions_daily, &memorystatus_max_frozen_demotions_daily, 0, UINT32_MAX, "");
454 
455 /*
456  * min # of thaws needed by a process to protect it from getting demoted into the IDLE band.
457  */
458 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_thaw_count_demotion_threshold, &memorystatus_thaw_count_demotion_threshold, 0, UINT32_MAX, "");
459 
460 #if DEVELOPMENT || DEBUG
461 
462 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, "");
463 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, "");
464 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
465 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, "");
466 
467 /*
468  * Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band.
469  * "0" means no limit.
470  * Default is 10% of system-wide task limit.
471  */
472 
473 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb_max, 0, "");
474 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, "");
475 
476 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, "");
477 
478 boolean_t memorystatus_freeze_throttle_enabled = TRUE;
479 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
480 
481 /*
482  * When set to true, this keeps frozen processes in the compressor pool in memory, instead of swapping them out to disk.
483  * Exposed via the sysctl kern.memorystatus_freeze_to_memory.
484  */
485 boolean_t memorystatus_freeze_to_memory = FALSE;
486 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_to_memory, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_to_memory, 0, "");
487 
488 #define VM_PAGES_FOR_ALL_PROCS    (2)
489 
490 /*
491  * Manual trigger of freeze and thaw for dev / debug kernels only.
492  */
493 static int
494 sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
495 {
496 #pragma unused(arg1, arg2)
497 	int error, pid = 0;
498 	proc_t p;
499 	int freezer_error_code = 0;
500 	pid_t pid_list[MAX_XPC_SERVICE_PIDS];
501 	int ntasks = 0;
502 	coalition_t coal = COALITION_NULL;
503 
504 	error = sysctl_handle_int(oidp, &pid, 0, req);
505 	if (error || !req->newptr) {
506 		return error;
507 	}
508 
509 	if (pid == VM_PAGES_FOR_ALL_PROCS) {
510 		vm_pageout_anonymous_pages();
511 
512 		return 0;
513 	}
514 
515 	lck_mtx_lock(&freezer_mutex);
516 	if (memorystatus_freeze_enabled == FALSE) {
517 		lck_mtx_unlock(&freezer_mutex);
518 		printf("sysctl_freeze: Freeze is DISABLED\n");
519 		return ENOTSUP;
520 	}
521 
522 again:
523 	p = proc_find(pid);
524 	if (p != NULL) {
525 		memorystatus_freezer_stats.mfs_process_considered_count++;
526 		uint32_t purgeable, wired, clean, dirty, shared;
527 		uint32_t max_pages = 0, state = 0;
528 
529 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
530 			/*
531 			 * Freezer backed by the compressor and swap file(s)
532 			 * will hold compressed data.
533 			 *
534 			 * Set the sysctl kern.memorystatus_freeze_to_memory to true to keep compressed data from
535 			 * being swapped out to disk. Note that this disables freezer swap support globally,
536 			 * not just for the process being frozen.
537 			 *
538 			 *
539 			 * We don't care about the global freezer budget or the process's (min/max) budget here.
540 			 * The freeze sysctl is meant to force-freeze a process.
541 			 *
542 			 * We also don't update any global or process stats on this path, so that the jetsam/ freeze
543 			 * logic remains unaffected. The tasks we're performing here are: freeze the process, set the
544 			 * P_MEMSTAT_FROZEN bit, and elevate the process to a higher band (if the freezer is active).
545 			 */
546 			max_pages = memorystatus_freeze_pages_max;
547 		} else {
548 			/*
549 			 * We only have the compressor without any swap.
550 			 */
551 			max_pages = UINT32_MAX - 1;
552 		}
553 
554 		proc_list_lock();
555 		state = p->p_memstat_state;
556 		proc_list_unlock();
557 
558 		/*
559 		 * The jetsam path also verifies that the process is a suspended App. We don't care about that here.
560 		 * We simply ensure that jetsam is not already working on the process and that the process has not
561 		 * explicitly disabled freezing.
562 		 */
563 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED)) {
564 			printf("sysctl_freeze: p_memstat_state check failed, process is%s%s%s\n",
565 			    (state & P_MEMSTAT_TERMINATED) ? " terminated" : "",
566 			    (state & P_MEMSTAT_LOCKED) ? " locked" : "",
567 			    (state & P_MEMSTAT_FREEZE_DISABLED) ? " unfreezable" : "");
568 
569 			proc_rele(p);
570 			lck_mtx_unlock(&freezer_mutex);
571 			return EPERM;
572 		}
573 
574 		error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
575 		if (!error || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
576 			memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
577 		}
578 
579 		if (error) {
580 			memorystatus_freeze_handle_error(p, freezer_error_code, state & P_MEMSTAT_FROZEN, pid, coal, "sysctl_freeze");
581 			if (error == KERN_NO_SPACE) {
582 				/* Make it easy to distinguish between failures due to low compressor/ swap space and other failures. */
583 				error = ENOSPC;
584 			} else {
585 				error = EIO;
586 			}
587 		} else {
588 			proc_list_lock();
589 			if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
590 				p->p_memstat_state |= P_MEMSTAT_FROZEN;
591 				p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
592 				memorystatus_frozen_count++;
593 				os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
594 				if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
595 					memorystatus_frozen_count_webcontent++;
596 					os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
597 				}
598 				if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
599 					memorystatus_freeze_out_of_slots();
600 				}
601 			} else {
602 				// This was a re-freeze
603 				if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
604 					memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
605 					memorystatus_freezer_stats.mfs_refreeze_count++;
606 				}
607 			}
608 			p->p_memstat_frozen_count++;
609 
610 			if (coal != NULL) {
611 				/* We just froze an xpc service. Mark it as such for telemetry */
612 				p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
613 				memorystatus_frozen_count_xpc_service++;
614 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
615 			}
616 
617 
618 			proc_list_unlock();
619 
620 			if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
621 				/*
622 				 * We elevate only if we are going to swap out the data.
623 				 */
624 				error = memorystatus_update_inactive_jetsam_priority_band(pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
625 				    memorystatus_freeze_jetsam_band, TRUE);
626 
627 				if (error) {
628 					printf("sysctl_freeze: Elevating frozen process to higher jetsam band failed with %d\n", error);
629 				}
630 			}
631 		}
632 
633 		if ((error == 0) && (coal == NULL)) {
634 			/*
635 			 * We froze a process and so we check to see if it was
636 			 * a coalition leader and if it has XPC services that
637 			 * might need freezing.
638 			 * Only one leader can be frozen at a time and so we shouldn't
639 			 * enter this block more than once per call. Hence the
640 			 * check that 'coal' has to be NULL. We should make this an
641 			 * assert() or panic() once we have a much more concrete way
642 			 * to detect an app vs a daemon.
643 			 */
644 
645 			task_t          curr_task = NULL;
646 
647 			curr_task = proc_task(p);
648 			coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
649 			if (coalition_is_leader(curr_task, coal)) {
650 				ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
651 				    COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
652 
653 				if (ntasks > MAX_XPC_SERVICE_PIDS) {
654 					ntasks = MAX_XPC_SERVICE_PIDS;
655 				}
656 			}
657 		}
658 
659 		proc_rele(p);
660 
661 		while (ntasks) {
662 			pid = pid_list[--ntasks];
663 			goto again;
664 		}
665 
666 		lck_mtx_unlock(&freezer_mutex);
667 		return error;
668 	} else {
669 		printf("sysctl_freeze: Invalid process\n");
670 	}
671 
672 
673 	lck_mtx_unlock(&freezer_mutex);
674 	return EINVAL;
675 }
676 
677 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
678     0, 0, &sysctl_memorystatus_freeze, "I", "");
679 
680 /*
681  * Manual trigger of agressive frozen demotion for dev / debug kernels only.
682  */
683 static int
684 sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS
685 {
686 #pragma unused(arg1, arg2)
687 	int error, val;
688 	/*
689 	 * Only demote on write to prevent demoting during `sysctl -a`.
690 	 * The actual value written doesn't matter.
691 	 */
692 	error = sysctl_handle_int(oidp, &val, 0, req);
693 	if (error || !req->newptr) {
694 		return error;
695 	}
696 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
697 		return ENOTSUP;
698 	}
699 	lck_mtx_lock(&freezer_mutex);
700 	memorystatus_demote_frozen_processes(false);
701 	lck_mtx_unlock(&freezer_mutex);
702 	return 0;
703 }
704 
705 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
706 
707 static int
708 sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
709 {
710 #pragma unused(arg1, arg2)
711 
712 	int error, pid = 0;
713 	proc_t p;
714 
715 	if (memorystatus_freeze_enabled == FALSE) {
716 		return ENOTSUP;
717 	}
718 
719 	error = sysctl_handle_int(oidp, &pid, 0, req);
720 	if (error || !req->newptr) {
721 		return error;
722 	}
723 
724 	if (pid == VM_PAGES_FOR_ALL_PROCS) {
725 		do_fastwake_warmup_all();
726 		return 0;
727 	} else {
728 		p = proc_find(pid);
729 		if (p != NULL) {
730 			error = task_thaw(proc_task(p));
731 
732 			if (error) {
733 				error = EIO;
734 			} else {
735 				/*
736 				 * task_thaw() succeeded.
737 				 *
738 				 * We increment memorystatus_frozen_count on the sysctl freeze path.
739 				 * And so we need the P_MEMSTAT_FROZEN to decrement the frozen count
740 				 * when this process exits.
741 				 *
742 				 * proc_list_lock();
743 				 * p->p_memstat_state &= ~P_MEMSTAT_FROZEN;
744 				 * proc_list_unlock();
745 				 */
746 			}
747 			proc_rele(p);
748 			return error;
749 		}
750 	}
751 
752 	return EINVAL;
753 }
754 
755 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
756     0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
757 
758 
759 typedef struct _global_freezable_status {
760 	boolean_t       freeze_pages_threshold_crossed;
761 	boolean_t       freeze_eligible_procs_available;
762 	boolean_t       freeze_scheduled_in_future;
763 }global_freezable_status_t;
764 
765 typedef struct _proc_freezable_status {
766 	boolean_t    freeze_has_memstat_state;
767 	boolean_t    freeze_has_pages_min;
768 	int        freeze_has_probability;
769 	int        freeze_leader_eligible;
770 	boolean_t    freeze_attempted;
771 	uint32_t    p_memstat_state;
772 	uint32_t    p_pages;
773 	int        p_freeze_error_code;
774 	int        p_pid;
775 	int        p_leader_pid;
776 	char        p_name[MAXCOMLEN + 1];
777 }proc_freezable_status_t;
778 
779 #define MAX_FREEZABLE_PROCESSES 200 /* Total # of processes in band 0 that we evaluate for freezability */
780 
781 /*
782  * For coalition based freezing evaluations, we proceed as follows:
783  *  - detect that the process is a coalition member and a XPC service
784  *  - mark its 'freeze_leader_eligible' field with FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN
785  *  - continue its freezability evaluation assuming its leader will be freezable too
786  *
787  * Once we are done evaluating all processes, we do a quick run thru all
788  * processes and for a coalition member XPC service we look up the 'freezable'
789  * status of its leader and iff:
790  *  - the xpc service is freezable i.e. its individual freeze evaluation worked
791  *  - and, its leader is also marked freezable
792  * we update its 'freeze_leader_eligible' to FREEZE_PROC_LEADER_FREEZABLE_SUCCESS.
793  */
794 
795 #define FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN   (-1)
796 #define FREEZE_PROC_LEADER_FREEZABLE_SUCCESS    (1)
797 #define FREEZE_PROC_LEADER_FREEZABLE_FAILURE    (2)
798 
799 static int
memorystatus_freezer_get_status(user_addr_t buffer,size_t buffer_size,int32_t * retval)800 memorystatus_freezer_get_status(user_addr_t buffer, size_t buffer_size, int32_t *retval)
801 {
802 	uint32_t            proc_count = 0, freeze_eligible_proc_considered = 0, band = 0, xpc_index = 0, leader_index = 0;
803 	global_freezable_status_t    *list_head;
804 	proc_freezable_status_t     *list_entry, *list_entry_start;
805 	size_t                list_size = 0, entry_count = 0;
806 	proc_t                p, leader_proc;
807 	memstat_bucket_t        *bucket;
808 	uint32_t            state = 0, pages = 0;
809 	boolean_t            try_freeze = TRUE, xpc_skip_size_probability_check = FALSE;
810 	int                error = 0, probability_of_use = 0;
811 	pid_t              leader_pid = 0;
812 
813 
814 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
815 		return ENOTSUP;
816 	}
817 
818 	list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
819 
820 	if (buffer_size < list_size) {
821 		return EINVAL;
822 	}
823 
824 	list_head = (global_freezable_status_t *)kalloc_data(list_size, Z_WAITOK | Z_ZERO);
825 	if (list_head == NULL) {
826 		return ENOMEM;
827 	}
828 
829 	list_size = sizeof(global_freezable_status_t);
830 
831 	proc_list_lock();
832 
833 	uint64_t curr_time = mach_absolute_time();
834 
835 	list_head->freeze_pages_threshold_crossed = (memorystatus_available_pages < memorystatus_freeze_threshold);
836 	list_head->freeze_eligible_procs_available = ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold);
837 	list_head->freeze_scheduled_in_future = (curr_time < memorystatus_freezer_thread_next_run_ts);
838 
839 	list_entry_start = (proc_freezable_status_t*) ((uintptr_t)list_head + sizeof(global_freezable_status_t));
840 	list_entry = list_entry_start;
841 
842 	bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
843 
844 	entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
845 
846 	p = memorystatus_get_first_proc_locked(&band, FALSE);
847 	proc_count++;
848 
849 	while ((proc_count <= MAX_FREEZABLE_PROCESSES) &&
850 	    (p) &&
851 	    (list_size < buffer_size)) {
852 		if (isSysProc(p)) {
853 			/*
854 			 * Daemon:- We will consider freezing it iff:
855 			 * - it belongs to a coalition and the leader is freeze-eligible (delayed evaluation)
856 			 * - its role in the coalition is XPC service.
857 			 *
858 			 * We skip memory size requirements in this case.
859 			 */
860 
861 			coalition_t     coal = COALITION_NULL;
862 			task_t          leader_task = NULL, curr_task = NULL;
863 			int             task_role_in_coalition = 0;
864 
865 			curr_task = proc_task(p);
866 			coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
867 
868 			if (coal == COALITION_NULL || coalition_is_leader(curr_task, coal)) {
869 				/*
870 				 * By default, XPC services without an app
871 				 * will be the leader of their own single-member
872 				 * coalition.
873 				 */
874 				goto skip_ineligible_xpc;
875 			}
876 
877 			leader_task = coalition_get_leader(coal);
878 			if (leader_task == TASK_NULL) {
879 				/*
880 				 * This jetsam coalition is currently leader-less.
881 				 * This could happen if the app died, but XPC services
882 				 * have not yet exited.
883 				 */
884 				goto skip_ineligible_xpc;
885 			}
886 
887 			leader_proc = (proc_t)get_bsdtask_info(leader_task);
888 			task_deallocate(leader_task);
889 
890 			if (leader_proc == PROC_NULL) {
891 				/* leader task is exiting */
892 				goto skip_ineligible_xpc;
893 			}
894 
895 			task_role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
896 
897 			if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
898 				xpc_skip_size_probability_check = TRUE;
899 				leader_pid = proc_getpid(leader_proc);
900 				goto continue_eval;
901 			}
902 
903 skip_ineligible_xpc:
904 			p = memorystatus_get_next_proc_locked(&band, p, FALSE);
905 			proc_count++;
906 			continue;
907 		}
908 
909 continue_eval:
910 		strlcpy(list_entry->p_name, p->p_name, MAXCOMLEN + 1);
911 
912 		list_entry->p_pid = proc_getpid(p);
913 
914 		state = p->p_memstat_state;
915 
916 		if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) ||
917 		    !(state & P_MEMSTAT_SUSPENDED)) {
918 			try_freeze = list_entry->freeze_has_memstat_state = FALSE;
919 		} else {
920 			try_freeze = list_entry->freeze_has_memstat_state = TRUE;
921 		}
922 
923 		list_entry->p_memstat_state = state;
924 
925 		if (xpc_skip_size_probability_check == TRUE) {
926 			/*
927 			 * Assuming the coalition leader is freezable
928 			 * we don't care re. minimum pages and probability
929 			 * as long as the process isn't marked P_MEMSTAT_FREEZE_DISABLED.
930 			 * XPC services have to be explicity opted-out of the disabled
931 			 * state. And we checked that state above.
932 			 */
933 			list_entry->freeze_has_pages_min = TRUE;
934 			list_entry->p_pages = -1;
935 			list_entry->freeze_has_probability = -1;
936 
937 			list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN;
938 			list_entry->p_leader_pid = leader_pid;
939 
940 			xpc_skip_size_probability_check = FALSE;
941 		} else {
942 			list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; /* Apps are freeze eligible and their own leaders. */
943 			list_entry->p_leader_pid = 0; /* Setting this to 0 signifies this isn't a coalition driven freeze. */
944 
945 			memorystatus_get_task_page_counts(proc_task(p), &pages, NULL, NULL);
946 			if (pages < memorystatus_freeze_pages_min) {
947 				try_freeze = list_entry->freeze_has_pages_min = FALSE;
948 			} else {
949 				list_entry->freeze_has_pages_min = TRUE;
950 			}
951 
952 			list_entry->p_pages = pages;
953 
954 			if (entry_count) {
955 				uint32_t j = 0;
956 				for (j = 0; j < entry_count; j++) {
957 					if (strncmp(memorystatus_global_probabilities_table[j].proc_name,
958 					    p->p_name,
959 					    MAXCOMLEN) == 0) {
960 						probability_of_use = memorystatus_global_probabilities_table[j].use_probability;
961 						break;
962 					}
963 				}
964 
965 				list_entry->freeze_has_probability = probability_of_use;
966 
967 				try_freeze = ((probability_of_use > 0) && try_freeze);
968 			} else {
969 				list_entry->freeze_has_probability = -1;
970 			}
971 		}
972 
973 		if (try_freeze) {
974 			uint32_t purgeable, wired, clean, dirty, shared;
975 			uint32_t max_pages = 0;
976 			int freezer_error_code = 0;
977 
978 			error = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, TRUE /* eval only */);
979 
980 			if (error) {
981 				list_entry->p_freeze_error_code = freezer_error_code;
982 			}
983 
984 			list_entry->freeze_attempted = TRUE;
985 		}
986 
987 		list_entry++;
988 		freeze_eligible_proc_considered++;
989 
990 		list_size += sizeof(proc_freezable_status_t);
991 
992 		p = memorystatus_get_next_proc_locked(&band, p, FALSE);
993 		proc_count++;
994 	}
995 
996 	proc_list_unlock();
997 
998 	list_entry = list_entry_start;
999 
1000 	for (xpc_index = 0; xpc_index < freeze_eligible_proc_considered; xpc_index++) {
1001 		if (list_entry[xpc_index].freeze_leader_eligible == FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN) {
1002 			leader_pid = list_entry[xpc_index].p_leader_pid;
1003 
1004 			leader_proc = proc_find(leader_pid);
1005 
1006 			if (leader_proc) {
1007 				if (leader_proc->p_memstat_state & P_MEMSTAT_FROZEN) {
1008 					/*
1009 					 * Leader has already been frozen.
1010 					 */
1011 					list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1012 					proc_rele(leader_proc);
1013 					continue;
1014 				}
1015 				proc_rele(leader_proc);
1016 			}
1017 
1018 			for (leader_index = 0; leader_index < freeze_eligible_proc_considered; leader_index++) {
1019 				if (list_entry[leader_index].p_pid == leader_pid) {
1020 					if (list_entry[leader_index].freeze_attempted && list_entry[leader_index].p_freeze_error_code == 0) {
1021 						list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1022 					} else {
1023 						list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1024 						list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1025 					}
1026 					break;
1027 				}
1028 			}
1029 
1030 			/*
1031 			 * Didn't find the leader entry. This might be likely because
1032 			 * the leader never made it down to band 0.
1033 			 */
1034 			if (leader_index == freeze_eligible_proc_considered) {
1035 				list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1036 				list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1037 			}
1038 		}
1039 	}
1040 
1041 	buffer_size = MIN(list_size, INT32_MAX);
1042 
1043 	error = copyout(list_head, buffer, buffer_size);
1044 	if (error == 0) {
1045 		*retval = (int32_t) buffer_size;
1046 	} else {
1047 		*retval = 0;
1048 	}
1049 
1050 	list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
1051 	kfree_data(list_head, list_size);
1052 
1053 	MEMORYSTATUS_DEBUG(1, "memorystatus_freezer_get_status: returning %d (%lu - size)\n", error, (unsigned long)*list_size);
1054 
1055 	return error;
1056 }
1057 
1058 #endif /* DEVELOPMENT || DEBUG */
1059 
1060 /*
1061  * Get a list of all processes in the freezer band which are currently frozen.
1062  * Used by powerlog to collect analytics on frozen process.
1063  */
1064 static int
memorystatus_freezer_get_procs(user_addr_t buffer,size_t buffer_size,int32_t * retval)1065 memorystatus_freezer_get_procs(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1066 {
1067 	global_frozen_procs_t *frozen_procs = NULL;
1068 	uint32_t band = memorystatus_freeze_jetsam_band;
1069 	proc_t p;
1070 	uint32_t state;
1071 	int error;
1072 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
1073 		return ENOTSUP;
1074 	}
1075 	if (buffer_size < sizeof(global_frozen_procs_t)) {
1076 		return EINVAL;
1077 	}
1078 	frozen_procs = (global_frozen_procs_t *)kalloc_data(sizeof(global_frozen_procs_t), Z_WAITOK | Z_ZERO);
1079 	if (frozen_procs == NULL) {
1080 		return ENOMEM;
1081 	}
1082 
1083 	proc_list_lock();
1084 	p = memorystatus_get_first_proc_locked(&band, FALSE);
1085 	while (p && frozen_procs->gfp_num_frozen < FREEZER_CONTROL_GET_PROCS_MAX_COUNT) {
1086 		state = p->p_memstat_state;
1087 		if (state & P_MEMSTAT_FROZEN) {
1088 			frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_pid = proc_getpid(p);
1089 			strlcpy(frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_name,
1090 			    p->p_name, sizeof(proc_name_t));
1091 			frozen_procs->gfp_num_frozen++;
1092 		}
1093 		p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1094 	}
1095 	proc_list_unlock();
1096 
1097 	buffer_size = MIN(buffer_size, sizeof(global_frozen_procs_t));
1098 	error = copyout(frozen_procs, buffer, buffer_size);
1099 	if (error == 0) {
1100 		*retval = (int32_t) buffer_size;
1101 	} else {
1102 		*retval = 0;
1103 	}
1104 	kfree_data(frozen_procs, sizeof(global_frozen_procs_t));
1105 
1106 	return error;
1107 }
1108 
1109 /*
1110  * If dasd is running an experiment that impacts their freezer candidate selection,
1111  * we record that in our telemetry.
1112  */
1113 static memorystatus_freezer_trial_identifiers_v1 dasd_trial_identifiers;
1114 
1115 static int
memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer,size_t buffer_size,int32_t * retval)1116 memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1117 {
1118 	memorystatus_freezer_trial_identifiers_v1 identifiers;
1119 	int error = 0;
1120 
1121 	if (buffer_size != sizeof(identifiers)) {
1122 		return EINVAL;
1123 	}
1124 	error = copyin(buffer, &identifiers, sizeof(identifiers));
1125 	if (error != 0) {
1126 		return error;
1127 	}
1128 	if (identifiers.version != 1) {
1129 		return EINVAL;
1130 	}
1131 	dasd_trial_identifiers = identifiers;
1132 	*retval = 0;
1133 	return error;
1134 }
1135 
1136 /*
1137  * Reset the freezer state by wiping out all suspended frozen apps, clearing
1138  * per-process freezer state, and starting a fresh interval.
1139  */
1140 static int
memorystatus_freezer_reset_state(int32_t * retval)1141 memorystatus_freezer_reset_state(int32_t *retval)
1142 {
1143 	uint32_t band = JETSAM_PRIORITY_IDLE;
1144 	/* Don't kill above the frozen band */
1145 	uint32_t kMaxBand = memorystatus_freeze_jetsam_band;
1146 	proc_t next_p = PROC_NULL;
1147 	uint64_t new_budget;
1148 
1149 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1150 		return ENOTSUP;
1151 	}
1152 
1153 	os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_GENERIC);
1154 	if (jetsam_reason == OS_REASON_NULL) {
1155 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus_freezer_reset_state -- sync: failed to allocate jetsam reason\n");
1156 	}
1157 	lck_mtx_lock(&freezer_mutex);
1158 	kill_all_frozen_processes(kMaxBand, true, jetsam_reason, NULL);
1159 	proc_list_lock();
1160 
1161 	/*
1162 	 * Clear the considered and skip reason flags on all processes
1163 	 * so we're starting fresh with the new policy.
1164 	 */
1165 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1166 	while (next_p) {
1167 		proc_t p = next_p;
1168 		uint32_t state = p->p_memstat_state;
1169 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1170 
1171 		if (p->p_memstat_effectivepriority > kMaxBand) {
1172 			break;
1173 		}
1174 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1175 			continue;
1176 		}
1177 
1178 		p->p_memstat_state &= ~(P_MEMSTAT_FREEZE_CONSIDERED);
1179 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1180 	}
1181 
1182 	proc_list_unlock();
1183 
1184 	new_budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1185 	memorystatus_freeze_force_new_interval(new_budget);
1186 
1187 	lck_mtx_unlock(&freezer_mutex);
1188 	*retval = 0;
1189 	return 0;
1190 }
1191 
1192 int
memorystatus_freezer_control(int32_t flags,user_addr_t buffer,size_t buffer_size,int32_t * retval)1193 memorystatus_freezer_control(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval)
1194 {
1195 	int err = ENOTSUP;
1196 
1197 #if DEVELOPMENT || DEBUG
1198 	if (flags == FREEZER_CONTROL_GET_STATUS) {
1199 		err = memorystatus_freezer_get_status(buffer, buffer_size, retval);
1200 	}
1201 #endif /* DEVELOPMENT || DEBUG */
1202 	if (flags == FREEZER_CONTROL_GET_PROCS) {
1203 		err = memorystatus_freezer_get_procs(buffer, buffer_size, retval);
1204 	} else if (flags == FREEZER_CONTROL_SET_DASD_TRIAL_IDENTIFIERS) {
1205 		err = memorystatus_freezer_set_dasd_trial_identifiers(buffer, buffer_size, retval);
1206 	} else if (flags == FREEZER_CONTROL_RESET_STATE) {
1207 		err = memorystatus_freezer_reset_state(retval);
1208 	}
1209 
1210 	return err;
1211 }
1212 
1213 extern void        vm_swap_consider_defragmenting(int);
1214 extern void vm_page_reactivate_all_throttled(void);
1215 
1216 static bool
kill_all_frozen_processes(uint64_t max_band,bool suspended_only,os_reason_t jetsam_reason,uint64_t * memory_reclaimed_out)1217 kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out)
1218 {
1219 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1220 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1221 
1222 	unsigned int band = 0;
1223 	proc_t p = PROC_NULL, next_p = PROC_NULL;
1224 	pid_t pid = 0;
1225 	bool retval = false, killed = false;
1226 	uint32_t state;
1227 	uint64_t memory_reclaimed = 0, footprint = 0, skips = 0;
1228 	proc_list_lock();
1229 
1230 	band = JETSAM_PRIORITY_IDLE;
1231 	p = PROC_NULL;
1232 	next_p = PROC_NULL;
1233 
1234 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1235 	while (next_p) {
1236 		p = next_p;
1237 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1238 		state = p->p_memstat_state;
1239 
1240 		if (p->p_memstat_effectivepriority > max_band) {
1241 			break;
1242 		}
1243 
1244 		if (!(state & P_MEMSTAT_FROZEN)) {
1245 			continue;
1246 		}
1247 
1248 		if (suspended_only && !(state & P_MEMSTAT_SUSPENDED)) {
1249 			continue;
1250 		}
1251 
1252 		if (state & P_MEMSTAT_ERROR) {
1253 			p->p_memstat_state &= ~P_MEMSTAT_ERROR;
1254 		}
1255 
1256 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1257 			os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: Skipping kill of frozen process %s (%d) because it's already exiting.\n", p->p_name, proc_getpid(p));
1258 			skips++;
1259 			continue;
1260 		}
1261 
1262 		footprint = get_task_phys_footprint(proc_task(p));
1263 		pid = proc_getpid(p);
1264 		proc_list_unlock();
1265 
1266 		/* memorystatus_kill_with_jetsam_reason_sync drops a reference. */
1267 		os_reason_ref(jetsam_reason);
1268 		retval = memorystatus_kill_with_jetsam_reason_sync(pid, jetsam_reason);
1269 		if (retval) {
1270 			killed = true;
1271 			memory_reclaimed += footprint;
1272 		}
1273 		proc_list_lock();
1274 		/*
1275 		 * The bands might have changed when we dropped the proc list lock.
1276 		 * So start from the beginning.
1277 		 * Since we're preventing any further freezing by holding the freezer mutex,
1278 		 * and we skip anything we've already tried to kill this is guaranteed to terminate.
1279 		 */
1280 		band = 0;
1281 		skips = 0;
1282 		next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1283 	}
1284 
1285 	assert(skips <= memorystatus_frozen_count);
1286 #if DEVELOPMENT || DEBUG
1287 	if (!suspended_only && max_band >= JETSAM_PRIORITY_FOREGROUND) {
1288 		/*
1289 		 * Check that we've killed all frozen processes.
1290 		 * Note that they may still be exiting (represented by skips).
1291 		 */
1292 		if (memorystatus_frozen_count - skips > 0) {
1293 			assert(memorystatus_freeze_enabled == FALSE);
1294 
1295 			panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d",
1296 			    memorystatus_frozen_count);
1297 		}
1298 	}
1299 #endif /* DEVELOPMENT || DEBUG */
1300 	if (memory_reclaimed_out) {
1301 		*memory_reclaimed_out = memory_reclaimed;
1302 	}
1303 	proc_list_unlock();
1304 	return killed;
1305 }
1306 
1307 /*
1308  * Disables the freezer, jetsams all frozen processes,
1309  * and reclaims the swap space immediately.
1310  */
1311 
1312 void
memorystatus_disable_freeze(void)1313 memorystatus_disable_freeze(void)
1314 {
1315 	uint64_t memory_reclaimed = 0;
1316 	bool killed = false;
1317 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1318 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1319 
1320 
1321 	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_START,
1322 	    memorystatus_available_pages, 0, 0, 0, 0);
1323 	os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: Disabling freezer. Will kill all frozen processes\n");
1324 
1325 	/*
1326 	 * We hold the freezer_mutex (preventing anything from being frozen in parallel)
1327 	 * and all frozen processes will be killed
1328 	 * by the time we release it. Setting memorystatus_freeze_enabled to false,
1329 	 * ensures that no new processes will be frozen once we release the mutex.
1330 	 *
1331 	 */
1332 	memorystatus_freeze_enabled = FALSE;
1333 
1334 	/*
1335 	 * Move dirty pages out from the throttle to the active queue since we're not freezing anymore.
1336 	 */
1337 	vm_page_reactivate_all_throttled();
1338 	os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE);
1339 	if (jetsam_reason == OS_REASON_NULL) {
1340 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus_disable_freeze -- sync: failed to allocate jetsam reason\n");
1341 	}
1342 
1343 	killed = kill_all_frozen_processes(JETSAM_PRIORITY_FOREGROUND, false, jetsam_reason, &memory_reclaimed);
1344 
1345 	if (killed) {
1346 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: Killed all frozen processes.\n");
1347 		vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM);
1348 
1349 		proc_list_lock();
1350 		size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
1351 		    sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
1352 		uint64_t timestamp_now = mach_absolute_time();
1353 		memorystatus_jetsam_snapshot->notification_time = timestamp_now;
1354 		memorystatus_jetsam_snapshot->js_gencount++;
1355 		if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
1356 		    timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) {
1357 			proc_list_unlock();
1358 			int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
1359 			if (!ret) {
1360 				proc_list_lock();
1361 				memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
1362 			}
1363 		}
1364 		proc_list_unlock();
1365 	} else {
1366 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: No frozen processes to kill.\n");
1367 	}
1368 
1369 	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_END,
1370 	    memorystatus_available_pages, memory_reclaimed, 0, 0, 0);
1371 
1372 	return;
1373 }
1374 
1375 static void
memorystatus_set_freeze_is_enabled(bool enabled)1376 memorystatus_set_freeze_is_enabled(bool enabled)
1377 {
1378 	lck_mtx_lock(&freezer_mutex);
1379 	if (enabled != memorystatus_freeze_enabled) {
1380 		if (enabled) {
1381 			memorystatus_freeze_enabled = true;
1382 		} else {
1383 			memorystatus_disable_freeze();
1384 		}
1385 	}
1386 	lck_mtx_unlock(&freezer_mutex);
1387 }
1388 
1389 
1390 static int
1391 sysctl_freeze_enabled SYSCTL_HANDLER_ARGS
1392 {
1393 #pragma unused(arg1, arg2)
1394 	int error, val = memorystatus_freeze_enabled ? 1 : 0;
1395 
1396 	error = sysctl_handle_int(oidp, &val, 0, req);
1397 	if (error || !req->newptr) {
1398 		return error;
1399 	}
1400 
1401 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1402 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: Failed attempt to set vm.freeze_enabled sysctl\n");
1403 		return EINVAL;
1404 	}
1405 
1406 	memorystatus_set_freeze_is_enabled(val);
1407 
1408 	return 0;
1409 }
1410 
1411 SYSCTL_PROC(_vm, OID_AUTO, freeze_enabled, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY, NULL, 0, sysctl_freeze_enabled, "I", "");
1412 
1413 static void
schedule_interval_reset(thread_call_t reset_thread_call,throttle_interval_t * interval)1414 schedule_interval_reset(thread_call_t reset_thread_call, throttle_interval_t *interval)
1415 {
1416 	uint64_t interval_expiration_ns = interval->ts.tv_sec * NSEC_PER_SEC + interval->ts.tv_nsec;
1417 	uint64_t interval_expiration_absolutetime;
1418 	nanoseconds_to_absolutetime(interval_expiration_ns, &interval_expiration_absolutetime);
1419 	os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: scheduling new freezer interval at %llu absolute time\n", interval_expiration_absolutetime);
1420 
1421 	thread_call_enter_delayed(reset_thread_call, interval_expiration_absolutetime);
1422 }
1423 
1424 extern uuid_string_t trial_treatment_id;
1425 extern uuid_string_t trial_experiment_id;
1426 extern int trial_deployment_id;
1427 
1428 CA_EVENT(freezer_interval,
1429     CA_INT, budget_remaining,
1430     CA_INT, error_below_min_pages,
1431     CA_INT, error_excess_shared_memory,
1432     CA_INT, error_low_private_shared_ratio,
1433     CA_INT, error_no_compressor_space,
1434     CA_INT, error_no_swap_space,
1435     CA_INT, error_low_probability_of_use,
1436     CA_INT, error_elevated,
1437     CA_INT, error_other,
1438     CA_INT, frozen_count,
1439     CA_INT, pageouts,
1440     CA_INT, refreeze_average,
1441     CA_INT, skipped_full,
1442     CA_INT, skipped_shared_mb_high,
1443     CA_INT, swapusage,
1444     CA_INT, thaw_count,
1445     CA_INT, thaw_percentage,
1446     CA_INT, thaws_per_gb,
1447     CA_INT, trial_deployment_id,
1448     CA_INT, dasd_trial_deployment_id,
1449     CA_INT, budget_exhaustion_duration_remaining,
1450     CA_INT, thaw_percentage_webcontent,
1451     CA_INT, thaw_percentage_fg,
1452     CA_INT, thaw_percentage_bg,
1453     CA_INT, thaw_percentage_fg_non_xpc_service,
1454     CA_INT, fg_resume_count,
1455     CA_INT, unique_freeze_count,
1456     CA_INT, unique_thaw_count,
1457     CA_STATIC_STRING(CA_UUID_LEN), trial_treatment_id,
1458     CA_STATIC_STRING(CA_UUID_LEN), trial_experiment_id,
1459     CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_treatment_id,
1460     CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_experiment_id);
1461 
1462 extern uint64_t vm_swap_get_total_space(void);
1463 extern uint64_t vm_swap_get_free_space(void);
1464 
1465 /*
1466  * Record statistics from the expiring interval
1467  * via core analytics.
1468  */
1469 static void
memorystatus_freeze_record_interval_analytics(void)1470 memorystatus_freeze_record_interval_analytics(void)
1471 {
1472 	ca_event_t event = CA_EVENT_ALLOCATE(freezer_interval);
1473 	CA_EVENT_TYPE(freezer_interval) * e = event->data;
1474 	e->budget_remaining = memorystatus_freeze_budget_pages_remaining * PAGE_SIZE / (1UL << 20);
1475 	uint64_t process_considered_count, refrozen_count, below_threshold_count;
1476 	memory_object_size_t swap_size;
1477 	process_considered_count = memorystatus_freezer_stats.mfs_process_considered_count;
1478 	if (process_considered_count != 0) {
1479 		e->error_below_min_pages = memorystatus_freezer_stats.mfs_error_below_min_pages_count * 100 / process_considered_count;
1480 		e->error_excess_shared_memory = memorystatus_freezer_stats.mfs_error_excess_shared_memory_count * 100 / process_considered_count;
1481 		e->error_low_private_shared_ratio = memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count * 100 / process_considered_count;
1482 		e->error_no_compressor_space = memorystatus_freezer_stats.mfs_error_no_compressor_space_count * 100 / process_considered_count;
1483 		e->error_no_swap_space = memorystatus_freezer_stats.mfs_error_no_swap_space_count * 100 / process_considered_count;
1484 		e->error_low_probability_of_use = memorystatus_freezer_stats.mfs_error_low_probability_of_use_count * 100 / process_considered_count;
1485 		e->error_elevated = memorystatus_freezer_stats.mfs_error_elevated_count * 100 / process_considered_count;
1486 		e->error_other = memorystatus_freezer_stats.mfs_error_other_count * 100 / process_considered_count;
1487 	}
1488 	e->frozen_count = memorystatus_frozen_count;
1489 	e->pageouts = normal_throttle_window->pageouts * PAGE_SIZE / (1UL << 20);
1490 	refrozen_count = memorystatus_freezer_stats.mfs_refreeze_count;
1491 	if (refrozen_count != 0) {
1492 		e->refreeze_average = (memorystatus_freezer_stats.mfs_bytes_refrozen / (1UL << 20)) / refrozen_count;
1493 	}
1494 	below_threshold_count = memorystatus_freezer_stats.mfs_below_threshold_count;
1495 	if (below_threshold_count != 0) {
1496 		e->skipped_full = memorystatus_freezer_stats.mfs_skipped_full_count * 100 / below_threshold_count;
1497 		e->skipped_shared_mb_high = memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count * 100 / below_threshold_count;
1498 	}
1499 	if (VM_CONFIG_SWAP_IS_PRESENT) {
1500 		swap_size = vm_swap_get_total_space();
1501 		if (swap_size) {
1502 			e->swapusage = vm_swap_get_free_space() * 100 / swap_size;
1503 		}
1504 	}
1505 	e->thaw_count = memorystatus_thaw_count;
1506 	e->thaw_percentage = get_thaw_percentage();
1507 	e->thaw_percentage_webcontent = get_thaw_percentage_webcontent();
1508 	e->thaw_percentage_fg = get_thaw_percentage_fg();
1509 	e->thaw_percentage_bg = get_thaw_percentage_bg();
1510 	e->thaw_percentage_fg_non_xpc_service = get_thaw_percentage_fg_non_xpc_service();
1511 
1512 	if (e->pageouts / (1UL << 10) != 0) {
1513 		e->thaws_per_gb = memorystatus_thaw_count / (e->pageouts / (1UL << 10));
1514 	}
1515 	e->budget_exhaustion_duration_remaining = memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining;
1516 	e->fg_resume_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
1517 	e->unique_freeze_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1518 	e->unique_thaw_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
1519 
1520 	/*
1521 	 * Record any xnu or dasd experiment information
1522 	 */
1523 	strlcpy(e->trial_treatment_id, trial_treatment_id, CA_UUID_LEN);
1524 	strlcpy(e->trial_experiment_id, trial_experiment_id, CA_UUID_LEN);
1525 	e->trial_deployment_id = trial_deployment_id;
1526 	strlcpy(e->dasd_trial_treatment_id, dasd_trial_identifiers.treatment_id, CA_UUID_LEN);
1527 	strlcpy(e->dasd_trial_experiment_id, dasd_trial_identifiers.experiment_id, CA_UUID_LEN);
1528 	e->dasd_trial_deployment_id = dasd_trial_identifiers.deployment_id;
1529 
1530 	CA_EVENT_SEND(event);
1531 }
1532 
1533 static void
memorystatus_freeze_reset_interval(void * arg0,void * arg1)1534 memorystatus_freeze_reset_interval(void *arg0, void *arg1)
1535 {
1536 #pragma unused(arg0, arg1)
1537 	struct throttle_interval_t *interval = NULL;
1538 	clock_sec_t sec;
1539 	clock_nsec_t nsec;
1540 	mach_timespec_t now_ts;
1541 	uint32_t budget_rollover = 0;
1542 
1543 	clock_get_system_nanotime(&sec, &nsec);
1544 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
1545 	now_ts.tv_nsec = nsec;
1546 	interval = normal_throttle_window;
1547 
1548 	/* Record analytics from the old interval before resetting. */
1549 	memorystatus_freeze_record_interval_analytics();
1550 
1551 	lck_mtx_lock(&freezer_mutex);
1552 	/* How long has it been since the previous interval expired? */
1553 	mach_timespec_t expiration_period_ts = now_ts;
1554 	SUB_MACH_TIMESPEC(&expiration_period_ts, &interval->ts);
1555 	/* Get unused budget. Clamp to 0. We'll adjust for overused budget in the next interval. */
1556 	budget_rollover = interval->pageouts > interval->max_pageouts ?
1557 	    0 : interval->max_pageouts - interval->pageouts;
1558 
1559 	memorystatus_freeze_start_normal_throttle_interval(memorystatus_freeze_calculate_new_budget(
1560 		    expiration_period_ts.tv_sec, interval->burst_multiple,
1561 		    interval->mins, budget_rollover),
1562 	    now_ts);
1563 	memorystatus_freeze_budget_pages_remaining = interval->max_pageouts;
1564 
1565 	if (!memorystatus_freezer_use_demotion_list) {
1566 		memorystatus_demote_frozen_processes(false); /* normal mode...don't force a demotion */
1567 	}
1568 	lck_mtx_unlock(&freezer_mutex);
1569 }
1570 
1571 
1572 proc_t
memorystatus_get_coalition_leader_and_role(proc_t p,int * role_in_coalition)1573 memorystatus_get_coalition_leader_and_role(proc_t p, int *role_in_coalition)
1574 {
1575 	coalition_t     coal = COALITION_NULL;
1576 	task_t          leader_task = NULL, curr_task = NULL;
1577 	proc_t          leader_proc = PROC_NULL;
1578 
1579 	curr_task = proc_task(p);
1580 	coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1581 
1582 	if (coal == NULL || coalition_is_leader(curr_task, coal)) {
1583 		return p;
1584 	}
1585 
1586 	leader_task = coalition_get_leader(coal);
1587 	if (leader_task == TASK_NULL) {
1588 		/*
1589 		 * This jetsam coalition is currently leader-less.
1590 		 * This could happen if the app died, but XPC services
1591 		 * have not yet exited.
1592 		 */
1593 		return PROC_NULL;
1594 	}
1595 
1596 	leader_proc = (proc_t)get_bsdtask_info(leader_task);
1597 	task_deallocate(leader_task);
1598 
1599 	if (leader_proc == PROC_NULL) {
1600 		/* leader task is exiting */
1601 		return PROC_NULL;
1602 	}
1603 
1604 	*role_in_coalition = task_coalition_role_for_type(curr_task, COALITION_TYPE_JETSAM);
1605 
1606 	return leader_proc;
1607 }
1608 
1609 bool
memorystatus_freeze_process_is_recommended(const proc_t p)1610 memorystatus_freeze_process_is_recommended(const proc_t p)
1611 {
1612 	assert(!memorystatus_freezer_use_ordered_list);
1613 	int probability_of_use = 0;
1614 
1615 	size_t entry_count = 0, i = 0;
1616 	entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
1617 	if (entry_count == 0) {
1618 		/*
1619 		 * If dasd hasn't supplied a table yet, we default to every app being eligible
1620 		 * for the freezer.
1621 		 */
1622 		return true;
1623 	}
1624 	for (i = 0; i < entry_count; i++) {
1625 		/*
1626 		 * NB: memorystatus_internal_probabilities.proc_name is MAXCOMLEN + 1 bytes
1627 		 * proc_t.p_name is 2*MAXCOMLEN + 1 bytes. So we only compare the first
1628 		 * MAXCOMLEN bytes here since the name in the probabilities table could
1629 		 * be truncated from the proc_t's p_name.
1630 		 */
1631 		if (strncmp(memorystatus_global_probabilities_table[i].proc_name,
1632 		    p->p_name,
1633 		    MAXCOMLEN) == 0) {
1634 			probability_of_use = memorystatus_global_probabilities_table[i].use_probability;
1635 			break;
1636 		}
1637 	}
1638 	return probability_of_use > 0;
1639 }
1640 
1641 __private_extern__ void
memorystatus_freeze_init(void)1642 memorystatus_freeze_init(void)
1643 {
1644 	kern_return_t result;
1645 	thread_t thread;
1646 
1647 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1648 		/*
1649 		 * This is just the default value if the underlying
1650 		 * storage device doesn't have any specific budget.
1651 		 * We check with the storage layer in memorystatus_freeze_update_throttle()
1652 		 * before we start our freezing the first time.
1653 		 */
1654 		memorystatus_freeze_budget_pages_remaining = (memorystatus_freeze_daily_mb_max * 1024 * 1024) / PAGE_SIZE;
1655 
1656 		result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
1657 		if (result == KERN_SUCCESS) {
1658 			proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
1659 			proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1660 			thread_set_thread_name(thread, "VM_freezer");
1661 
1662 			thread_deallocate(thread);
1663 		} else {
1664 			panic("Could not create memorystatus_freeze_thread");
1665 		}
1666 
1667 		freeze_interval_reset_thread_call = thread_call_allocate_with_options(memorystatus_freeze_reset_interval, NULL, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
1668 		/* Start a new interval */
1669 
1670 		lck_mtx_lock(&freezer_mutex);
1671 		uint32_t budget;
1672 		budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1673 		memorystatus_freeze_force_new_interval(budget);
1674 		lck_mtx_unlock(&freezer_mutex);
1675 	} else {
1676 		memorystatus_freeze_budget_pages_remaining = 0;
1677 	}
1678 }
1679 
1680 void
memorystatus_freeze_configure_for_swap()1681 memorystatus_freeze_configure_for_swap()
1682 {
1683 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1684 		return;
1685 	}
1686 
1687 	assert(memorystatus_swap_all_apps);
1688 
1689 	/*
1690 	 * We expect both a larger working set and larger individual apps
1691 	 * in this mode, so tune up the freezer accordingly.
1692 	 */
1693 	memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX_SWAP_ENABLED;
1694 	memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS_SWAP_ENABLED;
1695 	memorystatus_freeze_pages_max = FREEZE_PAGES_MAX_SWAP_ENABLED;
1696 
1697 	/*
1698 	 * We don't have a budget when running with full app swap.
1699 	 * Force a new interval. memorystatus_freeze_calculate_new_budget should give us an
1700 	 * unlimited budget.
1701 	 */
1702 	lck_mtx_lock(&freezer_mutex);
1703 	uint32_t budget;
1704 	budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1705 	memorystatus_freeze_force_new_interval(budget);
1706 	lck_mtx_unlock(&freezer_mutex);
1707 }
1708 
1709 void
memorystatus_freeze_disable_swap()1710 memorystatus_freeze_disable_swap()
1711 {
1712 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1713 		return;
1714 	}
1715 
1716 	assert(!memorystatus_swap_all_apps);
1717 
1718 	memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX;
1719 	memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS;
1720 	memorystatus_freeze_pages_max = FREEZE_PAGES_MAX;
1721 
1722 	/*
1723 	 * Calculate a new budget now that we're constrained by our daily write budget again.
1724 	 */
1725 	lck_mtx_lock(&freezer_mutex);
1726 	uint32_t budget;
1727 	budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1728 	memorystatus_freeze_force_new_interval(budget);
1729 	lck_mtx_unlock(&freezer_mutex);
1730 }
1731 
1732 /*
1733  * Called with both the freezer_mutex and proc_list_lock held & both will be held on return.
1734  */
1735 static int
memorystatus_freeze_process(proc_t p,coalition_t * coal,pid_t * coalition_list,unsigned int * coalition_list_length)1736 memorystatus_freeze_process(
1737 	proc_t p,
1738 	coalition_t *coal, /* IN / OUT */
1739 	pid_t *coalition_list, /* OUT */
1740 	unsigned int *coalition_list_length /* OUT */)
1741 {
1742 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1743 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1744 
1745 	kern_return_t kr;
1746 	uint32_t purgeable, wired, clean, dirty, shared;
1747 	uint64_t max_pages = 0;
1748 	int    freezer_error_code = 0;
1749 	bool is_refreeze = false;
1750 	task_t curr_task = TASK_NULL;
1751 
1752 	pid_t aPid = proc_getpid(p);
1753 
1754 	is_refreeze = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
1755 
1756 	/* Ensure the process is eligible for (re-)freezing */
1757 	if (is_refreeze && !memorystatus_freeze_proc_is_refreeze_eligible(p)) {
1758 		/* Process is already frozen & hasn't been thawed. Nothing to do here. */
1759 		return EINVAL;
1760 	}
1761 	if (is_refreeze) {
1762 		/*
1763 		 * Not currently being looked at for something.
1764 		 */
1765 		if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1766 			return EBUSY;
1767 		}
1768 
1769 		/*
1770 		 * We are going to try and refreeze and so re-evaluate
1771 		 * the process. We don't want to double count the shared
1772 		 * memory. So deduct the old snapshot here.
1773 		 */
1774 		memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages;
1775 		p->p_memstat_freeze_sharedanon_pages = 0;
1776 
1777 		p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
1778 		memorystatus_refreeze_eligible_count--;
1779 	} else {
1780 		if (!memorystatus_is_process_eligible_for_freeze(p)) {
1781 			return EINVAL;
1782 		}
1783 	}
1784 
1785 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1786 		/*
1787 		 * Freezer backed by the compressor and swap file(s)
1788 		 * will hold compressed data.
1789 		 */
1790 
1791 		max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1792 	} else {
1793 		/*
1794 		 * We only have the compressor pool.
1795 		 */
1796 		max_pages = UINT32_MAX - 1;
1797 	}
1798 
1799 	/* Mark as locked temporarily to avoid kill */
1800 	p->p_memstat_state |= P_MEMSTAT_LOCKED;
1801 
1802 	p = proc_ref(p, true);
1803 	if (!p) {
1804 		memorystatus_freezer_stats.mfs_error_other_count++;
1805 		return EBUSY;
1806 	}
1807 
1808 	proc_list_unlock();
1809 
1810 	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START,
1811 	    memorystatus_available_pages, 0, 0, 0, 0);
1812 
1813 	max_pages = MIN(max_pages, UINT32_MAX);
1814 	kr = task_freeze(proc_task(p), &purgeable, &wired, &clean, &dirty, (uint32_t) max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1815 	if (kr == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1816 		memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
1817 	}
1818 
1819 	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
1820 	    memorystatus_available_pages, aPid, 0, 0, 0);
1821 
1822 	MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
1823 	    "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %d, shared %d\n",
1824 	    (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1825 	    memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1826 
1827 	proc_list_lock();
1828 
1829 	/* Success? */
1830 	if (KERN_SUCCESS == kr) {
1831 		memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1832 
1833 		p->p_memstat_freeze_sharedanon_pages += shared;
1834 
1835 		memorystatus_frozen_shared_mb += shared;
1836 
1837 		if (!is_refreeze) {
1838 			p->p_memstat_state |= P_MEMSTAT_FROZEN;
1839 			p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1840 			memorystatus_frozen_count++;
1841 			os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1842 			if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
1843 				memorystatus_frozen_count_webcontent++;
1844 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
1845 			}
1846 			if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
1847 				memorystatus_freeze_out_of_slots();
1848 			}
1849 		} else {
1850 			// This was a re-freeze
1851 			if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1852 				memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
1853 				memorystatus_freezer_stats.mfs_refreeze_count++;
1854 			}
1855 		}
1856 
1857 		p->p_memstat_frozen_count++;
1858 
1859 		/*
1860 		 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1861 		 * to its higher jetsam band.
1862 		 */
1863 		proc_list_unlock();
1864 
1865 		memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1866 
1867 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1868 			int ret;
1869 			unsigned int i;
1870 			ret = memorystatus_update_inactive_jetsam_priority_band(proc_getpid(p), MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, memorystatus_freeze_jetsam_band, TRUE);
1871 
1872 			if (ret) {
1873 				printf("Elevating the frozen process failed with %d\n", ret);
1874 				/* not fatal */
1875 			}
1876 
1877 			/* Update stats */
1878 			for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1879 				throttle_intervals[i].pageouts += dirty;
1880 			}
1881 		}
1882 		memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1883 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
1884 		    is_refreeze ? "re" : "", ((!coal || !*coal) ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, is_refreeze ? "Re" : "", dirty);
1885 
1886 		proc_list_lock();
1887 
1888 		memorystatus_freeze_pageouts += dirty;
1889 
1890 		if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1891 			/*
1892 			 * Add some eviction logic here? At some point should we
1893 			 * jetsam a process to get back its swap space so that we
1894 			 * can freeze a more eligible process at this moment in time?
1895 			 */
1896 		}
1897 
1898 		/* Check if we just froze a coalition leader. If so, return the list of XPC services to freeze next. */
1899 		if (coal != NULL && *coal == NULL) {
1900 			curr_task = proc_task(p);
1901 			*coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1902 			if (coalition_is_leader(curr_task, *coal)) {
1903 				*coalition_list_length = coalition_get_pid_list(*coal, COALITION_ROLEMASK_XPC,
1904 				    COALITION_SORT_DEFAULT, coalition_list, MAX_XPC_SERVICE_PIDS);
1905 
1906 				if (*coalition_list_length > MAX_XPC_SERVICE_PIDS) {
1907 					*coalition_list_length = MAX_XPC_SERVICE_PIDS;
1908 				}
1909 			}
1910 		} else {
1911 			/* We just froze an xpc service. Mark it as such for telemetry */
1912 			p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
1913 			memorystatus_frozen_count_xpc_service++;
1914 			os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
1915 		}
1916 
1917 		p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1918 		wakeup(&p->p_memstat_state);
1919 		proc_rele(p);
1920 		return 0;
1921 	} else {
1922 		if (is_refreeze) {
1923 			if ((freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) ||
1924 			    (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO)) {
1925 				/*
1926 				 * Keeping this prior-frozen process in this high band when
1927 				 * we failed to re-freeze it due to bad shared memory usage
1928 				 * could cause excessive pressure on the lower bands.
1929 				 * We need to demote it for now. It'll get re-evaluated next
1930 				 * time because we don't set the P_MEMSTAT_FREEZE_IGNORE
1931 				 * bit.
1932 				 */
1933 
1934 				p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
1935 				memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1936 				memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, TRUE, TRUE);
1937 			}
1938 		} else {
1939 			p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
1940 		}
1941 		memorystatus_freeze_handle_error(p, freezer_error_code, p->p_memstat_state & P_MEMSTAT_FROZEN, aPid, (coal != NULL) ? *coal : NULL, "memorystatus_freeze_top_process");
1942 
1943 		p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1944 		wakeup(&p->p_memstat_state);
1945 		proc_rele(p);
1946 
1947 		return EINVAL;
1948 	}
1949 }
1950 
1951 /*
1952  * Synchronously freeze the passed proc. Called with a reference to the proc held.
1953  *
1954  * Doesn't deal with:
1955  * - re-freezing because this is called on a specific process and
1956  *   not by the freezer thread. If that changes, we'll have to teach it about
1957  *   refreezing a frozen process.
1958  *
1959  * - grouped/coalition freezing because we are hoping to deprecate this
1960  *   interface as it was used by user-space to freeze particular processes. But
1961  *   we have moved away from that approach to having the kernel choose the optimal
1962  *   candidates to be frozen.
1963  *
1964  * Returns ENOTSUP if the freezer isn't supported on this device. Otherwise
1965  * returns EINVAL or the value returned by task_freeze().
1966  */
1967 int
memorystatus_freeze_process_sync(proc_t p)1968 memorystatus_freeze_process_sync(proc_t p)
1969 {
1970 	int ret = EINVAL;
1971 	boolean_t memorystatus_freeze_swap_low = FALSE;
1972 
1973 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1974 		return ENOTSUP;
1975 	}
1976 
1977 	lck_mtx_lock(&freezer_mutex);
1978 
1979 	if (p == NULL) {
1980 		printf("memorystatus_freeze_process_sync: Invalid process\n");
1981 		goto exit;
1982 	}
1983 
1984 	if (memorystatus_freeze_enabled == FALSE) {
1985 		printf("memorystatus_freeze_process_sync: Freezing is DISABLED\n");
1986 		goto exit;
1987 	}
1988 
1989 	if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
1990 		printf("memorystatus_freeze_process_sync: Low compressor and/or low swap space...skipping freeze\n");
1991 		goto exit;
1992 	}
1993 
1994 	memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1995 	if (!memorystatus_freeze_budget_pages_remaining) {
1996 		printf("memorystatus_freeze_process_sync: exit with NO available budget\n");
1997 		goto exit;
1998 	}
1999 
2000 	proc_list_lock();
2001 
2002 	ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2003 
2004 exit:
2005 	lck_mtx_unlock(&freezer_mutex);
2006 
2007 	return ret;
2008 }
2009 
2010 proc_t
memorystatus_freezer_candidate_list_get_proc(struct memorystatus_freezer_candidate_list * list,size_t index,uint64_t * pid_mismatch_counter)2011 memorystatus_freezer_candidate_list_get_proc(
2012 	struct memorystatus_freezer_candidate_list *list,
2013 	size_t index,
2014 	uint64_t *pid_mismatch_counter)
2015 {
2016 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2017 	if (list->mfcl_list == NULL || list->mfcl_length <= index) {
2018 		return NULL;
2019 	}
2020 	memorystatus_properties_freeze_entry_v1 *entry = &list->mfcl_list[index];
2021 	if (entry->pid == NO_PID) {
2022 		/* Entry has been removed. */
2023 		return NULL;
2024 	}
2025 
2026 	proc_t p = proc_find_locked(entry->pid);
2027 	if (p && strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2028 		/*
2029 		 * We grab a reference when we are about to freeze the process. So drop
2030 		 * the reference that proc_find_locked() grabbed for us.
2031 		 * We also have the proc_list_lock so this process is stable.
2032 		 */
2033 		proc_rele(p);
2034 		return p;
2035 	} else {
2036 		if (p) {
2037 			/* pid rollover. */
2038 			proc_rele(p);
2039 		}
2040 		/*
2041 		 * The proc has exited since we received this list.
2042 		 * It may have re-launched with a new pid, so we go looking for it.
2043 		 */
2044 		unsigned int band = JETSAM_PRIORITY_IDLE;
2045 		p = memorystatus_get_first_proc_locked(&band, TRUE);
2046 		while (p != NULL && band <= memorystatus_freeze_max_candidate_band) {
2047 			if (strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2048 				(*pid_mismatch_counter)++;
2049 				/* Stash the pid for faster lookup next time. */
2050 				entry->pid = proc_getpid(p);
2051 				return p;
2052 			}
2053 			p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2054 		}
2055 		/* No match. */
2056 		return NULL;
2057 	}
2058 }
2059 
2060 static size_t
memorystatus_freeze_pid_list(pid_t * pid_list,unsigned int num_pids)2061 memorystatus_freeze_pid_list(pid_t *pid_list, unsigned int num_pids)
2062 {
2063 	int ret = 0;
2064 	size_t num_frozen = 0;
2065 	while (num_pids > 0) {
2066 		pid_t pid = pid_list[--num_pids];
2067 		proc_t p = proc_find_locked(pid);
2068 		if (p) {
2069 			proc_rele(p);
2070 			ret = memorystatus_freeze_process(p, NULL, NULL, NULL);
2071 			if (ret != 0) {
2072 				break;
2073 			}
2074 			num_frozen++;
2075 		}
2076 	}
2077 	return num_frozen;
2078 }
2079 
2080 /*
2081  * Attempt to freeze the best candidate process.
2082  * Keep trying until we freeze something or run out of candidates.
2083  * Returns the number of processes frozen (including coalition members).
2084  */
2085 static size_t
memorystatus_freeze_top_process(void)2086 memorystatus_freeze_top_process(void)
2087 {
2088 	int freeze_ret;
2089 	size_t num_frozen = 0;
2090 	coalition_t coal = COALITION_NULL;
2091 	pid_t pid_list[MAX_XPC_SERVICE_PIDS];
2092 	unsigned int ntasks = 0;
2093 	struct memorystatus_freeze_list_iterator iterator;
2094 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2095 
2096 	bzero(&iterator, sizeof(struct memorystatus_freeze_list_iterator));
2097 	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages, 0, 0, 0, 0);
2098 
2099 	proc_list_lock();
2100 	while (true) {
2101 		proc_t p = memorystatus_freeze_pick_process(&iterator);
2102 		if (p == PROC_NULL) {
2103 			/* Nothing left to freeze */
2104 			break;
2105 		}
2106 		freeze_ret = memorystatus_freeze_process(p, &coal, pid_list, &ntasks);
2107 		if (freeze_ret == 0) {
2108 			num_frozen = 1;
2109 			/*
2110 			 * We froze a process successfully.
2111 			 * If it's a coalition head, freeze the coalition.
2112 			 * Then we're done for now.
2113 			 */
2114 			if (coal != NULL) {
2115 				num_frozen += memorystatus_freeze_pid_list(pid_list, ntasks);
2116 			}
2117 			break;
2118 		} else {
2119 			if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
2120 				break;
2121 			}
2122 			/*
2123 			 * Freeze failed but we're not out of space.
2124 			 * Keep trying to find a good candidate,
2125 			 * memorystatus_freeze_pick_process will not return this proc again until
2126 			 * we reset the iterator.
2127 			 */
2128 		}
2129 	}
2130 	proc_list_unlock();
2131 
2132 	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_END, memorystatus_available_pages, 0, 0, 0, 0);
2133 
2134 	return num_frozen;
2135 }
2136 
2137 #if DEVELOPMENT || DEBUG
2138 /* For testing memorystatus_freeze_top_process */
2139 static int
2140 sysctl_memorystatus_freeze_top_process SYSCTL_HANDLER_ARGS
2141 {
2142 #pragma unused(arg1, arg2)
2143 	int error, val, ret = 0;
2144 	size_t num_frozen;
2145 	/*
2146 	 * Only freeze on write to prevent freezing during `sysctl -a`.
2147 	 * The actual value written doesn't matter.
2148 	 */
2149 	error = sysctl_handle_int(oidp, &val, 0, req);
2150 	if (error || !req->newptr) {
2151 		return error;
2152 	}
2153 
2154 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2155 		return ENOTSUP;
2156 	}
2157 
2158 	lck_mtx_lock(&freezer_mutex);
2159 	num_frozen = memorystatus_freeze_top_process();
2160 	lck_mtx_unlock(&freezer_mutex);
2161 
2162 	if (num_frozen == 0) {
2163 		ret = ESRCH;
2164 	}
2165 	return ret;
2166 }
2167 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_top_process, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2168     0, 0, &sysctl_memorystatus_freeze_top_process, "I", "");
2169 #endif /* DEVELOPMENT || DEBUG */
2170 
2171 static inline boolean_t
memorystatus_can_freeze_processes(void)2172 memorystatus_can_freeze_processes(void)
2173 {
2174 	boolean_t ret;
2175 
2176 	proc_list_lock();
2177 
2178 	if (memorystatus_suspended_count) {
2179 		memorystatus_freeze_suspended_threshold = MIN(memorystatus_freeze_suspended_threshold, FREEZE_SUSPENDED_THRESHOLD_DEFAULT);
2180 
2181 		if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
2182 			ret = TRUE;
2183 		} else {
2184 			ret = FALSE;
2185 		}
2186 	} else {
2187 		ret = FALSE;
2188 	}
2189 
2190 	proc_list_unlock();
2191 
2192 	return ret;
2193 }
2194 
2195 static boolean_t
memorystatus_can_freeze(boolean_t * memorystatus_freeze_swap_low)2196 memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
2197 {
2198 	boolean_t can_freeze = TRUE;
2199 
2200 	/* Only freeze if we're sufficiently low on memory; this holds off freeze right
2201 	*  after boot,  and is generally is a no-op once we've reached steady state. */
2202 	if (memorystatus_available_pages > memorystatus_freeze_threshold) {
2203 		return FALSE;
2204 	}
2205 
2206 	/* Check minimum suspended process threshold. */
2207 	if (!memorystatus_can_freeze_processes()) {
2208 		return FALSE;
2209 	}
2210 	assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
2211 
2212 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2213 		/*
2214 		 * In-core compressor used for freezing WITHOUT on-disk swap support.
2215 		 */
2216 		if (vm_compressor_low_on_space()) {
2217 			if (*memorystatus_freeze_swap_low) {
2218 				*memorystatus_freeze_swap_low = TRUE;
2219 			}
2220 
2221 			can_freeze = FALSE;
2222 		} else {
2223 			if (*memorystatus_freeze_swap_low) {
2224 				*memorystatus_freeze_swap_low = FALSE;
2225 			}
2226 
2227 			can_freeze = TRUE;
2228 		}
2229 	} else {
2230 		/*
2231 		 * Freezing WITH on-disk swap support.
2232 		 *
2233 		 * In-core compressor fronts the swap.
2234 		 */
2235 		if (vm_swap_low_on_space()) {
2236 			if (*memorystatus_freeze_swap_low) {
2237 				*memorystatus_freeze_swap_low = TRUE;
2238 			}
2239 
2240 			can_freeze = FALSE;
2241 		}
2242 	}
2243 
2244 	return can_freeze;
2245 }
2246 
2247 /*
2248  * Demote the given frozen process.
2249  * Caller must hold the proc_list_lock & it will be held on return.
2250  */
2251 static void
memorystatus_demote_frozen_process(proc_t p,bool urgent_mode __unused)2252 memorystatus_demote_frozen_process(proc_t p, bool urgent_mode __unused)
2253 {
2254 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2255 
2256 	/* We demote to IDLE unless someone has asserted a higher priority on this process. */
2257 	int maxpriority = JETSAM_PRIORITY_IDLE;
2258 	p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
2259 	memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2260 
2261 	maxpriority = MAX(p->p_memstat_assertionpriority, maxpriority);
2262 	memorystatus_update_priority_locked(p, maxpriority, FALSE, FALSE);
2263 #if DEVELOPMENT || DEBUG
2264 	os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus_demote_frozen_process(%s) pid %d [%s]\n",
2265 	    (urgent_mode ? "urgent" : "normal"), (p ? proc_getpid(p) : -1), ((p && *p->p_name) ? p->p_name : "unknown"));
2266 #endif /* DEVELOPMENT || DEBUG */
2267 
2268 	/*
2269 	 * The freezer thread will consider this a normal app to be frozen
2270 	 * because it is in the IDLE band. So we don't need the
2271 	 * P_MEMSTAT_REFREEZE_ELIGIBLE state here. Also, if it gets resumed
2272 	 * we'll correctly count it as eligible for re-freeze again.
2273 	 *
2274 	 * We don't drop the frozen count because this process still has
2275 	 * state on disk. So there's a chance it gets resumed and then it
2276 	 * should land in the higher jetsam band. For that it needs to
2277 	 * remain marked frozen.
2278 	 */
2279 	if (memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2280 		p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
2281 		memorystatus_refreeze_eligible_count--;
2282 	}
2283 }
2284 
2285 static unsigned int
memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)2286 memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)
2287 {
2288 	unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
2289 	unsigned int demoted_proc_count = 0;
2290 	proc_t p = PROC_NULL, next_p = PROC_NULL;
2291 	proc_list_lock();
2292 
2293 	next_p = memorystatus_get_first_proc_locked(&band, FALSE);
2294 	while (next_p) {
2295 		p = next_p;
2296 		next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2297 
2298 		if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
2299 			continue;
2300 		}
2301 
2302 		if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
2303 			continue;
2304 		}
2305 
2306 		if (urgent_mode) {
2307 			if (!memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2308 				/*
2309 				 * This process hasn't been thawed recently and so most of
2310 				 * its state sits on NAND and so we skip it -- jetsamming it
2311 				 * won't help with memory pressure.
2312 				 */
2313 				continue;
2314 			}
2315 		} else {
2316 			if (p->p_memstat_thaw_count >= memorystatus_thaw_count_demotion_threshold) {
2317 				/*
2318 				 * This process has met / exceeded our thaw count demotion threshold
2319 				 * and so we let it live in the higher bands.
2320 				 */
2321 				continue;
2322 			}
2323 		}
2324 
2325 		memorystatus_demote_frozen_process(p, urgent_mode);
2326 		demoted_proc_count++;
2327 		if ((urgent_mode) || (demoted_proc_count == memorystatus_max_frozen_demotions_daily)) {
2328 			break;
2329 		}
2330 	}
2331 
2332 	proc_list_unlock();
2333 	return demoted_proc_count;
2334 }
2335 
2336 static unsigned int
memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)2337 memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)
2338 {
2339 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2340 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2341 	assert(memorystatus_freezer_use_demotion_list);
2342 	unsigned int demoted_proc_count = 0;
2343 
2344 	proc_list_lock();
2345 	for (size_t i = 0; i < memorystatus_global_demote_list.mfcl_length; i++) {
2346 		proc_t p = memorystatus_freezer_candidate_list_get_proc(
2347 			&memorystatus_global_demote_list,
2348 			i,
2349 			&memorystatus_freezer_stats.mfs_demote_pid_mismatches);
2350 		if (p != NULL && memorystatus_freeze_proc_is_refreeze_eligible(p)) {
2351 			memorystatus_demote_frozen_process(p, urgent_mode);
2352 			/* Remove this entry now that it's been demoted. */
2353 			memorystatus_global_demote_list.mfcl_list[i].pid = NO_PID;
2354 			demoted_proc_count++;
2355 			/*
2356 			 * We only demote one proc at a time in this mode.
2357 			 * This gives jetsam a chance to kill the recently demoted processes.
2358 			 */
2359 			break;
2360 		}
2361 	}
2362 
2363 	proc_list_unlock();
2364 	return demoted_proc_count;
2365 }
2366 
2367 /*
2368  * This function evaluates if the currently frozen processes deserve
2369  * to stay in the higher jetsam band. There are 2 modes:
2370  * - 'force one == TRUE': (urgent mode)
2371  *	We are out of budget and can't refreeze a process. The process's
2372  * state, if it was resumed, will stay in compressed memory. If we let it
2373  * remain up in the higher frozen jetsam band, it'll put a lot of pressure on
2374  * the lower bands. So we force-demote the least-recently-used-and-thawed
2375  * process.
2376  *
2377  * - 'force_one == FALSE': (normal mode)
2378  *      If the # of thaws of a process is below our threshold, then we
2379  * will demote that process into the IDLE band.
2380  * We don't immediately kill the process here because it  already has
2381  * state on disk and so it might be worth giving it another shot at
2382  * getting thawed/resumed and used.
2383  */
2384 static void
memorystatus_demote_frozen_processes(bool urgent_mode)2385 memorystatus_demote_frozen_processes(bool urgent_mode)
2386 {
2387 	unsigned int demoted_proc_count = 0;
2388 
2389 	if (memorystatus_freeze_enabled == FALSE) {
2390 		/*
2391 		 * Freeze has been disabled likely to
2392 		 * reclaim swap space. So don't change
2393 		 * any state on the frozen processes.
2394 		 */
2395 		return;
2396 	}
2397 
2398 	/*
2399 	 * We have two demotion policies which can be toggled by userspace.
2400 	 * In non-urgent mode, the ordered list policy will
2401 	 * choose a demotion candidate using the list provided by dasd.
2402 	 * The thaw count policy will demote the oldest process that hasn't been
2403 	 * thawed more than memorystatus_thaw_count_demotion_threshold times.
2404 	 *
2405 	 * If urgent_mode is set, both policies will only consider demoting
2406 	 * processes that are re-freeze eligible. But the ordering is different.
2407 	 * The ordered list policy will scan in the order given by dasd.
2408 	 * The thaw count policy will scan through the frozen band.
2409 	 */
2410 	if (memorystatus_freezer_use_demotion_list) {
2411 		demoted_proc_count += memorystatus_demote_frozen_processes_using_demote_list(urgent_mode);
2412 
2413 		if (demoted_proc_count == 0 && urgent_mode) {
2414 			/*
2415 			 * We're out of budget and the demotion list doesn't contain any valid
2416 			 * candidates. We still need to demote something. Fall back to scanning
2417 			 * the frozen band.
2418 			 */
2419 			memorystatus_demote_frozen_processes_using_thaw_count(true);
2420 		}
2421 	} else {
2422 		demoted_proc_count += memorystatus_demote_frozen_processes_using_thaw_count(urgent_mode);
2423 	}
2424 }
2425 
2426 /*
2427  * Calculate a new freezer budget.
2428  * @param time_since_last_interval_expired_sec How long has it been (in seconds) since the previous interval expired.
2429  * @param burst_multiple The burst_multiple for the new period
2430  * @param interval_duration_min How many minutes will the new interval be?
2431  * @param rollover The amount to rollover from the previous budget.
2432  *
2433  * @return A budget for the new interval.
2434  */
2435 static uint32_t
memorystatus_freeze_calculate_new_budget(unsigned int time_since_last_interval_expired_sec,unsigned int burst_multiple,unsigned int interval_duration_min,uint32_t rollover)2436 memorystatus_freeze_calculate_new_budget(
2437 	unsigned int time_since_last_interval_expired_sec,
2438 	unsigned int burst_multiple,
2439 	unsigned int interval_duration_min,
2440 	uint32_t rollover)
2441 {
2442 	uint64_t freeze_daily_budget = 0, freeze_daily_budget_mb = 0, daily_budget_pageouts = 0, budget_missed = 0, freeze_daily_pageouts_max = 0, new_budget = 0;
2443 	const static unsigned int kNumSecondsInDay = 60 * 60 * 24;
2444 	/* Precision factor for days_missed. 2 decimal points. */
2445 	const static unsigned int kFixedPointFactor = 100;
2446 	unsigned int days_missed;
2447 
2448 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2449 		return 0;
2450 	}
2451 	if (memorystatus_swap_all_apps) {
2452 		/*
2453 		 * We effectively have an unlimited budget when app swap is enabled.
2454 		 */
2455 		memorystatus_freeze_daily_mb_max = UINT32_MAX;
2456 		return UINT32_MAX;
2457 	}
2458 
2459 	/* Get the daily budget from the storage layer */
2460 	if (vm_swap_max_budget(&freeze_daily_budget)) {
2461 		freeze_daily_budget_mb = freeze_daily_budget / (1024 * 1024);
2462 		assert(freeze_daily_budget_mb <= UINT32_MAX);
2463 		memorystatus_freeze_daily_mb_max = (unsigned int) freeze_daily_budget_mb;
2464 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
2465 	}
2466 	/* Calculate the daily pageout budget */
2467 	freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
2468 	/* Multiply by memorystatus_freeze_budget_multiplier */
2469 	freeze_daily_pageouts_max = ((kFixedPointFactor * memorystatus_freeze_budget_multiplier / 100) * freeze_daily_pageouts_max) / kFixedPointFactor;
2470 
2471 	daily_budget_pageouts = (burst_multiple * (((uint64_t) interval_duration_min * freeze_daily_pageouts_max) / (kNumSecondsInDay / 60)));
2472 
2473 	/*
2474 	 * Add additional budget for time since the interval expired.
2475 	 * For example, if the interval expired n days ago, we should get an additional n days
2476 	 * of budget since we didn't use any budget during those n days.
2477 	 */
2478 	days_missed = time_since_last_interval_expired_sec * kFixedPointFactor / kNumSecondsInDay;
2479 	budget_missed = days_missed * freeze_daily_pageouts_max / kFixedPointFactor;
2480 	new_budget = rollover + daily_budget_pageouts + budget_missed;
2481 	return (uint32_t) MIN(new_budget, UINT32_MAX);
2482 }
2483 
2484 /*
2485  * Mark all non frozen, freezer-eligible processes as skipped for the given reason.
2486  * Used when we hit some system freeze limit and know that we won't be considering remaining processes.
2487  * If you're using this for a new reason, make sure to add it to memorystatus_freeze_init_proc so that
2488  * it gets set for new processes.
2489  * NB: These processes will retain this skip reason until they are reconsidered by memorystatus_is_process_eligible_for_freeze.
2490  */
2491 static void
memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason,bool locked)2492 memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason, bool locked)
2493 {
2494 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2495 	LCK_MTX_ASSERT(&proc_list_mlock, locked ? LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED);
2496 	unsigned int band = JETSAM_PRIORITY_IDLE;
2497 	proc_t p;
2498 
2499 	if (!locked) {
2500 		proc_list_lock();
2501 	}
2502 	p = memorystatus_get_first_proc_locked(&band, FALSE);
2503 	while (p) {
2504 		assert(p->p_memstat_effectivepriority == (int32_t) band);
2505 		if (!(p->p_memstat_state & P_MEMSTAT_FROZEN) && memorystatus_is_process_eligible_for_freeze(p)) {
2506 			assert(p->p_memstat_freeze_skip_reason == kMemorystatusFreezeSkipReasonNone);
2507 			p->p_memstat_freeze_skip_reason = (uint8_t) reason;
2508 		}
2509 		p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2510 	}
2511 	if (!locked) {
2512 		proc_list_unlock();
2513 	}
2514 }
2515 
2516 /*
2517  * Called after we fail to freeze a process.
2518  * Logs the failure, marks the process with the failure reason, and updates freezer stats.
2519  */
2520 static void
memorystatus_freeze_handle_error(proc_t p,const int freezer_error_code,bool was_refreeze,pid_t pid,const coalition_t coalition,const char * log_prefix)2521 memorystatus_freeze_handle_error(
2522 	proc_t p,
2523 	const int freezer_error_code,
2524 	bool was_refreeze,
2525 	pid_t pid,
2526 	const coalition_t coalition,
2527 	const char* log_prefix)
2528 {
2529 	const char *reason;
2530 	memorystatus_freeze_skip_reason_t skip_reason;
2531 
2532 	switch (freezer_error_code) {
2533 	case FREEZER_ERROR_EXCESS_SHARED_MEMORY:
2534 		memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
2535 		reason = "too much shared memory";
2536 		skip_reason = kMemorystatusFreezeSkipReasonExcessSharedMemory;
2537 		break;
2538 	case FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO:
2539 		memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
2540 		reason = "private-shared pages ratio";
2541 		skip_reason = kMemorystatusFreezeSkipReasonLowPrivateSharedRatio;
2542 		break;
2543 	case FREEZER_ERROR_NO_COMPRESSOR_SPACE:
2544 		memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
2545 		reason = "no compressor space";
2546 		skip_reason = kMemorystatusFreezeSkipReasonNoCompressorSpace;
2547 		break;
2548 	case FREEZER_ERROR_NO_SWAP_SPACE:
2549 		memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
2550 		reason = "no swap space";
2551 		skip_reason = kMemorystatusFreezeSkipReasonNoSwapSpace;
2552 		break;
2553 	default:
2554 		reason = "unknown error";
2555 		skip_reason = kMemorystatusFreezeSkipReasonOther;
2556 	}
2557 
2558 	p->p_memstat_freeze_skip_reason = (uint8_t) skip_reason;
2559 
2560 	os_log_with_startup_serial(OS_LOG_DEFAULT, "%s: %sfreezing (%s) pid %d [%s]...skipped (%s)\n",
2561 	    log_prefix, was_refreeze ? "re" : "",
2562 	    (coalition == NULL ? "general" : "coalition-driven"), pid,
2563 	    ((p && *p->p_name) ? p->p_name : "unknown"), reason);
2564 }
2565 
2566 /*
2567  * Start a new normal throttle interval with the given budget.
2568  * Caller must hold the freezer mutex
2569  */
2570 static void
memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget,mach_timespec_t start_ts)2571 memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts)
2572 {
2573 	unsigned int band;
2574 	proc_t p, next_p;
2575 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2576 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2577 
2578 	normal_throttle_window->max_pageouts = new_budget;
2579 	normal_throttle_window->ts.tv_sec = normal_throttle_window->mins * 60;
2580 	normal_throttle_window->ts.tv_nsec = 0;
2581 	ADD_MACH_TIMESPEC(&normal_throttle_window->ts, &start_ts);
2582 	/* Since we update the throttle stats pre-freeze, adjust for overshoot here */
2583 	if (normal_throttle_window->pageouts > normal_throttle_window->max_pageouts) {
2584 		normal_throttle_window->pageouts -= normal_throttle_window->max_pageouts;
2585 	} else {
2586 		normal_throttle_window->pageouts = 0;
2587 	}
2588 	/* Ensure the normal window is now active. */
2589 	memorystatus_freeze_degradation = FALSE;
2590 
2591 	/*
2592 	 * Reset interval statistics.
2593 	 */
2594 	memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
2595 	memorystatus_freezer_stats.mfs_process_considered_count = 0;
2596 	memorystatus_freezer_stats.mfs_error_below_min_pages_count = 0;
2597 	memorystatus_freezer_stats.mfs_error_excess_shared_memory_count = 0;
2598 	memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count = 0;
2599 	memorystatus_freezer_stats.mfs_error_no_compressor_space_count = 0;
2600 	memorystatus_freezer_stats.mfs_error_no_swap_space_count = 0;
2601 	memorystatus_freezer_stats.mfs_error_low_probability_of_use_count = 0;
2602 	memorystatus_freezer_stats.mfs_error_elevated_count = 0;
2603 	memorystatus_freezer_stats.mfs_error_other_count = 0;
2604 	memorystatus_freezer_stats.mfs_refreeze_count = 0;
2605 	memorystatus_freezer_stats.mfs_bytes_refrozen = 0;
2606 	memorystatus_freezer_stats.mfs_below_threshold_count = 0;
2607 	memorystatus_freezer_stats.mfs_skipped_full_count = 0;
2608 	memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count = 0;
2609 	memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = 0;
2610 	memorystatus_thaw_count = 0;
2611 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed, 0, release);
2612 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, 0, release);
2613 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg, 0, release);
2614 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, 0, release);
2615 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen, memorystatus_frozen_count, release);
2616 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, memorystatus_frozen_count_webcontent, release);
2617 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, memorystatus_frozen_count_xpc_service, release);
2618 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_fg_resumed, 0, release);
2619 	os_atomic_inc(&memorystatus_freeze_current_interval, release);
2620 
2621 	/* Clear the focal thaw bit */
2622 	proc_list_lock();
2623 	band = JETSAM_PRIORITY_IDLE;
2624 	p = PROC_NULL;
2625 	next_p = PROC_NULL;
2626 
2627 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
2628 	while (next_p) {
2629 		p = next_p;
2630 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2631 
2632 		if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) {
2633 			break;
2634 		}
2635 		p->p_memstat_state &= ~P_MEMSTAT_FROZEN_FOCAL_THAW;
2636 	}
2637 	proc_list_unlock();
2638 
2639 	schedule_interval_reset(freeze_interval_reset_thread_call, normal_throttle_window);
2640 }
2641 
2642 #if DEVELOPMENT || DEBUG
2643 
2644 static int
2645 sysctl_memorystatus_freeze_calculate_new_budget SYSCTL_HANDLER_ARGS
2646 {
2647 #pragma unused(arg1, arg2)
2648 	int error = 0;
2649 	unsigned int time_since_last_interval_expired_sec = 0;
2650 	unsigned int new_budget;
2651 
2652 	error = sysctl_handle_int(oidp, &time_since_last_interval_expired_sec, 0, req);
2653 	if (error || !req->newptr) {
2654 		return error;
2655 	}
2656 
2657 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2658 		return ENOTSUP;
2659 	}
2660 	new_budget = memorystatus_freeze_calculate_new_budget(time_since_last_interval_expired_sec, 1, NORMAL_WINDOW_MINS, 0);
2661 	return copyout(&new_budget, req->oldptr, MIN(sizeof(req->oldlen), sizeof(new_budget)));
2662 }
2663 
2664 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_calculate_new_budget, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2665     0, 0, &sysctl_memorystatus_freeze_calculate_new_budget, "I", "");
2666 
2667 #endif /* DEVELOPMENT || DEBUG */
2668 
2669 /*
2670  * Called when we first run out of budget in an interval.
2671  * Marks idle processes as not frozen due to lack of budget.
2672  * NB: It might be worth having a CA event here.
2673  */
2674 static void
memorystatus_freeze_out_of_budget(const struct throttle_interval_t * interval)2675 memorystatus_freeze_out_of_budget(const struct throttle_interval_t *interval)
2676 {
2677 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2678 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2679 
2680 	mach_timespec_t time_left = {0, 0};
2681 	mach_timespec_t now_ts;
2682 	clock_sec_t sec;
2683 	clock_nsec_t nsec;
2684 
2685 	time_left.tv_sec = interval->ts.tv_sec;
2686 	time_left.tv_nsec = 0;
2687 	clock_get_system_nanotime(&sec, &nsec);
2688 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2689 	now_ts.tv_nsec = nsec;
2690 
2691 	SUB_MACH_TIMESPEC(&time_left, &now_ts);
2692 	memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = time_left.tv_sec;
2693 	os_log(OS_LOG_DEFAULT,
2694 	    "memorystatus_freeze: Out of NAND write budget with %u minutes left in the current freezer interval. %u procs are frozen.\n",
2695 	    time_left.tv_sec / 60, memorystatus_frozen_count);
2696 
2697 	memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfBudget, false);
2698 }
2699 
2700 /*
2701  * Called when we cross over the threshold of maximum frozen processes allowed.
2702  * Marks remaining idle processes as not frozen due to lack of slots.
2703  */
2704 static void
memorystatus_freeze_out_of_slots(void)2705 memorystatus_freeze_out_of_slots(void)
2706 {
2707 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2708 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2709 	assert(memorystatus_frozen_count == memorystatus_frozen_processes_max);
2710 
2711 	os_log(OS_LOG_DEFAULT,
2712 	    "memorystatus_freeze: Out of slots in the freezer. %u procs are frozen.\n",
2713 	    memorystatus_frozen_count);
2714 
2715 	memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfSlots, true);
2716 }
2717 
2718 /*
2719  * This function will do 4 things:
2720  *
2721  * 1) check to see if we are currently in a degraded freezer mode, and if so:
2722  *    - check to see if our window has expired and we should exit this mode, OR,
2723  *    - return a budget based on the degraded throttle window's max. pageouts vs current pageouts.
2724  *
2725  * 2) check to see if we are in a NEW normal window and update the normal throttle window's params.
2726  *
2727  * 3) check what the current normal window allows for a budget.
2728  *
2729  * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below
2730  *    what we would normally expect, then we are running low on our daily budget and need to enter
2731  *    degraded perf. mode.
2732  *
2733  *    Caller must hold the freezer mutex
2734  *    Caller must not hold the proc_list lock
2735  */
2736 
2737 static void
memorystatus_freeze_update_throttle(uint64_t * budget_pages_allowed)2738 memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
2739 {
2740 	clock_sec_t sec;
2741 	clock_nsec_t nsec;
2742 	mach_timespec_t now_ts;
2743 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2744 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2745 
2746 	unsigned int freeze_daily_pageouts_max = 0;
2747 	bool started_with_budget = (*budget_pages_allowed > 0);
2748 
2749 #if DEVELOPMENT || DEBUG
2750 	if (!memorystatus_freeze_throttle_enabled) {
2751 		/*
2752 		 * No throttling...we can use the full budget everytime.
2753 		 */
2754 		*budget_pages_allowed = UINT64_MAX;
2755 		return;
2756 	}
2757 #endif
2758 
2759 	clock_get_system_nanotime(&sec, &nsec);
2760 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2761 	now_ts.tv_nsec = nsec;
2762 
2763 	struct throttle_interval_t *interval = NULL;
2764 
2765 	if (memorystatus_freeze_degradation == TRUE) {
2766 		interval = degraded_throttle_window;
2767 
2768 		if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
2769 			interval->pageouts = 0;
2770 			interval->max_pageouts = 0;
2771 		} else {
2772 			*budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2773 		}
2774 	}
2775 
2776 	interval = normal_throttle_window;
2777 
2778 	/*
2779 	 * Current throttle window.
2780 	 * Deny freezing if we have no budget left.
2781 	 * Try graceful degradation if we are within 25% of:
2782 	 * - the daily budget, and
2783 	 * - the current budget left is below our normal budget expectations.
2784 	 */
2785 
2786 	if (memorystatus_freeze_degradation == FALSE) {
2787 		if (interval->pageouts >= interval->max_pageouts) {
2788 			*budget_pages_allowed = 0;
2789 			if (started_with_budget) {
2790 				memorystatus_freeze_out_of_budget(interval);
2791 			}
2792 		} else {
2793 			int budget_left = interval->max_pageouts - interval->pageouts;
2794 			int budget_threshold = (freeze_daily_pageouts_max * FREEZE_DEGRADATION_BUDGET_THRESHOLD) / 100;
2795 
2796 			mach_timespec_t time_left = {0, 0};
2797 
2798 			time_left.tv_sec = interval->ts.tv_sec;
2799 			time_left.tv_nsec = 0;
2800 
2801 			SUB_MACH_TIMESPEC(&time_left, &now_ts);
2802 
2803 			if (budget_left <= budget_threshold) {
2804 				/*
2805 				 * For the current normal window, calculate how much we would pageout in a DEGRADED_WINDOW_MINS duration.
2806 				 * And also calculate what we would pageout for the same DEGRADED_WINDOW_MINS duration if we had the full
2807 				 * daily pageout budget.
2808 				 */
2809 
2810 				unsigned int current_budget_rate_allowed = ((budget_left / time_left.tv_sec) / 60) * DEGRADED_WINDOW_MINS;
2811 				unsigned int normal_budget_rate_allowed = (freeze_daily_pageouts_max / NORMAL_WINDOW_MINS) * DEGRADED_WINDOW_MINS;
2812 
2813 				/*
2814 				 * The current rate of pageouts is below what we would expect for
2815 				 * the normal rate i.e. we have below normal budget left and so...
2816 				 */
2817 
2818 				if (current_budget_rate_allowed < normal_budget_rate_allowed) {
2819 					memorystatus_freeze_degradation = TRUE;
2820 					degraded_throttle_window->max_pageouts = current_budget_rate_allowed;
2821 					degraded_throttle_window->pageouts = 0;
2822 
2823 					/*
2824 					 * Switch over to the degraded throttle window so the budget
2825 					 * doled out is based on that window.
2826 					 */
2827 					interval = degraded_throttle_window;
2828 				}
2829 			}
2830 
2831 			*budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2832 		}
2833 	}
2834 
2835 	MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n",
2836 	    interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - now_ts->tv_sec) / 60,
2837 	    interval->throttle ? "on" : "off");
2838 }
2839 
2840 bool memorystatus_freeze_thread_init = false;
2841 static void
memorystatus_freeze_thread(void * param __unused,wait_result_t wr __unused)2842 memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
2843 {
2844 	static boolean_t memorystatus_freeze_swap_low = FALSE;
2845 	size_t max_to_freeze = 0, num_frozen = 0, num_frozen_this_iteration = 0;
2846 
2847 	if (!memorystatus_freeze_thread_init) {
2848 #if CONFIG_THREAD_GROUPS
2849 		thread_group_vm_add();
2850 #endif
2851 		memorystatus_freeze_thread_init = true;
2852 	}
2853 
2854 	max_to_freeze = memorystatus_pick_freeze_count_for_wakeup();
2855 
2856 	lck_mtx_lock(&freezer_mutex);
2857 	if (memorystatus_freeze_enabled) {
2858 		if (memorystatus_freezer_use_demotion_list && memorystatus_refreeze_eligible_count > 0) {
2859 			memorystatus_demote_frozen_processes(false); /* Normal mode. Consider demoting thawed processes. */
2860 		}
2861 		while (num_frozen < max_to_freeze &&
2862 		    memorystatus_can_freeze(&memorystatus_freeze_swap_low) &&
2863 		    ((memorystatus_frozen_count < memorystatus_frozen_processes_max) ||
2864 		    (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD))) {
2865 			/* Only freeze if we've not exceeded our pageout budgets.*/
2866 			memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2867 
2868 			if (memorystatus_freeze_budget_pages_remaining) {
2869 				num_frozen_this_iteration = memorystatus_freeze_top_process();
2870 				if (num_frozen_this_iteration == 0) {
2871 					/* Nothing left to freeze. */
2872 					break;
2873 				}
2874 				num_frozen += num_frozen_this_iteration;
2875 			} else {
2876 				memorystatus_demote_frozen_processes(true); /* urgent mode..force one demotion */
2877 				break;
2878 			}
2879 		}
2880 	}
2881 
2882 	/*
2883 	 * Give applications currently in the aging band a chance to age out into the idle band before
2884 	 * running the freezer again.
2885 	 */
2886 	memorystatus_freezer_thread_next_run_ts = mach_absolute_time() + memorystatus_apps_idle_delay_time;
2887 
2888 	assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
2889 	lck_mtx_unlock(&freezer_mutex);
2890 
2891 	thread_block((thread_continue_t) memorystatus_freeze_thread);
2892 }
2893 
2894 int
memorystatus_get_process_is_freezable(pid_t pid,int * is_freezable)2895 memorystatus_get_process_is_freezable(pid_t pid, int *is_freezable)
2896 {
2897 	proc_t p = PROC_NULL;
2898 
2899 	if (pid == 0) {
2900 		return EINVAL;
2901 	}
2902 
2903 	p = proc_find(pid);
2904 	if (!p) {
2905 		return ESRCH;
2906 	}
2907 
2908 	/*
2909 	 * Only allow this on the current proc for now.
2910 	 * We can check for privileges and allow targeting another process in the future.
2911 	 */
2912 	if (p != current_proc()) {
2913 		proc_rele(p);
2914 		return EPERM;
2915 	}
2916 
2917 	proc_list_lock();
2918 	*is_freezable = ((p->p_memstat_state & P_MEMSTAT_FREEZE_DISABLED) ? 0 : 1);
2919 	proc_rele(p);
2920 	proc_list_unlock();
2921 
2922 	return 0;
2923 }
2924 
2925 errno_t
memorystatus_get_process_is_frozen(pid_t pid,int * is_frozen)2926 memorystatus_get_process_is_frozen(pid_t pid, int *is_frozen)
2927 {
2928 	proc_t p = PROC_NULL;
2929 
2930 	if (pid == 0) {
2931 		return EINVAL;
2932 	}
2933 
2934 	/*
2935 	 * Only allow this on the current proc for now.
2936 	 * We can check for privileges and allow targeting another process in the future.
2937 	 */
2938 	p = current_proc();
2939 	if (proc_getpid(p) != pid) {
2940 		return EPERM;
2941 	}
2942 
2943 	proc_list_lock();
2944 	*is_frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
2945 	proc_list_unlock();
2946 
2947 	return 0;
2948 }
2949 
2950 int
memorystatus_set_process_is_freezable(pid_t pid,boolean_t is_freezable)2951 memorystatus_set_process_is_freezable(pid_t pid, boolean_t is_freezable)
2952 {
2953 	proc_t p = PROC_NULL;
2954 
2955 	if (pid == 0) {
2956 		return EINVAL;
2957 	}
2958 
2959 	/*
2960 	 * To enable freezable status, you need to be root or an entitlement.
2961 	 */
2962 	if (is_freezable &&
2963 	    !kauth_cred_issuser(kauth_cred_get()) &&
2964 	    !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
2965 		return EPERM;
2966 	}
2967 
2968 	p = proc_find(pid);
2969 	if (!p) {
2970 		return ESRCH;
2971 	}
2972 
2973 	/*
2974 	 * A process can change its own status. A coalition leader can
2975 	 * change the status of coalition members.
2976 	 * An entitled process (or root) can change anyone's status.
2977 	 */
2978 	if (p != current_proc() &&
2979 	    !kauth_cred_issuser(kauth_cred_get()) &&
2980 	    !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
2981 		coalition_t coal = task_get_coalition(proc_task(p), COALITION_TYPE_JETSAM);
2982 		if (!coalition_is_leader(proc_task(current_proc()), coal)) {
2983 			proc_rele(p);
2984 			return EPERM;
2985 		}
2986 	}
2987 
2988 	proc_list_lock();
2989 	if (is_freezable == FALSE) {
2990 		/* Freeze preference set to FALSE. Set the P_MEMSTAT_FREEZE_DISABLED bit. */
2991 		p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED;
2992 		printf("memorystatus_set_process_is_freezable: disabling freeze for pid %d [%s]\n",
2993 		    proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
2994 	} else {
2995 		p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED;
2996 		printf("memorystatus_set_process_is_freezable: enabling freeze for pid %d [%s]\n",
2997 		    proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
2998 	}
2999 	proc_rele(p);
3000 	proc_list_unlock();
3001 
3002 	return 0;
3003 }
3004 
3005 /*
3006  * Called when process is created before it is added to a memorystatus bucket.
3007  */
3008 void
memorystatus_freeze_init_proc(proc_t p)3009 memorystatus_freeze_init_proc(proc_t p)
3010 {
3011 	/* NB: Process is not on the memorystatus lists yet so it's safe to modify the skip reason without the freezer mutex. */
3012 	if (memorystatus_freeze_budget_pages_remaining == 0) {
3013 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfBudget;
3014 	} else if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
3015 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
3016 	} else {
3017 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
3018 	}
3019 }
3020 
3021 
3022 static int
3023 sysctl_memorystatus_do_fastwake_warmup_all  SYSCTL_HANDLER_ARGS
3024 {
3025 #pragma unused(oidp, arg1, arg2)
3026 
3027 	if (!req->newptr) {
3028 		return EINVAL;
3029 	}
3030 
3031 	/* Need to be root or have entitlement */
3032 	if (!kauth_cred_issuser(kauth_cred_get()) && !IOCurrentTaskHasEntitlement( MEMORYSTATUS_ENTITLEMENT)) {
3033 		return EPERM;
3034 	}
3035 
3036 	if (memorystatus_freeze_enabled == FALSE) {
3037 		return ENOTSUP;
3038 	}
3039 
3040 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
3041 		return ENOTSUP;
3042 	}
3043 
3044 	do_fastwake_warmup_all();
3045 
3046 	return 0;
3047 }
3048 
3049 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_do_fastwake_warmup_all, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3050     0, 0, &sysctl_memorystatus_do_fastwake_warmup_all, "I", "");
3051 
3052 /*
3053  * Takes in a candidate list from the user_addr, validates it, and copies it into the list pointer.
3054  * Takes ownership over the original value of list.
3055  * Assumes that list is protected by the freezer_mutex.
3056  * The caller should not hold any locks.
3057  */
3058 static errno_t
set_freezer_candidate_list(user_addr_t buffer,size_t buffer_size,struct memorystatus_freezer_candidate_list * list)3059 set_freezer_candidate_list(user_addr_t buffer, size_t buffer_size, struct memorystatus_freezer_candidate_list *list)
3060 {
3061 	errno_t error = 0;
3062 	memorystatus_properties_freeze_entry_v1 *entries = NULL, *tmp_entries = NULL;
3063 	size_t entry_count = 0, entries_size = 0, tmp_size = 0;
3064 
3065 	/* Validate the user provided list. */
3066 	if ((buffer == USER_ADDR_NULL) || (buffer_size == 0)) {
3067 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus_cmd_grp_set_freeze_priority: NULL or empty list\n");
3068 		return EINVAL;
3069 	}
3070 
3071 	if (buffer_size % sizeof(memorystatus_properties_freeze_entry_v1) != 0) {
3072 		os_log_with_startup_serial(OS_LOG_DEFAULT,
3073 		    "memorystatus_cmd_grp_set_freeze_priority: Invalid list length (caller might have comiled agsinst invalid headers.)\n");
3074 		return EINVAL;
3075 	}
3076 
3077 	entry_count = buffer_size / sizeof(memorystatus_properties_freeze_entry_v1);
3078 	entries_size = buffer_size;
3079 	entries = kalloc_data(buffer_size, Z_WAITOK | Z_ZERO);
3080 	if (entries == NULL) {
3081 		return ENOMEM;
3082 	}
3083 
3084 	error = copyin(buffer, entries, buffer_size);
3085 	if (error != 0) {
3086 		goto out;
3087 	}
3088 
3089 #if MACH_ASSERT
3090 	for (size_t i = 0; i < entry_count; i++) {
3091 		memorystatus_properties_freeze_entry_v1 *entry = &entries[i];
3092 		if (entry->version != 1) {
3093 			os_log(OS_LOG_DEFAULT, "memorystatus_cmd_grp_set_freeze_priority: Invalid entry version number.");
3094 			error = EINVAL;
3095 			goto out;
3096 		}
3097 		if (i > 0 && entry->priority >= entries[i - 1].priority) {
3098 			os_log(OS_LOG_DEFAULT, "memorystatus_cmd_grp_set_freeze_priority: Entry list is not in descending order.");
3099 			error = EINVAL;
3100 			goto out;
3101 		}
3102 	}
3103 #endif /* MACH_ASSERT */
3104 
3105 	lck_mtx_lock(&freezer_mutex);
3106 
3107 	tmp_entries = list->mfcl_list;
3108 	tmp_size = list->mfcl_length * sizeof(memorystatus_properties_freeze_entry_v1);
3109 	list->mfcl_list = entries;
3110 	list->mfcl_length = entry_count;
3111 
3112 	lck_mtx_unlock(&freezer_mutex);
3113 
3114 	entries = tmp_entries;
3115 	entries_size = tmp_size;
3116 
3117 out:
3118 	kfree_data(entries, entries_size);
3119 	return error;
3120 }
3121 
3122 errno_t
memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer,size_t buffer_size)3123 memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer, size_t buffer_size)
3124 {
3125 	return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_freeze_list);
3126 }
3127 
3128 errno_t
memorystatus_cmd_grp_set_demote_list(user_addr_t buffer,size_t buffer_size)3129 memorystatus_cmd_grp_set_demote_list(user_addr_t buffer, size_t buffer_size)
3130 {
3131 	return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_demote_list);
3132 }
3133 
3134 void
memorystatus_freezer_mark_ui_transition(proc_t p)3135 memorystatus_freezer_mark_ui_transition(proc_t p)
3136 {
3137 	bool frozen = false, previous_focal_thaw = false, xpc_service = false, suspended = false;
3138 	proc_list_lock();
3139 
3140 	if (isSysProc(p)) {
3141 		goto out;
3142 	}
3143 
3144 	frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
3145 	previous_focal_thaw = (p->p_memstat_state & P_MEMSTAT_FROZEN_FOCAL_THAW) != 0;
3146 	xpc_service = (p->p_memstat_state & P_MEMSTAT_FROZEN_XPC_SERVICE) != 0;
3147 	suspended = (p->p_memstat_state & P_MEMSTAT_SUSPENDED) != 0;
3148 	if (!suspended) {
3149 		if (frozen) {
3150 			if (!previous_focal_thaw) {
3151 				p->p_memstat_state |= P_MEMSTAT_FROZEN_FOCAL_THAW;
3152 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg), relaxed);
3153 				if (xpc_service) {
3154 					os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service), relaxed);
3155 				}
3156 			}
3157 		}
3158 		os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_fg_resumed), relaxed);
3159 	}
3160 
3161 out:
3162 	proc_list_unlock();
3163 }
3164 
3165 #endif /* CONFIG_FREEZE */
3166