xref: /xnu-8020.101.4/bsd/kern/kern_memorystatus_freeze.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  *
28  */
29 
30 #include <kern/sched_prim.h>
31 #include <kern/kalloc.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h>
34 #include <kern/locks.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/host.h>
38 #include <kern/policy_internal.h>
39 #include <kern/thread_call.h>
40 #include <kern/thread_group.h>
41 
42 #include <libkern/libkern.h>
43 #include <mach/coalition.h>
44 #include <mach/mach_time.h>
45 #include <mach/task.h>
46 #include <mach/host_priv.h>
47 #include <mach/mach_host.h>
48 #include <os/log.h>
49 #include <pexpert/pexpert.h>
50 #include <sys/coalition.h>
51 #include <sys/kern_event.h>
52 #include <sys/proc.h>
53 #include <sys/proc_info.h>
54 #include <sys/reason.h>
55 #include <sys/signal.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/sysproto.h>
59 #include <sys/wait.h>
60 #include <sys/tree.h>
61 #include <sys/priv.h>
62 #include <vm/vm_pageout.h>
63 #include <vm/vm_protos.h>
64 #include <mach/machine/sdt.h>
65 #include <libkern/coreanalytics/coreanalytics.h>
66 #include <libkern/section_keywords.h>
67 #include <stdatomic.h>
68 
69 #include <IOKit/IOBSD.h>
70 
71 #if CONFIG_FREEZE
72 #include <vm/vm_map.h>
73 #endif /* CONFIG_FREEZE */
74 
75 #include <sys/kern_memorystatus.h>
76 #include <sys/kern_memorystatus_freeze.h>
77 #include <sys/kern_memorystatus_notify.h>
78 
79 #if CONFIG_JETSAM
80 
81 extern unsigned int memorystatus_available_pages;
82 extern unsigned int memorystatus_available_pages_pressure;
83 extern unsigned int memorystatus_available_pages_critical;
84 extern unsigned int memorystatus_available_pages_critical_base;
85 extern unsigned int memorystatus_available_pages_critical_idle_offset;
86 
87 #else /* CONFIG_JETSAM */
88 
89 extern uint64_t memorystatus_available_pages;
90 extern uint64_t memorystatus_available_pages_pressure;
91 extern uint64_t memorystatus_available_pages_critical;
92 
93 #endif /* CONFIG_JETSAM */
94 
95 unsigned int memorystatus_frozen_count = 0;
96 unsigned int memorystatus_frozen_count_webcontent = 0;
97 unsigned int memorystatus_frozen_count_xpc_service = 0;
98 unsigned int memorystatus_suspended_count = 0;
99 unsigned long freeze_threshold_percentage = 50;
100 
101 #if CONFIG_FREEZE
102 
103 static LCK_GRP_DECLARE(freezer_lck_grp, "freezer");
104 static LCK_MTX_DECLARE(freezer_mutex, &freezer_lck_grp);
105 
106 /* Thresholds */
107 unsigned int memorystatus_freeze_threshold = 0;
108 unsigned int memorystatus_freeze_pages_min = 0;
109 unsigned int memorystatus_freeze_pages_max = 0;
110 unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
111 unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT;
112 uint64_t     memorystatus_freeze_budget_pages_remaining = 0; /* Remaining # of pages that can be frozen to disk */
113 uint64_t     memorystatus_freeze_budget_multiplier = 100; /* Multiplies the daily budget by 100/multiplier */
114 boolean_t memorystatus_freeze_degradation = FALSE; /* Protected by the freezer mutex. Signals we are in a degraded freeze mode. */
115 unsigned int memorystatus_freeze_max_candidate_band = FREEZE_MAX_CANDIDATE_BAND;
116 
117 unsigned int memorystatus_max_frozen_demotions_daily = 0;
118 unsigned int memorystatus_thaw_count_demotion_threshold = 0;
119 
120 boolean_t memorystatus_freeze_enabled = FALSE;
121 int memorystatus_freeze_wakeup = 0;
122 int memorystatus_freeze_jetsam_band = 0; /* the jetsam band which will contain P_MEMSTAT_FROZEN processes */
123 
124 #define MAX_XPC_SERVICE_PIDS 10 /* Max. # of XPC services per coalition we'll consider freezing. */
125 
126 #ifdef XNU_KERNEL_PRIVATE
127 
128 unsigned int memorystatus_frozen_processes_max = 0;
129 unsigned int memorystatus_frozen_shared_mb = 0;
130 unsigned int memorystatus_frozen_shared_mb_max = 0;
131 unsigned int memorystatus_freeze_shared_mb_per_process_max = 0; /* Max. MB allowed per process to be freezer-eligible. */
132 unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of private:shared pages for a process to be freezer-eligible. */
133 unsigned int memorystatus_thaw_count = 0; /* # of thaws in the current freezer interval */
134 uint64_t memorystatus_thaw_count_since_boot = 0; /* The number of thaws since boot */
135 unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
136 
137 struct memorystatus_freezer_stats_t memorystatus_freezer_stats = {0};
138 
139 #endif /* XNU_KERNEL_PRIVATE */
140 
141 static inline boolean_t memorystatus_can_freeze_processes(void);
142 static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
143 static boolean_t memorystatus_is_process_eligible_for_freeze(proc_t p);
144 static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
145 static uint32_t memorystatus_freeze_calculate_new_budget(
146 	unsigned int time_since_last_interval_expired_sec,
147 	unsigned int burst_multiple,
148 	unsigned int interval_duration_min,
149 	uint32_t rollover);
150 static void memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts);
151 
152 static void memorystatus_set_freeze_is_enabled(bool enabled);
153 static void memorystatus_disable_freeze(void);
154 static bool kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out);
155 
156 /* Stats */
157 static uint64_t memorystatus_freeze_pageouts = 0;
158 
159 /* Throttling */
160 #define DEGRADED_WINDOW_MINS    (30)
161 #define NORMAL_WINDOW_MINS      (24 * 60)
162 
163 /* Protected by the freezer_mutex */
164 static throttle_interval_t throttle_intervals[] = {
165 	{ DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
166 	{ NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
167 };
168 throttle_interval_t *degraded_throttle_window = &throttle_intervals[0];
169 throttle_interval_t *normal_throttle_window = &throttle_intervals[1];
170 uint32_t memorystatus_freeze_current_interval = 0;
171 static thread_call_t freeze_interval_reset_thread_call;
172 static uint32_t memorystatus_freeze_calculate_new_budget(
173 	unsigned int time_since_last_interval_expired_sec,
174 	unsigned int burst_multiple,
175 	unsigned int interval_duration_min,
176 	uint32_t rollover);
177 
178 /* An ordered list of freeze or demotion candidates */
179 struct memorystatus_freezer_candidate_list {
180 	memorystatus_properties_freeze_entry_v1 *mfcl_list;
181 	size_t mfcl_length;
182 };
183 struct memorystatus_freezer_candidate_list memorystatus_global_freeze_list = {NULL, 0};
184 struct memorystatus_freezer_candidate_list memorystatus_global_demote_list = {NULL, 0};
185 /*
186  * When enabled, freeze candidates are chosen from the memorystatus_global_freeze_list
187  * in order (as opposed to using the older LRU approach).
188  */
189 int memorystatus_freezer_use_ordered_list = 0;
190 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_ordered_list, &memorystatus_freezer_use_ordered_list, 0, 1, "");
191 /*
192  * When enabled, demotion candidates are chosen from memorystatus_global_demotion_list
193  */
194 int memorystatus_freezer_use_demotion_list = 0;
195 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freezer_use_demotion_list, &memorystatus_freezer_use_demotion_list, 0, 1, "");
196 
197 extern uint64_t vm_swap_get_free_space(void);
198 extern boolean_t vm_swap_max_budget(uint64_t *);
199 extern int i_coal_jetsam_get_taskrole(coalition_t coal, task_t task);
200 
201 static void memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed);
202 static void memorystatus_demote_frozen_processes(bool urgent_mode);
203 
204 static void memorystatus_freeze_handle_error(proc_t p, const int freezer_error_code, bool was_refreeze, pid_t pid, const coalition_t coalition, const char* log_prefix);
205 static void memorystatus_freeze_out_of_slots(void);
206 static uint64_t memorystatus_freezer_thread_next_run_ts = 0;
207 
208 /* Sysctls needed for aggd stats */
209 
210 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count, 0, "");
211 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_webcontent, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_webcontent, 0, "");
212 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count_xpc_service, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count_xpc_service, 0, "");
213 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
214 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_thaw_count_since_boot, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count_since_boot, "");
215 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
216 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_interval, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_current_interval, 0, "");
217 
218 /*
219  * Force a new interval with the given budget (no rollover).
220  */
221 static void
memorystatus_freeze_force_new_interval(uint64_t new_budget)222 memorystatus_freeze_force_new_interval(uint64_t new_budget)
223 {
224 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
225 	mach_timespec_t now_ts;
226 	clock_sec_t sec;
227 	clock_nsec_t nsec;
228 
229 	clock_get_system_nanotime(&sec, &nsec);
230 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
231 	now_ts.tv_nsec = nsec;
232 	memorystatus_freeze_start_normal_throttle_interval((uint32_t) MIN(new_budget, UINT32_MAX), now_ts);
233 	/* Don't carry over any excess pageouts since we're forcing a new budget */
234 	normal_throttle_window->pageouts = 0;
235 	memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
236 }
237 #if DEVELOPMENT || DEBUG
238 static int sysctl_memorystatus_freeze_budget_pages_remaining SYSCTL_HANDLER_ARGS
239 {
240 	#pragma unused(arg1, arg2, oidp)
241 	int error, changed;
242 	uint64_t new_budget = memorystatus_freeze_budget_pages_remaining;
243 
244 	lck_mtx_lock(&freezer_mutex);
245 
246 	error = sysctl_io_number(req, memorystatus_freeze_budget_pages_remaining, sizeof(uint64_t), &new_budget, &changed);
247 	if (changed) {
248 		if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
249 			lck_mtx_unlock(&freezer_mutex);
250 			return ENOTSUP;
251 		}
252 		memorystatus_freeze_force_new_interval(new_budget);
253 	}
254 
255 	lck_mtx_unlock(&freezer_mutex);
256 	return error;
257 }
258 
259 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freeze_budget_pages_remaining, "Q", "");
260 #else /* DEVELOPMENT || DEBUG */
261 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, "");
262 #endif /* DEVELOPMENT || DEBUG */
263 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_excess_shared_memory_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_excess_shared_memory_count, "");
264 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_private_shared_ratio_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count, "");
265 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_compressor_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_compressor_space_count, "");
266 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_swap_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_swap_space_count, "");
267 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_below_min_pages_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_below_min_pages_count, "");
268 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_probability_of_use_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_probability_of_use_count, "");
269 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_elevated_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_elevated_count, "");
270 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_other_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_other_count, "");
271 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_process_considered_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_process_considered_count, "");
272 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_below_threshold_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_below_threshold_count, "");
273 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_full_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_full_count, "");
274 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_shared_mb_high_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count, "");
275 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_shared_pages_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_shared_pages_skipped, "");
276 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_bytes_refrozen, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_bytes_refrozen, "");
277 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_refreeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_refreeze_count, "");
278 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_freeze_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_freeze_pid_mismatches, "");
279 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_demote_pid_mismatches, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_demote_pid_mismatches, "");
280 
281 static_assert(_kMemorystatusFreezeSkipReasonMax <= UINT8_MAX);
282 
283 static inline bool
proc_is_refreeze_eligible(proc_t p)284 proc_is_refreeze_eligible(proc_t p)
285 {
286 	return (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) != 0;
287 }
288 
289 /*
290  * Calculates the hit rate for the freezer.
291  * The hit rate is defined as the percentage of procs that are currently in the
292  * freezer which we have thawed.
293  * A low hit rate means we're freezing bad candidates since they're not re-used.
294  */
295 static int
calculate_thaw_percentage(uint64_t frozen_count,uint64_t thaw_count)296 calculate_thaw_percentage(uint64_t frozen_count, uint64_t thaw_count)
297 {
298 	int thaw_percentage = 100;
299 
300 	if (frozen_count > 0) {
301 		if (thaw_count > frozen_count) {
302 			/*
303 			 * Both counts are using relaxed atomics & could be out of sync
304 			 * causing us to see thaw_percentage > 100.
305 			 */
306 			thaw_percentage = 100;
307 		} else {
308 			thaw_percentage = (int)(100 * thaw_count / frozen_count);
309 		}
310 	}
311 	return thaw_percentage;
312 }
313 
314 static int
get_thaw_percentage()315 get_thaw_percentage()
316 {
317 	uint64_t processes_frozen, processes_thawed;
318 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
319 	processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
320 	return calculate_thaw_percentage(processes_frozen, processes_thawed);
321 }
322 
323 static int
324 sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS
325 {
326 #pragma unused(arg1, arg2)
327 	int thaw_percentage = get_thaw_percentage();
328 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
329 }
330 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage, "I", "");
331 
332 static int
get_thaw_percentage_fg()333 get_thaw_percentage_fg()
334 {
335 	uint64_t processes_frozen, processes_thawed_fg;
336 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
337 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
338 	return calculate_thaw_percentage(processes_frozen, processes_thawed_fg);
339 }
340 
341 static int sysctl_memorystatus_freezer_thaw_percentage_fg SYSCTL_HANDLER_ARGS
342 {
343 #pragma unused(arg1, arg2)
344 	int thaw_percentage = get_thaw_percentage_fg();
345 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
346 }
347 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg, "I", "");
348 
349 static int
get_thaw_percentage_webcontent()350 get_thaw_percentage_webcontent()
351 {
352 	uint64_t processes_frozen_webcontent, processes_thawed_webcontent;
353 	processes_frozen_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, relaxed);
354 	processes_thawed_webcontent = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, relaxed);
355 	return calculate_thaw_percentage(processes_frozen_webcontent, processes_thawed_webcontent);
356 }
357 
358 static int sysctl_memorystatus_freezer_thaw_percentage_webcontent SYSCTL_HANDLER_ARGS
359 {
360 #pragma unused(arg1, arg2)
361 	int thaw_percentage = get_thaw_percentage_webcontent();
362 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
363 }
364 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_webcontent, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_webcontent, "I", "");
365 
366 
367 static int
get_thaw_percentage_bg()368 get_thaw_percentage_bg()
369 {
370 	uint64_t processes_frozen, processes_thawed_fg, processes_thawed;
371 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
372 	processes_thawed = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
373 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
374 	return calculate_thaw_percentage(processes_frozen, processes_thawed - processes_thawed_fg);
375 }
376 
377 static int sysctl_memorystatus_freezer_thaw_percentage_bg SYSCTL_HANDLER_ARGS
378 {
379 #pragma unused(arg1, arg2)
380 	int thaw_percentage = get_thaw_percentage_bg();
381 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
382 }
383 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_bg, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_bg, "I", "");
384 
385 static int
get_thaw_percentage_fg_non_xpc_service()386 get_thaw_percentage_fg_non_xpc_service()
387 {
388 	uint64_t processes_frozen, processes_frozen_xpc_service, processes_thawed_fg, processes_thawed_fg_xpc_service;
389 	processes_frozen = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
390 	processes_frozen_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, relaxed);
391 	processes_thawed_fg = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
392 	processes_thawed_fg_xpc_service = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, relaxed);
393 	/*
394 	 * Since these are all relaxed loads, it's possible (although unlikely) to read a value for
395 	 * frozen/thawed xpc services that's > the value for processes frozen / thawed.
396 	 * Clamp just in case.
397 	 */
398 	processes_frozen_xpc_service = MIN(processes_frozen_xpc_service, processes_frozen);
399 	processes_thawed_fg_xpc_service = MIN(processes_thawed_fg_xpc_service, processes_thawed_fg);
400 	return calculate_thaw_percentage(processes_frozen - processes_frozen_xpc_service, processes_thawed_fg - processes_thawed_fg_xpc_service);
401 }
402 
403 static int sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service SYSCTL_HANDLER_ARGS
404 {
405 #pragma unused(arg1, arg2)
406 	int thaw_percentage = get_thaw_percentage_fg_non_xpc_service();
407 	return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
408 }
409 
410 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage_fg_non_xpc_service, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage_fg_non_xpc_service, "I", "");
411 
412 #define FREEZER_ERROR_STRING_LENGTH 128
413 
414 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_min, &memorystatus_freeze_pages_min, 0, UINT32_MAX, "");
415 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_max, &memorystatus_freeze_pages_max, 0, UINT32_MAX, "");
416 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_processes_max, &memorystatus_frozen_processes_max, 0, UINT32_MAX, "");
417 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_jetsam_band, &memorystatus_freeze_jetsam_band, JETSAM_PRIORITY_IDLE, JETSAM_PRIORITY_MAX - 1, "");
418 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_private_shared_pages_ratio, &memorystatus_freeze_private_shared_pages_ratio, 0, UINT32_MAX, "");
419 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_min_processes, &memorystatus_freeze_suspended_threshold, 0, UINT32_MAX, "");
420 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_max_candidate_band, &memorystatus_freeze_max_candidate_band, JETSAM_PRIORITY_IDLE, JETSAM_PRIORITY_MAX - 1, "");
421 static int
422 sysctl_memorystatus_freeze_budget_multiplier SYSCTL_HANDLER_ARGS
423 {
424 #pragma unused(arg1, arg2, oidp, req)
425 	int error = 0, changed = 0;
426 	uint64_t val = memorystatus_freeze_budget_multiplier;
427 	unsigned int new_budget;
428 	clock_sec_t sec;
429 	clock_nsec_t nsec;
430 	mach_timespec_t now_ts;
431 
432 	error = sysctl_io_number(req, memorystatus_freeze_budget_multiplier, sizeof(val), &val, &changed);
433 	if (error) {
434 		return error;
435 	}
436 	if (changed) {
437 		if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
438 			return ENOTSUP;
439 		}
440 #if !(DEVELOPMENT || DEBUG)
441 		if (val > 100) {
442 			/* Can not increase budget on release. */
443 			return EINVAL;
444 		}
445 #endif
446 		lck_mtx_lock(&freezer_mutex);
447 
448 		memorystatus_freeze_budget_multiplier = val;
449 		/* Start a new throttle interval with this budget multiplier */
450 		new_budget = memorystatus_freeze_calculate_new_budget(0, 1, NORMAL_WINDOW_MINS, 0);
451 		clock_get_system_nanotime(&sec, &nsec);
452 		now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
453 		now_ts.tv_nsec = nsec;
454 		memorystatus_freeze_start_normal_throttle_interval(new_budget, now_ts);
455 		memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
456 
457 		lck_mtx_unlock(&freezer_mutex);
458 	}
459 	return 0;
460 }
461 EXPERIMENT_FACTOR_PROC(_kern, memorystatus_freeze_budget_multiplier, CTLTYPE_QUAD | CTLFLAG_RW, 0, 0, &sysctl_memorystatus_freeze_budget_multiplier, "Q", "");
462 /*
463  * max. # of frozen process demotions we will allow in our daily cycle.
464  */
465 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_max_freeze_demotions_daily, &memorystatus_max_frozen_demotions_daily, 0, UINT32_MAX, "");
466 
467 /*
468  * min # of thaws needed by a process to protect it from getting demoted into the IDLE band.
469  */
470 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_thaw_count_demotion_threshold, &memorystatus_thaw_count_demotion_threshold, 0, UINT32_MAX, "");
471 
472 #if DEVELOPMENT || DEBUG
473 
474 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, "");
475 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, "");
476 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
477 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, "");
478 
479 /*
480  * Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band.
481  * "0" means no limit.
482  * Default is 10% of system-wide task limit.
483  */
484 
485 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb_max, 0, "");
486 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, "");
487 
488 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, "");
489 
490 boolean_t memorystatus_freeze_throttle_enabled = TRUE;
491 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
492 
493 /*
494  * When set to true, this keeps frozen processes in the compressor pool in memory, instead of swapping them out to disk.
495  * Exposed via the sysctl kern.memorystatus_freeze_to_memory.
496  */
497 boolean_t memorystatus_freeze_to_memory = FALSE;
498 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_to_memory, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_to_memory, 0, "");
499 
500 #define VM_PAGES_FOR_ALL_PROCS    (2)
501 
502 /*
503  * Manual trigger of freeze and thaw for dev / debug kernels only.
504  */
505 static int
506 sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
507 {
508 #pragma unused(arg1, arg2)
509 	int error, pid = 0;
510 	proc_t p;
511 	int freezer_error_code = 0;
512 	pid_t pid_list[MAX_XPC_SERVICE_PIDS];
513 	int ntasks = 0;
514 	coalition_t coal = COALITION_NULL;
515 
516 	error = sysctl_handle_int(oidp, &pid, 0, req);
517 	if (error || !req->newptr) {
518 		return error;
519 	}
520 
521 	if (pid == VM_PAGES_FOR_ALL_PROCS) {
522 		vm_pageout_anonymous_pages();
523 
524 		return 0;
525 	}
526 
527 	lck_mtx_lock(&freezer_mutex);
528 	if (memorystatus_freeze_enabled == FALSE) {
529 		lck_mtx_unlock(&freezer_mutex);
530 		printf("sysctl_freeze: Freeze is DISABLED\n");
531 		return ENOTSUP;
532 	}
533 
534 again:
535 	p = proc_find(pid);
536 	if (p != NULL) {
537 		memorystatus_freezer_stats.mfs_process_considered_count++;
538 		uint32_t purgeable, wired, clean, dirty, shared;
539 		uint32_t max_pages = 0, state = 0;
540 
541 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
542 			/*
543 			 * Freezer backed by the compressor and swap file(s)
544 			 * will hold compressed data.
545 			 *
546 			 * Set the sysctl kern.memorystatus_freeze_to_memory to true to keep compressed data from
547 			 * being swapped out to disk. Note that this disables freezer swap support globally,
548 			 * not just for the process being frozen.
549 			 *
550 			 *
551 			 * We don't care about the global freezer budget or the process's (min/max) budget here.
552 			 * The freeze sysctl is meant to force-freeze a process.
553 			 *
554 			 * We also don't update any global or process stats on this path, so that the jetsam/ freeze
555 			 * logic remains unaffected. The tasks we're performing here are: freeze the process, set the
556 			 * P_MEMSTAT_FROZEN bit, and elevate the process to a higher band (if the freezer is active).
557 			 */
558 			max_pages = memorystatus_freeze_pages_max;
559 		} else {
560 			/*
561 			 * We only have the compressor without any swap.
562 			 */
563 			max_pages = UINT32_MAX - 1;
564 		}
565 
566 		proc_list_lock();
567 		state = p->p_memstat_state;
568 		proc_list_unlock();
569 
570 		/*
571 		 * The jetsam path also verifies that the process is a suspended App. We don't care about that here.
572 		 * We simply ensure that jetsam is not already working on the process and that the process has not
573 		 * explicitly disabled freezing.
574 		 */
575 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED)) {
576 			printf("sysctl_freeze: p_memstat_state check failed, process is%s%s%s\n",
577 			    (state & P_MEMSTAT_TERMINATED) ? " terminated" : "",
578 			    (state & P_MEMSTAT_LOCKED) ? " locked" : "",
579 			    (state & P_MEMSTAT_FREEZE_DISABLED) ? " unfreezable" : "");
580 
581 			proc_rele(p);
582 			lck_mtx_unlock(&freezer_mutex);
583 			return EPERM;
584 		}
585 
586 		error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
587 		if (!error || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
588 			memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
589 		}
590 
591 		if (error) {
592 			memorystatus_freeze_handle_error(p, freezer_error_code, state & P_MEMSTAT_FROZEN, pid, coal, "sysctl_freeze");
593 			if (error == KERN_NO_SPACE) {
594 				/* Make it easy to distinguish between failures due to low compressor/ swap space and other failures. */
595 				error = ENOSPC;
596 			} else {
597 				error = EIO;
598 			}
599 		} else {
600 			proc_list_lock();
601 			if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
602 				p->p_memstat_state |= P_MEMSTAT_FROZEN;
603 				p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
604 				memorystatus_frozen_count++;
605 				os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
606 				if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
607 					memorystatus_frozen_count_webcontent++;
608 					os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
609 				}
610 				if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
611 					memorystatus_freeze_out_of_slots();
612 				}
613 			} else {
614 				// This was a re-freeze
615 				if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
616 					memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
617 					memorystatus_freezer_stats.mfs_refreeze_count++;
618 				}
619 			}
620 			p->p_memstat_frozen_count++;
621 
622 			if (coal != NULL) {
623 				/* We just froze an xpc service. Mark it as such for telemetry */
624 				p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
625 				memorystatus_frozen_count_xpc_service++;
626 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
627 			}
628 
629 
630 			proc_list_unlock();
631 
632 			if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
633 				/*
634 				 * We elevate only if we are going to swap out the data.
635 				 */
636 				error = memorystatus_update_inactive_jetsam_priority_band(pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
637 				    memorystatus_freeze_jetsam_band, TRUE);
638 
639 				if (error) {
640 					printf("sysctl_freeze: Elevating frozen process to higher jetsam band failed with %d\n", error);
641 				}
642 			}
643 		}
644 
645 		if ((error == 0) && (coal == NULL)) {
646 			/*
647 			 * We froze a process and so we check to see if it was
648 			 * a coalition leader and if it has XPC services that
649 			 * might need freezing.
650 			 * Only one leader can be frozen at a time and so we shouldn't
651 			 * enter this block more than once per call. Hence the
652 			 * check that 'coal' has to be NULL. We should make this an
653 			 * assert() or panic() once we have a much more concrete way
654 			 * to detect an app vs a daemon.
655 			 */
656 
657 			task_t          curr_task = NULL;
658 
659 			curr_task = proc_task(p);
660 			coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
661 			if (coalition_is_leader(curr_task, coal)) {
662 				ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
663 				    COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
664 
665 				if (ntasks > MAX_XPC_SERVICE_PIDS) {
666 					ntasks = MAX_XPC_SERVICE_PIDS;
667 				}
668 			}
669 		}
670 
671 		proc_rele(p);
672 
673 		while (ntasks) {
674 			pid = pid_list[--ntasks];
675 			goto again;
676 		}
677 
678 		lck_mtx_unlock(&freezer_mutex);
679 		return error;
680 	} else {
681 		printf("sysctl_freeze: Invalid process\n");
682 	}
683 
684 
685 	lck_mtx_unlock(&freezer_mutex);
686 	return EINVAL;
687 }
688 
689 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
690     0, 0, &sysctl_memorystatus_freeze, "I", "");
691 
692 /*
693  * Manual trigger of agressive frozen demotion for dev / debug kernels only.
694  */
695 static int
696 sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS
697 {
698 #pragma unused(arg1, arg2)
699 	int error, val;
700 	/*
701 	 * Only demote on write to prevent demoting during `sysctl -a`.
702 	 * The actual value written doesn't matter.
703 	 */
704 	error = sysctl_handle_int(oidp, &val, 0, req);
705 	if (error || !req->newptr) {
706 		return error;
707 	}
708 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
709 		return ENOTSUP;
710 	}
711 	lck_mtx_lock(&freezer_mutex);
712 	memorystatus_demote_frozen_processes(false);
713 	lck_mtx_unlock(&freezer_mutex);
714 	return 0;
715 }
716 
717 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
718 
719 static int
720 sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
721 {
722 #pragma unused(arg1, arg2)
723 
724 	int error, pid = 0;
725 	proc_t p;
726 
727 	if (memorystatus_freeze_enabled == FALSE) {
728 		return ENOTSUP;
729 	}
730 
731 	error = sysctl_handle_int(oidp, &pid, 0, req);
732 	if (error || !req->newptr) {
733 		return error;
734 	}
735 
736 	if (pid == VM_PAGES_FOR_ALL_PROCS) {
737 		do_fastwake_warmup_all();
738 		return 0;
739 	} else {
740 		p = proc_find(pid);
741 		if (p != NULL) {
742 			error = task_thaw(p->task);
743 
744 			if (error) {
745 				error = EIO;
746 			} else {
747 				/*
748 				 * task_thaw() succeeded.
749 				 *
750 				 * We increment memorystatus_frozen_count on the sysctl freeze path.
751 				 * And so we need the P_MEMSTAT_FROZEN to decrement the frozen count
752 				 * when this process exits.
753 				 *
754 				 * proc_list_lock();
755 				 * p->p_memstat_state &= ~P_MEMSTAT_FROZEN;
756 				 * proc_list_unlock();
757 				 */
758 			}
759 			proc_rele(p);
760 			return error;
761 		}
762 	}
763 
764 	return EINVAL;
765 }
766 
767 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
768     0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
769 
770 
771 typedef struct _global_freezable_status {
772 	boolean_t       freeze_pages_threshold_crossed;
773 	boolean_t       freeze_eligible_procs_available;
774 	boolean_t       freeze_scheduled_in_future;
775 }global_freezable_status_t;
776 
777 typedef struct _proc_freezable_status {
778 	boolean_t    freeze_has_memstat_state;
779 	boolean_t    freeze_has_pages_min;
780 	int        freeze_has_probability;
781 	int        freeze_leader_eligible;
782 	boolean_t    freeze_attempted;
783 	uint32_t    p_memstat_state;
784 	uint32_t    p_pages;
785 	int        p_freeze_error_code;
786 	int        p_pid;
787 	int        p_leader_pid;
788 	char        p_name[MAXCOMLEN + 1];
789 }proc_freezable_status_t;
790 
791 #define MAX_FREEZABLE_PROCESSES 200 /* Total # of processes in band 0 that we evaluate for freezability */
792 
793 /*
794  * For coalition based freezing evaluations, we proceed as follows:
795  *  - detect that the process is a coalition member and a XPC service
796  *  - mark its 'freeze_leader_eligible' field with FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN
797  *  - continue its freezability evaluation assuming its leader will be freezable too
798  *
799  * Once we are done evaluating all processes, we do a quick run thru all
800  * processes and for a coalition member XPC service we look up the 'freezable'
801  * status of its leader and iff:
802  *  - the xpc service is freezable i.e. its individual freeze evaluation worked
803  *  - and, its leader is also marked freezable
804  * we update its 'freeze_leader_eligible' to FREEZE_PROC_LEADER_FREEZABLE_SUCCESS.
805  */
806 
807 #define FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN   (-1)
808 #define FREEZE_PROC_LEADER_FREEZABLE_SUCCESS    (1)
809 #define FREEZE_PROC_LEADER_FREEZABLE_FAILURE    (2)
810 
811 static int
memorystatus_freezer_get_status(user_addr_t buffer,size_t buffer_size,int32_t * retval)812 memorystatus_freezer_get_status(user_addr_t buffer, size_t buffer_size, int32_t *retval)
813 {
814 	uint32_t            proc_count = 0, freeze_eligible_proc_considered = 0, band = 0, xpc_index = 0, leader_index = 0;
815 	global_freezable_status_t    *list_head;
816 	proc_freezable_status_t     *list_entry, *list_entry_start;
817 	size_t                list_size = 0, entry_count = 0;
818 	proc_t                p, leader_proc;
819 	memstat_bucket_t        *bucket;
820 	uint32_t            state = 0, pages = 0;
821 	boolean_t            try_freeze = TRUE, xpc_skip_size_probability_check = FALSE;
822 	int                error = 0, probability_of_use = 0;
823 	pid_t              leader_pid = 0;
824 
825 
826 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
827 		return ENOTSUP;
828 	}
829 
830 	list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
831 
832 	if (buffer_size < list_size) {
833 		return EINVAL;
834 	}
835 
836 	list_head = (global_freezable_status_t *)kalloc_data(list_size, Z_WAITOK | Z_ZERO);
837 	if (list_head == NULL) {
838 		return ENOMEM;
839 	}
840 
841 	list_size = sizeof(global_freezable_status_t);
842 
843 	proc_list_lock();
844 
845 	uint64_t curr_time = mach_absolute_time();
846 
847 	list_head->freeze_pages_threshold_crossed = (memorystatus_available_pages < memorystatus_freeze_threshold);
848 	list_head->freeze_eligible_procs_available = ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold);
849 	list_head->freeze_scheduled_in_future = (curr_time < memorystatus_freezer_thread_next_run_ts);
850 
851 	list_entry_start = (proc_freezable_status_t*) ((uintptr_t)list_head + sizeof(global_freezable_status_t));
852 	list_entry = list_entry_start;
853 
854 	bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
855 
856 	entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
857 
858 	p = memorystatus_get_first_proc_locked(&band, FALSE);
859 	proc_count++;
860 
861 	while ((proc_count <= MAX_FREEZABLE_PROCESSES) &&
862 	    (p) &&
863 	    (list_size < buffer_size)) {
864 		if (isSysProc(p)) {
865 			/*
866 			 * Daemon:- We will consider freezing it iff:
867 			 * - it belongs to a coalition and the leader is freeze-eligible (delayed evaluation)
868 			 * - its role in the coalition is XPC service.
869 			 *
870 			 * We skip memory size requirements in this case.
871 			 */
872 
873 			coalition_t     coal = COALITION_NULL;
874 			task_t          leader_task = NULL, curr_task = NULL;
875 			int             task_role_in_coalition = 0;
876 
877 			curr_task = proc_task(p);
878 			coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
879 
880 			if (coal == COALITION_NULL || coalition_is_leader(curr_task, coal)) {
881 				/*
882 				 * By default, XPC services without an app
883 				 * will be the leader of their own single-member
884 				 * coalition.
885 				 */
886 				goto skip_ineligible_xpc;
887 			}
888 
889 			leader_task = coalition_get_leader(coal);
890 			if (leader_task == TASK_NULL) {
891 				/*
892 				 * This jetsam coalition is currently leader-less.
893 				 * This could happen if the app died, but XPC services
894 				 * have not yet exited.
895 				 */
896 				goto skip_ineligible_xpc;
897 			}
898 
899 			leader_proc = (proc_t)get_bsdtask_info(leader_task);
900 			task_deallocate(leader_task);
901 
902 			if (leader_proc == PROC_NULL) {
903 				/* leader task is exiting */
904 				goto skip_ineligible_xpc;
905 			}
906 
907 			task_role_in_coalition = i_coal_jetsam_get_taskrole(coal, curr_task);
908 
909 			if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
910 				xpc_skip_size_probability_check = TRUE;
911 				leader_pid = proc_getpid(leader_proc);
912 				goto continue_eval;
913 			}
914 
915 skip_ineligible_xpc:
916 			p = memorystatus_get_next_proc_locked(&band, p, FALSE);
917 			proc_count++;
918 			continue;
919 		}
920 
921 continue_eval:
922 		strlcpy(list_entry->p_name, p->p_name, MAXCOMLEN + 1);
923 
924 		list_entry->p_pid = proc_getpid(p);
925 
926 		state = p->p_memstat_state;
927 
928 		if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) ||
929 		    !(state & P_MEMSTAT_SUSPENDED)) {
930 			try_freeze = list_entry->freeze_has_memstat_state = FALSE;
931 		} else {
932 			try_freeze = list_entry->freeze_has_memstat_state = TRUE;
933 		}
934 
935 		list_entry->p_memstat_state = state;
936 
937 		if (xpc_skip_size_probability_check == TRUE) {
938 			/*
939 			 * Assuming the coalition leader is freezable
940 			 * we don't care re. minimum pages and probability
941 			 * as long as the process isn't marked P_MEMSTAT_FREEZE_DISABLED.
942 			 * XPC services have to be explicity opted-out of the disabled
943 			 * state. And we checked that state above.
944 			 */
945 			list_entry->freeze_has_pages_min = TRUE;
946 			list_entry->p_pages = -1;
947 			list_entry->freeze_has_probability = -1;
948 
949 			list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN;
950 			list_entry->p_leader_pid = leader_pid;
951 
952 			xpc_skip_size_probability_check = FALSE;
953 		} else {
954 			list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; /* Apps are freeze eligible and their own leaders. */
955 			list_entry->p_leader_pid = 0; /* Setting this to 0 signifies this isn't a coalition driven freeze. */
956 
957 			memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
958 			if (pages < memorystatus_freeze_pages_min) {
959 				try_freeze = list_entry->freeze_has_pages_min = FALSE;
960 			} else {
961 				list_entry->freeze_has_pages_min = TRUE;
962 			}
963 
964 			list_entry->p_pages = pages;
965 
966 			if (entry_count) {
967 				uint32_t j = 0;
968 				for (j = 0; j < entry_count; j++) {
969 					if (strncmp(memorystatus_global_probabilities_table[j].proc_name,
970 					    p->p_name,
971 					    MAXCOMLEN) == 0) {
972 						probability_of_use = memorystatus_global_probabilities_table[j].use_probability;
973 						break;
974 					}
975 				}
976 
977 				list_entry->freeze_has_probability = probability_of_use;
978 
979 				try_freeze = ((probability_of_use > 0) && try_freeze);
980 			} else {
981 				list_entry->freeze_has_probability = -1;
982 			}
983 		}
984 
985 		if (try_freeze) {
986 			uint32_t purgeable, wired, clean, dirty, shared;
987 			uint32_t max_pages = 0;
988 			int freezer_error_code = 0;
989 
990 			error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, TRUE /* eval only */);
991 
992 			if (error) {
993 				list_entry->p_freeze_error_code = freezer_error_code;
994 			}
995 
996 			list_entry->freeze_attempted = TRUE;
997 		}
998 
999 		list_entry++;
1000 		freeze_eligible_proc_considered++;
1001 
1002 		list_size += sizeof(proc_freezable_status_t);
1003 
1004 		p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1005 		proc_count++;
1006 	}
1007 
1008 	proc_list_unlock();
1009 
1010 	list_entry = list_entry_start;
1011 
1012 	for (xpc_index = 0; xpc_index < freeze_eligible_proc_considered; xpc_index++) {
1013 		if (list_entry[xpc_index].freeze_leader_eligible == FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN) {
1014 			leader_pid = list_entry[xpc_index].p_leader_pid;
1015 
1016 			leader_proc = proc_find(leader_pid);
1017 
1018 			if (leader_proc) {
1019 				if (leader_proc->p_memstat_state & P_MEMSTAT_FROZEN) {
1020 					/*
1021 					 * Leader has already been frozen.
1022 					 */
1023 					list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1024 					proc_rele(leader_proc);
1025 					continue;
1026 				}
1027 				proc_rele(leader_proc);
1028 			}
1029 
1030 			for (leader_index = 0; leader_index < freeze_eligible_proc_considered; leader_index++) {
1031 				if (list_entry[leader_index].p_pid == leader_pid) {
1032 					if (list_entry[leader_index].freeze_attempted && list_entry[leader_index].p_freeze_error_code == 0) {
1033 						list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
1034 					} else {
1035 						list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1036 						list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1037 					}
1038 					break;
1039 				}
1040 			}
1041 
1042 			/*
1043 			 * Didn't find the leader entry. This might be likely because
1044 			 * the leader never made it down to band 0.
1045 			 */
1046 			if (leader_index == freeze_eligible_proc_considered) {
1047 				list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
1048 				list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
1049 			}
1050 		}
1051 	}
1052 
1053 	buffer_size = MIN(list_size, INT32_MAX);
1054 
1055 	error = copyout(list_head, buffer, buffer_size);
1056 	if (error == 0) {
1057 		*retval = (int32_t) buffer_size;
1058 	} else {
1059 		*retval = 0;
1060 	}
1061 
1062 	list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
1063 	kfree_data(list_head, list_size);
1064 
1065 	MEMORYSTATUS_DEBUG(1, "memorystatus_freezer_get_status: returning %d (%lu - size)\n", error, (unsigned long)*list_size);
1066 
1067 	return error;
1068 }
1069 
1070 #endif /* DEVELOPMENT || DEBUG */
1071 
1072 /*
1073  * Get a list of all processes in the freezer band which are currently frozen.
1074  * Used by powerlog to collect analytics on frozen process.
1075  */
1076 static int
memorystatus_freezer_get_procs(user_addr_t buffer,size_t buffer_size,int32_t * retval)1077 memorystatus_freezer_get_procs(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1078 {
1079 	global_frozen_procs_t *frozen_procs = NULL;
1080 	uint32_t band = memorystatus_freeze_jetsam_band;
1081 	proc_t p;
1082 	uint32_t state;
1083 	int error;
1084 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
1085 		return ENOTSUP;
1086 	}
1087 	if (buffer_size < sizeof(global_frozen_procs_t)) {
1088 		return EINVAL;
1089 	}
1090 	frozen_procs = (global_frozen_procs_t *)kalloc_data(sizeof(global_frozen_procs_t), Z_WAITOK | Z_ZERO);
1091 	if (frozen_procs == NULL) {
1092 		return ENOMEM;
1093 	}
1094 
1095 	proc_list_lock();
1096 	p = memorystatus_get_first_proc_locked(&band, FALSE);
1097 	while (p && frozen_procs->gfp_num_frozen < FREEZER_CONTROL_GET_PROCS_MAX_COUNT) {
1098 		state = p->p_memstat_state;
1099 		if (state & P_MEMSTAT_FROZEN) {
1100 			frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_pid = proc_getpid(p);
1101 			strlcpy(frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_name,
1102 			    p->p_name, sizeof(proc_name_t));
1103 			frozen_procs->gfp_num_frozen++;
1104 		}
1105 		p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1106 	}
1107 	proc_list_unlock();
1108 
1109 	buffer_size = MIN(buffer_size, sizeof(global_frozen_procs_t));
1110 	error = copyout(frozen_procs, buffer, buffer_size);
1111 	if (error == 0) {
1112 		*retval = (int32_t) buffer_size;
1113 	} else {
1114 		*retval = 0;
1115 	}
1116 	kfree_data(frozen_procs, sizeof(global_frozen_procs_t));
1117 
1118 	return error;
1119 }
1120 
1121 /*
1122  * If dasd is running an experiment that impacts their freezer candidate selection,
1123  * we record that in our telemetry.
1124  */
1125 static memorystatus_freezer_trial_identifiers_v1 dasd_trial_identifiers;
1126 
1127 static int
memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer,size_t buffer_size,int32_t * retval)1128 memorystatus_freezer_set_dasd_trial_identifiers(user_addr_t buffer, size_t buffer_size, int32_t *retval)
1129 {
1130 	memorystatus_freezer_trial_identifiers_v1 identifiers;
1131 	int error = 0;
1132 
1133 	if (buffer_size != sizeof(identifiers)) {
1134 		return EINVAL;
1135 	}
1136 	error = copyin(buffer, &identifiers, sizeof(identifiers));
1137 	if (error != 0) {
1138 		return error;
1139 	}
1140 	if (identifiers.version != 1) {
1141 		return EINVAL;
1142 	}
1143 	dasd_trial_identifiers = identifiers;
1144 	*retval = 0;
1145 	return error;
1146 }
1147 
1148 /*
1149  * Reset the freezer state by wiping out all suspended frozen apps, clearing
1150  * per-process freezer state, and starting a fresh interval.
1151  */
1152 static int
memorystatus_freezer_reset_state(int32_t * retval)1153 memorystatus_freezer_reset_state(int32_t *retval)
1154 {
1155 	uint32_t band = JETSAM_PRIORITY_IDLE;
1156 	/* Don't kill above the frozen band */
1157 	uint32_t kMaxBand = memorystatus_freeze_jetsam_band;
1158 	proc_t next_p = PROC_NULL;
1159 	uint64_t new_budget;
1160 
1161 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1162 		return ENOTSUP;
1163 	}
1164 
1165 	os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_GENERIC);
1166 	if (jetsam_reason == OS_REASON_NULL) {
1167 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus_freezer_reset_state -- sync: failed to allocate jetsam reason\n");
1168 	}
1169 	lck_mtx_lock(&freezer_mutex);
1170 	kill_all_frozen_processes(kMaxBand, true, jetsam_reason, NULL);
1171 	proc_list_lock();
1172 
1173 	/*
1174 	 * Clear the considered and skip reason flags on all processes
1175 	 * so we're starting fresh with the new policy.
1176 	 */
1177 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1178 	while (next_p) {
1179 		proc_t p = next_p;
1180 		uint32_t state = p->p_memstat_state;
1181 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1182 
1183 		if (p->p_memstat_effectivepriority > kMaxBand) {
1184 			break;
1185 		}
1186 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1187 			continue;
1188 		}
1189 
1190 		p->p_memstat_state &= ~(P_MEMSTAT_FREEZE_CONSIDERED);
1191 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1192 	}
1193 
1194 	proc_list_unlock();
1195 
1196 	new_budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1197 	memorystatus_freeze_force_new_interval(new_budget);
1198 
1199 	lck_mtx_unlock(&freezer_mutex);
1200 	*retval = 0;
1201 	return 0;
1202 }
1203 
1204 int
memorystatus_freezer_control(int32_t flags,user_addr_t buffer,size_t buffer_size,int32_t * retval)1205 memorystatus_freezer_control(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval)
1206 {
1207 	int err = ENOTSUP;
1208 
1209 #if DEVELOPMENT || DEBUG
1210 	if (flags == FREEZER_CONTROL_GET_STATUS) {
1211 		err = memorystatus_freezer_get_status(buffer, buffer_size, retval);
1212 	}
1213 #endif /* DEVELOPMENT || DEBUG */
1214 	if (flags == FREEZER_CONTROL_GET_PROCS) {
1215 		err = memorystatus_freezer_get_procs(buffer, buffer_size, retval);
1216 	} else if (flags == FREEZER_CONTROL_SET_DASD_TRIAL_IDENTIFIERS) {
1217 		err = memorystatus_freezer_set_dasd_trial_identifiers(buffer, buffer_size, retval);
1218 	} else if (flags == FREEZER_CONTROL_RESET_STATE) {
1219 		err = memorystatus_freezer_reset_state(retval);
1220 	}
1221 
1222 	return err;
1223 }
1224 
1225 extern void        vm_swap_consider_defragmenting(int);
1226 extern void vm_page_reactivate_all_throttled(void);
1227 
1228 static bool
kill_all_frozen_processes(uint64_t max_band,bool suspended_only,os_reason_t jetsam_reason,uint64_t * memory_reclaimed_out)1229 kill_all_frozen_processes(uint64_t max_band, bool suspended_only, os_reason_t jetsam_reason, uint64_t *memory_reclaimed_out)
1230 {
1231 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1232 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1233 
1234 	unsigned int band = 0;
1235 	proc_t p = PROC_NULL, next_p = PROC_NULL;
1236 	pid_t pid = 0;
1237 	bool retval = false, killed = false;
1238 	uint32_t state;
1239 	uint64_t memory_reclaimed = 0, footprint = 0, skips = 0;
1240 	proc_list_lock();
1241 
1242 	band = JETSAM_PRIORITY_IDLE;
1243 	p = PROC_NULL;
1244 	next_p = PROC_NULL;
1245 
1246 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1247 	while (next_p) {
1248 		p = next_p;
1249 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1250 		state = p->p_memstat_state;
1251 
1252 		if (p->p_memstat_effectivepriority > max_band) {
1253 			break;
1254 		}
1255 
1256 		if (!(state & P_MEMSTAT_FROZEN)) {
1257 			continue;
1258 		}
1259 
1260 		if (suspended_only && !(state & P_MEMSTAT_SUSPENDED)) {
1261 			continue;
1262 		}
1263 
1264 		if (state & P_MEMSTAT_ERROR) {
1265 			p->p_memstat_state &= ~P_MEMSTAT_ERROR;
1266 		}
1267 
1268 		if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED)) {
1269 			os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: Skipping kill of frozen process %s (%d) because it's already exiting.\n", p->p_name, proc_getpid(p));
1270 			skips++;
1271 			continue;
1272 		}
1273 
1274 		footprint = get_task_phys_footprint(p->task);
1275 		p->p_memstat_state |= P_MEMSTAT_TERMINATED;
1276 		pid = proc_getpid(p);
1277 		proc_list_unlock();
1278 
1279 		/* memorystatus_kill_with_jetsam_reason_sync drops a reference. */
1280 		os_reason_ref(jetsam_reason);
1281 		retval = memorystatus_kill_with_jetsam_reason_sync(pid, jetsam_reason);
1282 		if (retval) {
1283 			killed = true;
1284 			memory_reclaimed += footprint;
1285 		}
1286 		proc_list_lock();
1287 		/*
1288 		 * The bands might have changed when we dropped the proc list lock.
1289 		 * So start from the beginning.
1290 		 * Since we're preventing any further freezing by holding the freezer mutex,
1291 		 * and we skip anything we've already tried to kill this is guaranteed to terminate.
1292 		 */
1293 		band = 0;
1294 		skips = 0;
1295 		next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1296 	}
1297 
1298 	assert(skips <= memorystatus_frozen_count);
1299 #if DEVELOPMENT || DEBUG
1300 	if (!suspended_only && max_band >= JETSAM_PRIORITY_FOREGROUND) {
1301 		/*
1302 		 * Check that we've killed all frozen processes.
1303 		 * Note that they may still be exiting (represented by skips).
1304 		 */
1305 		if (memorystatus_frozen_count - skips > 0) {
1306 			assert(memorystatus_freeze_enabled == FALSE);
1307 
1308 			panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d",
1309 			    memorystatus_frozen_count);
1310 		}
1311 	}
1312 #endif /* DEVELOPMENT || DEBUG */
1313 	if (memory_reclaimed_out) {
1314 		*memory_reclaimed_out = memory_reclaimed;
1315 	}
1316 	proc_list_unlock();
1317 	return killed;
1318 }
1319 
1320 /*
1321  * Disables the freezer, jetsams all frozen processes,
1322  * and reclaims the swap space immediately.
1323  */
1324 
1325 void
memorystatus_disable_freeze(void)1326 memorystatus_disable_freeze(void)
1327 {
1328 	uint64_t memory_reclaimed = 0;
1329 	bool killed = false;
1330 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1331 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1332 
1333 
1334 	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_START,
1335 	    memorystatus_available_pages, 0, 0, 0, 0);
1336 	os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: Disabling freezer. Will kill all frozen processes\n");
1337 
1338 	/*
1339 	 * We hold the freezer_mutex (preventing anything from being frozen in parallel)
1340 	 * and all frozen processes will be killed
1341 	 * by the time we release it. Setting memorystatus_freeze_enabled to false,
1342 	 * ensures that no new processes will be frozen once we release the mutex.
1343 	 *
1344 	 */
1345 	memorystatus_freeze_enabled = FALSE;
1346 
1347 	/*
1348 	 * Move dirty pages out from the throttle to the active queue since we're not freezing anymore.
1349 	 */
1350 	vm_page_reactivate_all_throttled();
1351 	os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE);
1352 	if (jetsam_reason == OS_REASON_NULL) {
1353 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus_disable_freeze -- sync: failed to allocate jetsam reason\n");
1354 	}
1355 
1356 	killed = kill_all_frozen_processes(JETSAM_PRIORITY_FOREGROUND, false, jetsam_reason, &memory_reclaimed);
1357 
1358 	if (killed) {
1359 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: Killed all frozen processes.\n");
1360 		vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM);
1361 
1362 		proc_list_lock();
1363 		size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
1364 		    sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
1365 		uint64_t timestamp_now = mach_absolute_time();
1366 		memorystatus_jetsam_snapshot->notification_time = timestamp_now;
1367 		memorystatus_jetsam_snapshot->js_gencount++;
1368 		if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
1369 		    timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) {
1370 			proc_list_unlock();
1371 			int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
1372 			if (!ret) {
1373 				proc_list_lock();
1374 				memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
1375 			}
1376 		}
1377 		proc_list_unlock();
1378 	} else {
1379 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: No frozen processes to kill.\n");
1380 	}
1381 
1382 	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_END,
1383 	    memorystatus_available_pages, memory_reclaimed, 0, 0, 0);
1384 
1385 	return;
1386 }
1387 
1388 static void
memorystatus_set_freeze_is_enabled(bool enabled)1389 memorystatus_set_freeze_is_enabled(bool enabled)
1390 {
1391 	lck_mtx_lock(&freezer_mutex);
1392 	if (enabled != memorystatus_freeze_enabled) {
1393 		if (enabled) {
1394 			memorystatus_freeze_enabled = true;
1395 		} else {
1396 			memorystatus_disable_freeze();
1397 		}
1398 	}
1399 	lck_mtx_unlock(&freezer_mutex);
1400 }
1401 
1402 
1403 static int
1404 sysctl_freeze_enabled SYSCTL_HANDLER_ARGS
1405 {
1406 #pragma unused(arg1, arg2)
1407 	int error, val = memorystatus_freeze_enabled ? 1 : 0;
1408 
1409 	error = sysctl_handle_int(oidp, &val, 0, req);
1410 	if (error || !req->newptr) {
1411 		return error;
1412 	}
1413 
1414 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1415 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: Failed attempt to set vm.freeze_enabled sysctl\n");
1416 		return EINVAL;
1417 	}
1418 
1419 	memorystatus_set_freeze_is_enabled(val);
1420 
1421 	return 0;
1422 }
1423 
1424 SYSCTL_PROC(_vm, OID_AUTO, freeze_enabled, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY, NULL, 0, sysctl_freeze_enabled, "I", "");
1425 
1426 static void
schedule_interval_reset(thread_call_t reset_thread_call,throttle_interval_t * interval)1427 schedule_interval_reset(thread_call_t reset_thread_call, throttle_interval_t *interval)
1428 {
1429 	uint64_t interval_expiration_ns = interval->ts.tv_sec * NSEC_PER_SEC + interval->ts.tv_nsec;
1430 	uint64_t interval_expiration_absolutetime;
1431 	nanoseconds_to_absolutetime(interval_expiration_ns, &interval_expiration_absolutetime);
1432 	os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: scheduling new freezer interval at %llu absolute time\n", interval_expiration_absolutetime);
1433 
1434 	thread_call_enter_delayed(reset_thread_call, interval_expiration_absolutetime);
1435 }
1436 
1437 extern uuid_string_t trial_treatment_id;
1438 extern uuid_string_t trial_experiment_id;
1439 extern int trial_deployment_id;
1440 
1441 CA_EVENT(freezer_interval,
1442     CA_INT, budget_remaining,
1443     CA_INT, error_below_min_pages,
1444     CA_INT, error_excess_shared_memory,
1445     CA_INT, error_low_private_shared_ratio,
1446     CA_INT, error_no_compressor_space,
1447     CA_INT, error_no_swap_space,
1448     CA_INT, error_low_probability_of_use,
1449     CA_INT, error_elevated,
1450     CA_INT, error_other,
1451     CA_INT, frozen_count,
1452     CA_INT, pageouts,
1453     CA_INT, refreeze_average,
1454     CA_INT, skipped_full,
1455     CA_INT, skipped_shared_mb_high,
1456     CA_INT, swapusage,
1457     CA_INT, thaw_count,
1458     CA_INT, thaw_percentage,
1459     CA_INT, thaws_per_gb,
1460     CA_INT, trial_deployment_id,
1461     CA_INT, dasd_trial_deployment_id,
1462     CA_INT, budget_exhaustion_duration_remaining,
1463     CA_INT, thaw_percentage_webcontent,
1464     CA_INT, thaw_percentage_fg,
1465     CA_INT, thaw_percentage_bg,
1466     CA_INT, thaw_percentage_fg_non_xpc_service,
1467     CA_INT, fg_resume_count,
1468     CA_INT, unique_freeze_count,
1469     CA_INT, unique_thaw_count,
1470     CA_STATIC_STRING(CA_UUID_LEN), trial_treatment_id,
1471     CA_STATIC_STRING(CA_UUID_LEN), trial_experiment_id,
1472     CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_treatment_id,
1473     CA_STATIC_STRING(CA_UUID_LEN), dasd_trial_experiment_id);
1474 
1475 extern uint64_t vm_swap_get_total_space(void);
1476 extern uint64_t vm_swap_get_free_space(void);
1477 
1478 /*
1479  * Record statistics from the expiring interval
1480  * via core analytics.
1481  */
1482 static void
memorystatus_freeze_record_interval_analytics(void)1483 memorystatus_freeze_record_interval_analytics(void)
1484 {
1485 	ca_event_t event = CA_EVENT_ALLOCATE(freezer_interval);
1486 	CA_EVENT_TYPE(freezer_interval) * e = event->data;
1487 	e->budget_remaining = memorystatus_freeze_budget_pages_remaining * PAGE_SIZE / (1UL << 20);
1488 	uint64_t process_considered_count, refrozen_count, below_threshold_count;
1489 	memory_object_size_t swap_size;
1490 	process_considered_count = memorystatus_freezer_stats.mfs_process_considered_count;
1491 	if (process_considered_count != 0) {
1492 		e->error_below_min_pages = memorystatus_freezer_stats.mfs_error_below_min_pages_count * 100 / process_considered_count;
1493 		e->error_excess_shared_memory = memorystatus_freezer_stats.mfs_error_excess_shared_memory_count * 100 / process_considered_count;
1494 		e->error_low_private_shared_ratio = memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count * 100 / process_considered_count;
1495 		e->error_no_compressor_space = memorystatus_freezer_stats.mfs_error_no_compressor_space_count * 100 / process_considered_count;
1496 		e->error_no_swap_space = memorystatus_freezer_stats.mfs_error_no_swap_space_count * 100 / process_considered_count;
1497 		e->error_low_probability_of_use = memorystatus_freezer_stats.mfs_error_low_probability_of_use_count * 100 / process_considered_count;
1498 		e->error_elevated = memorystatus_freezer_stats.mfs_error_elevated_count * 100 / process_considered_count;
1499 		e->error_other = memorystatus_freezer_stats.mfs_error_other_count * 100 / process_considered_count;
1500 	}
1501 	e->frozen_count = memorystatus_frozen_count;
1502 	e->pageouts = normal_throttle_window->pageouts * PAGE_SIZE / (1UL << 20);
1503 	refrozen_count = memorystatus_freezer_stats.mfs_refreeze_count;
1504 	if (refrozen_count != 0) {
1505 		e->refreeze_average = (memorystatus_freezer_stats.mfs_bytes_refrozen / (1UL << 20)) / refrozen_count;
1506 	}
1507 	below_threshold_count = memorystatus_freezer_stats.mfs_below_threshold_count;
1508 	if (below_threshold_count != 0) {
1509 		e->skipped_full = memorystatus_freezer_stats.mfs_skipped_full_count * 100 / below_threshold_count;
1510 		e->skipped_shared_mb_high = memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count * 100 / below_threshold_count;
1511 	}
1512 	if (VM_CONFIG_SWAP_IS_PRESENT) {
1513 		swap_size = vm_swap_get_total_space();
1514 		if (swap_size) {
1515 			e->swapusage = vm_swap_get_free_space() * 100 / swap_size;
1516 		}
1517 	}
1518 	e->thaw_count = memorystatus_thaw_count;
1519 	e->thaw_percentage = get_thaw_percentage();
1520 	e->thaw_percentage_webcontent = get_thaw_percentage_webcontent();
1521 	e->thaw_percentage_fg = get_thaw_percentage_fg();
1522 	e->thaw_percentage_bg = get_thaw_percentage_bg();
1523 	e->thaw_percentage_fg_non_xpc_service = get_thaw_percentage_fg_non_xpc_service();
1524 
1525 	if (e->pageouts / (1UL << 10) != 0) {
1526 		e->thaws_per_gb = memorystatus_thaw_count / (e->pageouts / (1UL << 10));
1527 	}
1528 	e->budget_exhaustion_duration_remaining = memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining;
1529 	e->fg_resume_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed_fg, relaxed);
1530 	e->unique_freeze_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1531 	e->unique_thaw_count = os_atomic_load(&memorystatus_freezer_stats.mfs_processes_thawed, relaxed);
1532 
1533 	/*
1534 	 * Record any xnu or dasd experiment information
1535 	 */
1536 	strlcpy(e->trial_treatment_id, trial_treatment_id, CA_UUID_LEN);
1537 	strlcpy(e->trial_experiment_id, trial_experiment_id, CA_UUID_LEN);
1538 	e->trial_deployment_id = trial_deployment_id;
1539 	strlcpy(e->dasd_trial_treatment_id, dasd_trial_identifiers.treatment_id, CA_UUID_LEN);
1540 	strlcpy(e->dasd_trial_experiment_id, dasd_trial_identifiers.experiment_id, CA_UUID_LEN);
1541 	e->dasd_trial_deployment_id = dasd_trial_identifiers.deployment_id;
1542 
1543 	CA_EVENT_SEND(event);
1544 }
1545 
1546 static void
memorystatus_freeze_reset_interval(void * arg0,void * arg1)1547 memorystatus_freeze_reset_interval(void *arg0, void *arg1)
1548 {
1549 #pragma unused(arg0, arg1)
1550 	struct throttle_interval_t *interval = NULL;
1551 	clock_sec_t sec;
1552 	clock_nsec_t nsec;
1553 	mach_timespec_t now_ts;
1554 	uint32_t budget_rollover = 0;
1555 
1556 	clock_get_system_nanotime(&sec, &nsec);
1557 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
1558 	now_ts.tv_nsec = nsec;
1559 	interval = normal_throttle_window;
1560 
1561 	/* Record analytics from the old interval before resetting. */
1562 	memorystatus_freeze_record_interval_analytics();
1563 
1564 	lck_mtx_lock(&freezer_mutex);
1565 	/* How long has it been since the previous interval expired? */
1566 	mach_timespec_t expiration_period_ts = now_ts;
1567 	SUB_MACH_TIMESPEC(&expiration_period_ts, &interval->ts);
1568 	/* Get unused budget. Clamp to 0. We'll adjust for overused budget in the next interval. */
1569 	budget_rollover = interval->pageouts > interval->max_pageouts ?
1570 	    0 : interval->max_pageouts - interval->pageouts;
1571 
1572 	memorystatus_freeze_start_normal_throttle_interval(memorystatus_freeze_calculate_new_budget(
1573 		    expiration_period_ts.tv_sec, interval->burst_multiple,
1574 		    interval->mins, budget_rollover),
1575 	    now_ts);
1576 	memorystatus_freeze_budget_pages_remaining = interval->max_pageouts;
1577 
1578 	if (!memorystatus_freezer_use_demotion_list) {
1579 		memorystatus_demote_frozen_processes(false); /* normal mode...don't force a demotion */
1580 	}
1581 	lck_mtx_unlock(&freezer_mutex);
1582 }
1583 
1584 __private_extern__ void
memorystatus_freeze_init(void)1585 memorystatus_freeze_init(void)
1586 {
1587 	kern_return_t result;
1588 	thread_t thread;
1589 
1590 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1591 		/*
1592 		 * This is just the default value if the underlying
1593 		 * storage device doesn't have any specific budget.
1594 		 * We check with the storage layer in memorystatus_freeze_update_throttle()
1595 		 * before we start our freezing the first time.
1596 		 */
1597 		memorystatus_freeze_budget_pages_remaining = (memorystatus_freeze_daily_mb_max * 1024 * 1024) / PAGE_SIZE;
1598 
1599 		result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
1600 		if (result == KERN_SUCCESS) {
1601 			proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
1602 			proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1603 			thread_set_thread_name(thread, "VM_freezer");
1604 
1605 			thread_deallocate(thread);
1606 		} else {
1607 			panic("Could not create memorystatus_freeze_thread");
1608 		}
1609 
1610 		freeze_interval_reset_thread_call = thread_call_allocate_with_options(memorystatus_freeze_reset_interval, NULL, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
1611 		/* Start a new interval */
1612 
1613 		lck_mtx_lock(&freezer_mutex);
1614 		uint32_t budget;
1615 		budget = memorystatus_freeze_calculate_new_budget(0, normal_throttle_window->burst_multiple, normal_throttle_window->mins, 0);
1616 		memorystatus_freeze_force_new_interval(budget);
1617 		lck_mtx_unlock(&freezer_mutex);
1618 	} else {
1619 		memorystatus_freeze_budget_pages_remaining = 0;
1620 	}
1621 }
1622 
1623 static boolean_t
memorystatus_is_process_eligible_for_freeze(proc_t p)1624 memorystatus_is_process_eligible_for_freeze(proc_t p)
1625 {
1626 	/*
1627 	 * Called with proc_list_lock held.
1628 	 */
1629 
1630 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1631 
1632 	boolean_t should_freeze = FALSE;
1633 	uint32_t state = 0, pages = 0;
1634 	int probability_of_use = 0;
1635 	size_t entry_count = 0, i = 0;
1636 	bool first_consideration = true;
1637 
1638 	state = p->p_memstat_state;
1639 
1640 	if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) {
1641 		if (state & P_MEMSTAT_FREEZE_DISABLED) {
1642 			p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonDisabled;
1643 		}
1644 		goto out;
1645 	}
1646 
1647 	if (isSysProc(p)) {
1648 		/*
1649 		 * Daemon:- We consider freezing it if:
1650 		 * - it belongs to a coalition and the leader is frozen, and,
1651 		 * - its role in the coalition is XPC service.
1652 		 *
1653 		 * We skip memory size requirements in this case.
1654 		 */
1655 
1656 		coalition_t     coal = COALITION_NULL;
1657 		task_t          leader_task = NULL, curr_task = NULL;
1658 		proc_t          leader_proc = NULL;
1659 		int             task_role_in_coalition = 0;
1660 
1661 		curr_task = proc_task(p);
1662 		coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1663 
1664 		if (coal == NULL || coalition_is_leader(curr_task, coal)) {
1665 			/*
1666 			 * By default, XPC services without an app
1667 			 * will be the leader of their own single-member
1668 			 * coalition.
1669 			 */
1670 			goto out;
1671 		}
1672 
1673 		leader_task = coalition_get_leader(coal);
1674 		if (leader_task == TASK_NULL) {
1675 			/*
1676 			 * This jetsam coalition is currently leader-less.
1677 			 * This could happen if the app died, but XPC services
1678 			 * have not yet exited.
1679 			 */
1680 			goto out;
1681 		}
1682 
1683 		leader_proc = (proc_t)get_bsdtask_info(leader_task);
1684 		task_deallocate(leader_task);
1685 
1686 		if (leader_proc == PROC_NULL) {
1687 			/* leader task is exiting */
1688 			goto out;
1689 		}
1690 
1691 		if (!(leader_proc->p_memstat_state & P_MEMSTAT_FROZEN)) {
1692 			goto out;
1693 		}
1694 
1695 		task_role_in_coalition = i_coal_jetsam_get_taskrole(coal, curr_task);
1696 
1697 		if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
1698 			should_freeze = TRUE;
1699 		}
1700 
1701 		goto out;
1702 	} else {
1703 		/*
1704 		 * Application. In addition to the above states we need to make
1705 		 * sure we only consider suspended applications for freezing.
1706 		 */
1707 		if (!(state & P_MEMSTAT_SUSPENDED)) {
1708 			goto out;
1709 		}
1710 	}
1711 
1712 	/*
1713 	 * This proc is a suspended application.
1714 	 * We're interested in tracking what percentage of these
1715 	 * actually get frozen.
1716 	 * To avoid skewing the metrics towards processes which
1717 	 * are considered more frequently, we only track failures once
1718 	 * per process.
1719 	 */
1720 	first_consideration = !(state & P_MEMSTAT_FREEZE_CONSIDERED);
1721 
1722 	if (first_consideration) {
1723 		memorystatus_freezer_stats.mfs_process_considered_count++;
1724 		p->p_memstat_state |= P_MEMSTAT_FREEZE_CONSIDERED;
1725 	}
1726 
1727 	/* Only freeze applications meeting our minimum resident page criteria */
1728 	memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
1729 	if (pages < memorystatus_freeze_pages_min) {
1730 		if (first_consideration) {
1731 			memorystatus_freezer_stats.mfs_error_below_min_pages_count++;
1732 		}
1733 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonBelowMinPages;
1734 		goto out;
1735 	}
1736 
1737 	/* Don't freeze processes that are already exiting on core. It may have started exiting
1738 	 * after we chose it for freeze, but before we obtained the proc_list_lock.
1739 	 * NB: This is only possible if we're coming in from memorystatus_freeze_process_sync.
1740 	 * memorystatus_freeze_top_process holds the proc_list_lock while it traverses the bands.
1741 	 */
1742 	if (proc_list_exited(p)) {
1743 		if (first_consideration) {
1744 			memorystatus_freezer_stats.mfs_error_other_count++;
1745 		}
1746 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOther;
1747 		goto out;
1748 	}
1749 
1750 	entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
1751 	if (entry_count && !memorystatus_freezer_use_ordered_list) {
1752 		for (i = 0; i < entry_count; i++) {
1753 			/*
1754 			 * NB: memorystatus_internal_probabilities.proc_name is MAXCOMLEN + 1 bytes
1755 			 * proc_t.p_name is 2*MAXCOMLEN + 1 bytes. So we only compare the first
1756 			 * MAXCOMLEN bytes here since the name in the probabilities table could
1757 			 * be truncated from the proc_t's p_name.
1758 			 */
1759 			if (strncmp(memorystatus_global_probabilities_table[i].proc_name,
1760 			    p->p_name,
1761 			    MAXCOMLEN) == 0) {
1762 				probability_of_use = memorystatus_global_probabilities_table[i].use_probability;
1763 				break;
1764 			}
1765 		}
1766 
1767 		if (probability_of_use == 0) {
1768 			if (first_consideration) {
1769 				memorystatus_freezer_stats.mfs_error_low_probability_of_use_count++;
1770 			}
1771 			p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonLowProbOfUse;
1772 			goto out;
1773 		}
1774 	}
1775 
1776 	if (!(state & P_MEMSTAT_FROZEN) && p->p_memstat_effectivepriority > memorystatus_freeze_max_candidate_band) {
1777 		/*
1778 		 * Proc has been elevated by something else.
1779 		 * Don't freeze it.
1780 		 */
1781 		if (first_consideration) {
1782 			memorystatus_freezer_stats.mfs_error_elevated_count++;
1783 		}
1784 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonElevated;
1785 		goto out;
1786 	}
1787 
1788 	should_freeze = TRUE;
1789 out:
1790 	if (should_freeze && !(state & P_MEMSTAT_FROZEN)) {
1791 		/*
1792 		 * Reset the skip reason. If it's killed before we manage to actually freeze it
1793 		 * we failed to consider it early enough.
1794 		 */
1795 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1796 		if (!first_consideration) {
1797 			/*
1798 			 * We're freezing this for the first time and we previously considered it ineligible.
1799 			 * Bump the considered count so that we track this as 1 failure
1800 			 * and 1 success.
1801 			 */
1802 			memorystatus_freezer_stats.mfs_process_considered_count++;
1803 		}
1804 	}
1805 	return should_freeze;
1806 }
1807 
1808 /*
1809  * Called with both the freezer_mutex and proc_list_lock held & both will be held on return.
1810  */
1811 static int
memorystatus_freeze_process(proc_t p,bool refreeze_processes,coalition_t * coal,pid_t * coalition_list,unsigned int * coalition_list_length)1812 memorystatus_freeze_process(
1813 	proc_t p,
1814 	bool refreeze_processes,
1815 	coalition_t *coal, /* IN / OUT */
1816 	pid_t *coalition_list, /* OUT */
1817 	unsigned int *coalition_list_length /* OUT */)
1818 {
1819 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1820 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1821 
1822 	kern_return_t kr;
1823 	uint32_t purgeable, wired, clean, dirty, shared;
1824 	uint64_t max_pages = 0;
1825 	int    freezer_error_code = 0;
1826 	bool was_refreeze = false;
1827 	task_t curr_task = TASK_NULL;
1828 
1829 	pid_t aPid = proc_getpid(p);
1830 
1831 	/* Ensure the process is eligible for (re-)freezing */
1832 	if ((p->p_memstat_state & P_MEMSTAT_FROZEN) && !proc_is_refreeze_eligible(p)) {
1833 		/* Process is already frozen & hasn't been thawed. Nothing to do here. */
1834 		return EINVAL;
1835 	}
1836 	if (refreeze_processes) {
1837 		/*
1838 		 * Has to have been frozen once before.
1839 		 */
1840 		if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
1841 			return EINVAL;
1842 		}
1843 
1844 		/*
1845 		 * Not currently being looked at for something.
1846 		 */
1847 		if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1848 			return EBUSY;
1849 		}
1850 
1851 		/*
1852 		 * We are going to try and refreeze and so re-evaluate
1853 		 * the process. We don't want to double count the shared
1854 		 * memory. So deduct the old snapshot here.
1855 		 */
1856 		memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages;
1857 		p->p_memstat_freeze_sharedanon_pages = 0;
1858 
1859 		p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
1860 		memorystatus_refreeze_eligible_count--;
1861 	} else {
1862 		if (memorystatus_is_process_eligible_for_freeze(p) == FALSE) {
1863 			return EINVAL;
1864 		}
1865 	}
1866 
1867 	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1868 		/*
1869 		 * Freezer backed by the compressor and swap file(s)
1870 		 * will hold compressed data.
1871 		 */
1872 
1873 		max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1874 	} else {
1875 		/*
1876 		 * We only have the compressor pool.
1877 		 */
1878 		max_pages = UINT32_MAX - 1;
1879 	}
1880 
1881 	/* Mark as locked temporarily to avoid kill */
1882 	p->p_memstat_state |= P_MEMSTAT_LOCKED;
1883 
1884 	p = proc_ref(p, true);
1885 	if (!p) {
1886 		memorystatus_freezer_stats.mfs_error_other_count++;
1887 		return EBUSY;
1888 	}
1889 
1890 	proc_list_unlock();
1891 
1892 	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START,
1893 	    memorystatus_available_pages, 0, 0, 0, 0);
1894 
1895 	max_pages = MIN(max_pages, UINT32_MAX);
1896 	kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, (uint32_t) max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1897 	if (kr == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1898 		memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
1899 	}
1900 
1901 	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
1902 	    memorystatus_available_pages, aPid, 0, 0, 0);
1903 
1904 	MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
1905 	    "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %d, shared %d\n",
1906 	    (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1907 	    memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1908 
1909 	proc_list_lock();
1910 
1911 	/* Success? */
1912 	if (KERN_SUCCESS == kr) {
1913 		memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1914 
1915 		p->p_memstat_freeze_sharedanon_pages += shared;
1916 
1917 		memorystatus_frozen_shared_mb += shared;
1918 
1919 		if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
1920 			p->p_memstat_state |= P_MEMSTAT_FROZEN;
1921 			p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1922 			memorystatus_frozen_count++;
1923 			os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1924 			if (strcmp(p->p_name, "com.apple.WebKit.WebContent") == 0) {
1925 				memorystatus_frozen_count_webcontent++;
1926 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_webcontent), relaxed);
1927 			}
1928 			if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
1929 				memorystatus_freeze_out_of_slots();
1930 			}
1931 		} else {
1932 			// This was a re-freeze
1933 			if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1934 				memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
1935 				memorystatus_freezer_stats.mfs_refreeze_count++;
1936 			}
1937 			was_refreeze = true;
1938 		}
1939 
1940 		p->p_memstat_frozen_count++;
1941 
1942 		/*
1943 		 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1944 		 * to its higher jetsam band.
1945 		 */
1946 		proc_list_unlock();
1947 
1948 		memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1949 
1950 		if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1951 			int ret;
1952 			unsigned int i;
1953 			ret = memorystatus_update_inactive_jetsam_priority_band(proc_getpid(p), MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, memorystatus_freeze_jetsam_band, TRUE);
1954 
1955 			if (ret) {
1956 				printf("Elevating the frozen process failed with %d\n", ret);
1957 				/* not fatal */
1958 			}
1959 
1960 			/* Update stats */
1961 			for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1962 				throttle_intervals[i].pageouts += dirty;
1963 			}
1964 		}
1965 		memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1966 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
1967 		    was_refreeze ? "re" : "", ((!coal || !*coal) ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, was_refreeze ? "Re" : "", dirty);
1968 
1969 		proc_list_lock();
1970 
1971 		memorystatus_freeze_pageouts += dirty;
1972 
1973 		if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1974 			/*
1975 			 * Add some eviction logic here? At some point should we
1976 			 * jetsam a process to get back its swap space so that we
1977 			 * can freeze a more eligible process at this moment in time?
1978 			 */
1979 		}
1980 
1981 		/* Check if we just froze a coalition leader. If so, return the list of XPC services to freeze next. */
1982 		if (coal != NULL && *coal == NULL) {
1983 			curr_task = proc_task(p);
1984 			*coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1985 			if (coalition_is_leader(curr_task, *coal)) {
1986 				*coalition_list_length = coalition_get_pid_list(*coal, COALITION_ROLEMASK_XPC,
1987 				    COALITION_SORT_DEFAULT, coalition_list, MAX_XPC_SERVICE_PIDS);
1988 
1989 				if (*coalition_list_length > MAX_XPC_SERVICE_PIDS) {
1990 					*coalition_list_length = MAX_XPC_SERVICE_PIDS;
1991 				}
1992 			}
1993 		} else {
1994 			/* We just froze an xpc service. Mark it as such for telemetry */
1995 			p->p_memstat_state |= P_MEMSTAT_FROZEN_XPC_SERVICE;
1996 			memorystatus_frozen_count_xpc_service++;
1997 			os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_frozen_xpc_service), relaxed);
1998 		}
1999 
2000 		p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
2001 		wakeup(&p->p_memstat_state);
2002 		proc_rele(p);
2003 		return 0;
2004 	} else {
2005 		if (refreeze_processes) {
2006 			if ((freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) ||
2007 			    (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO)) {
2008 				/*
2009 				 * Keeping this prior-frozen process in this high band when
2010 				 * we failed to re-freeze it due to bad shared memory usage
2011 				 * could cause excessive pressure on the lower bands.
2012 				 * We need to demote it for now. It'll get re-evaluated next
2013 				 * time because we don't set the P_MEMSTAT_FREEZE_IGNORE
2014 				 * bit.
2015 				 */
2016 
2017 				p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
2018 				memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2019 				memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, TRUE, TRUE);
2020 			}
2021 		} else {
2022 			p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
2023 		}
2024 		memorystatus_freeze_handle_error(p, freezer_error_code, p->p_memstat_state & P_MEMSTAT_FROZEN, aPid, *coal, "memorystatus_freeze_top_process");
2025 
2026 		p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
2027 		wakeup(&p->p_memstat_state);
2028 		proc_rele(p);
2029 
2030 		return EINVAL;
2031 	}
2032 }
2033 
2034 /*
2035  * Synchronously freeze the passed proc. Called with a reference to the proc held.
2036  *
2037  * Doesn't deal with:
2038  * - re-freezing because this is called on a specific process and
2039  *   not by the freezer thread. If that changes, we'll have to teach it about
2040  *   refreezing a frozen process.
2041  *
2042  * - grouped/coalition freezing because we are hoping to deprecate this
2043  *   interface as it was used by user-space to freeze particular processes. But
2044  *   we have moved away from that approach to having the kernel choose the optimal
2045  *   candidates to be frozen.
2046  *
2047  * Returns ENOTSUP if the freezer isn't supported on this device. Otherwise
2048  * returns EINVAL or the value returned by task_freeze().
2049  */
2050 int
memorystatus_freeze_process_sync(proc_t p)2051 memorystatus_freeze_process_sync(proc_t p)
2052 {
2053 	int ret = EINVAL;
2054 	boolean_t memorystatus_freeze_swap_low = FALSE;
2055 
2056 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2057 		return ENOTSUP;
2058 	}
2059 
2060 	lck_mtx_lock(&freezer_mutex);
2061 
2062 	if (p == NULL) {
2063 		printf("memorystatus_freeze_process_sync: Invalid process\n");
2064 		goto exit;
2065 	}
2066 
2067 	if (memorystatus_freeze_enabled == FALSE) {
2068 		printf("memorystatus_freeze_process_sync: Freezing is DISABLED\n");
2069 		goto exit;
2070 	}
2071 
2072 	if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
2073 		printf("memorystatus_freeze_process_sync: Low compressor and/or low swap space...skipping freeze\n");
2074 		goto exit;
2075 	}
2076 
2077 	memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2078 	if (!memorystatus_freeze_budget_pages_remaining) {
2079 		printf("memorystatus_freeze_process_sync: exit with NO available budget\n");
2080 		goto exit;
2081 	}
2082 
2083 	proc_list_lock();
2084 
2085 	ret = memorystatus_freeze_process(p, false, NULL, NULL, NULL);
2086 
2087 exit:
2088 	lck_mtx_unlock(&freezer_mutex);
2089 
2090 	return ret;
2091 }
2092 
2093 static proc_t
memorystatus_freezer_candidate_list_get_proc(struct memorystatus_freezer_candidate_list * list,size_t index,uint64_t * pid_mismatch_counter)2094 memorystatus_freezer_candidate_list_get_proc(
2095 	struct memorystatus_freezer_candidate_list *list,
2096 	size_t index,
2097 	uint64_t *pid_mismatch_counter)
2098 {
2099 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2100 	if (list->mfcl_list == NULL || list->mfcl_length <= index) {
2101 		return NULL;
2102 	}
2103 	memorystatus_properties_freeze_entry_v1 *entry = &list->mfcl_list[index];
2104 	if (entry->pid == NO_PID) {
2105 		/* Entry has been removed. */
2106 		return NULL;
2107 	}
2108 
2109 	proc_t p = proc_find_locked(entry->pid);
2110 	if (p && strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2111 		/*
2112 		 * We grab a reference when we are about to freeze the process. So drop
2113 		 * the reference that proc_find_locked() grabbed for us.
2114 		 * We also have the proc_list_lock so this process is stable.
2115 		 */
2116 		proc_rele(p);
2117 		return p;
2118 	} else {
2119 		if (p) {
2120 			/* pid rollover. */
2121 			proc_rele(p);
2122 		}
2123 		/*
2124 		 * The proc has exited since we received this list.
2125 		 * It may have re-launched with a new pid, so we go looking for it.
2126 		 */
2127 		unsigned int band = JETSAM_PRIORITY_IDLE;
2128 		p = memorystatus_get_first_proc_locked(&band, TRUE);
2129 		while (p != NULL && band <= memorystatus_freeze_max_candidate_band) {
2130 			if (strncmp(entry->proc_name, p->p_name, sizeof(proc_name_t)) == 0) {
2131 				(*pid_mismatch_counter)++;
2132 				/* Stash the pid for faster lookup next time. */
2133 				entry->pid = proc_getpid(p);
2134 				return p;
2135 			}
2136 			p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2137 		}
2138 		/* No match. */
2139 		return NULL;
2140 	}
2141 }
2142 
2143 /*
2144  * Caller must hold the freezer_mutex and it will be locked on return.
2145  */
2146 static int
memorystatus_freeze_top_process(void)2147 memorystatus_freeze_top_process(void)
2148 {
2149 	pid_t coal_xpc_pid = 0;
2150 	int ret = -1;
2151 	int freeze_ret;
2152 	proc_t p = PROC_NULL, next_p = PROC_NULL;
2153 	unsigned int band = JETSAM_PRIORITY_IDLE;
2154 	bool refreeze_processes = false;
2155 	coalition_t coal = COALITION_NULL;
2156 	pid_t pid_list[MAX_XPC_SERVICE_PIDS];
2157 	unsigned int    ntasks = 0;
2158 	size_t global_freeze_list_index = 0;
2159 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2160 
2161 	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages, 0, 0, 0, 0);
2162 
2163 	proc_list_lock();
2164 
2165 	if (memorystatus_frozen_count >= memorystatus_frozen_processes_max) {
2166 		/*
2167 		 * Freezer is already full but we are here and so let's
2168 		 * try to refreeze any processes we might have thawed
2169 		 * in the past and push their compressed state out.
2170 		 */
2171 		refreeze_processes = true;
2172 		band = (unsigned int) memorystatus_freeze_jetsam_band;
2173 	}
2174 
2175 freeze_process:
2176 
2177 	next_p = NULL;
2178 	if (memorystatus_freezer_use_ordered_list && !refreeze_processes) {
2179 		global_freeze_list_index = 0;
2180 		next_p = memorystatus_freezer_candidate_list_get_proc(
2181 			&memorystatus_global_freeze_list,
2182 			global_freeze_list_index++,
2183 			&memorystatus_freezer_stats.mfs_freeze_pid_mismatches);
2184 		if (!next_p) {
2185 			/*
2186 			 * No candidate to freeze.
2187 			 * But we're here. So try to re-freeze.
2188 			 */
2189 			refreeze_processes = true;
2190 			band = (unsigned int) memorystatus_freeze_jetsam_band;
2191 		}
2192 	}
2193 	if (next_p == NULL) {
2194 		next_p = memorystatus_get_first_proc_locked(&band, FALSE);
2195 	}
2196 	while (next_p) {
2197 		p = next_p;
2198 		if (!memorystatus_freezer_use_ordered_list && p->p_memstat_effectivepriority != (int32_t) band) {
2199 			/*
2200 			 * We shouldn't be freezing processes outside the
2201 			 * prescribed band (unless we've been given an ordered list).
2202 			 */
2203 			break;
2204 		}
2205 
2206 		freeze_ret = memorystatus_freeze_process(p, refreeze_processes, &coal, pid_list, &ntasks);
2207 		if (!freeze_ret) {
2208 			ret = 0;
2209 			/*
2210 			 * We froze a process successfully. We can stop now
2211 			 * and see if that helped if this process isn't part
2212 			 * of a coalition.
2213 			 */
2214 
2215 			if (coal != NULL) {
2216 				next_p = NULL;
2217 
2218 				if (ntasks > 0) {
2219 					coal_xpc_pid = pid_list[--ntasks];
2220 					next_p = proc_find_locked(coal_xpc_pid);
2221 
2222 					/*
2223 					 * We grab a reference when we are about to freeze the process. So drop
2224 					 * the reference that proc_find_locked() grabbed for us.
2225 					 * We also have the proc_list_lock and so this process is stable.
2226 					 */
2227 					if (next_p) {
2228 						proc_rele(next_p);
2229 					}
2230 				}
2231 			}
2232 
2233 			if (coal && next_p) {
2234 				continue;
2235 			}
2236 
2237 			/*
2238 			 * No coalition leader was frozen. So we don't
2239 			 * need to evaluate any XPC services.
2240 			 *
2241 			 * OR
2242 			 *
2243 			 * We have frozen all eligible XPC services for
2244 			 * the current coalition leader.
2245 			 *
2246 			 * Either way, we can break here and see if freezing
2247 			 * helped.
2248 			 */
2249 
2250 			break;
2251 		} else {
2252 			if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
2253 				break;
2254 			}
2255 			if (memorystatus_freezer_use_ordered_list && !refreeze_processes) {
2256 				next_p = memorystatus_freezer_candidate_list_get_proc(
2257 					&memorystatus_global_freeze_list,
2258 					global_freeze_list_index++,
2259 					&memorystatus_freezer_stats.mfs_freeze_pid_mismatches);
2260 			} else {
2261 				next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2262 			}
2263 		}
2264 	}
2265 
2266 	if ((ret == -1) &&
2267 	    (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD) &&
2268 	    (!refreeze_processes)) {
2269 		/*
2270 		 * We failed to freeze a process from the IDLE
2271 		 * band AND we have some thawed processes
2272 		 * AND haven't tried refreezing as yet.
2273 		 * Let's try and re-freeze processes in the
2274 		 * frozen band that have been resumed in the past
2275 		 * and so have brought in state from disk.
2276 		 */
2277 
2278 		band = (unsigned int) memorystatus_freeze_jetsam_band;
2279 
2280 		refreeze_processes = true;
2281 
2282 		goto freeze_process;
2283 	}
2284 
2285 	proc_list_unlock();
2286 
2287 	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_END, memorystatus_available_pages, 0, 0, 0, 0);
2288 
2289 	return ret;
2290 }
2291 
2292 #if DEVELOPMENT || DEBUG
2293 /* For testing memorystatus_freeze_top_process */
2294 static int
2295 sysctl_memorystatus_freeze_top_process SYSCTL_HANDLER_ARGS
2296 {
2297 #pragma unused(arg1, arg2)
2298 	int error, val;
2299 	/*
2300 	 * Only freeze on write to prevent freezing during `sysctl -a`.
2301 	 * The actual value written doesn't matter.
2302 	 */
2303 	error = sysctl_handle_int(oidp, &val, 0, req);
2304 	if (error || !req->newptr) {
2305 		return error;
2306 	}
2307 
2308 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2309 		return ENOTSUP;
2310 	}
2311 
2312 	lck_mtx_lock(&freezer_mutex);
2313 	int ret = memorystatus_freeze_top_process();
2314 	lck_mtx_unlock(&freezer_mutex);
2315 
2316 	if (ret == -1) {
2317 		ret = ESRCH;
2318 	}
2319 	return ret;
2320 }
2321 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_top_process, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2322     0, 0, &sysctl_memorystatus_freeze_top_process, "I", "");
2323 #endif /* DEVELOPMENT || DEBUG */
2324 
2325 static inline boolean_t
memorystatus_can_freeze_processes(void)2326 memorystatus_can_freeze_processes(void)
2327 {
2328 	boolean_t ret;
2329 
2330 	proc_list_lock();
2331 
2332 	if (memorystatus_suspended_count) {
2333 		memorystatus_freeze_suspended_threshold = MIN(memorystatus_freeze_suspended_threshold, FREEZE_SUSPENDED_THRESHOLD_DEFAULT);
2334 
2335 		if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
2336 			ret = TRUE;
2337 		} else {
2338 			ret = FALSE;
2339 		}
2340 	} else {
2341 		ret = FALSE;
2342 	}
2343 
2344 	proc_list_unlock();
2345 
2346 	return ret;
2347 }
2348 
2349 static boolean_t
memorystatus_can_freeze(boolean_t * memorystatus_freeze_swap_low)2350 memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
2351 {
2352 	boolean_t can_freeze = TRUE;
2353 
2354 	/* Only freeze if we're sufficiently low on memory; this holds off freeze right
2355 	*  after boot,  and is generally is a no-op once we've reached steady state. */
2356 	if (memorystatus_available_pages > memorystatus_freeze_threshold) {
2357 		return FALSE;
2358 	}
2359 
2360 	/* Check minimum suspended process threshold. */
2361 	if (!memorystatus_can_freeze_processes()) {
2362 		return FALSE;
2363 	}
2364 	assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
2365 
2366 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2367 		/*
2368 		 * In-core compressor used for freezing WITHOUT on-disk swap support.
2369 		 */
2370 		if (vm_compressor_low_on_space()) {
2371 			if (*memorystatus_freeze_swap_low) {
2372 				*memorystatus_freeze_swap_low = TRUE;
2373 			}
2374 
2375 			can_freeze = FALSE;
2376 		} else {
2377 			if (*memorystatus_freeze_swap_low) {
2378 				*memorystatus_freeze_swap_low = FALSE;
2379 			}
2380 
2381 			can_freeze = TRUE;
2382 		}
2383 	} else {
2384 		/*
2385 		 * Freezing WITH on-disk swap support.
2386 		 *
2387 		 * In-core compressor fronts the swap.
2388 		 */
2389 		if (vm_swap_low_on_space()) {
2390 			if (*memorystatus_freeze_swap_low) {
2391 				*memorystatus_freeze_swap_low = TRUE;
2392 			}
2393 
2394 			can_freeze = FALSE;
2395 		}
2396 	}
2397 
2398 	return can_freeze;
2399 }
2400 
2401 /*
2402  * Demote the given frozen process.
2403  * Caller must hold the proc_list_lock & it will be held on return.
2404  */
2405 static void
memorystatus_demote_frozen_process(proc_t p,bool urgent_mode __unused)2406 memorystatus_demote_frozen_process(proc_t p, bool urgent_mode __unused)
2407 {
2408 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2409 
2410 	/* We demote to IDLE unless someone has asserted a higher priority on this process. */
2411 	int maxpriority = JETSAM_PRIORITY_IDLE;
2412 	p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
2413 	memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2414 
2415 	maxpriority = MAX(p->p_memstat_assertionpriority, maxpriority);
2416 	memorystatus_update_priority_locked(p, maxpriority, FALSE, FALSE);
2417 #if DEVELOPMENT || DEBUG
2418 	os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus_demote_frozen_process(%s) pid %d [%s]\n",
2419 	    (urgent_mode ? "urgent" : "normal"), (p ? proc_getpid(p) : -1), ((p && *p->p_name) ? p->p_name : "unknown"));
2420 #endif /* DEVELOPMENT || DEBUG */
2421 
2422 	/*
2423 	 * The freezer thread will consider this a normal app to be frozen
2424 	 * because it is in the IDLE band. So we don't need the
2425 	 * P_MEMSTAT_REFREEZE_ELIGIBLE state here. Also, if it gets resumed
2426 	 * we'll correctly count it as eligible for re-freeze again.
2427 	 *
2428 	 * We don't drop the frozen count because this process still has
2429 	 * state on disk. So there's a chance it gets resumed and then it
2430 	 * should land in the higher jetsam band. For that it needs to
2431 	 * remain marked frozen.
2432 	 */
2433 	if (proc_is_refreeze_eligible(p)) {
2434 		p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
2435 		memorystatus_refreeze_eligible_count--;
2436 	}
2437 }
2438 
2439 static unsigned int
memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)2440 memorystatus_demote_frozen_processes_using_thaw_count(bool urgent_mode)
2441 {
2442 	unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
2443 	unsigned int demoted_proc_count = 0;
2444 	proc_t p = PROC_NULL, next_p = PROC_NULL;
2445 	proc_list_lock();
2446 
2447 	next_p = memorystatus_get_first_proc_locked(&band, FALSE);
2448 	while (next_p) {
2449 		p = next_p;
2450 		next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2451 
2452 		if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
2453 			continue;
2454 		}
2455 
2456 		if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
2457 			continue;
2458 		}
2459 
2460 		if (urgent_mode) {
2461 			if (!proc_is_refreeze_eligible(p)) {
2462 				/*
2463 				 * This process hasn't been thawed recently and so most of
2464 				 * its state sits on NAND and so we skip it -- jetsamming it
2465 				 * won't help with memory pressure.
2466 				 */
2467 				continue;
2468 			}
2469 		} else {
2470 			if (p->p_memstat_thaw_count >= memorystatus_thaw_count_demotion_threshold) {
2471 				/*
2472 				 * This process has met / exceeded our thaw count demotion threshold
2473 				 * and so we let it live in the higher bands.
2474 				 */
2475 				continue;
2476 			}
2477 		}
2478 
2479 		memorystatus_demote_frozen_process(p, urgent_mode);
2480 		demoted_proc_count++;
2481 		if ((urgent_mode) || (demoted_proc_count == memorystatus_max_frozen_demotions_daily)) {
2482 			break;
2483 		}
2484 	}
2485 
2486 	proc_list_unlock();
2487 	return demoted_proc_count;
2488 }
2489 
2490 static unsigned int
memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)2491 memorystatus_demote_frozen_processes_using_demote_list(bool urgent_mode)
2492 {
2493 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2494 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2495 	assert(memorystatus_freezer_use_demotion_list);
2496 	unsigned int demoted_proc_count = 0;
2497 
2498 	proc_list_lock();
2499 	for (size_t i = 0; i < memorystatus_global_demote_list.mfcl_length; i++) {
2500 		proc_t p = memorystatus_freezer_candidate_list_get_proc(
2501 			&memorystatus_global_demote_list,
2502 			i,
2503 			&memorystatus_freezer_stats.mfs_demote_pid_mismatches);
2504 		if (p != NULL && proc_is_refreeze_eligible(p)) {
2505 			memorystatus_demote_frozen_process(p, urgent_mode);
2506 			/* Remove this entry now that it's been demoted. */
2507 			memorystatus_global_demote_list.mfcl_list[i].pid = NO_PID;
2508 			demoted_proc_count++;
2509 			/*
2510 			 * We only demote one proc at a time in this mode.
2511 			 * This gives jetsam a chance to kill the recently demoted processes.
2512 			 */
2513 			break;
2514 		}
2515 	}
2516 
2517 	proc_list_unlock();
2518 	return demoted_proc_count;
2519 }
2520 
2521 /*
2522  * This function evaluates if the currently frozen processes deserve
2523  * to stay in the higher jetsam band. There are 2 modes:
2524  * - 'force one == TRUE': (urgent mode)
2525  *	We are out of budget and can't refreeze a process. The process's
2526  * state, if it was resumed, will stay in compressed memory. If we let it
2527  * remain up in the higher frozen jetsam band, it'll put a lot of pressure on
2528  * the lower bands. So we force-demote the least-recently-used-and-thawed
2529  * process.
2530  *
2531  * - 'force_one == FALSE': (normal mode)
2532  *      If the # of thaws of a process is below our threshold, then we
2533  * will demote that process into the IDLE band.
2534  * We don't immediately kill the process here because it  already has
2535  * state on disk and so it might be worth giving it another shot at
2536  * getting thawed/resumed and used.
2537  */
2538 static void
memorystatus_demote_frozen_processes(bool urgent_mode)2539 memorystatus_demote_frozen_processes(bool urgent_mode)
2540 {
2541 	unsigned int demoted_proc_count = 0;
2542 
2543 	if (memorystatus_freeze_enabled == FALSE) {
2544 		/*
2545 		 * Freeze has been disabled likely to
2546 		 * reclaim swap space. So don't change
2547 		 * any state on the frozen processes.
2548 		 */
2549 		return;
2550 	}
2551 
2552 	/*
2553 	 * We have two demotion policies which can be toggled by userspace.
2554 	 * In non-urgent mode, the ordered list policy will
2555 	 * choose a demotion candidate using the list provided by dasd.
2556 	 * The thaw count policy will demote the oldest process that hasn't been
2557 	 * thawed more than memorystatus_thaw_count_demotion_threshold times.
2558 	 *
2559 	 * If urgent_mode is set, both policies will only consider demoting
2560 	 * processes that are re-freeze eligible. But the ordering is different.
2561 	 * The ordered list policy will scan in the order given by dasd.
2562 	 * The thaw count policy will scan through the frozen band.
2563 	 */
2564 	if (memorystatus_freezer_use_demotion_list) {
2565 		demoted_proc_count += memorystatus_demote_frozen_processes_using_demote_list(urgent_mode);
2566 
2567 		if (demoted_proc_count == 0 && urgent_mode) {
2568 			/*
2569 			 * We're out of budget and the demotion list doesn't contain any valid
2570 			 * candidates. We still need to demote something. Fall back to scanning
2571 			 * the frozen band.
2572 			 */
2573 			memorystatus_demote_frozen_processes_using_thaw_count(true);
2574 		}
2575 	} else {
2576 		demoted_proc_count += memorystatus_demote_frozen_processes_using_thaw_count(urgent_mode);
2577 	}
2578 }
2579 
2580 /*
2581  * Calculate a new freezer budget.
2582  * @param time_since_last_interval_expired_sec How long has it been (in seconds) since the previous interval expired.
2583  * @param burst_multiple The burst_multiple for the new period
2584  * @param interval_duration_min How many minutes will the new interval be?
2585  * @param rollover The amount to rollover from the previous budget.
2586  *
2587  * @return A budget for the new interval.
2588  */
2589 static uint32_t
memorystatus_freeze_calculate_new_budget(unsigned int time_since_last_interval_expired_sec,unsigned int burst_multiple,unsigned int interval_duration_min,uint32_t rollover)2590 memorystatus_freeze_calculate_new_budget(
2591 	unsigned int time_since_last_interval_expired_sec,
2592 	unsigned int burst_multiple,
2593 	unsigned int interval_duration_min,
2594 	uint32_t rollover)
2595 {
2596 	uint64_t freeze_daily_budget = 0, freeze_daily_budget_mb = 0, daily_budget_pageouts = 0, budget_missed = 0, freeze_daily_pageouts_max = 0, new_budget = 0;
2597 	const static unsigned int kNumSecondsInDay = 60 * 60 * 24;
2598 	/* Precision factor for days_missed. 2 decimal points. */
2599 	const static unsigned int kFixedPointFactor = 100;
2600 	unsigned int days_missed;
2601 
2602 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2603 		return 0;
2604 	}
2605 
2606 	/* Get the daily budget from the storage layer */
2607 	if (vm_swap_max_budget(&freeze_daily_budget)) {
2608 		freeze_daily_budget_mb = freeze_daily_budget / (1024 * 1024);
2609 		assert(freeze_daily_budget_mb <= UINT32_MAX);
2610 		memorystatus_freeze_daily_mb_max = (unsigned int) freeze_daily_budget_mb;
2611 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
2612 	}
2613 	/* Calculate the daily pageout budget */
2614 	freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
2615 	/* Multiply by memorystatus_freeze_budget_multiplier */
2616 	freeze_daily_pageouts_max = ((kFixedPointFactor * memorystatus_freeze_budget_multiplier / 100) * freeze_daily_pageouts_max) / kFixedPointFactor;
2617 
2618 	daily_budget_pageouts = (burst_multiple * (((uint64_t) interval_duration_min * freeze_daily_pageouts_max) / (kNumSecondsInDay / 60)));
2619 
2620 	/*
2621 	 * Add additional budget for time since the interval expired.
2622 	 * For example, if the interval expired n days ago, we should get an additional n days
2623 	 * of budget since we didn't use any budget during those n days.
2624 	 */
2625 	days_missed = time_since_last_interval_expired_sec * kFixedPointFactor / kNumSecondsInDay;
2626 	budget_missed = days_missed * freeze_daily_pageouts_max / kFixedPointFactor;
2627 	new_budget = rollover + daily_budget_pageouts + budget_missed;
2628 	return (uint32_t) MIN(new_budget, UINT32_MAX);
2629 }
2630 
2631 /*
2632  * Mark all non frozen, freezer-eligible processes as skipped for the given reason.
2633  * Used when we hit some system freeze limit and know that we won't be considering remaining processes.
2634  * If you're using this for a new reason, make sure to add it to memorystatus_freeze_init_proc so that
2635  * it gets set for new processes.
2636  * NB: These processes will retain this skip reason until they are reconsidered by memorystatus_is_process_eligible_for_freeze.
2637  */
2638 static void
memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason,bool locked)2639 memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason, bool locked)
2640 {
2641 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2642 	LCK_MTX_ASSERT(&proc_list_mlock, locked ? LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED);
2643 	unsigned int band = JETSAM_PRIORITY_IDLE;
2644 	proc_t p;
2645 
2646 	if (!locked) {
2647 		proc_list_lock();
2648 	}
2649 	p = memorystatus_get_first_proc_locked(&band, FALSE);
2650 	while (p) {
2651 		assert(p->p_memstat_effectivepriority == (int32_t) band);
2652 		if (!(p->p_memstat_state & P_MEMSTAT_FROZEN) && memorystatus_is_process_eligible_for_freeze(p)) {
2653 			assert(p->p_memstat_freeze_skip_reason == kMemorystatusFreezeSkipReasonNone);
2654 			p->p_memstat_freeze_skip_reason = (uint8_t) reason;
2655 		}
2656 		p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2657 	}
2658 	if (!locked) {
2659 		proc_list_unlock();
2660 	}
2661 }
2662 
2663 /*
2664  * Called after we fail to freeze a process.
2665  * Logs the failure, marks the process with the failure reason, and updates freezer stats.
2666  */
2667 static void
memorystatus_freeze_handle_error(proc_t p,const int freezer_error_code,bool was_refreeze,pid_t pid,const coalition_t coalition,const char * log_prefix)2668 memorystatus_freeze_handle_error(
2669 	proc_t p,
2670 	const int freezer_error_code,
2671 	bool was_refreeze,
2672 	pid_t pid,
2673 	const coalition_t coalition,
2674 	const char* log_prefix)
2675 {
2676 	const char *reason;
2677 	memorystatus_freeze_skip_reason_t skip_reason;
2678 
2679 	switch (freezer_error_code) {
2680 	case FREEZER_ERROR_EXCESS_SHARED_MEMORY:
2681 		memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
2682 		reason = "too much shared memory";
2683 		skip_reason = kMemorystatusFreezeSkipReasonExcessSharedMemory;
2684 		break;
2685 	case FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO:
2686 		memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
2687 		reason = "private-shared pages ratio";
2688 		skip_reason = kMemorystatusFreezeSkipReasonLowPrivateSharedRatio;
2689 		break;
2690 	case FREEZER_ERROR_NO_COMPRESSOR_SPACE:
2691 		memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
2692 		reason = "no compressor space";
2693 		skip_reason = kMemorystatusFreezeSkipReasonNoCompressorSpace;
2694 		break;
2695 	case FREEZER_ERROR_NO_SWAP_SPACE:
2696 		memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
2697 		reason = "no swap space";
2698 		skip_reason = kMemorystatusFreezeSkipReasonNoSwapSpace;
2699 		break;
2700 	default:
2701 		reason = "unknown error";
2702 		skip_reason = kMemorystatusFreezeSkipReasonOther;
2703 	}
2704 
2705 	p->p_memstat_freeze_skip_reason = (uint8_t) skip_reason;
2706 
2707 	os_log_with_startup_serial(OS_LOG_DEFAULT, "%s: %sfreezing (%s) pid %d [%s]...skipped (%s)\n",
2708 	    log_prefix, was_refreeze ? "re" : "",
2709 	    (coalition == NULL ? "general" : "coalition-driven"), pid,
2710 	    ((p && *p->p_name) ? p->p_name : "unknown"), reason);
2711 }
2712 
2713 /*
2714  * Start a new normal throttle interval with the given budget.
2715  * Caller must hold the freezer mutex
2716  */
2717 static void
memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget,mach_timespec_t start_ts)2718 memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts)
2719 {
2720 	unsigned int band;
2721 	proc_t p, next_p;
2722 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2723 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2724 
2725 	normal_throttle_window->max_pageouts = new_budget;
2726 	normal_throttle_window->ts.tv_sec = normal_throttle_window->mins * 60;
2727 	normal_throttle_window->ts.tv_nsec = 0;
2728 	ADD_MACH_TIMESPEC(&normal_throttle_window->ts, &start_ts);
2729 	/* Since we update the throttle stats pre-freeze, adjust for overshoot here */
2730 	if (normal_throttle_window->pageouts > normal_throttle_window->max_pageouts) {
2731 		normal_throttle_window->pageouts -= normal_throttle_window->max_pageouts;
2732 	} else {
2733 		normal_throttle_window->pageouts = 0;
2734 	}
2735 	/* Ensure the normal window is now active. */
2736 	memorystatus_freeze_degradation = FALSE;
2737 
2738 	/*
2739 	 * Reset interval statistics.
2740 	 */
2741 	memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
2742 	memorystatus_freezer_stats.mfs_process_considered_count = 0;
2743 	memorystatus_freezer_stats.mfs_error_below_min_pages_count = 0;
2744 	memorystatus_freezer_stats.mfs_error_excess_shared_memory_count = 0;
2745 	memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count = 0;
2746 	memorystatus_freezer_stats.mfs_error_no_compressor_space_count = 0;
2747 	memorystatus_freezer_stats.mfs_error_no_swap_space_count = 0;
2748 	memorystatus_freezer_stats.mfs_error_low_probability_of_use_count = 0;
2749 	memorystatus_freezer_stats.mfs_error_elevated_count = 0;
2750 	memorystatus_freezer_stats.mfs_error_other_count = 0;
2751 	memorystatus_freezer_stats.mfs_refreeze_count = 0;
2752 	memorystatus_freezer_stats.mfs_bytes_refrozen = 0;
2753 	memorystatus_freezer_stats.mfs_below_threshold_count = 0;
2754 	memorystatus_freezer_stats.mfs_skipped_full_count = 0;
2755 	memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count = 0;
2756 	memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = 0;
2757 	memorystatus_thaw_count = 0;
2758 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed, 0, release);
2759 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_webcontent, 0, release);
2760 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg, 0, release);
2761 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service, 0, release);
2762 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen, memorystatus_frozen_count, release);
2763 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_webcontent, memorystatus_frozen_count_webcontent, release);
2764 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen_xpc_service, memorystatus_frozen_count_xpc_service, release);
2765 	os_atomic_store(&memorystatus_freezer_stats.mfs_processes_fg_resumed, 0, release);
2766 	os_atomic_inc(&memorystatus_freeze_current_interval, release);
2767 
2768 	/* Clear the focal thaw bit */
2769 	proc_list_lock();
2770 	band = JETSAM_PRIORITY_IDLE;
2771 	p = PROC_NULL;
2772 	next_p = PROC_NULL;
2773 
2774 	next_p = memorystatus_get_first_proc_locked(&band, TRUE);
2775 	while (next_p) {
2776 		p = next_p;
2777 		next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
2778 
2779 		if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) {
2780 			break;
2781 		}
2782 		p->p_memstat_state &= ~P_MEMSTAT_FROZEN_FOCAL_THAW;
2783 	}
2784 	proc_list_unlock();
2785 
2786 	schedule_interval_reset(freeze_interval_reset_thread_call, normal_throttle_window);
2787 }
2788 
2789 #if DEVELOPMENT || DEBUG
2790 
2791 static int
2792 sysctl_memorystatus_freeze_calculate_new_budget SYSCTL_HANDLER_ARGS
2793 {
2794 #pragma unused(arg1, arg2)
2795 	int error = 0;
2796 	unsigned int time_since_last_interval_expired_sec = 0;
2797 	unsigned int new_budget;
2798 
2799 	error = sysctl_handle_int(oidp, &time_since_last_interval_expired_sec, 0, req);
2800 	if (error || !req->newptr) {
2801 		return error;
2802 	}
2803 
2804 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
2805 		return ENOTSUP;
2806 	}
2807 	new_budget = memorystatus_freeze_calculate_new_budget(time_since_last_interval_expired_sec, 1, NORMAL_WINDOW_MINS, 0);
2808 	return copyout(&new_budget, req->oldptr, MIN(sizeof(req->oldlen), sizeof(new_budget)));
2809 }
2810 
2811 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_calculate_new_budget, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2812     0, 0, &sysctl_memorystatus_freeze_calculate_new_budget, "I", "");
2813 
2814 #endif /* DEVELOPMENT || DEBUG */
2815 
2816 /*
2817  * Called when we first run out of budget in an interval.
2818  * Marks idle processes as not frozen due to lack of budget.
2819  * NB: It might be worth having a CA event here.
2820  */
2821 static void
memorystatus_freeze_out_of_budget(const struct throttle_interval_t * interval)2822 memorystatus_freeze_out_of_budget(const struct throttle_interval_t *interval)
2823 {
2824 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2825 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2826 
2827 	mach_timespec_t time_left = {0, 0};
2828 	mach_timespec_t now_ts;
2829 	clock_sec_t sec;
2830 	clock_nsec_t nsec;
2831 
2832 	time_left.tv_sec = interval->ts.tv_sec;
2833 	time_left.tv_nsec = 0;
2834 	clock_get_system_nanotime(&sec, &nsec);
2835 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2836 	now_ts.tv_nsec = nsec;
2837 
2838 	SUB_MACH_TIMESPEC(&time_left, &now_ts);
2839 	memorystatus_freezer_stats.mfs_budget_exhaustion_duration_remaining = time_left.tv_sec;
2840 	os_log(OS_LOG_DEFAULT,
2841 	    "memorystatus_freeze: Out of NAND write budget with %u minutes left in the current freezer interval. %u procs are frozen.\n",
2842 	    time_left.tv_sec / 60, memorystatus_frozen_count);
2843 
2844 	memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfBudget, false);
2845 }
2846 
2847 /*
2848  * Called when we cross over the threshold of maximum frozen processes allowed.
2849  * Marks remaining idle processes as not frozen due to lack of slots.
2850  */
2851 static void
memorystatus_freeze_out_of_slots(void)2852 memorystatus_freeze_out_of_slots(void)
2853 {
2854 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2855 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2856 	assert(memorystatus_frozen_count == memorystatus_frozen_processes_max);
2857 
2858 	os_log(OS_LOG_DEFAULT,
2859 	    "memorystatus_freeze: Out of slots in the freezer. %u procs are frozen.\n",
2860 	    memorystatus_frozen_count);
2861 
2862 	memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfSlots, true);
2863 }
2864 
2865 /*
2866  * This function will do 4 things:
2867  *
2868  * 1) check to see if we are currently in a degraded freezer mode, and if so:
2869  *    - check to see if our window has expired and we should exit this mode, OR,
2870  *    - return a budget based on the degraded throttle window's max. pageouts vs current pageouts.
2871  *
2872  * 2) check to see if we are in a NEW normal window and update the normal throttle window's params.
2873  *
2874  * 3) check what the current normal window allows for a budget.
2875  *
2876  * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below
2877  *    what we would normally expect, then we are running low on our daily budget and need to enter
2878  *    degraded perf. mode.
2879  *
2880  *    Caller must hold the freezer mutex
2881  *    Caller must not hold the proc_list lock
2882  */
2883 
2884 static void
memorystatus_freeze_update_throttle(uint64_t * budget_pages_allowed)2885 memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
2886 {
2887 	clock_sec_t sec;
2888 	clock_nsec_t nsec;
2889 	mach_timespec_t now_ts;
2890 	LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2891 	LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2892 
2893 	unsigned int freeze_daily_pageouts_max = 0;
2894 	bool started_with_budget = (*budget_pages_allowed > 0);
2895 
2896 #if DEVELOPMENT || DEBUG
2897 	if (!memorystatus_freeze_throttle_enabled) {
2898 		/*
2899 		 * No throttling...we can use the full budget everytime.
2900 		 */
2901 		*budget_pages_allowed = UINT64_MAX;
2902 		return;
2903 	}
2904 #endif
2905 
2906 	clock_get_system_nanotime(&sec, &nsec);
2907 	now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2908 	now_ts.tv_nsec = nsec;
2909 
2910 	struct throttle_interval_t *interval = NULL;
2911 
2912 	if (memorystatus_freeze_degradation == TRUE) {
2913 		interval = degraded_throttle_window;
2914 
2915 		if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
2916 			interval->pageouts = 0;
2917 			interval->max_pageouts = 0;
2918 		} else {
2919 			*budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2920 		}
2921 	}
2922 
2923 	interval = normal_throttle_window;
2924 
2925 	/*
2926 	 * Current throttle window.
2927 	 * Deny freezing if we have no budget left.
2928 	 * Try graceful degradation if we are within 25% of:
2929 	 * - the daily budget, and
2930 	 * - the current budget left is below our normal budget expectations.
2931 	 */
2932 
2933 	if (memorystatus_freeze_degradation == FALSE) {
2934 		if (interval->pageouts >= interval->max_pageouts) {
2935 			*budget_pages_allowed = 0;
2936 			if (started_with_budget) {
2937 				memorystatus_freeze_out_of_budget(interval);
2938 			}
2939 		} else {
2940 			int budget_left = interval->max_pageouts - interval->pageouts;
2941 			int budget_threshold = (freeze_daily_pageouts_max * FREEZE_DEGRADATION_BUDGET_THRESHOLD) / 100;
2942 
2943 			mach_timespec_t time_left = {0, 0};
2944 
2945 			time_left.tv_sec = interval->ts.tv_sec;
2946 			time_left.tv_nsec = 0;
2947 
2948 			SUB_MACH_TIMESPEC(&time_left, &now_ts);
2949 
2950 			if (budget_left <= budget_threshold) {
2951 				/*
2952 				 * For the current normal window, calculate how much we would pageout in a DEGRADED_WINDOW_MINS duration.
2953 				 * And also calculate what we would pageout for the same DEGRADED_WINDOW_MINS duration if we had the full
2954 				 * daily pageout budget.
2955 				 */
2956 
2957 				unsigned int current_budget_rate_allowed = ((budget_left / time_left.tv_sec) / 60) * DEGRADED_WINDOW_MINS;
2958 				unsigned int normal_budget_rate_allowed = (freeze_daily_pageouts_max / NORMAL_WINDOW_MINS) * DEGRADED_WINDOW_MINS;
2959 
2960 				/*
2961 				 * The current rate of pageouts is below what we would expect for
2962 				 * the normal rate i.e. we have below normal budget left and so...
2963 				 */
2964 
2965 				if (current_budget_rate_allowed < normal_budget_rate_allowed) {
2966 					memorystatus_freeze_degradation = TRUE;
2967 					degraded_throttle_window->max_pageouts = current_budget_rate_allowed;
2968 					degraded_throttle_window->pageouts = 0;
2969 
2970 					/*
2971 					 * Switch over to the degraded throttle window so the budget
2972 					 * doled out is based on that window.
2973 					 */
2974 					interval = degraded_throttle_window;
2975 				}
2976 			}
2977 
2978 			*budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2979 		}
2980 	}
2981 
2982 	MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n",
2983 	    interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - now_ts->tv_sec) / 60,
2984 	    interval->throttle ? "on" : "off");
2985 }
2986 
2987 bool memorystatus_freeze_thread_init = false;
2988 static void
memorystatus_freeze_thread(void * param __unused,wait_result_t wr __unused)2989 memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
2990 {
2991 	static boolean_t memorystatus_freeze_swap_low = FALSE;
2992 
2993 	if (!memorystatus_freeze_thread_init) {
2994 #if CONFIG_THREAD_GROUPS
2995 		thread_group_vm_add();
2996 #endif
2997 		memorystatus_freeze_thread_init = true;
2998 	}
2999 
3000 	lck_mtx_lock(&freezer_mutex);
3001 
3002 	if (memorystatus_freeze_enabled) {
3003 		if (memorystatus_freezer_use_demotion_list && memorystatus_refreeze_eligible_count > 0) {
3004 			memorystatus_demote_frozen_processes(false); /* Normal mode. Consider demoting thawed processes. */
3005 		}
3006 		if ((memorystatus_frozen_count < memorystatus_frozen_processes_max) ||
3007 		    (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD)) {
3008 			if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
3009 				/* Only freeze if we've not exceeded our pageout budgets.*/
3010 				memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
3011 
3012 				if (memorystatus_freeze_budget_pages_remaining) {
3013 					memorystatus_freeze_top_process();
3014 				} else {
3015 					memorystatus_demote_frozen_processes(true); /* urgent mode..force one demotion */
3016 				}
3017 			}
3018 		}
3019 	}
3020 
3021 	/*
3022 	 * Give applications currently in the aging band a chance to age out into the idle band before
3023 	 * running the freezer again.
3024 	 */
3025 	memorystatus_freezer_thread_next_run_ts = mach_absolute_time() + memorystatus_apps_idle_delay_time;
3026 
3027 	assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
3028 	lck_mtx_unlock(&freezer_mutex);
3029 
3030 	thread_block((thread_continue_t) memorystatus_freeze_thread);
3031 }
3032 
3033 boolean_t
memorystatus_freeze_thread_should_run(void)3034 memorystatus_freeze_thread_should_run(void)
3035 {
3036 	/*
3037 	 * No freezer_mutex held here...see why near call-site
3038 	 * within memorystatus_pages_update().
3039 	 */
3040 
3041 	boolean_t should_run = FALSE;
3042 
3043 	if (memorystatus_freeze_enabled == FALSE) {
3044 		goto out;
3045 	}
3046 
3047 	if (memorystatus_available_pages > memorystatus_freeze_threshold) {
3048 		goto out;
3049 	}
3050 
3051 	memorystatus_freezer_stats.mfs_below_threshold_count++;
3052 
3053 	if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
3054 		/*
3055 		 * Consider this as a skip even if we wake up to refreeze because
3056 		 * we won't freeze any new procs.
3057 		 */
3058 		memorystatus_freezer_stats.mfs_skipped_full_count++;
3059 		if (memorystatus_refreeze_eligible_count < MIN_THAW_REFREEZE_THRESHOLD) {
3060 			goto out;
3061 		}
3062 	}
3063 
3064 	if (memorystatus_frozen_shared_mb_max && (memorystatus_frozen_shared_mb >= memorystatus_frozen_shared_mb_max)) {
3065 		memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count++;
3066 		goto out;
3067 	}
3068 
3069 	uint64_t curr_time = mach_absolute_time();
3070 
3071 	if (curr_time < memorystatus_freezer_thread_next_run_ts) {
3072 		goto out;
3073 	}
3074 
3075 	should_run = TRUE;
3076 
3077 out:
3078 	return should_run;
3079 }
3080 
3081 int
memorystatus_get_process_is_freezable(pid_t pid,int * is_freezable)3082 memorystatus_get_process_is_freezable(pid_t pid, int *is_freezable)
3083 {
3084 	proc_t p = PROC_NULL;
3085 
3086 	if (pid == 0) {
3087 		return EINVAL;
3088 	}
3089 
3090 	p = proc_find(pid);
3091 	if (!p) {
3092 		return ESRCH;
3093 	}
3094 
3095 	/*
3096 	 * Only allow this on the current proc for now.
3097 	 * We can check for privileges and allow targeting another process in the future.
3098 	 */
3099 	if (p != current_proc()) {
3100 		proc_rele(p);
3101 		return EPERM;
3102 	}
3103 
3104 	proc_list_lock();
3105 	*is_freezable = ((p->p_memstat_state & P_MEMSTAT_FREEZE_DISABLED) ? 0 : 1);
3106 	proc_rele(p);
3107 	proc_list_unlock();
3108 
3109 	return 0;
3110 }
3111 
3112 errno_t
memorystatus_get_process_is_frozen(pid_t pid,int * is_frozen)3113 memorystatus_get_process_is_frozen(pid_t pid, int *is_frozen)
3114 {
3115 	proc_t p = PROC_NULL;
3116 
3117 	if (pid == 0) {
3118 		return EINVAL;
3119 	}
3120 
3121 	/*
3122 	 * Only allow this on the current proc for now.
3123 	 * We can check for privileges and allow targeting another process in the future.
3124 	 */
3125 	p = current_proc();
3126 	if (proc_getpid(p) != pid) {
3127 		return EPERM;
3128 	}
3129 
3130 	proc_list_lock();
3131 	*is_frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
3132 	proc_list_unlock();
3133 
3134 	return 0;
3135 }
3136 
3137 int
memorystatus_set_process_is_freezable(pid_t pid,boolean_t is_freezable)3138 memorystatus_set_process_is_freezable(pid_t pid, boolean_t is_freezable)
3139 {
3140 	proc_t p = PROC_NULL;
3141 
3142 	if (pid == 0) {
3143 		return EINVAL;
3144 	}
3145 
3146 	/*
3147 	 * To enable freezable status, you need to be root or an entitlement.
3148 	 */
3149 	if (is_freezable &&
3150 	    !kauth_cred_issuser(kauth_cred_get()) &&
3151 	    !IOCurrentTaskHasEntitlement(MEMORYSTATUS_ENTITLEMENT)) {
3152 		return EPERM;
3153 	}
3154 
3155 	p = proc_find(pid);
3156 	if (!p) {
3157 		return ESRCH;
3158 	}
3159 
3160 	/*
3161 	 * A process can change its own status. A coalition leader can
3162 	 * change the status of coalition members.
3163 	 */
3164 	if (p != current_proc()) {
3165 		coalition_t coal = task_get_coalition(proc_task(p), COALITION_TYPE_JETSAM);
3166 		if (!coalition_is_leader(proc_task(current_proc()), coal)) {
3167 			proc_rele(p);
3168 			return EPERM;
3169 		}
3170 	}
3171 
3172 	proc_list_lock();
3173 	if (is_freezable == FALSE) {
3174 		/* Freeze preference set to FALSE. Set the P_MEMSTAT_FREEZE_DISABLED bit. */
3175 		p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED;
3176 		printf("memorystatus_set_process_is_freezable: disabling freeze for pid %d [%s]\n",
3177 		    proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
3178 	} else {
3179 		p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED;
3180 		printf("memorystatus_set_process_is_freezable: enabling freeze for pid %d [%s]\n",
3181 		    proc_getpid(p), (*p->p_name ? p->p_name : "unknown"));
3182 	}
3183 	proc_rele(p);
3184 	proc_list_unlock();
3185 
3186 	return 0;
3187 }
3188 
3189 /*
3190  * Called when process is created before it is added to a memorystatus bucket.
3191  */
3192 void
memorystatus_freeze_init_proc(proc_t p)3193 memorystatus_freeze_init_proc(proc_t p)
3194 {
3195 	/* NB: Process is not on the memorystatus lists yet so it's safe to modify the skip reason without the freezer mutex. */
3196 	if (memorystatus_freeze_budget_pages_remaining == 0) {
3197 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfBudget;
3198 	} else if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
3199 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
3200 	} else {
3201 		p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
3202 	}
3203 }
3204 
3205 
3206 static int
3207 sysctl_memorystatus_do_fastwake_warmup_all  SYSCTL_HANDLER_ARGS
3208 {
3209 #pragma unused(oidp, arg1, arg2)
3210 
3211 	if (!req->newptr) {
3212 		return EINVAL;
3213 	}
3214 
3215 	/* Need to be root or have entitlement */
3216 	if (!kauth_cred_issuser(kauth_cred_get()) && !IOCurrentTaskHasEntitlement( MEMORYSTATUS_ENTITLEMENT)) {
3217 		return EPERM;
3218 	}
3219 
3220 	if (memorystatus_freeze_enabled == FALSE) {
3221 		return ENOTSUP;
3222 	}
3223 
3224 	if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
3225 		return ENOTSUP;
3226 	}
3227 
3228 	do_fastwake_warmup_all();
3229 
3230 	return 0;
3231 }
3232 
3233 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_do_fastwake_warmup_all, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3234     0, 0, &sysctl_memorystatus_do_fastwake_warmup_all, "I", "");
3235 
3236 /*
3237  * Takes in a candidate list from the user_addr, validates it, and copies it into the list pointer.
3238  * Takes ownership over the original value of list.
3239  * Assumes that list is protected by the freezer_mutex.
3240  * The caller should not hold any locks.
3241  */
3242 static errno_t
set_freezer_candidate_list(user_addr_t buffer,size_t buffer_size,struct memorystatus_freezer_candidate_list * list)3243 set_freezer_candidate_list(user_addr_t buffer, size_t buffer_size, struct memorystatus_freezer_candidate_list *list)
3244 {
3245 	errno_t error = 0;
3246 	memorystatus_properties_freeze_entry_v1 *entries = NULL, *tmp_entries = NULL;
3247 	size_t entry_count = 0, entries_size = 0, tmp_size = 0;
3248 
3249 	/* Validate the user provided list. */
3250 	if ((buffer == USER_ADDR_NULL) || (buffer_size == 0)) {
3251 		os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus_cmd_grp_set_freeze_priority: NULL or empty list\n");
3252 		return EINVAL;
3253 	}
3254 
3255 	if (buffer_size % sizeof(memorystatus_properties_freeze_entry_v1) != 0) {
3256 		os_log_with_startup_serial(OS_LOG_DEFAULT,
3257 		    "memorystatus_cmd_grp_set_freeze_priority: Invalid list length (caller might have comiled agsinst invalid headers.)\n");
3258 		return EINVAL;
3259 	}
3260 
3261 	entry_count = buffer_size / sizeof(memorystatus_properties_freeze_entry_v1);
3262 	entries_size = buffer_size;
3263 	entries = kalloc_data(buffer_size, Z_WAITOK | Z_ZERO);
3264 	if (entries == NULL) {
3265 		return ENOMEM;
3266 	}
3267 
3268 	error = copyin(buffer, entries, buffer_size);
3269 	if (error != 0) {
3270 		goto out;
3271 	}
3272 
3273 #if MACH_ASSERT
3274 	for (size_t i = 0; i < entry_count; i++) {
3275 		memorystatus_properties_freeze_entry_v1 *entry = &entries[i];
3276 		if (entry->version != 1) {
3277 			os_log(OS_LOG_DEFAULT, "memorystatus_cmd_grp_set_freeze_priority: Invalid entry version number.");
3278 			error = EINVAL;
3279 			goto out;
3280 		}
3281 		if (i > 0 && entry->priority >= entries[i - 1].priority) {
3282 			os_log(OS_LOG_DEFAULT, "memorystatus_cmd_grp_set_freeze_priority: Entry list is not in descending order.");
3283 			error = EINVAL;
3284 			goto out;
3285 		}
3286 	}
3287 #endif /* MACH_ASSERT */
3288 
3289 	lck_mtx_lock(&freezer_mutex);
3290 
3291 	tmp_entries = list->mfcl_list;
3292 	tmp_size = list->mfcl_length * sizeof(memorystatus_properties_freeze_entry_v1);
3293 	list->mfcl_list = entries;
3294 	list->mfcl_length = entry_count;
3295 
3296 	lck_mtx_unlock(&freezer_mutex);
3297 
3298 	entries = tmp_entries;
3299 	entries_size = tmp_size;
3300 
3301 out:
3302 	kfree_data(entries, entries_size);
3303 	return error;
3304 }
3305 
3306 errno_t
memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer,size_t buffer_size)3307 memorystatus_cmd_grp_set_freeze_list(user_addr_t buffer, size_t buffer_size)
3308 {
3309 	return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_freeze_list);
3310 }
3311 
3312 errno_t
memorystatus_cmd_grp_set_demote_list(user_addr_t buffer,size_t buffer_size)3313 memorystatus_cmd_grp_set_demote_list(user_addr_t buffer, size_t buffer_size)
3314 {
3315 	return set_freezer_candidate_list(buffer, buffer_size, &memorystatus_global_demote_list);
3316 }
3317 
3318 void
memorystatus_freezer_mark_ui_transition(proc_t p)3319 memorystatus_freezer_mark_ui_transition(proc_t p)
3320 {
3321 	bool frozen = false, previous_focal_thaw = false, xpc_service = false, suspended = false;
3322 	proc_list_lock();
3323 
3324 	if (isSysProc(p)) {
3325 		goto out;
3326 	}
3327 
3328 	frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
3329 	previous_focal_thaw = (p->p_memstat_state & P_MEMSTAT_FROZEN_FOCAL_THAW) != 0;
3330 	xpc_service = (p->p_memstat_state & P_MEMSTAT_FROZEN_XPC_SERVICE) != 0;
3331 	suspended = (p->p_memstat_state & P_MEMSTAT_SUSPENDED) != 0;
3332 	if (!suspended) {
3333 		if (frozen) {
3334 			if (!previous_focal_thaw) {
3335 				p->p_memstat_state |= P_MEMSTAT_FROZEN_FOCAL_THAW;
3336 				os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg), relaxed);
3337 				if (xpc_service) {
3338 					os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed_fg_xpc_service), relaxed);
3339 				}
3340 			}
3341 		}
3342 		os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_fg_resumed), relaxed);
3343 	}
3344 
3345 out:
3346 	proc_list_unlock();
3347 }
3348 
3349 #endif /* CONFIG_FREEZE */
3350