xref: /xnu-11215.1.10/tests/vm/vm_reclaim.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 #include <sys/types.h>
2 #include <sys/sysctl.h>
3 #include <mach/mach.h>
4 #include <mach/mach_vm.h>
5 #include <mach/vm_reclaim.h>
6 #include <mach-o/dyld.h>
7 #include <os/atomic_private.h>
8 #include <signal.h>
9 #include <spawn.h>
10 #include <spawn_private.h>
11 #include <unistd.h>
12 
13 #include <darwintest.h>
14 #include <darwintest_multiprocess.h>
15 #include <darwintest_utils.h>
16 
17 #include <Kernel/kern/ledger.h>
18 extern int ledger(int cmd, caddr_t arg1, caddr_t arg2, caddr_t arg3);
19 
20 #include "memorystatus_assertion_helpers.h"
21 
22 // Some of the unit tests test deferred deallocations.
23 // For these we need to set a sufficiently large reclaim threshold
24 // to ensure their buffers aren't freed prematurely.
25 #define VM_RECLAIM_THRESHOLD_BOOTARG_HIGH "vm_reclaim_max_threshold=268435456"
26 #define VM_RECLAIM_THRESHOLD_BOOTARG_LOW "vm_reclaim_max_threshold=16384"
27 #define VM_RECLAIM_BOOTARG_DISABLED "vm_reclaim_max_threshold=0"
28 #define VM_RECLAIM_THRESHOLD_SYSCTL_HIGH "vm.reclaim_max_threshold=268435456"
29 #define VM_RECLAIM_THRESHOLD_SYSCTL_LOW "vm.reclaim_max_threshold=16384"
30 #define VM_RECLAIM_SYSCTL_DISABLED "vm.reclaim_max_threshold=0"
31 
32 T_GLOBAL_META(
33 	T_META_NAMESPACE("xnu.vm"),
34 	T_META_RADAR_COMPONENT_NAME("xnu"),
35 	T_META_RADAR_COMPONENT_VERSION("performance"),
36 	T_META_OWNER("jarrad"),
37 	T_META_ENABLED(TARGET_OS_IOS && !TARGET_OS_MACCATALYST),
38 	// Ensure we don't conflict with libmalloc's reclaim buffer
39 	T_META_ENVVAR("MallocLargeCache=0"),
40 	T_META_RUN_CONCURRENTLY(false)
41 	);
42 
43 T_DECL(vm_reclaim_init, "Set up and tear down a reclaim buffer",
44     T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_LOW),
45     T_META_TAG_VM_PREFERRED)
46 {
47 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
48 
49 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
50 
51 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
52 }
53 
54 T_DECL(vm_reclaim_init_fails_when_disabled, "Initializing a ring buffer on a system with vm_reclaim disabled should fail",
55     T_META_BOOTARGS_SET(VM_RECLAIM_BOOTARG_DISABLED), T_META_TAG_VM_PREFERRED)
56 {
57 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
58 
59 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
60 
61 	T_QUIET; T_EXPECT_MACH_ERROR(kr, KERN_NOT_SUPPORTED, "mach_vm_reclaim_ringbuffer_init");
62 }
63 
64 /*
65  * Allocate a buffer of the given size, write val to each byte, and free it via a deferred free call.
66  */
67 static uint64_t
allocate_and_defer_free(size_t size,mach_vm_reclaim_ringbuffer_v1_t ringbuffer,unsigned char val,mach_vm_reclaim_behavior_v1_t behavior,mach_vm_address_t * addr)68 allocate_and_defer_free(size_t size, mach_vm_reclaim_ringbuffer_v1_t ringbuffer, unsigned char val, mach_vm_reclaim_behavior_v1_t behavior, mach_vm_address_t *addr /* OUT */)
69 {
70 	kern_return_t kr = mach_vm_map(mach_task_self(), addr, size, 0, VM_FLAGS_ANYWHERE, MEMORY_OBJECT_NULL, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
71 	bool should_update_kernel_accounting = false;
72 	uint64_t idx;
73 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_map");
74 
75 	memset((void *) *addr, val, size);
76 
77 	idx = mach_vm_reclaim_mark_free(ringbuffer, *addr, (uint32_t) size, behavior, &should_update_kernel_accounting);
78 	if (should_update_kernel_accounting) {
79 		mach_vm_reclaim_update_kernel_accounting(ringbuffer);
80 	}
81 	return idx;
82 }
83 
84 static uint64_t
allocate_and_defer_deallocate(size_t size,mach_vm_reclaim_ringbuffer_v1_t ringbuffer,unsigned char val,mach_vm_address_t * addr)85 allocate_and_defer_deallocate(size_t size, mach_vm_reclaim_ringbuffer_v1_t ringbuffer, unsigned char val, mach_vm_address_t *addr /* OUT */)
86 {
87 	return allocate_and_defer_free(size, ringbuffer, val, MACH_VM_RECLAIM_DEALLOCATE, addr);
88 }
89 
90 T_DECL(vm_reclaim_single_entry, "Place a single entry in the buffer and call sync",
91     T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_LOW),
92     T_META_TAG_VM_PREFERRED)
93 {
94 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
95 	static const size_t kAllocationSize = (1UL << 20); // 1MB
96 	mach_vm_address_t addr;
97 
98 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
99 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
100 
101 	uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, 1, &addr);
102 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
103 	mach_vm_reclaim_synchronize(&ringbuffer, 1);
104 }
105 
106 static pid_t
spawn_helper(char * helper)107 spawn_helper(char *helper)
108 {
109 	char **launch_tool_args;
110 	char testpath[PATH_MAX];
111 	uint32_t testpath_buf_size;
112 	pid_t child_pid;
113 
114 	testpath_buf_size = sizeof(testpath);
115 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
116 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
117 	T_LOG("Executable path: %s", testpath);
118 	launch_tool_args = (char *[]){
119 		testpath,
120 		"-n",
121 		helper,
122 		NULL
123 	};
124 
125 	/* Spawn the child process. */
126 	ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
127 	if (ret != 0) {
128 		T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
129 	}
130 	T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
131 
132 	return child_pid;
133 }
134 
135 static int
spawn_helper_and_wait_for_exit(char * helper)136 spawn_helper_and_wait_for_exit(char *helper)
137 {
138 	int status;
139 	pid_t child_pid, rc;
140 
141 	child_pid = spawn_helper(helper);
142 	rc = waitpid(child_pid, &status, 0);
143 	T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
144 	return status;
145 }
146 
147 /*
148  * Returns true iff every entry in buffer is expected.
149  */
150 static bool
check_buffer(mach_vm_address_t addr,size_t size,unsigned char expected)151 check_buffer(mach_vm_address_t addr, size_t size, unsigned char expected)
152 {
153 	unsigned char *buffer = (unsigned char *) addr;
154 	for (size_t i = 0; i < size; i++) {
155 		if (buffer[i] != expected) {
156 			return false;
157 		}
158 	}
159 	return true;
160 }
161 
162 /*
163  * Read every byte of a buffer to ensure re-usability
164  */
165 static void
read_buffer(mach_vm_address_t addr,size_t size)166 read_buffer(mach_vm_address_t addr, size_t size)
167 {
168 	volatile uint8_t byte;
169 	uint8_t *buffer = (uint8_t *)addr;
170 	for (size_t i = 0; i < size; i++) {
171 		byte = buffer[i];
172 	}
173 }
174 
175 /*
176  * Check that the given (freed) buffer has changed.
177  * This will likely crash, but if we make it through the entire buffer then segfault on purpose.
178  */
179 static void
assert_buffer_has_changed_and_crash(mach_vm_address_t addr,size_t size,unsigned char expected)180 assert_buffer_has_changed_and_crash(mach_vm_address_t addr, size_t size, unsigned char expected)
181 {
182 	/*
183 	 * mach_vm_reclaim_synchronize should have ensured the buffer was freed.
184 	 * Two cases:
185 	 * 1. The buffer is still free (touching it causes a crash)
186 	 * 2. The address range was re-allocated by some other library in process.
187 	 * #1 is far more likely. But if #2 happened, the buffer shouldn't be filled
188 	 * with the value we wrote to it. So scan the buffer. If we segfault it's case #1
189 	 * and if we see another value it's case #2.
190 	 */
191 	bool changed = !check_buffer(addr, size, expected);
192 	T_QUIET; T_ASSERT_TRUE(changed, "buffer was re-allocated");
193 	/* Case #2. Force a segfault so the parent sees that we crashed. */
194 	*(volatile int *) 0 = 1;
195 
196 	T_FAIL("Test did not crash when dereferencing NULL");
197 }
198 
199 static void
reuse_reclaimed_entry(mach_vm_reclaim_behavior_v1_t behavior)200 reuse_reclaimed_entry(mach_vm_reclaim_behavior_v1_t behavior)
201 {
202 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
203 	static const size_t kAllocationSize = (1UL << 20); // 1MB
204 	mach_vm_address_t addr;
205 	static const unsigned char kValue = 220;
206 
207 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
208 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
209 
210 	uint64_t idx = allocate_and_defer_free(kAllocationSize, &ringbuffer, kValue, behavior, &addr);
211 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
212 	kr = mach_vm_reclaim_synchronize(&ringbuffer, 10);
213 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_synchronize");
214 	bool usable = mach_vm_reclaim_mark_used(&ringbuffer, idx, addr, kAllocationSize);
215 	bool reclaimed = mach_vm_reclaim_is_reclaimed(&ringbuffer, idx);
216 	T_EXPECT_FALSE(usable, "reclaimed entry is not re-usable");
217 	T_EXPECT_TRUE(reclaimed, "reclaimed entry was marked reclaimed");
218 	switch (behavior) {
219 	case MACH_VM_RECLAIM_DEALLOCATE:
220 		assert_buffer_has_changed_and_crash(addr, kAllocationSize, kValue);
221 		break;
222 	case MACH_VM_RECLAIM_REUSABLE:
223 		read_buffer(addr, kAllocationSize);
224 		T_PASS("Freed buffer re-used successfully");
225 		break;
226 	default:
227 		T_FAIL("Unexpected reclaim behavior %d", behavior);
228 	}
229 }
230 
231 T_HELPER_DECL(reuse_freed_entry_dealloc,
232     "defer free (dealloc), sync, and try to use entry")
233 {
234 	reuse_reclaimed_entry(MACH_VM_RECLAIM_DEALLOCATE);
235 }
236 
237 T_HELPER_DECL(reuse_freed_entry_reusable,
238     "defer free (reusable), sync, and try to use entry")
239 {
240 	reuse_reclaimed_entry(MACH_VM_RECLAIM_REUSABLE);
241 }
242 
243 T_DECL(vm_reclaim_single_entry_verify_free, "Place a single entry in the buffer and call sync",
244     T_META_IGNORECRASHES(".*vm_reclaim_single_entry_verify_free.*"),
245     T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_HIGH),
246     T_META_TAG_VM_PREFERRED)
247 {
248 	int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_dealloc");
249 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
250 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGSEGV, "Test process crashed with segmentation fault.");
251 }
252 
253 T_DECL(vm_reclaim_single_entry_reusable,
254     "Reclaim a reusable entry and verify re-use is legal",
255     T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_HIGH),
256     T_META_TAG_VM_PREFERRED)
257 {
258 	int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_reusable");
259 	T_QUIET; T_ASSERT_TRUE(WIFEXITED(status), "Test process exited.");
260 	T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
261 }
262 
263 static void
allocate_and_suspend(char * const * argv,bool free_buffer,bool double_free)264 allocate_and_suspend(char *const *argv, bool free_buffer, bool double_free)
265 {
266 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
267 	static const size_t kAllocationSize = (1UL << 20); // 1MB
268 	mach_vm_address_t addr = 0;
269 	bool should_update_kernel_accounting = false;
270 
271 	const mach_vm_size_t kNumEntries = (size_t) atoi(argv[0]);
272 
273 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
274 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
275 	T_QUIET; T_ASSERT_LT(kNumEntries, ringbuffer.buffer_len, "Test does not fill up ringubffer");
276 
277 	for (size_t i = 0; i < kNumEntries; i++) {
278 		uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, (unsigned char) i, &addr);
279 		T_QUIET; T_ASSERT_EQ(idx, (uint64_t) i, "idx is correct");
280 	}
281 
282 	if (double_free) {
283 		// Double free the last entry
284 		mach_vm_reclaim_mark_free(&ringbuffer, addr, (uint32_t) kAllocationSize, MACH_VM_RECLAIM_DEALLOCATE, &should_update_kernel_accounting);
285 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_mark_free");
286 	}
287 
288 	if (free_buffer) {
289 		mach_vm_size_t buffer_size = ringbuffer.buffer_len * sizeof(mach_vm_reclaim_entry_v1_t) + \
290 		    offsetof(struct mach_vm_reclaim_buffer_v1_s, entries);
291 		kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t) ringbuffer.buffer, buffer_size);
292 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate");
293 	}
294 
295 	// Signal to our parent to suspend us
296 	if (kill(getppid(), SIGUSR1) != 0) {
297 		T_LOG("Unable to signal to parent process!");
298 		exit(1);
299 	}
300 
301 	while (1) {
302 		;
303 	}
304 }
305 
306 T_HELPER_DECL(allocate_and_suspend,
307     "defer free, and signal parent to suspend")
308 {
309 	allocate_and_suspend(argv, false, false);
310 }
311 
312 static void
resume_and_kill_proc(pid_t pid)313 resume_and_kill_proc(pid_t pid)
314 {
315 	int ret = pid_resume(pid);
316 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "proc resumed after freeze");
317 	T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(pid, SIGKILL), "Killed process");
318 }
319 
320 static void
drain_async_queue(pid_t child_pid)321 drain_async_queue(pid_t child_pid)
322 {
323 	int val = child_pid;
324 	int ret;
325 	size_t len = sizeof(val);
326 	ret = sysctlbyname("vm.reclaim_drain_async_queue", NULL, NULL, &val, len);
327 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.reclaim_drain_async_queue");
328 }
329 
330 static size_t
ledger_phys_footprint_index(size_t * num_entries)331 ledger_phys_footprint_index(size_t *num_entries)
332 {
333 	struct ledger_info li;
334 	struct ledger_template_info *templateInfo = NULL;
335 	int ret;
336 	size_t i, footprint_index;
337 	bool found = false;
338 
339 	ret = ledger(LEDGER_INFO, (caddr_t)(uintptr_t)getpid(), (caddr_t)&li, NULL);
340 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_INFO)");
341 
342 	T_QUIET; T_ASSERT_GT(li.li_entries, (int64_t) 0, "num ledger entries is valid");
343 	*num_entries = (size_t) li.li_entries;
344 	templateInfo = malloc((size_t)li.li_entries * sizeof(struct ledger_template_info));
345 	T_QUIET; T_ASSERT_NOTNULL(templateInfo, "malloc entries");
346 
347 	footprint_index = 0;
348 	ret = ledger(LEDGER_TEMPLATE_INFO, (caddr_t) templateInfo, (caddr_t) num_entries, NULL);
349 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_TEMPLATE_INFO)");
350 	for (i = 0; i < *num_entries; i++) {
351 		if (strcmp(templateInfo[i].lti_name, "phys_footprint") == 0) {
352 			footprint_index = i;
353 			found = true;
354 		}
355 	}
356 	free(templateInfo);
357 	T_QUIET; T_ASSERT_TRUE(found, "found phys_footprint in ledger");
358 	return footprint_index;
359 }
360 
361 static int64_t
get_ledger_entry_for_pid(pid_t pid,size_t index,size_t num_entries)362 get_ledger_entry_for_pid(pid_t pid, size_t index, size_t num_entries)
363 {
364 	int ret;
365 	int64_t value;
366 	struct ledger_entry_info *lei = NULL;
367 
368 	lei = malloc(num_entries * sizeof(*lei));
369 	ret = ledger(LEDGER_ENTRY_INFO, (caddr_t) (uintptr_t) pid, (caddr_t) lei, (caddr_t) &num_entries);
370 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_ENTRY_INFO)");
371 	value = lei[index].lei_balance;
372 	free(lei);
373 	return value;
374 }
375 
376 static pid_t child_pid;
377 
378 static void
test_after_background_helper_launches(char * variant,char * arg1,dispatch_block_t test_block,dispatch_block_t exit_block)379 test_after_background_helper_launches(char* variant, char * arg1, dispatch_block_t test_block, dispatch_block_t exit_block)
380 {
381 	char **launch_tool_args;
382 	char testpath[PATH_MAX];
383 	uint32_t testpath_buf_size;
384 
385 	dispatch_source_t ds_signal, ds_exit;
386 
387 	/* Wait for the child process to tell us that it's ready, and then freeze it */
388 	signal(SIGUSR1, SIG_IGN);
389 	ds_signal = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dispatch_get_main_queue());
390 	T_QUIET; T_ASSERT_NOTNULL(ds_signal, "dispatch_source_create");
391 	dispatch_source_set_event_handler(ds_signal, test_block);
392 
393 	dispatch_activate(ds_signal);
394 
395 	testpath_buf_size = sizeof(testpath);
396 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
397 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
398 	T_LOG("Executable path: %s", testpath);
399 	launch_tool_args = (char *[]){
400 		testpath,
401 		"-n",
402 		variant,
403 		arg1,
404 		NULL
405 	};
406 
407 	/* Spawn the child process. */
408 	ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
409 	if (ret != 0) {
410 		T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
411 	}
412 	T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
413 
414 	/* Listen for exit. */
415 	ds_exit = dispatch_source_create(DISPATCH_SOURCE_TYPE_PROC, (uintptr_t)child_pid, DISPATCH_PROC_EXIT, dispatch_get_main_queue());
416 	dispatch_source_set_event_handler(ds_exit, exit_block);
417 
418 	dispatch_activate(ds_exit);
419 	dispatch_main();
420 }
421 
422 T_DECL(vm_reclaim_full_reclaim_on_suspend, "Defer free memory and then suspend.",
423     T_META_ASROOT(true),
424     T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_HIGH),
425     T_META_TAG_VM_PREFERRED)
426 {
427 	test_after_background_helper_launches("allocate_and_suspend", "20", ^{
428 		int ret = 0;
429 		size_t num_ledger_entries = 0;
430 		size_t phys_footprint_index = ledger_phys_footprint_index(&num_ledger_entries);
431 		int64_t before_footprint, after_footprint, reclaimable_bytes = 20 * (1ULL << 20);
432 		before_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries);
433 		T_QUIET; T_EXPECT_GE(before_footprint, reclaimable_bytes, "memory was allocated");
434 		ret = pid_suspend(child_pid);
435 		T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
436 		/*
437 		 * The reclaim work is kicked off asynchronously by the suspend.
438 		 * So we need to call into the kernel to synchronize with the reclaim worker
439 		 * thread.
440 		 */
441 		drain_async_queue(child_pid);
442 
443 		after_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries);
444 		T_QUIET; T_EXPECT_LE(after_footprint, before_footprint - reclaimable_bytes, "memory was reclaimed");
445 
446 		resume_and_kill_proc(child_pid);
447 	},
448 	    ^{
449 		int status = 0, code = 0;
450 		pid_t rc = waitpid(child_pid, &status, 0);
451 		T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
452 		code = WEXITSTATUS(status);
453 		T_QUIET; T_ASSERT_EQ(code, 0, "Child exited cleanly");
454 		T_END;
455 	});
456 }
457 
458 T_DECL(vm_reclaim_limit_kills, "Deferred reclaims are processed before a limit kill",
459     T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_LOW),
460     T_META_TAG_VM_PREFERRED)
461 {
462 	int err;
463 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
464 	const size_t kNumEntries = 50;
465 	static const size_t kAllocationSize = (1UL << 20); // 1MB
466 	static const size_t kMemoryLimit = kNumEntries / 10 * kAllocationSize;
467 
468 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
469 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
470 
471 	err = set_memlimits(getpid(), kMemoryLimit >> 20, kMemoryLimit >> 20, TRUE, TRUE);
472 	T_QUIET; T_ASSERT_POSIX_SUCCESS(err, "set_memlimits");
473 
474 	for (size_t i = 0; i < kNumEntries; i++) {
475 		mach_vm_address_t addr = 0;
476 		uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, (unsigned char) i, &addr);
477 		T_QUIET; T_ASSERT_EQ(idx, (uint64_t) i, "idx is correct");
478 	}
479 
480 	T_PASS("Was able to allocate and defer free %zu chunks of size %zu bytes while staying under limit of %zu bytes", kNumEntries, kAllocationSize, kMemoryLimit);
481 }
482 
483 T_DECL(vm_reclaim_update_reclaimable_bytes_threshold, "Kernel reclaims when num_bytes_reclaimable crosses threshold",
484     T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_LOW),
485     T_META_TAG_VM_PREFERRED)
486 {
487 	mach_vm_size_t kNumEntries = 0;
488 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
489 	const size_t kAllocationSize = vm_kernel_page_size;
490 	uint64_t vm_reclaim_reclaimable_max_threshold;
491 	int ret;
492 	size_t len = sizeof(vm_reclaim_reclaimable_max_threshold);
493 	size_t num_ledger_entries = 0;
494 	size_t phys_footprint_index = ledger_phys_footprint_index(&num_ledger_entries);
495 
496 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
497 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
498 
499 	// Allocate 1000 times the reclaim threshold
500 	ret = sysctlbyname("vm.reclaim_max_threshold", &vm_reclaim_reclaimable_max_threshold, &len, NULL, 0);
501 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.reclaim_max_threshold");
502 	kNumEntries = vm_reclaim_reclaimable_max_threshold / kAllocationSize * 1000;
503 	T_QUIET; T_ASSERT_LT(kNumEntries, ringbuffer.buffer_len, "Entries will not fill up ringbuffer.");
504 
505 	mach_vm_address_t addr = 0;
506 	for (uint64_t i = 0; i < kNumEntries; i++) {
507 		uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, (unsigned char) i, &addr);
508 		T_QUIET; T_ASSERT_EQ(idx, i, "idx is correct");
509 	}
510 
511 	T_QUIET; T_ASSERT_LT(get_ledger_entry_for_pid(getpid(), phys_footprint_index, num_ledger_entries),
512 	    (int64_t) ((kNumEntries) * kAllocationSize), "Entries were reclaimed as we crossed threshold");
513 }
514 
515 T_HELPER_DECL(deallocate_buffer,
516     "deallocate the buffer from underneath the kernel")
517 {
518 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
519 	static const size_t kAllocationSize = (1UL << 20); // 1MB
520 	mach_vm_address_t addr;
521 
522 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
523 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
524 
525 	uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, 1, &addr);
526 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
527 	mach_vm_size_t buffer_size = ringbuffer.buffer_len * sizeof(mach_vm_reclaim_entry_v1_t) + \
528 	    offsetof(struct mach_vm_reclaim_buffer_v1_s, entries);
529 	kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t) ringbuffer.buffer, buffer_size);
530 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate");
531 
532 	mach_vm_reclaim_synchronize(&ringbuffer, 10);
533 
534 	T_FAIL("Test did not crash when synchronizing on a deallocated buffer!");
535 }
536 
537 T_DECL(vm_reclaim_copyio_buffer_error, "Force a copyio error on the buffer",
538     T_META_IGNORECRASHES(".*deallocate_buffer.*"),
539     T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_HIGH),
540     T_META_TAG_VM_PREFERRED)
541 {
542 	int status = spawn_helper_and_wait_for_exit("deallocate_buffer");
543 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
544 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
545 }
546 
547 T_HELPER_DECL(dealloc_gap, "Put a bad entry in the buffer")
548 {
549 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
550 	static const size_t kAllocationSize = (1UL << 20); // 1MB
551 	mach_vm_address_t addr;
552 	bool should_update_kernel_accounting = false;
553 
554 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
555 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
556 
557 	uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, 1, &addr);
558 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
559 	idx = mach_vm_reclaim_mark_free(&ringbuffer, addr, (uint32_t) kAllocationSize, MACH_VM_RECLAIM_DEALLOCATE, &should_update_kernel_accounting);
560 	T_QUIET; T_ASSERT_EQ(idx, 1ULL, "Entry placed at correct index");
561 
562 	mach_vm_reclaim_synchronize(&ringbuffer, 2);
563 
564 	T_FAIL("Test did not crash when doing a double free!");
565 }
566 
567 T_DECL(vm_reclaim_dealloc_gap, "Ensure a dealloc gap delivers a fatal exception",
568     T_META_IGNORECRASHES(".*vm_reclaim_dealloc_gap.*"),
569     T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_LOW),
570     T_META_TAG_VM_PREFERRED)
571 {
572 	int status = spawn_helper_and_wait_for_exit("dealloc_gap");
573 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
574 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
575 }
576 
577 T_HELPER_DECL(allocate_and_suspend_with_dealloc_gap,
578     "defer double free, and signal parent to suspend")
579 {
580 	allocate_and_suspend(argv, false, true);
581 }
582 
583 static void
vm_reclaim_async_exception(char * variant,char * arg1)584 vm_reclaim_async_exception(char *variant, char *arg1)
585 {
586 	test_after_background_helper_launches(variant, arg1, ^{
587 		int ret = 0;
588 		ret = pid_suspend(child_pid);
589 		T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
590 		/*
591 		 * The reclaim work is kicked off asynchronously by the suspend.
592 		 * So we need to call into the kernel to synchronize with the reclaim worker
593 		 * thread.
594 		 */
595 		drain_async_queue(child_pid);
596 	}, ^{
597 		int status;
598 		pid_t rc = waitpid(child_pid, &status, 0);
599 		T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
600 		T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
601 		T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
602 		T_END;
603 	});
604 }
605 
606 T_DECL(vm_reclaim_dealloc_gap_async, "Ensure a dealloc gap delivers an async fatal exception",
607     T_META_IGNORECRASHES(".*vm_reclaim_dealloc_gap_async.*"),
608     T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_LOW),
609     T_META_TAG_VM_PREFERRED)
610 {
611 	vm_reclaim_async_exception("allocate_and_suspend_with_dealloc_gap", "15");
612 }
613 
614 T_HELPER_DECL(allocate_and_suspend_with_buffer_error,
615     "defer free, free buffer, and signal parent to suspend")
616 {
617 	allocate_and_suspend(argv, true, false);
618 }
619 
620 T_DECL(vm_reclaim_copyio_buffer_error_async, "Ensure a buffer copyio failure delivers an async fatal exception",
621     T_META_IGNORECRASHES(".*vm_reclaim_dealloc_gap_async.*"),
622     T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_HIGH),
623     T_META_TAG_VM_PREFERRED)
624 {
625 	vm_reclaim_async_exception("allocate_and_suspend_with_buffer_error", "15");
626 }
627 
628 T_HELPER_DECL(reuse_freed_entry_fork,
629     "defer free, sync, and try to use entry")
630 {
631 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
632 	static const size_t kAllocationSize = (1UL << 20); // 1MB
633 	mach_vm_address_t addr;
634 	static const unsigned char kValue = 119;
635 
636 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
637 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
638 
639 	uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, kValue, &addr);
640 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
641 
642 	pid_t forked_pid = fork();
643 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NE(forked_pid, -1, "fork()");
644 	if (forked_pid == 0) {
645 		kr = mach_vm_reclaim_synchronize(&ringbuffer, 10);
646 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_synchronize");
647 		assert_buffer_has_changed_and_crash(addr, kAllocationSize, kValue);
648 	} else {
649 		int status;
650 		pid_t rc = waitpid(forked_pid, &status, 0);
651 		T_QUIET; T_ASSERT_EQ(rc, forked_pid, "waitpid");
652 		T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Forked process crashed.");
653 		T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGSEGV, "Forked process crashed with segmentation fault.");
654 	}
655 }
656 
657 T_DECL(vm_reclaim_fork, "Ensure reclaim buffer is inherited across a fork",
658     T_META_IGNORECRASHES(".*vm_reclaim_fork.*"),
659     T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_HIGH),
660     T_META_TAG_VM_PREFERRED)
661 {
662 	int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_fork");
663 	T_QUIET; T_ASSERT_TRUE(WIFEXITED(status), "Test process exited.");
664 	T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
665 }
666 
667 #define SUSPEND_AND_RESUME_COUNT 4
668 
669 // rdar://110081398
670 T_DECL(reclaim_async_on_repeated_suspend,
671     "verify that subsequent suspends are allowed",
672     T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_HIGH),
673     T_META_TAG_VM_PREFERRED)
674 {
675 	const int sleep_duration = 3;
676 	test_after_background_helper_launches("allocate_and_suspend", "20", ^{
677 		int ret = 0;
678 		for (int i = 0; i < SUSPEND_AND_RESUME_COUNT; i++) {
679 		        ret = pid_suspend(child_pid);
680 		        T_ASSERT_POSIX_SUCCESS(ret, "pid_suspend()");
681 		        ret = pid_resume(child_pid);
682 		        T_ASSERT_POSIX_SUCCESS(ret, "pid_resume()");
683 		}
684 		T_LOG("Sleeping %d sec...", sleep_duration);
685 		sleep(sleep_duration);
686 		T_LOG("Killing child...");
687 		T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "kill()");
688 	}, ^{
689 		int status;
690 		pid_t rc = waitpid(child_pid, &status, 0);
691 		T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
692 		T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
693 		T_END;
694 	});
695 }
696 
697 T_HELPER_DECL(ringbuffer_init_after_exec,
698     "initialize a ringbuffer after exec")
699 {
700 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
701 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
702 	T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init()");
703 }
704 
705 extern char **environ;
706 
707 T_HELPER_DECL(exec_after_ringbuffer_init,
708     "initialize a ringbuffer then exec")
709 {
710 	char **launch_tool_args;
711 	char testpath[PATH_MAX];
712 	uint32_t testpath_buf_size;
713 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
714 
715 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
716 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init()");
717 
718 	testpath_buf_size = sizeof(testpath);
719 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
720 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
721 	T_LOG("Executable path: %s", testpath);
722 	launch_tool_args = (char *[]){
723 		testpath,
724 		"-n",
725 		"ringbuffer_init_after_exec",
726 		NULL
727 	};
728 
729 	/* Spawn the child process. */
730 	posix_spawnattr_t spawnattrs;
731 	posix_spawnattr_init(&spawnattrs);
732 	posix_spawnattr_setflags(&spawnattrs, POSIX_SPAWN_SETEXEC);
733 	posix_spawn(&child_pid, testpath, NULL, &spawnattrs, launch_tool_args, environ);
734 	T_ASSERT_FAIL("should not be reached");
735 }
736 
737 T_DECL(reclaim_exec_new_reclaim_buffer,
738     "verify that an exec-ed process may instantiate a new buffer",
739     T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_LOW),
740     T_META_TAG_VM_PREFERRED)
741 {
742 	dt_helper_t helpers[1];
743 	helpers[0] = dt_child_helper("exec_after_ringbuffer_init");
744 
745 	dt_run_helpers(helpers, 1, 30);
746 }
747