xref: /xnu-10002.41.9/tests/vm/vm_reclaim.c (revision 699cd48037512bf4380799317ca44ca453c82f57)
1 #include <sys/types.h>
2 #include <sys/sysctl.h>
3 #include <mach/mach.h>
4 #include <mach/mach_vm.h>
5 #include <mach/vm_reclaim.h>
6 #include <mach-o/dyld.h>
7 #include <os/atomic_private.h>
8 #include <signal.h>
9 #include <spawn.h>
10 #include <spawn_private.h>
11 #include <unistd.h>
12 
13 #include <darwintest.h>
14 #include <darwintest_utils.h>
15 
16 #include <Kernel/kern/ledger.h>
17 extern int ledger(int cmd, caddr_t arg1, caddr_t arg2, caddr_t arg3);
18 
19 #include "memorystatus_assertion_helpers.h"
20 
21 // Some of the unit tests test deferred deallocations.
22 // For these we need to set a sufficiently large reclaim threshold
23 // to ensure their buffers aren't freed prematurely.
24 #define VM_RECLAIM_THRESHOLD_BOOTARG_HIGH "vm_reclaim_max_threshold=268435456"
25 #define VM_RECLAIM_THRESHOLD_BOOTARG_LOW "vm_reclaim_max_threshold=16384"
26 #define VM_RECLAIM_BOOTARG_DISABLED "vm_reclaim_max_threshold=0"
27 
28 T_GLOBAL_META(
29 	T_META_NAMESPACE("xnu.vm"),
30 	T_META_RADAR_COMPONENT_NAME("xnu"),
31 	T_META_RADAR_COMPONENT_VERSION("VM"),
32 	T_META_ENABLED(TARGET_OS_IOS && !TARGET_OS_MACCATALYST),
33 	T_META_ENVVAR("MallocLargeCache=0") // Ensure we don't conflict with libmalloc's reclaim buffer
34 	);
35 
36 T_DECL(vm_reclaim_init, "Set up and tear down a reclaim buffer",
37     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_LOW))
38 {
39 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
40 
41 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
42 
43 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
44 }
45 
46 T_DECL(vm_reclaim_init_fails_when_disabled, "Initializing a ring buffer on a system with vm_reclaim disabled should fail",
47     T_META_BOOTARGS_SET(VM_RECLAIM_BOOTARG_DISABLED))
48 {
49 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
50 
51 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
52 
53 	T_QUIET; T_EXPECT_MACH_ERROR(kr, KERN_NOT_SUPPORTED, "mach_vm_reclaim_ringbuffer_init");
54 }
55 
56 /*
57  * Allocate a buffer of the given size, write val to each byte, and free it via a deferred free call.
58  */
59 static uint64_t
allocate_and_defer_free(size_t size,mach_vm_reclaim_ringbuffer_v1_t ringbuffer,unsigned char val,mach_vm_address_t * addr)60 allocate_and_defer_free(size_t size, mach_vm_reclaim_ringbuffer_v1_t ringbuffer, unsigned char val, mach_vm_address_t *addr /* OUT */)
61 {
62 	kern_return_t kr = mach_vm_map(mach_task_self(), addr, size, 0, VM_FLAGS_ANYWHERE, MEMORY_OBJECT_NULL, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
63 	bool should_update_kernel_accounting = false;
64 	uint64_t idx;
65 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_map");
66 
67 	memset((void *) *addr, val, size);
68 
69 	idx = mach_vm_reclaim_mark_free(ringbuffer, *addr, (uint32_t) size, &should_update_kernel_accounting);
70 	if (should_update_kernel_accounting) {
71 		mach_vm_reclaim_update_kernel_accounting(ringbuffer);
72 	}
73 	return idx;
74 }
75 
76 T_DECL(vm_reclaim_single_entry, "Place a single entry in the buffer and call sync",
77     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_LOW))
78 {
79 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
80 	static const size_t kAllocationSize = (1UL << 20); // 1MB
81 	mach_vm_address_t addr;
82 
83 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
84 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
85 
86 	uint64_t idx = allocate_and_defer_free(kAllocationSize, &ringbuffer, 1, &addr);
87 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
88 	mach_vm_reclaim_synchronize(&ringbuffer, 1);
89 }
90 
91 static pid_t
spawn_helper(char * helper)92 spawn_helper(char *helper)
93 {
94 	char **launch_tool_args;
95 	char testpath[PATH_MAX];
96 	uint32_t testpath_buf_size;
97 	pid_t child_pid;
98 
99 	testpath_buf_size = sizeof(testpath);
100 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
101 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
102 	T_LOG("Executable path: %s", testpath);
103 	launch_tool_args = (char *[]){
104 		testpath,
105 		"-n",
106 		helper,
107 		NULL
108 	};
109 
110 	/* Spawn the child process. */
111 	ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
112 	if (ret != 0) {
113 		T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
114 	}
115 	T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
116 
117 	return child_pid;
118 }
119 
120 static int
spawn_helper_and_wait_for_exit(char * helper)121 spawn_helper_and_wait_for_exit(char *helper)
122 {
123 	int status;
124 	pid_t child_pid, rc;
125 
126 	child_pid = spawn_helper(helper);
127 	rc = waitpid(child_pid, &status, 0);
128 	T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
129 	return status;
130 }
131 
132 /*
133  * Returns true iff every entry in buffer is expected.
134  */
135 static bool
check_buffer(mach_vm_address_t addr,size_t size,unsigned char expected)136 check_buffer(mach_vm_address_t addr, size_t size, unsigned char expected)
137 {
138 	unsigned char *buffer = (unsigned char *) addr;
139 	for (size_t i = 0; i < size; i++) {
140 		if (buffer[i] != expected) {
141 			return false;
142 		}
143 	}
144 	return true;
145 }
146 
147 /*
148  * Check that the given (freed) buffer has changed.
149  * This will likely crash, but if we make it through the entire buffer then segfault on purpose.
150  */
151 static void
assert_buffer_has_changed_and_crash(mach_vm_address_t addr,size_t size,unsigned char expected)152 assert_buffer_has_changed_and_crash(mach_vm_address_t addr, size_t size, unsigned char expected)
153 {
154 	/*
155 	 * mach_vm_reclaim_synchronize should have ensured the buffer was freed.
156 	 * Two cases:
157 	 * 1. The buffer is still still free (touching it causes a crash)
158 	 * 2. The address range was re-allocated by some other library in process.
159 	 * #1 is far more likely. But if #2 happened, the buffer shouldn't be filled
160 	 * with the value we wrote to it. So scan the buffer. If we segfault it's case #1
161 	 * and if we see another value it's case #2.
162 	 */
163 	bool changed = !check_buffer(addr, size, expected);
164 	T_QUIET; T_ASSERT_TRUE(changed, "buffer was re-allocated");
165 	/* Case #2. Force a segfault so the parent sees that we crashed. */
166 	*(volatile int *) 0 = 1;
167 
168 	T_FAIL("Test did not crash when dereferencing NULL");
169 }
170 
171 T_HELPER_DECL(reuse_freed_entry,
172     "defer free, sync, and try to use entry")
173 {
174 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
175 	static const size_t kAllocationSize = (1UL << 20); // 1MB
176 	mach_vm_address_t addr;
177 	static const unsigned char kValue = 220;
178 
179 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
180 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
181 
182 	uint64_t idx = allocate_and_defer_free(kAllocationSize, &ringbuffer, kValue, &addr);
183 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
184 	kr = mach_vm_reclaim_synchronize(&ringbuffer, 10);
185 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_synchronize");
186 	assert_buffer_has_changed_and_crash(addr, kAllocationSize, kValue);
187 }
188 
189 T_DECL(vm_reclaim_single_entry_verify_free, "Place a single entry in the buffer and call sync",
190     T_META_IGNORECRASHES("vm_reclaim_single_entry_verify_free*"),
191     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_LOW))
192 {
193 	int status = spawn_helper_and_wait_for_exit("reuse_freed_entry");
194 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
195 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGSEGV, "Test process crashed with segmentation fault.");
196 }
197 
198 static void
allocate_and_suspend(char * const * argv,bool free_buffer,bool double_free)199 allocate_and_suspend(char *const *argv, bool free_buffer, bool double_free)
200 {
201 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
202 	static const size_t kAllocationSize = (1UL << 20); // 1MB
203 	mach_vm_address_t addr = 0;
204 	bool should_update_kernel_accounting = false;
205 
206 	const mach_vm_size_t kNumEntries = (size_t) atoi(argv[0]);
207 
208 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
209 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
210 	T_QUIET; T_ASSERT_LT(kNumEntries, ringbuffer.buffer_len, "Test does not fill up ringubffer");
211 
212 	for (size_t i = 0; i < kNumEntries; i++) {
213 		uint64_t idx = allocate_and_defer_free(kAllocationSize, &ringbuffer, (unsigned char) i, &addr);
214 		T_QUIET; T_ASSERT_EQ(idx, (uint64_t) i, "idx is correct");
215 	}
216 
217 	if (double_free) {
218 		// Double free the last entry
219 		mach_vm_reclaim_mark_free(&ringbuffer, addr, (uint32_t) kAllocationSize, &should_update_kernel_accounting);
220 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_mark_free");
221 	}
222 
223 	if (free_buffer) {
224 		mach_vm_size_t buffer_size = ringbuffer.buffer_len * sizeof(mach_vm_reclaim_entry_v1_t) + \
225 		    offsetof(struct mach_vm_reclaim_buffer_v1_s, entries);
226 		kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t) ringbuffer.buffer, buffer_size);
227 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate");
228 	}
229 
230 	// Signal to our parent to suspend us
231 	if (kill(getppid(), SIGUSR1) != 0) {
232 		T_LOG("Unable to signal to parent process!");
233 		exit(1);
234 	}
235 
236 	while (1) {
237 		;
238 	}
239 }
240 
241 T_HELPER_DECL(allocate_and_suspend,
242     "defer free, and signal parent to suspend")
243 {
244 	allocate_and_suspend(argv, false, false);
245 }
246 
247 static void
resume_and_kill_proc(pid_t pid)248 resume_and_kill_proc(pid_t pid)
249 {
250 	int ret = pid_resume(pid);
251 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "proc resumed after freeze");
252 	T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(pid, SIGKILL), "Killed process");
253 }
254 
255 static void
drain_async_queue(pid_t child_pid)256 drain_async_queue(pid_t child_pid)
257 {
258 	int val = child_pid;
259 	int ret;
260 	size_t len = sizeof(val);
261 	ret = sysctlbyname("vm.reclaim_drain_async_queue", NULL, NULL, &val, len);
262 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.reclaim_drain_async_queue");
263 }
264 
265 static size_t
ledger_phys_footprint_index(size_t * num_entries)266 ledger_phys_footprint_index(size_t *num_entries)
267 {
268 	struct ledger_info li;
269 	struct ledger_template_info *templateInfo = NULL;
270 	int ret;
271 	size_t i, footprint_index;
272 	bool found = false;
273 
274 	ret = ledger(LEDGER_INFO, (caddr_t)(uintptr_t)getpid(), (caddr_t)&li, NULL);
275 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_INFO)");
276 
277 	T_QUIET; T_ASSERT_GT(li.li_entries, (int64_t) 0, "num ledger entries is valid");
278 	*num_entries = (size_t) li.li_entries;
279 	templateInfo = malloc((size_t)li.li_entries * sizeof(struct ledger_template_info));
280 	T_QUIET; T_ASSERT_NOTNULL(templateInfo, "malloc entries");
281 
282 	footprint_index = 0;
283 	ret = ledger(LEDGER_TEMPLATE_INFO, (caddr_t) templateInfo, (caddr_t) num_entries, NULL);
284 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_TEMPLATE_INFO)");
285 	for (i = 0; i < *num_entries; i++) {
286 		if (strcmp(templateInfo[i].lti_name, "phys_footprint") == 0) {
287 			footprint_index = i;
288 			found = true;
289 		}
290 	}
291 	free(templateInfo);
292 	T_QUIET; T_ASSERT_TRUE(found, "found phys_footprint in ledger");
293 	return footprint_index;
294 }
295 
296 static int64_t
get_ledger_entry_for_pid(pid_t pid,size_t index,size_t num_entries)297 get_ledger_entry_for_pid(pid_t pid, size_t index, size_t num_entries)
298 {
299 	int ret;
300 	int64_t value;
301 	struct ledger_entry_info *lei = NULL;
302 
303 	lei = malloc(num_entries * sizeof(*lei));
304 	ret = ledger(LEDGER_ENTRY_INFO, (caddr_t) (uintptr_t) pid, (caddr_t) lei, (caddr_t) &num_entries);
305 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_ENTRY_INFO)");
306 	value = lei[index].lei_balance;
307 	free(lei);
308 	return value;
309 }
310 
311 static pid_t child_pid;
312 
313 static void
test_after_background_helper_launches(char * variant,char * arg1,dispatch_block_t test_block,dispatch_block_t exit_block)314 test_after_background_helper_launches(char* variant, char * arg1, dispatch_block_t test_block, dispatch_block_t exit_block)
315 {
316 	char **launch_tool_args;
317 	char testpath[PATH_MAX];
318 	uint32_t testpath_buf_size;
319 
320 	dispatch_source_t ds_signal, ds_exit;
321 
322 	/* Wait for the child process to tell us that it's ready, and then freeze it */
323 	signal(SIGUSR1, SIG_IGN);
324 	ds_signal = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dispatch_get_main_queue());
325 	T_QUIET; T_ASSERT_NOTNULL(ds_signal, "dispatch_source_create");
326 	dispatch_source_set_event_handler(ds_signal, test_block);
327 
328 	dispatch_activate(ds_signal);
329 
330 	testpath_buf_size = sizeof(testpath);
331 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
332 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
333 	T_LOG("Executable path: %s", testpath);
334 	launch_tool_args = (char *[]){
335 		testpath,
336 		"-n",
337 		variant,
338 		arg1,
339 		NULL
340 	};
341 
342 	/* Spawn the child process. */
343 	ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
344 	if (ret != 0) {
345 		T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
346 	}
347 	T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
348 
349 	/* Listen for exit. */
350 	ds_exit = dispatch_source_create(DISPATCH_SOURCE_TYPE_PROC, (uintptr_t)child_pid, DISPATCH_PROC_EXIT, dispatch_get_main_queue());
351 	dispatch_source_set_event_handler(ds_exit, exit_block);
352 
353 	dispatch_activate(ds_exit);
354 	dispatch_main();
355 }
356 
357 T_DECL(vm_reclaim_full_reclaim_on_suspend, "Defer free memory and then suspend.",
358     T_META_ASROOT(true),
359     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_HIGH))
360 {
361 	test_after_background_helper_launches("allocate_and_suspend", "20", ^{
362 		int ret = 0;
363 		size_t num_ledger_entries = 0;
364 		size_t phys_footprint_index = ledger_phys_footprint_index(&num_ledger_entries);
365 		int64_t before_footprint, after_footprint, reclaimable_bytes = 20 * (1ULL << 20);
366 		before_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries);
367 		T_QUIET; T_EXPECT_GE(before_footprint, reclaimable_bytes, "memory was allocated");
368 		ret = pid_suspend(child_pid);
369 		T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
370 		/*
371 		 * The reclaim work is kicked off asynchronously by the suspend.
372 		 * So we need to call into the kernel to synchronize with the reclaim worker
373 		 * thread.
374 		 */
375 		drain_async_queue(child_pid);
376 
377 		after_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries);
378 		T_QUIET; T_EXPECT_LE(after_footprint, before_footprint - reclaimable_bytes, "memory was reclaimed");
379 
380 		resume_and_kill_proc(child_pid);
381 	},
382 	    ^{
383 		int status = 0, code = 0;
384 		pid_t rc = waitpid(child_pid, &status, 0);
385 		T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
386 		code = WEXITSTATUS(status);
387 		T_QUIET; T_ASSERT_EQ(code, 0, "Child exited cleanly");
388 		T_END;
389 	});
390 }
391 
392 T_DECL(vm_reclaim_limit_kills, "Deferred reclaims are processed before a limit kill",
393     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_LOW))
394 {
395 	int err;
396 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
397 	const size_t kNumEntries = 50;
398 	static const size_t kAllocationSize = (1UL << 20); // 1MB
399 	static const size_t kMemoryLimit = kNumEntries / 10 * kAllocationSize;
400 
401 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
402 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
403 
404 	err = set_memlimits(getpid(), kMemoryLimit >> 20, kMemoryLimit >> 20, TRUE, TRUE);
405 	T_QUIET; T_ASSERT_POSIX_SUCCESS(err, "set_memlimits");
406 
407 	for (size_t i = 0; i < kNumEntries; i++) {
408 		mach_vm_address_t addr = 0;
409 		uint64_t idx = allocate_and_defer_free(kAllocationSize, &ringbuffer, (unsigned char) i, &addr);
410 		T_QUIET; T_ASSERT_EQ(idx, (uint64_t) i, "idx is correct");
411 	}
412 
413 	T_PASS("Was able to allocate and defer free %zu chunks of size %zu bytes while staying under limit of %zu bytes", kNumEntries, kAllocationSize, kMemoryLimit);
414 }
415 
416 T_DECL(vm_reclaim_update_reclaimable_bytes_threshold, "Kernel reclaims when num_bytes_reclaimable crosses threshold",
417     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_LOW))
418 {
419 	mach_vm_size_t kNumEntries = 0;
420 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
421 	const size_t kAllocationSize = vm_kernel_page_size;
422 	uint64_t vm_reclaim_reclaimable_max_threshold;
423 	int ret;
424 	size_t len = sizeof(vm_reclaim_reclaimable_max_threshold);
425 	size_t num_ledger_entries = 0;
426 	size_t phys_footprint_index = ledger_phys_footprint_index(&num_ledger_entries);
427 
428 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
429 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
430 
431 	// Allocate 1000 times the reclaim threshold
432 	ret = sysctlbyname("vm.reclaim_max_threshold", &vm_reclaim_reclaimable_max_threshold, &len, NULL, 0);
433 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.reclaim_max_threshold");
434 	kNumEntries = vm_reclaim_reclaimable_max_threshold / kAllocationSize * 1000;
435 	T_QUIET; T_ASSERT_LT(kNumEntries, ringbuffer.buffer_len, "Entries will not fill up ringbuffer.");
436 
437 	mach_vm_address_t addr = 0;
438 	for (uint64_t i = 0; i < kNumEntries; i++) {
439 		uint64_t idx = allocate_and_defer_free(kAllocationSize, &ringbuffer, (unsigned char) i, &addr);
440 		T_QUIET; T_ASSERT_EQ(idx, i, "idx is correct");
441 	}
442 
443 	T_QUIET; T_ASSERT_LT(get_ledger_entry_for_pid(getpid(), phys_footprint_index, num_ledger_entries),
444 	    (int64_t) ((kNumEntries) * kAllocationSize), "Entries were reclaimed as we crossed threshold");
445 }
446 
447 T_HELPER_DECL(deallocate_buffer,
448     "deallocate the buffer from underneath the kernel")
449 {
450 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
451 	static const size_t kAllocationSize = (1UL << 20); // 1MB
452 	mach_vm_address_t addr;
453 
454 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
455 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
456 
457 	uint64_t idx = allocate_and_defer_free(kAllocationSize, &ringbuffer, 1, &addr);
458 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
459 	mach_vm_size_t buffer_size = ringbuffer.buffer_len * sizeof(mach_vm_reclaim_entry_v1_t) + \
460 	    offsetof(struct mach_vm_reclaim_buffer_v1_s, entries);
461 	kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t) ringbuffer.buffer, buffer_size);
462 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate");
463 
464 	mach_vm_reclaim_synchronize(&ringbuffer, 10);
465 
466 	T_FAIL("Test did not crash when synchronizing on a deallocated buffer!");
467 }
468 
469 T_DECL(vm_reclaim_copyio_buffer_error, "Force a copyio error on the buffer",
470     T_META_IGNORECRASHES("vm_reclaim_copyio_buffer_error*"),
471     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_HIGH))
472 {
473 	int status = spawn_helper_and_wait_for_exit("deallocate_buffer");
474 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
475 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
476 }
477 
478 T_HELPER_DECL(dealloc_gap, "Put a bad entry in the buffer")
479 {
480 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
481 	static const size_t kAllocationSize = (1UL << 20); // 1MB
482 	mach_vm_address_t addr;
483 	bool should_update_kernel_accounting = false;
484 
485 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
486 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
487 
488 	uint64_t idx = allocate_and_defer_free(kAllocationSize, &ringbuffer, 1, &addr);
489 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
490 	idx = mach_vm_reclaim_mark_free(&ringbuffer, addr, (uint32_t) kAllocationSize, &should_update_kernel_accounting);
491 	T_QUIET; T_ASSERT_EQ(idx, 1ULL, "Entry placed at correct index");
492 
493 	mach_vm_reclaim_synchronize(&ringbuffer, 2);
494 
495 	T_FAIL("Test did not crash when doing a double free!");
496 }
497 
498 T_DECL(vm_reclaim_dealloc_gap, "Ensure a dealloc gap delivers a fatal exception",
499     T_META_IGNORECRASHES("vm_reclaim_dealloc_gap*"),
500     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_LOW))
501 {
502 	int status = spawn_helper_and_wait_for_exit("dealloc_gap");
503 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
504 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
505 }
506 
507 T_HELPER_DECL(allocate_and_suspend_with_dealloc_gap,
508     "defer double free, and signal parent to suspend")
509 {
510 	allocate_and_suspend(argv, false, true);
511 }
512 
513 static void
vm_reclaim_async_exception(char * variant,char * arg1)514 vm_reclaim_async_exception(char *variant, char *arg1)
515 {
516 	test_after_background_helper_launches(variant, arg1, ^{
517 		int ret = 0;
518 		ret = pid_suspend(child_pid);
519 		T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
520 		/*
521 		 * The reclaim work is kicked off asynchronously by the suspend.
522 		 * So we need to call into the kernel to synchronize with the reclaim worker
523 		 * thread.
524 		 */
525 		drain_async_queue(child_pid);
526 	}, ^{
527 		int status;
528 		pid_t rc = waitpid(child_pid, &status, 0);
529 		T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
530 		T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
531 		T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
532 		T_END;
533 	});
534 }
535 
536 T_DECL(vm_reclaim_dealloc_gap_async, "Ensure a dealloc gap delivers an async fatal exception",
537     T_META_IGNORECRASHES("vm_reclaim_dealloc_gap_async*"),
538     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_LOW))
539 {
540 	vm_reclaim_async_exception("allocate_and_suspend_with_dealloc_gap", "15");
541 }
542 
543 T_HELPER_DECL(allocate_and_suspend_with_buffer_error,
544     "defer free, free buffer, and signal parent to suspend")
545 {
546 	allocate_and_suspend(argv, true, false);
547 }
548 
549 T_DECL(vm_reclaim_copyio_buffer_error_async, "Ensure a buffer copyio failure delivers an async fatal exception",
550     T_META_IGNORECRASHES("vm_reclaim_dealloc_gap_async*"),
551     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_HIGH))
552 {
553 	vm_reclaim_async_exception("allocate_and_suspend_with_buffer_error", "15");
554 }
555 
556 T_HELPER_DECL(reuse_freed_entry_fork,
557     "defer free, sync, and try to use entry")
558 {
559 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
560 	static const size_t kAllocationSize = (1UL << 20); // 1MB
561 	mach_vm_address_t addr;
562 	static const unsigned char kValue = 119;
563 
564 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
565 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
566 
567 	uint64_t idx = allocate_and_defer_free(kAllocationSize, &ringbuffer, kValue, &addr);
568 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
569 
570 	pid_t forked_pid = fork();
571 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NE(forked_pid, -1, "fork()");
572 	if (forked_pid == 0) {
573 		kr = mach_vm_reclaim_synchronize(&ringbuffer, 10);
574 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_synchronize");
575 		assert_buffer_has_changed_and_crash(addr, kAllocationSize, kValue);
576 	} else {
577 		int status;
578 		pid_t rc = waitpid(forked_pid, &status, 0);
579 		T_QUIET; T_ASSERT_EQ(rc, forked_pid, "waitpid");
580 		T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Forked process crashed.");
581 		T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGSEGV, "Forked process crashed with segmentation fault.");
582 	}
583 }
584 
585 T_DECL(vm_reclaim_fork, "Ensure reclaim buffer is inherited across a fork",
586     T_META_IGNORECRASHES("vm_reclaim_fork*"),
587     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_HIGH))
588 {
589 	int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_fork");
590 	T_QUIET; T_ASSERT_TRUE(WIFEXITED(status), "Test process exited.");
591 	T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
592 }
593 
594 #define SUSPEND_AND_RESUME_COUNT 4
595 
596 // rdar://110081398
597 T_DECL(reclaim_async_on_repeated_suspend,
598     "verify that subsequent suspends are allowed",
599     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_HIGH))
600 {
601 	const int sleep_duration = 3;
602 	test_after_background_helper_launches("allocate_and_suspend", "20", ^{
603 		int ret = 0;
604 		for (int i = 0; i < SUSPEND_AND_RESUME_COUNT; i++) {
605 		        ret = pid_suspend(child_pid);
606 		        T_ASSERT_POSIX_SUCCESS(ret, "pid_suspend()");
607 		        ret = pid_resume(child_pid);
608 		        T_ASSERT_POSIX_SUCCESS(ret, "pid_resume()");
609 		}
610 		T_LOG("Sleeping %d sec...", sleep_duration);
611 		sleep(sleep_duration);
612 		T_LOG("Killing child...");
613 		T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "kill()");
614 	}, ^{
615 		int status;
616 		pid_t rc = waitpid(child_pid, &status, 0);
617 		T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
618 		T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
619 		T_END;
620 	});
621 }
622 
623 T_HELPER_DECL(ringbuffer_init_after_exec,
624     "initialize a ringbuffer after exec")
625 {
626 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
627 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
628 	T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init()");
629 }
630 
631 extern char **environ;
632 
633 T_HELPER_DECL(exec_after_ringbuffer_init,
634     "initialize a ringbuffer then exec")
635 {
636 	char **launch_tool_args;
637 	char testpath[PATH_MAX];
638 	uint32_t testpath_buf_size;
639 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
640 
641 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
642 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init()");
643 
644 	testpath_buf_size = sizeof(testpath);
645 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
646 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
647 	T_LOG("Executable path: %s", testpath);
648 	launch_tool_args = (char *[]){
649 		testpath,
650 		"-n",
651 		"ringbuffer_init_after_exec",
652 		NULL
653 	};
654 
655 	/* Spawn the child process. */
656 	posix_spawnattr_t spawnattrs;
657 	posix_spawnattr_init(&spawnattrs);
658 	posix_spawnattr_setflags(&spawnattrs, POSIX_SPAWN_SETEXEC);
659 	posix_spawn(&child_pid, testpath, NULL, &spawnattrs, launch_tool_args, environ);
660 	T_ASSERT_FAIL("should not be reached");
661 }
662 
663 T_DECL(reclaim_exec_new_reclaim_buffer,
664     "verify that an exec-ed process may instantiate a new buffer",
665     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_LOW))
666 {
667 	char **launch_tool_args;
668 	char testpath[PATH_MAX];
669 	uint32_t testpath_buf_size;
670 
671 	testpath_buf_size = sizeof(testpath);
672 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
673 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
674 	T_LOG("Executable path: %s", testpath);
675 	launch_tool_args = (char *[]){
676 		testpath,
677 		"-n",
678 		"exec_after_ringbuffer_init",
679 		NULL
680 	};
681 
682 	/* Spawn the child process. */
683 	ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
684 	if (ret != 0) {
685 		T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
686 	}
687 	T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
688 
689 	bool success = dt_waitpid(child_pid, NULL, NULL, 10);
690 	T_QUIET; T_ASSERT_TRUE(success, "dt_waitpid()");
691 }
692