xref: /xnu-10063.101.15/tests/vm/vm_reclaim.c (revision 94d3b452840153a99b38a3a9659680b2a006908e) !
1 #include <sys/types.h>
2 #include <sys/sysctl.h>
3 #include <mach/mach.h>
4 #include <mach/mach_vm.h>
5 #include <mach/vm_reclaim.h>
6 #include <mach-o/dyld.h>
7 #include <os/atomic_private.h>
8 #include <signal.h>
9 #include <spawn.h>
10 #include <spawn_private.h>
11 #include <unistd.h>
12 
13 #include <darwintest.h>
14 #include <darwintest_utils.h>
15 
16 #include <Kernel/kern/ledger.h>
17 extern int ledger(int cmd, caddr_t arg1, caddr_t arg2, caddr_t arg3);
18 
19 #include "memorystatus_assertion_helpers.h"
20 
21 // Some of the unit tests test deferred deallocations.
22 // For these we need to set a sufficiently large reclaim threshold
23 // to ensure their buffers aren't freed prematurely.
24 #define VM_RECLAIM_THRESHOLD_BOOTARG_HIGH "vm_reclaim_max_threshold=268435456"
25 #define VM_RECLAIM_THRESHOLD_BOOTARG_LOW "vm_reclaim_max_threshold=16384"
26 #define VM_RECLAIM_BOOTARG_DISABLED "vm_reclaim_max_threshold=0"
27 
28 T_GLOBAL_META(
29 	T_META_NAMESPACE("xnu.vm"),
30 	T_META_RADAR_COMPONENT_NAME("xnu"),
31 	T_META_RADAR_COMPONENT_VERSION("performance"),
32 	T_META_OWNER("jarrad"),
33 	T_META_ENABLED(TARGET_OS_IOS && !TARGET_OS_MACCATALYST),
34 	// Ensure we don't conflict with libmalloc's reclaim buffer
35 	T_META_ENVVAR("MallocLargeCache=0"),
36 	T_META_RUN_CONCURRENTLY(false)
37 	);
38 
39 T_DECL(vm_reclaim_init, "Set up and tear down a reclaim buffer",
40     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_LOW))
41 {
42 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
43 
44 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
45 
46 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
47 }
48 
49 T_DECL(vm_reclaim_init_fails_when_disabled, "Initializing a ring buffer on a system with vm_reclaim disabled should fail",
50     T_META_BOOTARGS_SET(VM_RECLAIM_BOOTARG_DISABLED))
51 {
52 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
53 
54 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
55 
56 	T_QUIET; T_EXPECT_MACH_ERROR(kr, KERN_NOT_SUPPORTED, "mach_vm_reclaim_ringbuffer_init");
57 }
58 
59 /*
60  * Allocate a buffer of the given size, write val to each byte, and free it via a deferred free call.
61  */
62 static uint64_t
allocate_and_defer_free(size_t size,mach_vm_reclaim_ringbuffer_v1_t ringbuffer,unsigned char val,mach_vm_reclaim_behavior_v1_t behavior,mach_vm_address_t * addr)63 allocate_and_defer_free(size_t size, mach_vm_reclaim_ringbuffer_v1_t ringbuffer, unsigned char val, mach_vm_reclaim_behavior_v1_t behavior, mach_vm_address_t *addr /* OUT */)
64 {
65 	kern_return_t kr = mach_vm_map(mach_task_self(), addr, size, 0, VM_FLAGS_ANYWHERE, MEMORY_OBJECT_NULL, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
66 	bool should_update_kernel_accounting = false;
67 	uint64_t idx;
68 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_map");
69 
70 	memset((void *) *addr, val, size);
71 
72 	idx = mach_vm_reclaim_mark_free(ringbuffer, *addr, (uint32_t) size, behavior, &should_update_kernel_accounting);
73 	if (should_update_kernel_accounting) {
74 		mach_vm_reclaim_update_kernel_accounting(ringbuffer);
75 	}
76 	return idx;
77 }
78 
79 static uint64_t
allocate_and_defer_deallocate(size_t size,mach_vm_reclaim_ringbuffer_v1_t ringbuffer,unsigned char val,mach_vm_address_t * addr)80 allocate_and_defer_deallocate(size_t size, mach_vm_reclaim_ringbuffer_v1_t ringbuffer, unsigned char val, mach_vm_address_t *addr /* OUT */)
81 {
82 	return allocate_and_defer_free(size, ringbuffer, val, MACH_VM_RECLAIM_DEALLOCATE, addr);
83 }
84 
85 T_DECL(vm_reclaim_single_entry, "Place a single entry in the buffer and call sync",
86     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_LOW))
87 {
88 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
89 	static const size_t kAllocationSize = (1UL << 20); // 1MB
90 	mach_vm_address_t addr;
91 
92 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
93 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
94 
95 	uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, 1, &addr);
96 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
97 	mach_vm_reclaim_synchronize(&ringbuffer, 1);
98 }
99 
100 static pid_t
spawn_helper(char * helper)101 spawn_helper(char *helper)
102 {
103 	char **launch_tool_args;
104 	char testpath[PATH_MAX];
105 	uint32_t testpath_buf_size;
106 	pid_t child_pid;
107 
108 	testpath_buf_size = sizeof(testpath);
109 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
110 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
111 	T_LOG("Executable path: %s", testpath);
112 	launch_tool_args = (char *[]){
113 		testpath,
114 		"-n",
115 		helper,
116 		NULL
117 	};
118 
119 	/* Spawn the child process. */
120 	ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
121 	if (ret != 0) {
122 		T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
123 	}
124 	T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
125 
126 	return child_pid;
127 }
128 
129 static int
spawn_helper_and_wait_for_exit(char * helper)130 spawn_helper_and_wait_for_exit(char *helper)
131 {
132 	int status;
133 	pid_t child_pid, rc;
134 
135 	child_pid = spawn_helper(helper);
136 	rc = waitpid(child_pid, &status, 0);
137 	T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
138 	return status;
139 }
140 
141 /*
142  * Returns true iff every entry in buffer is expected.
143  */
144 static bool
check_buffer(mach_vm_address_t addr,size_t size,unsigned char expected)145 check_buffer(mach_vm_address_t addr, size_t size, unsigned char expected)
146 {
147 	unsigned char *buffer = (unsigned char *) addr;
148 	for (size_t i = 0; i < size; i++) {
149 		if (buffer[i] != expected) {
150 			return false;
151 		}
152 	}
153 	return true;
154 }
155 
156 /*
157  * Read every byte of a buffer to ensure re-usability
158  */
159 static void
read_buffer(mach_vm_address_t addr,size_t size)160 read_buffer(mach_vm_address_t addr, size_t size)
161 {
162 	volatile uint8_t byte;
163 	uint8_t *buffer = (uint8_t *)addr;
164 	for (size_t i = 0; i < size; i++) {
165 		byte = buffer[i];
166 	}
167 }
168 
169 /*
170  * Check that the given (freed) buffer has changed.
171  * This will likely crash, but if we make it through the entire buffer then segfault on purpose.
172  */
173 static void
assert_buffer_has_changed_and_crash(mach_vm_address_t addr,size_t size,unsigned char expected)174 assert_buffer_has_changed_and_crash(mach_vm_address_t addr, size_t size, unsigned char expected)
175 {
176 	/*
177 	 * mach_vm_reclaim_synchronize should have ensured the buffer was freed.
178 	 * Two cases:
179 	 * 1. The buffer is still free (touching it causes a crash)
180 	 * 2. The address range was re-allocated by some other library in process.
181 	 * #1 is far more likely. But if #2 happened, the buffer shouldn't be filled
182 	 * with the value we wrote to it. So scan the buffer. If we segfault it's case #1
183 	 * and if we see another value it's case #2.
184 	 */
185 	bool changed = !check_buffer(addr, size, expected);
186 	T_QUIET; T_ASSERT_TRUE(changed, "buffer was re-allocated");
187 	/* Case #2. Force a segfault so the parent sees that we crashed. */
188 	*(volatile int *) 0 = 1;
189 
190 	T_FAIL("Test did not crash when dereferencing NULL");
191 }
192 
193 static void
reuse_reclaimed_entry(mach_vm_reclaim_behavior_v1_t behavior)194 reuse_reclaimed_entry(mach_vm_reclaim_behavior_v1_t behavior)
195 {
196 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
197 	static const size_t kAllocationSize = (1UL << 20); // 1MB
198 	mach_vm_address_t addr;
199 	static const unsigned char kValue = 220;
200 
201 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
202 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
203 
204 	uint64_t idx = allocate_and_defer_free(kAllocationSize, &ringbuffer, kValue, behavior, &addr);
205 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
206 	kr = mach_vm_reclaim_synchronize(&ringbuffer, 10);
207 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_synchronize");
208 	bool usable = mach_vm_reclaim_mark_used(&ringbuffer, idx, addr, kAllocationSize);
209 	bool reclaimed = mach_vm_reclaim_is_reclaimed(&ringbuffer, idx);
210 	T_EXPECT_FALSE(usable, "reclaimed entry is not re-usable");
211 	T_EXPECT_TRUE(reclaimed, "reclaimed entry was marked reclaimed");
212 	switch (behavior) {
213 	case MACH_VM_RECLAIM_DEALLOCATE:
214 		assert_buffer_has_changed_and_crash(addr, kAllocationSize, kValue);
215 		break;
216 	case MACH_VM_RECLAIM_REUSABLE:
217 		read_buffer(addr, kAllocationSize);
218 		T_PASS("Freed buffer re-used successfully");
219 		break;
220 	default:
221 		T_FAIL("Unexpected reclaim behavior %d", behavior);
222 	}
223 }
224 
225 T_HELPER_DECL(reuse_freed_entry_dealloc,
226     "defer free (dealloc), sync, and try to use entry")
227 {
228 	reuse_reclaimed_entry(MACH_VM_RECLAIM_DEALLOCATE);
229 }
230 
231 T_HELPER_DECL(reuse_freed_entry_reusable,
232     "defer free (reusable), sync, and try to use entry")
233 {
234 	reuse_reclaimed_entry(MACH_VM_RECLAIM_REUSABLE);
235 }
236 
237 T_DECL(vm_reclaim_single_entry_verify_free, "Place a single entry in the buffer and call sync",
238     T_META_IGNORECRASHES(".*vm_reclaim_single_entry_verify_free.*"),
239     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_HIGH))
240 {
241 	int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_dealloc");
242 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
243 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGSEGV, "Test process crashed with segmentation fault.");
244 }
245 
246 T_DECL(vm_reclaim_single_entry_reusable,
247     "Reclaim a reusable entry and verify re-use is legal",
248     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_HIGH))
249 {
250 	int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_reusable");
251 	T_QUIET; T_ASSERT_TRUE(WIFEXITED(status), "Test process exited.");
252 	T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
253 }
254 
255 static void
allocate_and_suspend(char * const * argv,bool free_buffer,bool double_free)256 allocate_and_suspend(char *const *argv, bool free_buffer, bool double_free)
257 {
258 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
259 	static const size_t kAllocationSize = (1UL << 20); // 1MB
260 	mach_vm_address_t addr = 0;
261 	bool should_update_kernel_accounting = false;
262 
263 	const mach_vm_size_t kNumEntries = (size_t) atoi(argv[0]);
264 
265 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
266 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
267 	T_QUIET; T_ASSERT_LT(kNumEntries, ringbuffer.buffer_len, "Test does not fill up ringubffer");
268 
269 	for (size_t i = 0; i < kNumEntries; i++) {
270 		uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, (unsigned char) i, &addr);
271 		T_QUIET; T_ASSERT_EQ(idx, (uint64_t) i, "idx is correct");
272 	}
273 
274 	if (double_free) {
275 		// Double free the last entry
276 		mach_vm_reclaim_mark_free(&ringbuffer, addr, (uint32_t) kAllocationSize, MACH_VM_RECLAIM_DEALLOCATE, &should_update_kernel_accounting);
277 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_mark_free");
278 	}
279 
280 	if (free_buffer) {
281 		mach_vm_size_t buffer_size = ringbuffer.buffer_len * sizeof(mach_vm_reclaim_entry_v1_t) + \
282 		    offsetof(struct mach_vm_reclaim_buffer_v1_s, entries);
283 		kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t) ringbuffer.buffer, buffer_size);
284 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate");
285 	}
286 
287 	// Signal to our parent to suspend us
288 	if (kill(getppid(), SIGUSR1) != 0) {
289 		T_LOG("Unable to signal to parent process!");
290 		exit(1);
291 	}
292 
293 	while (1) {
294 		;
295 	}
296 }
297 
298 T_HELPER_DECL(allocate_and_suspend,
299     "defer free, and signal parent to suspend")
300 {
301 	allocate_and_suspend(argv, false, false);
302 }
303 
304 static void
resume_and_kill_proc(pid_t pid)305 resume_and_kill_proc(pid_t pid)
306 {
307 	int ret = pid_resume(pid);
308 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "proc resumed after freeze");
309 	T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(pid, SIGKILL), "Killed process");
310 }
311 
312 static void
drain_async_queue(pid_t child_pid)313 drain_async_queue(pid_t child_pid)
314 {
315 	int val = child_pid;
316 	int ret;
317 	size_t len = sizeof(val);
318 	ret = sysctlbyname("vm.reclaim_drain_async_queue", NULL, NULL, &val, len);
319 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.reclaim_drain_async_queue");
320 }
321 
322 static size_t
ledger_phys_footprint_index(size_t * num_entries)323 ledger_phys_footprint_index(size_t *num_entries)
324 {
325 	struct ledger_info li;
326 	struct ledger_template_info *templateInfo = NULL;
327 	int ret;
328 	size_t i, footprint_index;
329 	bool found = false;
330 
331 	ret = ledger(LEDGER_INFO, (caddr_t)(uintptr_t)getpid(), (caddr_t)&li, NULL);
332 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_INFO)");
333 
334 	T_QUIET; T_ASSERT_GT(li.li_entries, (int64_t) 0, "num ledger entries is valid");
335 	*num_entries = (size_t) li.li_entries;
336 	templateInfo = malloc((size_t)li.li_entries * sizeof(struct ledger_template_info));
337 	T_QUIET; T_ASSERT_NOTNULL(templateInfo, "malloc entries");
338 
339 	footprint_index = 0;
340 	ret = ledger(LEDGER_TEMPLATE_INFO, (caddr_t) templateInfo, (caddr_t) num_entries, NULL);
341 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_TEMPLATE_INFO)");
342 	for (i = 0; i < *num_entries; i++) {
343 		if (strcmp(templateInfo[i].lti_name, "phys_footprint") == 0) {
344 			footprint_index = i;
345 			found = true;
346 		}
347 	}
348 	free(templateInfo);
349 	T_QUIET; T_ASSERT_TRUE(found, "found phys_footprint in ledger");
350 	return footprint_index;
351 }
352 
353 static int64_t
get_ledger_entry_for_pid(pid_t pid,size_t index,size_t num_entries)354 get_ledger_entry_for_pid(pid_t pid, size_t index, size_t num_entries)
355 {
356 	int ret;
357 	int64_t value;
358 	struct ledger_entry_info *lei = NULL;
359 
360 	lei = malloc(num_entries * sizeof(*lei));
361 	ret = ledger(LEDGER_ENTRY_INFO, (caddr_t) (uintptr_t) pid, (caddr_t) lei, (caddr_t) &num_entries);
362 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_ENTRY_INFO)");
363 	value = lei[index].lei_balance;
364 	free(lei);
365 	return value;
366 }
367 
368 static pid_t child_pid;
369 
370 static void
test_after_background_helper_launches(char * variant,char * arg1,dispatch_block_t test_block,dispatch_block_t exit_block)371 test_after_background_helper_launches(char* variant, char * arg1, dispatch_block_t test_block, dispatch_block_t exit_block)
372 {
373 	char **launch_tool_args;
374 	char testpath[PATH_MAX];
375 	uint32_t testpath_buf_size;
376 
377 	dispatch_source_t ds_signal, ds_exit;
378 
379 	/* Wait for the child process to tell us that it's ready, and then freeze it */
380 	signal(SIGUSR1, SIG_IGN);
381 	ds_signal = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dispatch_get_main_queue());
382 	T_QUIET; T_ASSERT_NOTNULL(ds_signal, "dispatch_source_create");
383 	dispatch_source_set_event_handler(ds_signal, test_block);
384 
385 	dispatch_activate(ds_signal);
386 
387 	testpath_buf_size = sizeof(testpath);
388 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
389 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
390 	T_LOG("Executable path: %s", testpath);
391 	launch_tool_args = (char *[]){
392 		testpath,
393 		"-n",
394 		variant,
395 		arg1,
396 		NULL
397 	};
398 
399 	/* Spawn the child process. */
400 	ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
401 	if (ret != 0) {
402 		T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
403 	}
404 	T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
405 
406 	/* Listen for exit. */
407 	ds_exit = dispatch_source_create(DISPATCH_SOURCE_TYPE_PROC, (uintptr_t)child_pid, DISPATCH_PROC_EXIT, dispatch_get_main_queue());
408 	dispatch_source_set_event_handler(ds_exit, exit_block);
409 
410 	dispatch_activate(ds_exit);
411 	dispatch_main();
412 }
413 
414 T_DECL(vm_reclaim_full_reclaim_on_suspend, "Defer free memory and then suspend.",
415     T_META_ASROOT(true),
416     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_HIGH))
417 {
418 	test_after_background_helper_launches("allocate_and_suspend", "20", ^{
419 		int ret = 0;
420 		size_t num_ledger_entries = 0;
421 		size_t phys_footprint_index = ledger_phys_footprint_index(&num_ledger_entries);
422 		int64_t before_footprint, after_footprint, reclaimable_bytes = 20 * (1ULL << 20);
423 		before_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries);
424 		T_QUIET; T_EXPECT_GE(before_footprint, reclaimable_bytes, "memory was allocated");
425 		ret = pid_suspend(child_pid);
426 		T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
427 		/*
428 		 * The reclaim work is kicked off asynchronously by the suspend.
429 		 * So we need to call into the kernel to synchronize with the reclaim worker
430 		 * thread.
431 		 */
432 		drain_async_queue(child_pid);
433 
434 		after_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries);
435 		T_QUIET; T_EXPECT_LE(after_footprint, before_footprint - reclaimable_bytes, "memory was reclaimed");
436 
437 		resume_and_kill_proc(child_pid);
438 	},
439 	    ^{
440 		int status = 0, code = 0;
441 		pid_t rc = waitpid(child_pid, &status, 0);
442 		T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
443 		code = WEXITSTATUS(status);
444 		T_QUIET; T_ASSERT_EQ(code, 0, "Child exited cleanly");
445 		T_END;
446 	});
447 }
448 
449 T_DECL(vm_reclaim_limit_kills, "Deferred reclaims are processed before a limit kill",
450     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_LOW))
451 {
452 	int err;
453 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
454 	const size_t kNumEntries = 50;
455 	static const size_t kAllocationSize = (1UL << 20); // 1MB
456 	static const size_t kMemoryLimit = kNumEntries / 10 * kAllocationSize;
457 
458 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
459 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
460 
461 	err = set_memlimits(getpid(), kMemoryLimit >> 20, kMemoryLimit >> 20, TRUE, TRUE);
462 	T_QUIET; T_ASSERT_POSIX_SUCCESS(err, "set_memlimits");
463 
464 	for (size_t i = 0; i < kNumEntries; i++) {
465 		mach_vm_address_t addr = 0;
466 		uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, (unsigned char) i, &addr);
467 		T_QUIET; T_ASSERT_EQ(idx, (uint64_t) i, "idx is correct");
468 	}
469 
470 	T_PASS("Was able to allocate and defer free %zu chunks of size %zu bytes while staying under limit of %zu bytes", kNumEntries, kAllocationSize, kMemoryLimit);
471 }
472 
473 T_DECL(vm_reclaim_update_reclaimable_bytes_threshold, "Kernel reclaims when num_bytes_reclaimable crosses threshold",
474     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_LOW))
475 {
476 	mach_vm_size_t kNumEntries = 0;
477 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
478 	const size_t kAllocationSize = vm_kernel_page_size;
479 	uint64_t vm_reclaim_reclaimable_max_threshold;
480 	int ret;
481 	size_t len = sizeof(vm_reclaim_reclaimable_max_threshold);
482 	size_t num_ledger_entries = 0;
483 	size_t phys_footprint_index = ledger_phys_footprint_index(&num_ledger_entries);
484 
485 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
486 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
487 
488 	// Allocate 1000 times the reclaim threshold
489 	ret = sysctlbyname("vm.reclaim_max_threshold", &vm_reclaim_reclaimable_max_threshold, &len, NULL, 0);
490 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.reclaim_max_threshold");
491 	kNumEntries = vm_reclaim_reclaimable_max_threshold / kAllocationSize * 1000;
492 	T_QUIET; T_ASSERT_LT(kNumEntries, ringbuffer.buffer_len, "Entries will not fill up ringbuffer.");
493 
494 	mach_vm_address_t addr = 0;
495 	for (uint64_t i = 0; i < kNumEntries; i++) {
496 		uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, (unsigned char) i, &addr);
497 		T_QUIET; T_ASSERT_EQ(idx, i, "idx is correct");
498 	}
499 
500 	T_QUIET; T_ASSERT_LT(get_ledger_entry_for_pid(getpid(), phys_footprint_index, num_ledger_entries),
501 	    (int64_t) ((kNumEntries) * kAllocationSize), "Entries were reclaimed as we crossed threshold");
502 }
503 
504 T_HELPER_DECL(deallocate_buffer,
505     "deallocate the buffer from underneath the kernel")
506 {
507 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
508 	static const size_t kAllocationSize = (1UL << 20); // 1MB
509 	mach_vm_address_t addr;
510 
511 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
512 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
513 
514 	uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, 1, &addr);
515 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
516 	mach_vm_size_t buffer_size = ringbuffer.buffer_len * sizeof(mach_vm_reclaim_entry_v1_t) + \
517 	    offsetof(struct mach_vm_reclaim_buffer_v1_s, entries);
518 	kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t) ringbuffer.buffer, buffer_size);
519 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate");
520 
521 	mach_vm_reclaim_synchronize(&ringbuffer, 10);
522 
523 	T_FAIL("Test did not crash when synchronizing on a deallocated buffer!");
524 }
525 
526 T_DECL(vm_reclaim_copyio_buffer_error, "Force a copyio error on the buffer",
527     T_META_IGNORECRASHES(".*deallocate_buffer.*"),
528     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_HIGH))
529 {
530 	int status = spawn_helper_and_wait_for_exit("deallocate_buffer");
531 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
532 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
533 }
534 
535 T_HELPER_DECL(dealloc_gap, "Put a bad entry in the buffer")
536 {
537 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
538 	static const size_t kAllocationSize = (1UL << 20); // 1MB
539 	mach_vm_address_t addr;
540 	bool should_update_kernel_accounting = false;
541 
542 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
543 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
544 
545 	uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, 1, &addr);
546 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
547 	idx = mach_vm_reclaim_mark_free(&ringbuffer, addr, (uint32_t) kAllocationSize, MACH_VM_RECLAIM_DEALLOCATE, &should_update_kernel_accounting);
548 	T_QUIET; T_ASSERT_EQ(idx, 1ULL, "Entry placed at correct index");
549 
550 	mach_vm_reclaim_synchronize(&ringbuffer, 2);
551 
552 	T_FAIL("Test did not crash when doing a double free!");
553 }
554 
555 T_DECL(vm_reclaim_dealloc_gap, "Ensure a dealloc gap delivers a fatal exception",
556     T_META_IGNORECRASHES(".*vm_reclaim_dealloc_gap.*"),
557     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_LOW))
558 {
559 	int status = spawn_helper_and_wait_for_exit("dealloc_gap");
560 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
561 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
562 }
563 
564 T_HELPER_DECL(allocate_and_suspend_with_dealloc_gap,
565     "defer double free, and signal parent to suspend")
566 {
567 	allocate_and_suspend(argv, false, true);
568 }
569 
570 static void
vm_reclaim_async_exception(char * variant,char * arg1)571 vm_reclaim_async_exception(char *variant, char *arg1)
572 {
573 	test_after_background_helper_launches(variant, arg1, ^{
574 		int ret = 0;
575 		ret = pid_suspend(child_pid);
576 		T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
577 		/*
578 		 * The reclaim work is kicked off asynchronously by the suspend.
579 		 * So we need to call into the kernel to synchronize with the reclaim worker
580 		 * thread.
581 		 */
582 		drain_async_queue(child_pid);
583 	}, ^{
584 		int status;
585 		pid_t rc = waitpid(child_pid, &status, 0);
586 		T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
587 		T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
588 		T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
589 		T_END;
590 	});
591 }
592 
593 T_DECL(vm_reclaim_dealloc_gap_async, "Ensure a dealloc gap delivers an async fatal exception",
594     T_META_IGNORECRASHES(".*vm_reclaim_dealloc_gap_async.*"),
595     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_LOW))
596 {
597 	vm_reclaim_async_exception("allocate_and_suspend_with_dealloc_gap", "15");
598 }
599 
600 T_HELPER_DECL(allocate_and_suspend_with_buffer_error,
601     "defer free, free buffer, and signal parent to suspend")
602 {
603 	allocate_and_suspend(argv, true, false);
604 }
605 
606 T_DECL(vm_reclaim_copyio_buffer_error_async, "Ensure a buffer copyio failure delivers an async fatal exception",
607     T_META_IGNORECRASHES(".*vm_reclaim_dealloc_gap_async.*"),
608     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_HIGH))
609 {
610 	vm_reclaim_async_exception("allocate_and_suspend_with_buffer_error", "15");
611 }
612 
613 T_HELPER_DECL(reuse_freed_entry_fork,
614     "defer free, sync, and try to use entry")
615 {
616 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
617 	static const size_t kAllocationSize = (1UL << 20); // 1MB
618 	mach_vm_address_t addr;
619 	static const unsigned char kValue = 119;
620 
621 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
622 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
623 
624 	uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, kValue, &addr);
625 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
626 
627 	pid_t forked_pid = fork();
628 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NE(forked_pid, -1, "fork()");
629 	if (forked_pid == 0) {
630 		kr = mach_vm_reclaim_synchronize(&ringbuffer, 10);
631 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_synchronize");
632 		assert_buffer_has_changed_and_crash(addr, kAllocationSize, kValue);
633 	} else {
634 		int status;
635 		pid_t rc = waitpid(forked_pid, &status, 0);
636 		T_QUIET; T_ASSERT_EQ(rc, forked_pid, "waitpid");
637 		T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Forked process crashed.");
638 		T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGSEGV, "Forked process crashed with segmentation fault.");
639 	}
640 }
641 
642 T_DECL(vm_reclaim_fork, "Ensure reclaim buffer is inherited across a fork",
643     T_META_IGNORECRASHES(".*vm_reclaim_fork.*"),
644     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_HIGH))
645 {
646 	int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_fork");
647 	T_QUIET; T_ASSERT_TRUE(WIFEXITED(status), "Test process exited.");
648 	T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
649 }
650 
651 #define SUSPEND_AND_RESUME_COUNT 4
652 
653 // rdar://110081398
654 T_DECL(reclaim_async_on_repeated_suspend,
655     "verify that subsequent suspends are allowed",
656     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_HIGH))
657 {
658 	const int sleep_duration = 3;
659 	test_after_background_helper_launches("allocate_and_suspend", "20", ^{
660 		int ret = 0;
661 		for (int i = 0; i < SUSPEND_AND_RESUME_COUNT; i++) {
662 		        ret = pid_suspend(child_pid);
663 		        T_ASSERT_POSIX_SUCCESS(ret, "pid_suspend()");
664 		        ret = pid_resume(child_pid);
665 		        T_ASSERT_POSIX_SUCCESS(ret, "pid_resume()");
666 		}
667 		T_LOG("Sleeping %d sec...", sleep_duration);
668 		sleep(sleep_duration);
669 		T_LOG("Killing child...");
670 		T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "kill()");
671 	}, ^{
672 		int status;
673 		pid_t rc = waitpid(child_pid, &status, 0);
674 		T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
675 		T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
676 		T_END;
677 	});
678 }
679 
680 T_HELPER_DECL(ringbuffer_init_after_exec,
681     "initialize a ringbuffer after exec")
682 {
683 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
684 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
685 	T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init()");
686 }
687 
688 extern char **environ;
689 
690 T_HELPER_DECL(exec_after_ringbuffer_init,
691     "initialize a ringbuffer then exec")
692 {
693 	char **launch_tool_args;
694 	char testpath[PATH_MAX];
695 	uint32_t testpath_buf_size;
696 	struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
697 
698 	kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
699 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init()");
700 
701 	testpath_buf_size = sizeof(testpath);
702 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
703 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
704 	T_LOG("Executable path: %s", testpath);
705 	launch_tool_args = (char *[]){
706 		testpath,
707 		"-n",
708 		"ringbuffer_init_after_exec",
709 		NULL
710 	};
711 
712 	/* Spawn the child process. */
713 	posix_spawnattr_t spawnattrs;
714 	posix_spawnattr_init(&spawnattrs);
715 	posix_spawnattr_setflags(&spawnattrs, POSIX_SPAWN_SETEXEC);
716 	posix_spawn(&child_pid, testpath, NULL, &spawnattrs, launch_tool_args, environ);
717 	T_ASSERT_FAIL("should not be reached");
718 }
719 
720 T_DECL(reclaim_exec_new_reclaim_buffer,
721     "verify that an exec-ed process may instantiate a new buffer",
722     T_META_BOOTARGS_SET(VM_RECLAIM_THRESHOLD_BOOTARG_LOW))
723 {
724 	char **launch_tool_args;
725 	char testpath[PATH_MAX];
726 	uint32_t testpath_buf_size;
727 
728 	testpath_buf_size = sizeof(testpath);
729 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
730 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
731 	T_LOG("Executable path: %s", testpath);
732 	launch_tool_args = (char *[]){
733 		testpath,
734 		"-n",
735 		"exec_after_ringbuffer_init",
736 		NULL
737 	};
738 
739 	/* Spawn the child process. */
740 	ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
741 	if (ret != 0) {
742 		T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
743 	}
744 	T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
745 
746 	bool success = dt_waitpid(child_pid, NULL, NULL, 10);
747 	T_QUIET; T_ASSERT_TRUE(success, "dt_waitpid()");
748 }
749