xref: /xnu-12377.81.4/tests/vm/vm_reclaim.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 #include <sys/types.h>
2 #include <sys/sysctl.h>
3 #include <mach/mach.h>
4 #include <mach/mach_vm.h>
5 #include <mach/vm_reclaim_private.h>
6 #include <mach-o/dyld.h>
7 #include <os/atomic_private.h>
8 #include <signal.h>
9 #include <spawn.h>
10 #include <spawn_private.h>
11 #include <time.h>
12 #include <unistd.h>
13 
14 #include <darwintest.h>
15 #include <darwintest_multiprocess.h>
16 #include <darwintest_utils.h>
17 
18 #include <Kernel/kern/ledger.h>
19 extern int ledger(int cmd, caddr_t arg1, caddr_t arg2, caddr_t arg3);
20 
21 #include "memorystatus_assertion_helpers.h"
22 
23 #define T_META_VM_RECLAIM_ENABLED T_META_SYSCTL_INT("vm.reclaim.enabled=1")
24 #define T_META_VM_RECLAIM_DISABLED T_META_SYSCTL_INT("vm.reclaim.enabled=0")
25 
26 #define MiB(x) (x << 20)
27 
28 T_GLOBAL_META(
29 	T_META_NAMESPACE("xnu.vm_reclaim"),
30 	T_META_RADAR_COMPONENT_NAME("xnu"),
31 	T_META_RADAR_COMPONENT_VERSION("performance"),
32 	T_META_OWNER("jarrad"),
33 	// Ensure we don't conflict with libmalloc's reclaim buffer
34 	T_META_ENVVAR("MallocDeferredReclaim=0"),
35 	T_META_ENVVAR("MallocAllowInternalSecurity=1"),
36 	T_META_RUN_CONCURRENTLY(false),
37 	T_META_CHECK_LEAKS(false)
38 	);
39 
40 static mach_vm_reclaim_ring_t
ringbuffer_init(void)41 ringbuffer_init(void)
42 {
43 	mach_vm_reclaim_ring_t ringbuffer = NULL;
44 	mach_vm_reclaim_count_t len = mach_vm_reclaim_round_capacity(1);
45 	mach_vm_reclaim_count_t max_len = len;
46 	kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, len, max_len);
47 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_allocate()");
48 	return ringbuffer;
49 }
50 
51 T_DECL(vm_reclaim_init, "Set up and tear down a reclaim buffer",
52     T_META_VM_RECLAIM_ENABLED,
53     T_META_TAG_VM_PREFERRED)
54 {
55 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
56 	T_ASSERT_NOTNULL(ringbuffer, "ringbuffer is allocated");
57 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->head, relaxed), 0ull, "head is zeroed");
58 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->busy, relaxed), 0ull, "busy is zeroed");
59 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->tail, relaxed), 0ull, "tail is zeroed");
60 	size_t expected_len = (vm_page_size - offsetof(struct mach_vm_reclaim_ring_s, entries)) /
61 	    sizeof(struct mach_vm_reclaim_entry_s);
62 	T_ASSERT_EQ((size_t)ringbuffer->len, expected_len, "length is set correctly");
63 	for (unsigned i = 0; i < ringbuffer->len; i++) {
64 		mach_vm_reclaim_entry_t entry = &ringbuffer->entries[i];
65 		T_QUIET; T_EXPECT_EQ(entry->address, 0ull, "address is zeroed");
66 		T_QUIET; T_EXPECT_EQ(entry->size, 0u, "size is zeroed");
67 		T_QUIET; T_EXPECT_EQ(entry->behavior, 0, "behavior is zeroed");
68 	}
69 }
70 
71 T_DECL(vm_reclaim_init_fails_when_disabled,
72     "Initializing a ring buffer on a system with vm_reclaim disabled should fail",
73     T_META_VM_RECLAIM_DISABLED, T_META_TAG_VM_PREFERRED)
74 {
75 	mach_vm_reclaim_ring_t ringbuffer;
76 	kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, 1, 1);
77 	T_EXPECT_MACH_ERROR(kr, VM_RECLAIM_NOT_SUPPORTED, "mach_vm_reclaim_ring_allocate()");
78 }
79 
80 static bool
try_cancel(mach_vm_reclaim_ring_t ringbuffer,mach_vm_reclaim_id_t id,mach_vm_address_t addr,mach_vm_size_t size,mach_vm_reclaim_action_t behavior)81 try_cancel(mach_vm_reclaim_ring_t ringbuffer, mach_vm_reclaim_id_t id, mach_vm_address_t addr, mach_vm_size_t size, mach_vm_reclaim_action_t behavior)
82 {
83 	bool update_accounting;
84 	mach_vm_reclaim_state_t state;
85 	kern_return_t kr;
86 	kr = mach_vm_reclaim_try_cancel(ringbuffer, id, addr, size, behavior, &state, &update_accounting);
87 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_cancel()");
88 	if (update_accounting) {
89 		kern_return_t tmp_kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer);
90 		T_QUIET; T_ASSERT_MACH_SUCCESS(tmp_kr, "mach_vm_reclaim_update_kernel_accounting()");
91 	}
92 	return mach_vm_reclaim_is_reusable(state);
93 }
94 
95 /*
96  * Allocate a buffer of the given size, write val to each byte, and free it via a deferred free call.
97  */
98 static mach_vm_reclaim_id_t
allocate_and_defer_free(size_t size,mach_vm_reclaim_ring_t ringbuffer,unsigned char val,mach_vm_reclaim_action_t behavior,mach_vm_address_t * addr)99 allocate_and_defer_free(size_t size, mach_vm_reclaim_ring_t ringbuffer,
100     unsigned char val, mach_vm_reclaim_action_t behavior,
101     mach_vm_address_t *addr /* OUT */)
102 {
103 	kern_return_t kr = mach_vm_map(mach_task_self(), addr, size, 0, VM_FLAGS_ANYWHERE, MEMORY_OBJECT_NULL, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
104 	bool should_update_kernel_accounting = false;
105 	mach_vm_reclaim_id_t id = VM_RECLAIM_ID_NULL;
106 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_map");
107 
108 	memset((void *) *addr, val, size);
109 
110 	kr = mach_vm_reclaim_try_enter(ringbuffer, *addr, size, behavior, &id, &should_update_kernel_accounting);
111 	if (should_update_kernel_accounting) {
112 		kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer);
113 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()");
114 	}
115 	return id;
116 }
117 
118 static mach_vm_reclaim_id_t
allocate_and_defer_deallocate(size_t size,mach_vm_reclaim_ring_t ringbuffer,unsigned char val,mach_vm_address_t * addr)119 allocate_and_defer_deallocate(size_t size, mach_vm_reclaim_ring_t ringbuffer, unsigned char val, mach_vm_address_t *addr /* OUT */)
120 {
121 	return allocate_and_defer_free(size, ringbuffer, val, VM_RECLAIM_DEALLOCATE, addr);
122 }
123 
124 T_DECL(vm_reclaim_single_entry, "Place a single entry in the buffer and call sync",
125     T_META_VM_RECLAIM_ENABLED,
126     T_META_TAG_VM_PREFERRED)
127 {
128 	static const size_t kAllocationSize = (1UL << 20); // 1MB
129 	mach_vm_address_t addr;
130 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
131 
132 	mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, 1, &addr);
133 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
134 	mach_vm_reclaim_ring_flush(ringbuffer, 1);
135 }
136 
137 static pid_t
spawn_helper(char * helper)138 spawn_helper(char *helper)
139 {
140 	char **launch_tool_args;
141 	char testpath[PATH_MAX];
142 	uint32_t testpath_buf_size;
143 	pid_t child_pid;
144 
145 	testpath_buf_size = sizeof(testpath);
146 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
147 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
148 	T_LOG("Executable path: %s", testpath);
149 	launch_tool_args = (char *[]){
150 		testpath,
151 		"-n",
152 		helper,
153 		NULL
154 	};
155 
156 	/* Spawn the child process. */
157 	ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
158 	if (ret != 0) {
159 		T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
160 	}
161 	T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
162 
163 	return child_pid;
164 }
165 
166 static int
spawn_helper_and_wait_for_exit(char * helper)167 spawn_helper_and_wait_for_exit(char *helper)
168 {
169 	int status;
170 	pid_t child_pid, rc;
171 
172 	child_pid = spawn_helper(helper);
173 	rc = waitpid(child_pid, &status, 0);
174 	T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
175 	return status;
176 }
177 
178 /*
179  * Returns true iff every entry in buffer is expected.
180  */
181 static bool
check_buffer(mach_vm_address_t addr,size_t size,unsigned char expected)182 check_buffer(mach_vm_address_t addr, size_t size, unsigned char expected)
183 {
184 	unsigned char *buffer = (unsigned char *) addr;
185 	for (size_t i = 0; i < size; i++) {
186 		if (buffer[i] != expected) {
187 			return false;
188 		}
189 	}
190 	return true;
191 }
192 
193 /*
194  * Read every byte of a buffer to ensure re-usability
195  */
196 static void
read_buffer(mach_vm_address_t addr,size_t size)197 read_buffer(mach_vm_address_t addr, size_t size)
198 {
199 	volatile uint8_t byte;
200 	uint8_t *buffer = (uint8_t *)addr;
201 	for (size_t i = 0; i < size; i++) {
202 		byte = buffer[i];
203 	}
204 }
205 
206 /*
207  * Check that the given (freed) buffer has changed.
208  * This will likely crash, but if we make it through the entire buffer then segfault on purpose.
209  */
210 static void
assert_buffer_has_changed_and_crash(mach_vm_address_t addr,size_t size,unsigned char expected)211 assert_buffer_has_changed_and_crash(mach_vm_address_t addr, size_t size, unsigned char expected)
212 {
213 	/*
214 	 * mach_vm_reclaim_ring_flush should have ensured the buffer was freed.
215 	 * Two cases:
216 	 * 1. The buffer is still free (touching it causes a crash)
217 	 * 2. The address range was re-allocated by some other library in process.
218 	 * #1 is far more likely. But if #2 happened, the buffer shouldn't be filled
219 	 * with the value we wrote to it. So scan the buffer. If we segfault it's case #1
220 	 * and if we see another value it's case #2.
221 	 */
222 	bool changed = !check_buffer(addr, size, expected);
223 	T_QUIET; T_ASSERT_TRUE(changed, "buffer was re-allocated");
224 	/* Case #2. Force a segfault so the parent sees that we crashed. */
225 	*(volatile int *) 0 = 1;
226 
227 	T_FAIL("Test did not crash when dereferencing NULL");
228 }
229 
230 static void
reuse_reclaimed_entry(mach_vm_reclaim_action_t behavior)231 reuse_reclaimed_entry(mach_vm_reclaim_action_t behavior)
232 {
233 	kern_return_t kr;
234 	static const size_t kAllocationSize = (1UL << 20); // 1MB
235 	mach_vm_address_t addr;
236 	static const unsigned char kValue = 220;
237 
238 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
239 
240 	mach_vm_reclaim_id_t idx = allocate_and_defer_free(kAllocationSize, ringbuffer, kValue, behavior, &addr);
241 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
242 	kr = mach_vm_reclaim_ring_flush(ringbuffer, 10);
243 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_flush");
244 	bool usable = try_cancel(ringbuffer, idx, addr, kAllocationSize, behavior);
245 	switch (behavior) {
246 	case VM_RECLAIM_DEALLOCATE:
247 		T_EXPECT_FALSE(usable, "reclaimed entry is not re-usable");
248 		assert_buffer_has_changed_and_crash(addr, kAllocationSize, kValue);
249 		break;
250 	case VM_RECLAIM_FREE:
251 		T_EXPECT_TRUE(usable, "reclaimed REUSABLE entry is re-usable");
252 		read_buffer(addr, kAllocationSize);
253 		T_PASS("Freed buffer re-used successfully");
254 		break;
255 	default:
256 		T_FAIL("Unexpected reclaim behavior %d", behavior);
257 	}
258 }
259 
260 T_HELPER_DECL(reuse_freed_entry_dealloc,
261     "defer free (dealloc), sync, and try to use entry")
262 {
263 	reuse_reclaimed_entry(VM_RECLAIM_DEALLOCATE);
264 }
265 
266 T_HELPER_DECL(reuse_freed_entry_reusable,
267     "defer free (reusable), sync, and try to use entry")
268 {
269 	reuse_reclaimed_entry(VM_RECLAIM_FREE);
270 }
271 
272 T_DECL(vm_reclaim_single_entry_verify_free, "Place a single entry in the buffer and call sync",
273     T_META_IGNORECRASHES(".*vm_reclaim_single_entry_verify_free.*"),
274     T_META_VM_RECLAIM_ENABLED,
275     T_META_TAG_VM_PREFERRED)
276 {
277 	int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_dealloc");
278 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
279 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGSEGV, "Test process crashed with segmentation fault.");
280 }
281 
282 T_DECL(vm_reclaim_single_entry_reusable,
283     "Reclaim a reusable entry and verify re-use is legal",
284     T_META_VM_RECLAIM_ENABLED,
285     T_META_TAG_VM_PREFERRED)
286 {
287 	int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_reusable");
288 	T_QUIET; T_ASSERT_TRUE(WIFEXITED(status), "Test process exited.");
289 	T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
290 }
291 
292 static void
allocate_and_suspend(char * const * argv,bool free_buffer,bool double_free)293 allocate_and_suspend(char *const *argv, bool free_buffer, bool double_free)
294 {
295 	kern_return_t kr;
296 	static const mach_vm_reclaim_count_t kAllocationSize = (1UL << 20); // 1MB
297 	mach_vm_address_t addr = 0;
298 	bool should_update_kernel_accounting = false;
299 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
300 
301 	const mach_vm_reclaim_count_t kNumEntries = (mach_vm_reclaim_count_t)atoi(argv[0]);
302 	mach_vm_reclaim_count_t capacity;
303 	kr = mach_vm_reclaim_ring_capacity(ringbuffer, &capacity);
304 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()");
305 	T_QUIET; T_ASSERT_LT(kNumEntries, capacity, "Test does not fill up ringbuffer");
306 
307 	T_LOG("allocate_and_suspend: Allocating and freeing %u entries...", kNumEntries);
308 	for (size_t i = 0; i < kNumEntries; i++) {
309 		addr = 0;
310 		mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, (unsigned char) i, &addr);
311 		T_QUIET; T_ASSERT_EQ(idx, (mach_vm_reclaim_id_t)i, "idx is correct");
312 		T_LOG("allocate_and_suspend: Allocated and deferred 0x%llx", addr);
313 	}
314 
315 	if (double_free) {
316 		// Double free the last entry
317 		mach_vm_reclaim_id_t id = VM_RECLAIM_ID_NULL;
318 		kr = mach_vm_reclaim_try_enter(ringbuffer, addr, kAllocationSize, VM_RECLAIM_DEALLOCATE, &id, &should_update_kernel_accounting);
319 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter");
320 	}
321 
322 	if (free_buffer) {
323 		mach_vm_size_t buffer_size = (size_t)capacity *
324 		    sizeof(struct mach_vm_reclaim_entry_s) + offsetof(struct mach_vm_reclaim_ring_s, entries);
325 		kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)ringbuffer, buffer_size);
326 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate");
327 	}
328 
329 	T_LOG("allocate_and_suspend: Signalling parent");
330 	// Signal to our parent to suspend us
331 	if (kill(getppid(), SIGUSR1) != 0) {
332 		T_LOG("Unable to signal to parent process!");
333 		exit(1);
334 	}
335 
336 	T_LOG("allocate_and_suspend: Spinning");
337 	while (1) {
338 		;
339 	}
340 	T_ASSERT_FAIL("notreached");
341 }
342 
343 T_HELPER_DECL(allocate_and_suspend,
344     "defer free, and signal parent to suspend")
345 {
346 	allocate_and_suspend(argv, false, false);
347 }
348 
349 static void
resume_and_kill_proc(pid_t pid)350 resume_and_kill_proc(pid_t pid)
351 {
352 	int ret = pid_resume(pid);
353 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "proc resumed after freeze");
354 	T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(pid, SIGKILL), "Killed process");
355 }
356 
357 static void
wait_for_pid_to_be_drained(pid_t child_pid)358 wait_for_pid_to_be_drained(pid_t child_pid)
359 {
360 	int val = child_pid;
361 	int ret;
362 	size_t len = sizeof(val);
363 	ret = sysctlbyname("vm.reclaim.wait_for_pid", NULL, NULL, &val, len);
364 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.reclaim.wait_for_pid");
365 }
366 
367 static size_t
ledger_phys_footprint_index(size_t * num_entries)368 ledger_phys_footprint_index(size_t *num_entries)
369 {
370 	struct ledger_info li;
371 	struct ledger_template_info *templateInfo = NULL;
372 	int ret;
373 	size_t i, footprint_index;
374 	bool found = false;
375 
376 	ret = ledger(LEDGER_INFO, (caddr_t)(uintptr_t)getpid(), (caddr_t)&li, NULL);
377 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_INFO)");
378 
379 	T_QUIET; T_ASSERT_GT(li.li_entries, (int64_t) 0, "num ledger entries is valid");
380 	*num_entries = (size_t) li.li_entries;
381 	templateInfo = malloc((size_t)li.li_entries * sizeof(struct ledger_template_info));
382 	T_QUIET; T_ASSERT_NOTNULL(templateInfo, "malloc entries");
383 
384 	footprint_index = 0;
385 	ret = ledger(LEDGER_TEMPLATE_INFO, (caddr_t) templateInfo, (caddr_t) num_entries, NULL);
386 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_TEMPLATE_INFO)");
387 	for (i = 0; i < *num_entries; i++) {
388 		if (strcmp(templateInfo[i].lti_name, "phys_footprint") == 0) {
389 			footprint_index = i;
390 			found = true;
391 		}
392 	}
393 	free(templateInfo);
394 	T_QUIET; T_ASSERT_TRUE(found, "found phys_footprint in ledger");
395 	return footprint_index;
396 }
397 
398 static int64_t
get_ledger_entry_for_pid(pid_t pid,size_t index,size_t num_entries)399 get_ledger_entry_for_pid(pid_t pid, size_t index, size_t num_entries)
400 {
401 	int ret;
402 	int64_t value;
403 	struct ledger_entry_info *lei = NULL;
404 
405 	lei = malloc(num_entries * sizeof(*lei));
406 	ret = ledger(LEDGER_ENTRY_INFO, (caddr_t) (uintptr_t) pid, (caddr_t) lei, (caddr_t) &num_entries);
407 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_ENTRY_INFO)");
408 	value = lei[index].lei_balance;
409 	free(lei);
410 	return value;
411 }
412 
413 static pid_t child_pid;
414 
415 static void
test_after_background_helper_launches(char * variant,char * arg1,dispatch_block_t test_block,dispatch_block_t exit_block)416 test_after_background_helper_launches(char* variant, char * arg1, dispatch_block_t test_block, dispatch_block_t exit_block)
417 {
418 	char **launch_tool_args;
419 	char testpath[PATH_MAX];
420 	uint32_t testpath_buf_size;
421 
422 	dispatch_source_t ds_signal, ds_exit;
423 
424 	/* Wait for the child process to tell us that it's ready, and then freeze it */
425 	signal(SIGUSR1, SIG_IGN);
426 	ds_signal = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dispatch_get_main_queue());
427 	T_QUIET; T_ASSERT_NOTNULL(ds_signal, "dispatch_source_create");
428 	dispatch_source_set_event_handler(ds_signal, test_block);
429 
430 	dispatch_activate(ds_signal);
431 
432 	testpath_buf_size = sizeof(testpath);
433 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
434 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
435 	T_LOG("Executable path: %s", testpath);
436 	launch_tool_args = (char *[]){
437 		testpath,
438 		"-n",
439 		variant,
440 		arg1,
441 		NULL
442 	};
443 
444 	/* Spawn the child process. */
445 	ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
446 	if (ret != 0) {
447 		T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
448 	}
449 	T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
450 
451 	/* Listen for exit. */
452 	ds_exit = dispatch_source_create(DISPATCH_SOURCE_TYPE_PROC, (uintptr_t)child_pid, DISPATCH_PROC_EXIT, dispatch_get_main_queue());
453 	dispatch_source_set_event_handler(ds_exit, exit_block);
454 
455 	dispatch_activate(ds_exit);
456 	dispatch_main();
457 }
458 
459 T_DECL(vm_reclaim_full_reclaim_on_suspend, "Defer free memory and then suspend.",
460     T_META_ASROOT(true),
461     T_META_VM_RECLAIM_ENABLED,
462     T_META_TAG_VM_PREFERRED)
463 {
464 	test_after_background_helper_launches("allocate_and_suspend", "20", ^{
465 		int ret = 0;
466 		size_t num_ledger_entries = 0;
467 		size_t phys_footprint_index = ledger_phys_footprint_index(&num_ledger_entries);
468 		int64_t before_footprint, after_footprint, reclaimable_bytes = 20 * (1ULL << 20);
469 		before_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries);
470 		T_QUIET; T_EXPECT_GE(before_footprint, reclaimable_bytes, "memory was allocated");
471 		ret = pid_suspend(child_pid);
472 		T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
473 		/*
474 		 * The reclaim work is kicked off asynchronously by the suspend.
475 		 * So we need to call into the kernel to synchronize with the reclaim worker
476 		 * thread.
477 		 */
478 		wait_for_pid_to_be_drained(child_pid);
479 		after_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries);
480 		T_QUIET; T_EXPECT_LE(after_footprint, before_footprint - reclaimable_bytes, "memory was reclaimed");
481 
482 		resume_and_kill_proc(child_pid);
483 	},
484 	    ^{
485 		int status = 0, code = 0;
486 		pid_t rc = waitpid(child_pid, &status, 0);
487 		T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
488 		code = WEXITSTATUS(status);
489 		T_QUIET; T_ASSERT_EQ(code, 0, "Child exited cleanly");
490 		T_END;
491 	});
492 }
493 
494 T_DECL(vm_reclaim_limit_kills, "Deferred reclaims are processed before a limit kill",
495     T_META_VM_RECLAIM_ENABLED,
496     T_META_TAG_VM_PREFERRED)
497 {
498 	int err;
499 	const size_t kNumEntries = 50;
500 	static const size_t kAllocationSize = (1UL << 20); // 1MB
501 	static const size_t kMemoryLimit = kNumEntries / 10 * kAllocationSize;
502 
503 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
504 
505 	err = set_memlimits(getpid(), kMemoryLimit >> 20, kMemoryLimit >> 20, TRUE, TRUE);
506 	T_QUIET; T_ASSERT_POSIX_SUCCESS(err, "set_memlimits");
507 
508 	for (size_t i = 0; i < kNumEntries; i++) {
509 		mach_vm_address_t addr = 0;
510 		mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, (unsigned char) i, &addr);
511 		T_QUIET; T_ASSERT_EQ(idx, (mach_vm_reclaim_id_t)i, "idx is correct");
512 	}
513 
514 	T_PASS("Was able to allocate and defer free %zu chunks of size %zu bytes while staying under limit of %zu bytes", kNumEntries, kAllocationSize, kMemoryLimit);
515 }
516 
517 T_HELPER_DECL(deallocate_buffer,
518     "deallocate the buffer from underneath the kernel")
519 {
520 	kern_return_t kr;
521 	static const size_t kAllocationSize = (1UL << 20); // 1MB
522 	mach_vm_address_t addr;
523 
524 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
525 
526 	mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, 1, &addr);
527 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
528 	mach_vm_reclaim_count_t capacity;
529 	kr = mach_vm_reclaim_ring_capacity(ringbuffer, &capacity);
530 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()");
531 
532 	mach_vm_size_t buffer_size = (size_t)capacity *
533 	    sizeof(struct mach_vm_reclaim_entry_s) + offsetof(struct mach_vm_reclaim_ring_s, entries);
534 	kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)ringbuffer, buffer_size);
535 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate");
536 
537 	mach_vm_reclaim_ring_flush(ringbuffer, 10);
538 
539 	T_FAIL("Test did not crash when synchronizing on a deallocated buffer!");
540 }
541 
542 T_DECL(vm_reclaim_copyio_buffer_error, "Force a copyio error on the buffer",
543     T_META_IGNORECRASHES(".*deallocate_buffer.*"),
544     T_META_VM_RECLAIM_ENABLED,
545     T_META_TAG_VM_PREFERRED)
546 {
547 	int status = spawn_helper_and_wait_for_exit("deallocate_buffer");
548 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
549 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
550 }
551 
552 T_HELPER_DECL(dealloc_gap, "Put a bad entry in the buffer")
553 {
554 	kern_return_t kr;
555 	static const size_t kAllocationSize = (1UL << 20); // 1MB
556 	mach_vm_address_t addr;
557 	bool should_update_kernel_accounting = false;
558 
559 	kr = task_set_exc_guard_behavior(mach_task_self(), TASK_EXC_GUARD_ALL);
560 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_set_exc_guard_behavior()");
561 
562 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
563 
564 	mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, 1, &addr);
565 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
566 	idx = VM_RECLAIM_ID_NULL;
567 	kr = mach_vm_reclaim_try_enter(ringbuffer, addr, kAllocationSize, VM_RECLAIM_DEALLOCATE, &idx, &should_update_kernel_accounting);
568 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter()");
569 	T_QUIET; T_ASSERT_EQ(idx, 1ULL, "Entry placed at correct index");
570 
571 	mach_vm_reclaim_ring_flush(ringbuffer, 2);
572 
573 	T_FAIL("Test did not crash when doing a double free!");
574 }
575 
576 T_DECL(vm_reclaim_dealloc_gap, "Ensure a dealloc gap delivers a fatal exception",
577     T_META_IGNORECRASHES(".*dealloc_gap.*"),
578     T_META_VM_RECLAIM_ENABLED,
579     T_META_TAG_VM_PREFERRED)
580 {
581 	int status = spawn_helper_and_wait_for_exit("dealloc_gap");
582 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
583 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
584 }
585 
586 T_HELPER_DECL(allocate_and_suspend_with_dealloc_gap,
587     "defer double free, and signal parent to suspend")
588 {
589 	kern_return_t kr = task_set_exc_guard_behavior(mach_task_self(), TASK_EXC_GUARD_ALL);
590 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_set_exc_guard_behavior()");
591 	allocate_and_suspend(argv, false, true);
592 }
593 
594 static void
vm_reclaim_async_exception(char * variant,char * arg1)595 vm_reclaim_async_exception(char *variant, char *arg1)
596 {
597 	test_after_background_helper_launches(variant, arg1, ^{
598 		int ret = 0;
599 		ret = pid_suspend(child_pid);
600 		T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
601 		/*
602 		 * The reclaim work is kicked off asynchronously by the suspend.
603 		 * So we need to call into the kernel to synchronize with the reclaim worker
604 		 * thread.
605 		 */
606 		T_LOG("Waiting for child to be drained...");
607 		wait_for_pid_to_be_drained(child_pid);
608 	}, ^{
609 		int status;
610 		int signal;
611 		T_LOG("Waiting for child to exit...");
612 		bool exited = dt_waitpid(child_pid, &status, &signal, 30);
613 		T_QUIET; T_EXPECT_FALSE(exited, "waitpid");
614 		T_QUIET; T_EXPECT_FALSE(status, "Test process crashed.");
615 		T_QUIET; T_EXPECT_EQ(signal, SIGKILL, "Test process crashed with SIGKILL.");
616 		T_END;
617 	});
618 }
619 
620 T_DECL(vm_reclaim_dealloc_gap_async, "Ensure a dealloc gap delivers an async fatal exception",
621     T_META_IGNORECRASHES(".*allocate_and_suspend_with_dealloc_gap.*"),
622     T_META_VM_RECLAIM_ENABLED,
623     T_META_TAG_VM_PREFERRED)
624 {
625 	vm_reclaim_async_exception("allocate_and_suspend_with_dealloc_gap", "15");
626 }
627 
628 T_HELPER_DECL(allocate_and_suspend_with_buffer_error,
629     "defer free, free buffer, and signal parent to suspend")
630 {
631 	allocate_and_suspend(argv, true, false);
632 }
633 
634 T_DECL(vm_reclaim_copyio_buffer_error_async, "Ensure a buffer copyio failure delivers an async fatal exception",
635     T_META_IGNORECRASHES(".*allocate_and_suspend_with_buffer_error.*"),
636     T_META_VM_RECLAIM_ENABLED,
637     T_META_TAG_VM_PREFERRED)
638 {
639 	vm_reclaim_async_exception("allocate_and_suspend_with_buffer_error", "15");
640 }
641 
642 static mach_vm_reclaim_ring_t buffer_4fork_inherit;
643 static const size_t allocation_size_4fork_inherit = (16UL << 10); // 16 KiB
644 static const unsigned char value_4fork_inherit = 119;
645 static mach_vm_address_t addr_4fork_inherit;
646 
647 T_HELPER_DECL(reuse_freed_entry_fork,
648     "defer free, sync, and try to use entry")
649 {
650 	kern_return_t kr;
651 	bool usable, update;
652 	mach_vm_reclaim_id_t id = VM_RECLAIM_ID_NULL;
653 	mach_vm_reclaim_ring_t ringbuffer_tmp;
654 	kr = mach_vm_reclaim_ring_allocate(&ringbuffer_tmp, 1, 1);
655 	T_ASSERT_MACH_ERROR(kr, VM_RECLAIM_RESOURCE_SHORTAGE, "mach_vm_reclaim_ring_allocate() should fail");
656 	usable = try_cancel(buffer_4fork_inherit, 0, addr_4fork_inherit,
657 	    allocation_size_4fork_inherit, VM_RECLAIM_DEALLOCATE);
658 	T_ASSERT_TRUE(usable, "Entry can be re-used after fork()");
659 
660 	T_EXPECT_EQ(*(unsigned char *)addr_4fork_inherit, value_4fork_inherit,
661 	    "value is preserved");
662 
663 	kr = mach_vm_reclaim_try_enter(buffer_4fork_inherit,
664 	    addr_4fork_inherit, allocation_size_4fork_inherit, VM_RECLAIM_DEALLOCATE, &id, &update);
665 	T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter()");
666 	T_EXPECT_EQ(id, 1ull, "new entry is placed at tail");
667 
668 	kr = mach_vm_reclaim_ring_flush(buffer_4fork_inherit, 10);
669 	T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_flush()");
670 }
671 
672 T_DECL(inherit_buffer_after_fork, "Ensure reclaim buffer is inherited across a fork",
673     T_META_IGNORECRASHES(".*vm_reclaim_fork.*"),
674     T_META_VM_RECLAIM_ENABLED,
675     T_META_TAG_VM_PREFERRED)
676 {
677 	dt_helper_t helpers[1];
678 
679 	buffer_4fork_inherit = ringbuffer_init();
680 
681 	mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(
682 		allocation_size_4fork_inherit, buffer_4fork_inherit, value_4fork_inherit, &addr_4fork_inherit);
683 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
684 	helpers[0] = dt_fork_helper("reuse_freed_entry_fork");
685 	dt_run_helpers(helpers, 1, 30);
686 }
687 
688 #define SUSPEND_AND_RESUME_COUNT 4
689 
690 // rdar://110081398
691 T_DECL(reclaim_async_on_repeated_suspend,
692     "verify that subsequent suspends are allowed",
693     T_META_VM_RECLAIM_ENABLED,
694     T_META_TAG_VM_PREFERRED)
695 {
696 	const int sleep_duration = 3;
697 	test_after_background_helper_launches("allocate_and_suspend", "20", ^{
698 		int ret = 0;
699 		for (int i = 0; i < SUSPEND_AND_RESUME_COUNT; i++) {
700 		        ret = pid_suspend(child_pid);
701 		        T_ASSERT_POSIX_SUCCESS(ret, "pid_suspend()");
702 		        ret = pid_resume(child_pid);
703 		        T_ASSERT_POSIX_SUCCESS(ret, "pid_resume()");
704 		}
705 		T_LOG("Sleeping %d sec...", sleep_duration);
706 		sleep(sleep_duration);
707 		T_LOG("Killing child...");
708 		T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "kill()");
709 	}, ^{
710 		int status;
711 		pid_t rc = waitpid(child_pid, &status, 0);
712 		T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
713 		T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
714 		T_END;
715 	});
716 }
717 
718 T_HELPER_DECL(buffer_init_after_exec,
719     "initialize a ringbuffer after exec")
720 {
721 	mach_vm_reclaim_ring_t ringbuffer;
722 	kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, 1, 1);
723 	T_ASSERT_MACH_SUCCESS(kr, "post-exec: mach_vm_reclaim_ring_allocate()");
724 }
725 
726 extern char **environ;
727 
728 T_DECL(reclaim_exec_new_reclaim_buffer,
729     "verify that an exec-ed process may instantiate a new buffer",
730     T_META_VM_RECLAIM_ENABLED,
731     T_META_TAG_VM_PREFERRED)
732 {
733 	char **launch_tool_args;
734 	char testpath[PATH_MAX];
735 	uint32_t testpath_buf_size;
736 	mach_vm_reclaim_ring_t ringbuffer;
737 
738 	kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, 1, 1);
739 	T_ASSERT_MACH_SUCCESS(kr, "pre-exec: mach_vm_reclaim_ring_allocate()");
740 
741 	testpath_buf_size = sizeof(testpath);
742 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
743 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
744 	T_LOG("Executable path: %s", testpath);
745 	launch_tool_args = (char *[]){
746 		testpath,
747 		"-n",
748 		"buffer_init_after_exec",
749 		NULL
750 	};
751 
752 	/* Spawn the child process. */
753 	posix_spawnattr_t spawnattrs;
754 	posix_spawnattr_init(&spawnattrs);
755 	posix_spawnattr_setflags(&spawnattrs, POSIX_SPAWN_SETEXEC);
756 	posix_spawn(&child_pid, testpath, NULL, &spawnattrs, launch_tool_args, environ);
757 	T_ASSERT_FAIL("should not be reached");
758 }
759 
760 T_DECL(resize_buffer,
761     "verify that a reclaim buffer may be safely resized",
762     T_META_VM_RECLAIM_ENABLED,
763     T_META_TAG_VM_PREFERRED)
764 {
765 	kern_return_t kr;
766 	mach_vm_reclaim_ring_t ringbuffer;
767 	mach_vm_address_t addr_tmp;
768 	mach_vm_reclaim_id_t id_tmp;
769 	mach_vm_reclaim_id_t ids[4095] = {0};
770 	mach_vm_address_t addrs[4095] = {0};
771 
772 	T_LOG("Initializing 1 page buffer");
773 	mach_vm_reclaim_count_t initial_len = mach_vm_reclaim_round_capacity(512);
774 	mach_vm_reclaim_count_t max_len = 4 * initial_len;
775 	kr = mach_vm_reclaim_ring_allocate(&ringbuffer, initial_len, max_len);
776 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_allocate()");
777 
778 	T_LOG("Filling buffer with entries");
779 	mach_vm_reclaim_count_t old_capacity;
780 	kr = mach_vm_reclaim_ring_capacity(ringbuffer, &old_capacity);
781 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()");
782 	T_EXPECT_EQ(old_capacity, initial_len, "Capacity is same as asked for");
783 	for (mach_vm_reclaim_count_t i = 0; i < old_capacity; i++) {
784 		ids[i] = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'A', &addrs[i]);
785 		T_QUIET; T_ASSERT_NE(ids[i], VM_RECLAIM_ID_NULL, "Able to defer deallocation");
786 	}
787 	id_tmp = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'X', &addr_tmp);
788 	T_ASSERT_EQ(id_tmp, VM_RECLAIM_ID_NULL, "Unable to over-fill buffer");
789 	uint64_t initial_tail = os_atomic_load(&ringbuffer->tail, relaxed);
790 	T_ASSERT_EQ(initial_tail, (uint64_t)old_capacity, "tail == capacity after fill");
791 
792 	T_LOG("Resizing buffer to 4x");
793 	kr = mach_vm_reclaim_ring_resize(ringbuffer, max_len);
794 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_resize()");
795 
796 	// All entries should be reclaimed after resize
797 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->head, relaxed), initial_tail, "head is incremented");
798 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->busy, relaxed), initial_tail, "busy is incremented");
799 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->tail, relaxed), initial_tail, "tail is preserved");
800 
801 	mach_vm_reclaim_count_t new_capacity;
802 	kr = mach_vm_reclaim_ring_capacity(ringbuffer, &new_capacity);
803 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()");
804 	T_EXPECT_GT(new_capacity, old_capacity, "Buffer capacity grew");
805 	T_ASSERT_EQ(new_capacity, max_len, "length is set correctly");
806 
807 	T_LOG("Attempting to use all entries (should fail)");
808 	for (mach_vm_reclaim_count_t i = 0; i < old_capacity; i++) {
809 		mach_vm_reclaim_state_t state;
810 		kr = mach_vm_reclaim_query_state(ringbuffer, ids[i], VM_RECLAIM_DEALLOCATE, &state);
811 		bool reclaimed = !(state == VM_RECLAIM_UNRECLAIMED);
812 		T_QUIET; T_EXPECT_TRUE(reclaimed, "Entry is reclaimed after resize");
813 		bool usable = try_cancel(ringbuffer, ids[i], addrs[i], vm_page_size, VM_RECLAIM_DEALLOCATE);
814 		T_QUIET; T_EXPECT_FALSE(usable, "Entry cannot be re-used after resize");
815 	}
816 
817 	T_LOG("Filling resized buffer");
818 	for (mach_vm_reclaim_count_t i = 0; i < new_capacity; i++) {
819 		ids[i] = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'B', &addrs[i]);
820 		T_QUIET; T_ASSERT_NE(ids[i], VM_RECLAIM_ID_NULL, "Able to defer deallocation");
821 	}
822 	id_tmp = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'X', &addr_tmp);
823 	T_ASSERT_EQ(id_tmp, VM_RECLAIM_ID_NULL, "Unable to over-fill buffer");
824 	T_LOG("Re-using all entries");
825 	for (mach_vm_reclaim_count_t i = 0; i < new_capacity; i++) {
826 		bool usable = try_cancel(ringbuffer, ids[i], addrs[i], vm_page_size, VM_RECLAIM_DEALLOCATE);
827 		T_QUIET; T_EXPECT_TRUE(usable, "Entry is available for re-use");
828 	}
829 }
830 
831 T_DECL(resize_after_drain,
832     "resize a buffer after draining it",
833     T_META_VM_RECLAIM_ENABLED,
834     T_META_TAG_VM_PREFERRED)
835 {
836 	int ret;
837 	mach_vm_reclaim_error_t err;
838 	mach_vm_reclaim_ring_t ring;
839 	uint64_t sampling_period_ns;
840 	size_t sampling_period_size = sizeof(sampling_period_ns);
841 
842 	ret = sysctlbyname("vm.reclaim.sampling_period_ns", &sampling_period_ns, &sampling_period_size, NULL, 0);
843 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl(vm.reclaim.sampling_period_ns)");
844 
845 	T_LOG("Initializing ring");
846 	mach_vm_reclaim_count_t initial_len = mach_vm_reclaim_round_capacity(512);
847 	mach_vm_reclaim_count_t max_len = 4 * initial_len;
848 	err = mach_vm_reclaim_ring_allocate(&ring, initial_len, max_len);
849 	T_QUIET; T_ASSERT_MACH_SUCCESS(err, "mach_vm_reclaim_ring_allocate()");
850 
851 	// Fill the buffer with some memory
852 	T_LOG("Allocating and deferring memory");
853 	for (mach_vm_reclaim_count_t i = 0; i < 128; i++) {
854 		mach_vm_address_t addr;
855 		mach_vm_reclaim_id_t id = allocate_and_defer_deallocate(vm_page_size, ring, 'A', &addr);
856 		T_QUIET; T_ASSERT_NE(id, VM_RECLAIM_ID_NULL, "Able to defer deallocation");
857 	}
858 
859 	T_LOG("Draining ring");
860 	pid_t pid = getpid();
861 	ret = sysctlbyname("vm.reclaim.drain_pid", NULL, NULL, &pid, sizeof(pid));
862 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl(vm.reclaim.drain_pid)");
863 
864 	err = mach_vm_reclaim_ring_resize(ring, 2 * initial_len);
865 	T_ASSERT_MACH_SUCCESS(err, "mach_vm_reclaim_ring_resize()");
866 
867 	T_LOG("Sleeping for 1 sampling period...");
868 	struct timespec ts = {
869 		.tv_sec = sampling_period_ns / NSEC_PER_SEC,
870 		.tv_nsec = sampling_period_ns % NSEC_PER_SEC,
871 	};
872 	ret = nanosleep(&ts, NULL);
873 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "nanosleep()");
874 
875 	err = mach_vm_reclaim_update_kernel_accounting(ring);
876 	T_ASSERT_MACH_SUCCESS(err, "mach_vm_reclaim_update_kernel_accounting()");
877 }
878 
879 #define QUERY_BUFFER_RING_COUNT 25
880 
881 static void
kill_child()882 kill_child()
883 {
884 	kill(child_pid, SIGKILL);
885 }
886 
887 
888 kern_return_t
889 mach_vm_deferred_reclamation_buffer_remap(task_t source_task,
890     task_t dest_task,
891     mach_vm_address_t addr,
892     mach_vm_address_t *addr_u,
893     mach_vm_size_t *size_u);
894 
895 T_DECL(copy_and_query_buffer,
896     "verify that a reclaim ring may be queried correctly",
897     T_META_VM_RECLAIM_ENABLED,
898     T_META_TAG_VM_PREFERRED,
899     T_META_ASROOT(true))
900 {
901 	kern_return_t kr;
902 	mach_vm_reclaim_error_t rr;
903 	mach_vm_reclaim_ring_t self_ring;
904 	mach_vm_reclaim_id_t ids[QUERY_BUFFER_RING_COUNT];
905 	mach_vm_address_t addrs[QUERY_BUFFER_RING_COUNT];
906 	mach_vm_size_t sizes[QUERY_BUFFER_RING_COUNT];
907 	mach_vm_reclaim_action_t actions[QUERY_BUFFER_RING_COUNT];
908 	struct mach_vm_reclaim_region_s query_buffer[QUERY_BUFFER_RING_COUNT];
909 	mach_vm_reclaim_count_t query_count;
910 	task_t child_task;
911 	mach_vm_reclaim_count_t n_rings;
912 	struct mach_vm_reclaim_ring_ref_s ring_ref;
913 	mach_vm_reclaim_count_t capacity = mach_vm_reclaim_round_capacity(512);
914 	mach_vm_reclaim_ring_copy_t copied_ring;
915 
916 	T_SETUPBEGIN;
917 
918 	T_LOG("Initializing buffer");
919 	kr = mach_vm_reclaim_ring_allocate(&self_ring, capacity, capacity);
920 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_allocate()");
921 
922 	T_LOG("Adding entries to buffer");
923 	for (mach_vm_reclaim_count_t i = 0; i < QUERY_BUFFER_RING_COUNT; i++) {
924 		actions[i] = (rand() % 2 == 0) ? VM_RECLAIM_FREE : VM_RECLAIM_DEALLOCATE;
925 		sizes[i] = ((rand() % 3) + 1) * vm_page_size;
926 		addrs[i] = 0;
927 		ids[i] = allocate_and_defer_free(sizes[i], self_ring, 'A', actions[i], &addrs[i]);
928 		T_QUIET; T_ASSERT_NE(ids[i], VM_RECLAIM_ID_NULL, "Able to defer allocation");
929 	}
930 
931 	child_pid = fork();
932 	if (child_pid == 0) {
933 		while (true) {
934 			sleep(1);
935 		}
936 	}
937 	T_ATEND(kill_child);
938 
939 	kr = task_for_pid(mach_task_self(), child_pid, &child_task);
940 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_for_pid");
941 
942 	T_SETUPEND;
943 
944 	T_LOG("Copying buffer");
945 	rr = mach_vm_reclaim_get_rings_for_task(child_task, NULL, &n_rings);
946 	T_ASSERT_MACH_SUCCESS(rr, "Query ring count");
947 	T_ASSERT_EQ(n_rings, 1, "Task has one ring");
948 	rr = mach_vm_reclaim_get_rings_for_task(child_task, &ring_ref, &n_rings);
949 	T_ASSERT_MACH_SUCCESS(rr, "Get ring reference");
950 	T_ASSERT_NE(ring_ref.addr, 0ULL, "Ring ref ring is not null");
951 
952 	kr = mach_vm_reclaim_ring_copy(child_task, &ring_ref, &copied_ring);
953 	T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_copy()");
954 	T_ASSERT_NOTNULL(copied_ring, "copied ring is not null");
955 
956 	T_LOG("Querying buffer");
957 
958 	rr = mach_vm_reclaim_copied_ring_query(&copied_ring, NULL, &query_count);
959 	T_QUIET; T_ASSERT_MACH_SUCCESS(rr, "query reclaim ring size");
960 	T_ASSERT_EQ(query_count, QUERY_BUFFER_RING_COUNT, "correct reclaim ring query size");
961 
962 	rr = mach_vm_reclaim_copied_ring_query(&copied_ring, query_buffer, &query_count);
963 	T_QUIET; T_ASSERT_MACH_SUCCESS(rr, "query reclaim ring");
964 	T_ASSERT_EQ(query_count, QUERY_BUFFER_RING_COUNT, "query count is correct");
965 
966 	bool all_match = true;
967 	for (mach_vm_reclaim_count_t i = 0; i < QUERY_BUFFER_RING_COUNT; i++) {
968 		mach_vm_reclaim_region_t qentry = &query_buffer[i];
969 		if ((qentry->vmrr_addr != addrs[i]) ||
970 		    (qentry->vmrr_size != sizes[i]) ||
971 		    (qentry->vmrr_behavior != actions[i])) {
972 			all_match = false;
973 		}
974 		T_QUIET; T_EXPECT_EQ(qentry->vmrr_addr, addrs[i], "query->vmrr_addr is correct");
975 		T_QUIET; T_EXPECT_EQ(qentry->vmrr_size, sizes[i], "query->vmrr_size is correct");
976 		T_QUIET; T_EXPECT_EQ(qentry->vmrr_behavior, actions[i], "query->vmrr_behavior is correct");
977 	}
978 	T_ASSERT_TRUE(all_match, "query entries are correct");
979 
980 	query_count = 5;
981 	rr = mach_vm_reclaim_copied_ring_query(&copied_ring, query_buffer, &query_count);
982 	T_QUIET; T_ASSERT_MACH_SUCCESS(rr, "query reclaim ring with small buffer");
983 	T_ASSERT_EQ(query_count, 5, "query reclaim ring with small buffer returns correct size");
984 
985 	T_LOG("Freeing buffer");
986 	rr = mach_vm_reclaim_copied_ring_free(&copied_ring);
987 	T_ASSERT_MACH_SUCCESS(rr, "free reclaim ring");
988 }
989