xref: /xnu-12377.1.9/tests/vm/vm_reclaim.c (revision f6217f891ac0bb64f3d375211650a4c1ff8ca1ea)
1 #include <sys/types.h>
2 #include <sys/sysctl.h>
3 #include <mach/mach.h>
4 #include <mach/mach_vm.h>
5 #include <mach/vm_reclaim_private.h>
6 #include <mach-o/dyld.h>
7 #include <os/atomic_private.h>
8 #include <signal.h>
9 #include <spawn.h>
10 #include <spawn_private.h>
11 #include <time.h>
12 #include <unistd.h>
13 
14 #include <darwintest.h>
15 #include <darwintest_multiprocess.h>
16 #include <darwintest_utils.h>
17 
18 #include <Kernel/kern/ledger.h>
19 extern int ledger(int cmd, caddr_t arg1, caddr_t arg2, caddr_t arg3);
20 
21 #include "memorystatus_assertion_helpers.h"
22 
23 #if TARGET_OS_IOS && !TARGET_OS_VISION
24 // Some of the unit tests test deferred deallocations.
25 // For these we need to set a sufficiently large reclaim threshold
26 // to ensure their buffers aren't freed prematurely.
27 #define T_META_VM_RECLAIM_ENABLED T_META_SYSCTL_INT("vm.reclaim.max_threshold=268435456")
28 #define T_META_VM_RECLAIM_DISABLED T_META_SYSCTL_INT("vm.reclaim.max_threshold=0")
29 #else // !TARGET_OS_IOS
30 #define T_META_VM_RECLAIM_ENABLED T_META_SYSCTL_INT("vm.reclaim.enabled=1")
31 #define T_META_VM_RECLAIM_DISABLED T_META_SYSCTL_INT("vm.reclaim.enabled=0")
32 #endif // TARGET_OS_IOS
33 
34 #define MiB(x) (x << 20)
35 
36 T_GLOBAL_META(
37 	T_META_NAMESPACE("xnu.vm_reclaim"),
38 	T_META_RADAR_COMPONENT_NAME("xnu"),
39 	T_META_RADAR_COMPONENT_VERSION("performance"),
40 	T_META_OWNER("jarrad"),
41 	// Ensure we don't conflict with libmalloc's reclaim buffer
42 	T_META_ENVVAR("MallocDeferredReclaim=0"),
43 	T_META_ENVVAR("MallocAllowInternalSecurity=1"),
44 	T_META_RUN_CONCURRENTLY(false),
45 	T_META_CHECK_LEAKS(false)
46 	);
47 
48 static mach_vm_reclaim_ring_t
ringbuffer_init(void)49 ringbuffer_init(void)
50 {
51 	mach_vm_reclaim_ring_t ringbuffer = NULL;
52 	mach_vm_reclaim_count_t len = mach_vm_reclaim_round_capacity(1);
53 	mach_vm_reclaim_count_t max_len = len;
54 	kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, len, max_len);
55 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_allocate()");
56 	return ringbuffer;
57 }
58 
59 T_DECL(vm_reclaim_init, "Set up and tear down a reclaim buffer",
60     T_META_VM_RECLAIM_ENABLED,
61     T_META_TAG_VM_PREFERRED)
62 {
63 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
64 	T_ASSERT_NOTNULL(ringbuffer, "ringbuffer is allocated");
65 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->head, relaxed), 0ull, "head is zeroed");
66 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->busy, relaxed), 0ull, "busy is zeroed");
67 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->tail, relaxed), 0ull, "tail is zeroed");
68 	size_t expected_len = (vm_page_size - offsetof(struct mach_vm_reclaim_ring_s, entries)) /
69 	    sizeof(struct mach_vm_reclaim_entry_s);
70 	T_ASSERT_EQ((size_t)ringbuffer->len, expected_len, "length is set correctly");
71 	for (unsigned i = 0; i < ringbuffer->len; i++) {
72 		mach_vm_reclaim_entry_t entry = &ringbuffer->entries[i];
73 		T_QUIET; T_EXPECT_EQ(entry->address, 0ull, "address is zeroed");
74 		T_QUIET; T_EXPECT_EQ(entry->size, 0u, "size is zeroed");
75 		T_QUIET; T_EXPECT_EQ(entry->behavior, 0, "behavior is zeroed");
76 	}
77 }
78 
79 T_DECL(vm_reclaim_init_fails_when_disabled,
80     "Initializing a ring buffer on a system with vm_reclaim disabled should fail",
81     T_META_VM_RECLAIM_DISABLED, T_META_TAG_VM_PREFERRED)
82 {
83 	mach_vm_reclaim_ring_t ringbuffer;
84 	kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, 1, 1);
85 	T_EXPECT_MACH_ERROR(kr, VM_RECLAIM_NOT_SUPPORTED, "mach_vm_reclaim_ring_allocate()");
86 }
87 
88 static bool
try_cancel(mach_vm_reclaim_ring_t ringbuffer,mach_vm_reclaim_id_t id,mach_vm_address_t addr,mach_vm_size_t size,mach_vm_reclaim_action_t behavior)89 try_cancel(mach_vm_reclaim_ring_t ringbuffer, mach_vm_reclaim_id_t id, mach_vm_address_t addr, mach_vm_size_t size, mach_vm_reclaim_action_t behavior)
90 {
91 	bool update_accounting;
92 	mach_vm_reclaim_state_t state;
93 	kern_return_t kr;
94 	kr = mach_vm_reclaim_try_cancel(ringbuffer, id, addr, size, behavior, &state, &update_accounting);
95 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_cancel()");
96 	if (update_accounting) {
97 		kern_return_t tmp_kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer);
98 		T_QUIET; T_ASSERT_MACH_SUCCESS(tmp_kr, "mach_vm_reclaim_update_kernel_accounting()");
99 	}
100 	return mach_vm_reclaim_is_reusable(state);
101 }
102 
103 /*
104  * Allocate a buffer of the given size, write val to each byte, and free it via a deferred free call.
105  */
106 static mach_vm_reclaim_id_t
allocate_and_defer_free(size_t size,mach_vm_reclaim_ring_t ringbuffer,unsigned char val,mach_vm_reclaim_action_t behavior,mach_vm_address_t * addr)107 allocate_and_defer_free(size_t size, mach_vm_reclaim_ring_t ringbuffer,
108     unsigned char val, mach_vm_reclaim_action_t behavior,
109     mach_vm_address_t *addr /* OUT */)
110 {
111 	kern_return_t kr = mach_vm_map(mach_task_self(), addr, size, 0, VM_FLAGS_ANYWHERE, MEMORY_OBJECT_NULL, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
112 	bool should_update_kernel_accounting = false;
113 	mach_vm_reclaim_id_t id = VM_RECLAIM_ID_NULL;
114 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_map");
115 
116 	memset((void *) *addr, val, size);
117 
118 	kr = mach_vm_reclaim_try_enter(ringbuffer, *addr, size, behavior, &id, &should_update_kernel_accounting);
119 	if (should_update_kernel_accounting) {
120 		kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer);
121 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()");
122 	}
123 	return id;
124 }
125 
126 static mach_vm_reclaim_id_t
allocate_and_defer_deallocate(size_t size,mach_vm_reclaim_ring_t ringbuffer,unsigned char val,mach_vm_address_t * addr)127 allocate_and_defer_deallocate(size_t size, mach_vm_reclaim_ring_t ringbuffer, unsigned char val, mach_vm_address_t *addr /* OUT */)
128 {
129 	return allocate_and_defer_free(size, ringbuffer, val, VM_RECLAIM_DEALLOCATE, addr);
130 }
131 
132 T_DECL(vm_reclaim_single_entry, "Place a single entry in the buffer and call sync",
133     T_META_VM_RECLAIM_ENABLED,
134     T_META_TAG_VM_PREFERRED)
135 {
136 	static const size_t kAllocationSize = (1UL << 20); // 1MB
137 	mach_vm_address_t addr;
138 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
139 
140 	mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, 1, &addr);
141 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
142 	mach_vm_reclaim_ring_flush(ringbuffer, 1);
143 }
144 
145 static pid_t
spawn_helper(char * helper)146 spawn_helper(char *helper)
147 {
148 	char **launch_tool_args;
149 	char testpath[PATH_MAX];
150 	uint32_t testpath_buf_size;
151 	pid_t child_pid;
152 
153 	testpath_buf_size = sizeof(testpath);
154 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
155 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
156 	T_LOG("Executable path: %s", testpath);
157 	launch_tool_args = (char *[]){
158 		testpath,
159 		"-n",
160 		helper,
161 		NULL
162 	};
163 
164 	/* Spawn the child process. */
165 	ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
166 	if (ret != 0) {
167 		T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
168 	}
169 	T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
170 
171 	return child_pid;
172 }
173 
174 static int
spawn_helper_and_wait_for_exit(char * helper)175 spawn_helper_and_wait_for_exit(char *helper)
176 {
177 	int status;
178 	pid_t child_pid, rc;
179 
180 	child_pid = spawn_helper(helper);
181 	rc = waitpid(child_pid, &status, 0);
182 	T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
183 	return status;
184 }
185 
186 /*
187  * Returns true iff every entry in buffer is expected.
188  */
189 static bool
check_buffer(mach_vm_address_t addr,size_t size,unsigned char expected)190 check_buffer(mach_vm_address_t addr, size_t size, unsigned char expected)
191 {
192 	unsigned char *buffer = (unsigned char *) addr;
193 	for (size_t i = 0; i < size; i++) {
194 		if (buffer[i] != expected) {
195 			return false;
196 		}
197 	}
198 	return true;
199 }
200 
201 /*
202  * Read every byte of a buffer to ensure re-usability
203  */
204 static void
read_buffer(mach_vm_address_t addr,size_t size)205 read_buffer(mach_vm_address_t addr, size_t size)
206 {
207 	volatile uint8_t byte;
208 	uint8_t *buffer = (uint8_t *)addr;
209 	for (size_t i = 0; i < size; i++) {
210 		byte = buffer[i];
211 	}
212 }
213 
214 /*
215  * Check that the given (freed) buffer has changed.
216  * This will likely crash, but if we make it through the entire buffer then segfault on purpose.
217  */
218 static void
assert_buffer_has_changed_and_crash(mach_vm_address_t addr,size_t size,unsigned char expected)219 assert_buffer_has_changed_and_crash(mach_vm_address_t addr, size_t size, unsigned char expected)
220 {
221 	/*
222 	 * mach_vm_reclaim_ring_flush should have ensured the buffer was freed.
223 	 * Two cases:
224 	 * 1. The buffer is still free (touching it causes a crash)
225 	 * 2. The address range was re-allocated by some other library in process.
226 	 * #1 is far more likely. But if #2 happened, the buffer shouldn't be filled
227 	 * with the value we wrote to it. So scan the buffer. If we segfault it's case #1
228 	 * and if we see another value it's case #2.
229 	 */
230 	bool changed = !check_buffer(addr, size, expected);
231 	T_QUIET; T_ASSERT_TRUE(changed, "buffer was re-allocated");
232 	/* Case #2. Force a segfault so the parent sees that we crashed. */
233 	*(volatile int *) 0 = 1;
234 
235 	T_FAIL("Test did not crash when dereferencing NULL");
236 }
237 
238 static void
reuse_reclaimed_entry(mach_vm_reclaim_action_t behavior)239 reuse_reclaimed_entry(mach_vm_reclaim_action_t behavior)
240 {
241 	kern_return_t kr;
242 	static const size_t kAllocationSize = (1UL << 20); // 1MB
243 	mach_vm_address_t addr;
244 	static const unsigned char kValue = 220;
245 
246 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
247 
248 	mach_vm_reclaim_id_t idx = allocate_and_defer_free(kAllocationSize, ringbuffer, kValue, behavior, &addr);
249 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
250 	kr = mach_vm_reclaim_ring_flush(ringbuffer, 10);
251 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_flush");
252 	bool usable = try_cancel(ringbuffer, idx, addr, kAllocationSize, behavior);
253 	switch (behavior) {
254 	case VM_RECLAIM_DEALLOCATE:
255 		T_EXPECT_FALSE(usable, "reclaimed entry is not re-usable");
256 		assert_buffer_has_changed_and_crash(addr, kAllocationSize, kValue);
257 		break;
258 	case VM_RECLAIM_FREE:
259 		T_EXPECT_TRUE(usable, "reclaimed REUSABLE entry is re-usable");
260 		read_buffer(addr, kAllocationSize);
261 		T_PASS("Freed buffer re-used successfully");
262 		break;
263 	default:
264 		T_FAIL("Unexpected reclaim behavior %d", behavior);
265 	}
266 }
267 
268 T_HELPER_DECL(reuse_freed_entry_dealloc,
269     "defer free (dealloc), sync, and try to use entry")
270 {
271 	reuse_reclaimed_entry(VM_RECLAIM_DEALLOCATE);
272 }
273 
274 T_HELPER_DECL(reuse_freed_entry_reusable,
275     "defer free (reusable), sync, and try to use entry")
276 {
277 	reuse_reclaimed_entry(VM_RECLAIM_FREE);
278 }
279 
280 T_DECL(vm_reclaim_single_entry_verify_free, "Place a single entry in the buffer and call sync",
281     T_META_IGNORECRASHES(".*vm_reclaim_single_entry_verify_free.*"),
282     T_META_VM_RECLAIM_ENABLED,
283     T_META_TAG_VM_PREFERRED)
284 {
285 	int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_dealloc");
286 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
287 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGSEGV, "Test process crashed with segmentation fault.");
288 }
289 
290 T_DECL(vm_reclaim_single_entry_reusable,
291     "Reclaim a reusable entry and verify re-use is legal",
292     T_META_VM_RECLAIM_ENABLED,
293     T_META_TAG_VM_PREFERRED)
294 {
295 	int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_reusable");
296 	T_QUIET; T_ASSERT_TRUE(WIFEXITED(status), "Test process exited.");
297 	T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
298 }
299 
300 static void
allocate_and_suspend(char * const * argv,bool free_buffer,bool double_free)301 allocate_and_suspend(char *const *argv, bool free_buffer, bool double_free)
302 {
303 	kern_return_t kr;
304 	static const mach_vm_reclaim_count_t kAllocationSize = (1UL << 20); // 1MB
305 	mach_vm_address_t addr = 0;
306 	bool should_update_kernel_accounting = false;
307 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
308 
309 	const mach_vm_reclaim_count_t kNumEntries = (mach_vm_reclaim_count_t)atoi(argv[0]);
310 	mach_vm_reclaim_count_t capacity;
311 	kr = mach_vm_reclaim_ring_capacity(ringbuffer, &capacity);
312 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()");
313 	T_QUIET; T_ASSERT_LT(kNumEntries, capacity, "Test does not fill up ringbuffer");
314 
315 	T_LOG("allocate_and_suspend: Allocating and freeing %u entries...", kNumEntries);
316 	for (size_t i = 0; i < kNumEntries; i++) {
317 		addr = 0;
318 		mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, (unsigned char) i, &addr);
319 		T_QUIET; T_ASSERT_EQ(idx, (mach_vm_reclaim_id_t)i, "idx is correct");
320 		T_LOG("allocate_and_suspend: Allocated and deferred 0x%llx", addr);
321 	}
322 
323 	if (double_free) {
324 		// Double free the last entry
325 		mach_vm_reclaim_id_t id = VM_RECLAIM_ID_NULL;
326 		kr = mach_vm_reclaim_try_enter(ringbuffer, addr, kAllocationSize, VM_RECLAIM_DEALLOCATE, &id, &should_update_kernel_accounting);
327 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter");
328 	}
329 
330 	if (free_buffer) {
331 		mach_vm_size_t buffer_size = (size_t)capacity *
332 		    sizeof(struct mach_vm_reclaim_entry_s) + offsetof(struct mach_vm_reclaim_ring_s, entries);
333 		kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)ringbuffer, buffer_size);
334 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate");
335 	}
336 
337 	T_LOG("allocate_and_suspend: Signalling parent");
338 	// Signal to our parent to suspend us
339 	if (kill(getppid(), SIGUSR1) != 0) {
340 		T_LOG("Unable to signal to parent process!");
341 		exit(1);
342 	}
343 
344 	T_LOG("allocate_and_suspend: Spinning");
345 	while (1) {
346 		;
347 	}
348 	T_ASSERT_FAIL("notreached");
349 }
350 
351 T_HELPER_DECL(allocate_and_suspend,
352     "defer free, and signal parent to suspend")
353 {
354 	allocate_and_suspend(argv, false, false);
355 }
356 
357 static void
resume_and_kill_proc(pid_t pid)358 resume_and_kill_proc(pid_t pid)
359 {
360 	int ret = pid_resume(pid);
361 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "proc resumed after freeze");
362 	T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(pid, SIGKILL), "Killed process");
363 }
364 
365 static void
wait_for_pid_to_be_drained(pid_t child_pid)366 wait_for_pid_to_be_drained(pid_t child_pid)
367 {
368 	int val = child_pid;
369 	int ret;
370 	size_t len = sizeof(val);
371 	ret = sysctlbyname("vm.reclaim.wait_for_pid", NULL, NULL, &val, len);
372 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.reclaim.wait_for_pid");
373 }
374 
375 static size_t
ledger_phys_footprint_index(size_t * num_entries)376 ledger_phys_footprint_index(size_t *num_entries)
377 {
378 	struct ledger_info li;
379 	struct ledger_template_info *templateInfo = NULL;
380 	int ret;
381 	size_t i, footprint_index;
382 	bool found = false;
383 
384 	ret = ledger(LEDGER_INFO, (caddr_t)(uintptr_t)getpid(), (caddr_t)&li, NULL);
385 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_INFO)");
386 
387 	T_QUIET; T_ASSERT_GT(li.li_entries, (int64_t) 0, "num ledger entries is valid");
388 	*num_entries = (size_t) li.li_entries;
389 	templateInfo = malloc((size_t)li.li_entries * sizeof(struct ledger_template_info));
390 	T_QUIET; T_ASSERT_NOTNULL(templateInfo, "malloc entries");
391 
392 	footprint_index = 0;
393 	ret = ledger(LEDGER_TEMPLATE_INFO, (caddr_t) templateInfo, (caddr_t) num_entries, NULL);
394 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_TEMPLATE_INFO)");
395 	for (i = 0; i < *num_entries; i++) {
396 		if (strcmp(templateInfo[i].lti_name, "phys_footprint") == 0) {
397 			footprint_index = i;
398 			found = true;
399 		}
400 	}
401 	free(templateInfo);
402 	T_QUIET; T_ASSERT_TRUE(found, "found phys_footprint in ledger");
403 	return footprint_index;
404 }
405 
406 static int64_t
get_ledger_entry_for_pid(pid_t pid,size_t index,size_t num_entries)407 get_ledger_entry_for_pid(pid_t pid, size_t index, size_t num_entries)
408 {
409 	int ret;
410 	int64_t value;
411 	struct ledger_entry_info *lei = NULL;
412 
413 	lei = malloc(num_entries * sizeof(*lei));
414 	ret = ledger(LEDGER_ENTRY_INFO, (caddr_t) (uintptr_t) pid, (caddr_t) lei, (caddr_t) &num_entries);
415 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_ENTRY_INFO)");
416 	value = lei[index].lei_balance;
417 	free(lei);
418 	return value;
419 }
420 
421 static pid_t child_pid;
422 
423 static void
test_after_background_helper_launches(char * variant,char * arg1,dispatch_block_t test_block,dispatch_block_t exit_block)424 test_after_background_helper_launches(char* variant, char * arg1, dispatch_block_t test_block, dispatch_block_t exit_block)
425 {
426 	char **launch_tool_args;
427 	char testpath[PATH_MAX];
428 	uint32_t testpath_buf_size;
429 
430 	dispatch_source_t ds_signal, ds_exit;
431 
432 	/* Wait for the child process to tell us that it's ready, and then freeze it */
433 	signal(SIGUSR1, SIG_IGN);
434 	ds_signal = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dispatch_get_main_queue());
435 	T_QUIET; T_ASSERT_NOTNULL(ds_signal, "dispatch_source_create");
436 	dispatch_source_set_event_handler(ds_signal, test_block);
437 
438 	dispatch_activate(ds_signal);
439 
440 	testpath_buf_size = sizeof(testpath);
441 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
442 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
443 	T_LOG("Executable path: %s", testpath);
444 	launch_tool_args = (char *[]){
445 		testpath,
446 		"-n",
447 		variant,
448 		arg1,
449 		NULL
450 	};
451 
452 	/* Spawn the child process. */
453 	ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
454 	if (ret != 0) {
455 		T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
456 	}
457 	T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
458 
459 	/* Listen for exit. */
460 	ds_exit = dispatch_source_create(DISPATCH_SOURCE_TYPE_PROC, (uintptr_t)child_pid, DISPATCH_PROC_EXIT, dispatch_get_main_queue());
461 	dispatch_source_set_event_handler(ds_exit, exit_block);
462 
463 	dispatch_activate(ds_exit);
464 	dispatch_main();
465 }
466 
467 T_DECL(vm_reclaim_full_reclaim_on_suspend, "Defer free memory and then suspend.",
468     T_META_ASROOT(true),
469     T_META_VM_RECLAIM_ENABLED,
470     T_META_TAG_VM_PREFERRED)
471 {
472 	test_after_background_helper_launches("allocate_and_suspend", "20", ^{
473 		int ret = 0;
474 		size_t num_ledger_entries = 0;
475 		size_t phys_footprint_index = ledger_phys_footprint_index(&num_ledger_entries);
476 		int64_t before_footprint, after_footprint, reclaimable_bytes = 20 * (1ULL << 20);
477 		before_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries);
478 		T_QUIET; T_EXPECT_GE(before_footprint, reclaimable_bytes, "memory was allocated");
479 		ret = pid_suspend(child_pid);
480 		T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
481 		/*
482 		 * The reclaim work is kicked off asynchronously by the suspend.
483 		 * So we need to call into the kernel to synchronize with the reclaim worker
484 		 * thread.
485 		 */
486 		wait_for_pid_to_be_drained(child_pid);
487 		after_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries);
488 		T_QUIET; T_EXPECT_LE(after_footprint, before_footprint - reclaimable_bytes, "memory was reclaimed");
489 
490 		resume_and_kill_proc(child_pid);
491 	},
492 	    ^{
493 		int status = 0, code = 0;
494 		pid_t rc = waitpid(child_pid, &status, 0);
495 		T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
496 		code = WEXITSTATUS(status);
497 		T_QUIET; T_ASSERT_EQ(code, 0, "Child exited cleanly");
498 		T_END;
499 	});
500 }
501 
502 T_DECL(vm_reclaim_limit_kills, "Deferred reclaims are processed before a limit kill",
503     T_META_VM_RECLAIM_ENABLED,
504     T_META_TAG_VM_PREFERRED)
505 {
506 	int err;
507 	const size_t kNumEntries = 50;
508 	static const size_t kAllocationSize = (1UL << 20); // 1MB
509 	static const size_t kMemoryLimit = kNumEntries / 10 * kAllocationSize;
510 
511 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
512 
513 	err = set_memlimits(getpid(), kMemoryLimit >> 20, kMemoryLimit >> 20, TRUE, TRUE);
514 	T_QUIET; T_ASSERT_POSIX_SUCCESS(err, "set_memlimits");
515 
516 	for (size_t i = 0; i < kNumEntries; i++) {
517 		mach_vm_address_t addr = 0;
518 		mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, (unsigned char) i, &addr);
519 		T_QUIET; T_ASSERT_EQ(idx, (mach_vm_reclaim_id_t)i, "idx is correct");
520 	}
521 
522 	T_PASS("Was able to allocate and defer free %zu chunks of size %zu bytes while staying under limit of %zu bytes", kNumEntries, kAllocationSize, kMemoryLimit);
523 }
524 
525 #if TARGET_OS_IOS && !TARGET_OS_VISION
526 T_DECL(vm_reclaim_update_reclaimable_bytes_threshold, "Kernel reclaims when num_bytes_reclaimable crosses threshold",
527     T_META_SYSCTL_INT("vm.reclaim.max_threshold=16384"),
528     T_META_TAG_VM_PREFERRED)
529 {
530 	mach_vm_reclaim_count_t kNumEntries = 0;
531 	const size_t kAllocationSize = vm_kernel_page_size;
532 	uint64_t vm_reclaim_reclaimable_max_threshold;
533 	int ret;
534 	mach_error_t err;
535 	size_t len = sizeof(vm_reclaim_reclaimable_max_threshold);
536 	size_t num_ledger_entries = 0;
537 	size_t phys_footprint_index = ledger_phys_footprint_index(&num_ledger_entries);
538 
539 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
540 
541 	// Allocate 1000 times the reclaim threshold
542 	ret = sysctlbyname("vm.reclaim.max_threshold", &vm_reclaim_reclaimable_max_threshold, &len, NULL, 0);
543 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.reclaim.max_threshold");
544 	kNumEntries = (mach_vm_reclaim_count_t)(vm_reclaim_reclaimable_max_threshold / kAllocationSize * 1000);
545 	mach_vm_reclaim_count_t capacity;
546 	err = mach_vm_reclaim_ring_capacity(ringbuffer, &capacity);
547 	T_QUIET; T_ASSERT_MACH_SUCCESS(err, "mach_vm_reclaim_ring_capacity()");
548 	T_QUIET; T_ASSERT_LT(kNumEntries, capacity, "Test does not fill up ringbuffer");
549 
550 	mach_vm_address_t addr = 0;
551 	for (uint64_t i = 0; i < kNumEntries; i++) {
552 		mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, (unsigned char)i, &addr);
553 		T_QUIET; T_ASSERT_EQ(idx, i, "idx is correct");
554 	}
555 
556 	T_QUIET; T_ASSERT_LT(get_ledger_entry_for_pid(getpid(), phys_footprint_index, num_ledger_entries),
557 	    (int64_t) ((kNumEntries) * kAllocationSize), "Entries were reclaimed as we crossed threshold");
558 }
559 #endif /* TARGET_OS_IPHONE && !TARGET_OS_VISION */
560 
561 T_HELPER_DECL(deallocate_buffer,
562     "deallocate the buffer from underneath the kernel")
563 {
564 	kern_return_t kr;
565 	static const size_t kAllocationSize = (1UL << 20); // 1MB
566 	mach_vm_address_t addr;
567 
568 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
569 
570 	mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, 1, &addr);
571 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
572 	mach_vm_reclaim_count_t capacity;
573 	kr = mach_vm_reclaim_ring_capacity(ringbuffer, &capacity);
574 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()");
575 
576 	mach_vm_size_t buffer_size = (size_t)capacity *
577 	    sizeof(struct mach_vm_reclaim_entry_s) + offsetof(struct mach_vm_reclaim_ring_s, entries);
578 	kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)ringbuffer, buffer_size);
579 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate");
580 
581 	mach_vm_reclaim_ring_flush(ringbuffer, 10);
582 
583 	T_FAIL("Test did not crash when synchronizing on a deallocated buffer!");
584 }
585 
586 T_DECL(vm_reclaim_copyio_buffer_error, "Force a copyio error on the buffer",
587     T_META_IGNORECRASHES(".*deallocate_buffer.*"),
588     T_META_VM_RECLAIM_ENABLED,
589     T_META_TAG_VM_PREFERRED)
590 {
591 	int status = spawn_helper_and_wait_for_exit("deallocate_buffer");
592 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
593 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
594 }
595 
596 T_HELPER_DECL(dealloc_gap, "Put a bad entry in the buffer")
597 {
598 	kern_return_t kr;
599 	static const size_t kAllocationSize = (1UL << 20); // 1MB
600 	mach_vm_address_t addr;
601 	bool should_update_kernel_accounting = false;
602 
603 	kr = task_set_exc_guard_behavior(mach_task_self(), TASK_EXC_GUARD_ALL);
604 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_set_exc_guard_behavior()");
605 
606 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
607 
608 	mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, 1, &addr);
609 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
610 	idx = VM_RECLAIM_ID_NULL;
611 	kr = mach_vm_reclaim_try_enter(ringbuffer, addr, kAllocationSize, VM_RECLAIM_DEALLOCATE, &idx, &should_update_kernel_accounting);
612 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter()");
613 	T_QUIET; T_ASSERT_EQ(idx, 1ULL, "Entry placed at correct index");
614 
615 	mach_vm_reclaim_ring_flush(ringbuffer, 2);
616 
617 	T_FAIL("Test did not crash when doing a double free!");
618 }
619 
620 T_DECL(vm_reclaim_dealloc_gap, "Ensure a dealloc gap delivers a fatal exception",
621     T_META_IGNORECRASHES(".*dealloc_gap.*"),
622     T_META_VM_RECLAIM_ENABLED,
623     T_META_TAG_VM_PREFERRED)
624 {
625 	int status = spawn_helper_and_wait_for_exit("dealloc_gap");
626 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
627 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
628 }
629 
630 T_HELPER_DECL(allocate_and_suspend_with_dealloc_gap,
631     "defer double free, and signal parent to suspend")
632 {
633 	kern_return_t kr = task_set_exc_guard_behavior(mach_task_self(), TASK_EXC_GUARD_ALL);
634 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_set_exc_guard_behavior()");
635 	allocate_and_suspend(argv, false, true);
636 }
637 
638 static void
vm_reclaim_async_exception(char * variant,char * arg1)639 vm_reclaim_async_exception(char *variant, char *arg1)
640 {
641 	test_after_background_helper_launches(variant, arg1, ^{
642 		int ret = 0;
643 		ret = pid_suspend(child_pid);
644 		T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
645 		/*
646 		 * The reclaim work is kicked off asynchronously by the suspend.
647 		 * So we need to call into the kernel to synchronize with the reclaim worker
648 		 * thread.
649 		 */
650 		T_LOG("Waiting for child to be drained...");
651 		wait_for_pid_to_be_drained(child_pid);
652 	}, ^{
653 		int status;
654 		int signal;
655 		T_LOG("Waiting for child to exit...");
656 		bool exited = dt_waitpid(child_pid, &status, &signal, 30);
657 		T_QUIET; T_EXPECT_FALSE(exited, "waitpid");
658 		T_QUIET; T_EXPECT_FALSE(status, "Test process crashed.");
659 		T_QUIET; T_EXPECT_EQ(signal, SIGKILL, "Test process crashed with SIGKILL.");
660 		T_END;
661 	});
662 }
663 
664 T_DECL(vm_reclaim_dealloc_gap_async, "Ensure a dealloc gap delivers an async fatal exception",
665     T_META_IGNORECRASHES(".*allocate_and_suspend_with_dealloc_gap.*"),
666     T_META_VM_RECLAIM_ENABLED,
667     T_META_TAG_VM_PREFERRED)
668 {
669 	vm_reclaim_async_exception("allocate_and_suspend_with_dealloc_gap", "15");
670 }
671 
672 T_HELPER_DECL(allocate_and_suspend_with_buffer_error,
673     "defer free, free buffer, and signal parent to suspend")
674 {
675 	allocate_and_suspend(argv, true, false);
676 }
677 
678 T_DECL(vm_reclaim_copyio_buffer_error_async, "Ensure a buffer copyio failure delivers an async fatal exception",
679     T_META_IGNORECRASHES(".*allocate_and_suspend_with_buffer_error.*"),
680     T_META_VM_RECLAIM_ENABLED,
681     T_META_TAG_VM_PREFERRED)
682 {
683 	vm_reclaim_async_exception("allocate_and_suspend_with_buffer_error", "15");
684 }
685 
686 static mach_vm_reclaim_ring_t buffer_4fork_inherit;
687 static const size_t allocation_size_4fork_inherit = (16UL << 10); // 16 KiB
688 static const unsigned char value_4fork_inherit = 119;
689 static mach_vm_address_t addr_4fork_inherit;
690 
691 T_HELPER_DECL(reuse_freed_entry_fork,
692     "defer free, sync, and try to use entry")
693 {
694 	kern_return_t kr;
695 	bool usable, update;
696 	mach_vm_reclaim_id_t id = VM_RECLAIM_ID_NULL;
697 	mach_vm_reclaim_ring_t ringbuffer_tmp;
698 	kr = mach_vm_reclaim_ring_allocate(&ringbuffer_tmp, 1, 1);
699 	T_ASSERT_MACH_ERROR(kr, VM_RECLAIM_RESOURCE_SHORTAGE, "mach_vm_reclaim_ring_allocate() should fail");
700 	usable = try_cancel(buffer_4fork_inherit, 0, addr_4fork_inherit,
701 	    allocation_size_4fork_inherit, VM_RECLAIM_DEALLOCATE);
702 	T_ASSERT_TRUE(usable, "Entry can be re-used after fork()");
703 
704 	T_EXPECT_EQ(*(unsigned char *)addr_4fork_inherit, value_4fork_inherit,
705 	    "value is preserved");
706 
707 	kr = mach_vm_reclaim_try_enter(buffer_4fork_inherit,
708 	    addr_4fork_inherit, allocation_size_4fork_inherit, VM_RECLAIM_DEALLOCATE, &id, &update);
709 	T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter()");
710 	T_EXPECT_EQ(id, 1ull, "new entry is placed at tail");
711 
712 	kr = mach_vm_reclaim_ring_flush(buffer_4fork_inherit, 10);
713 	T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_flush()");
714 }
715 
716 T_DECL(inherit_buffer_after_fork, "Ensure reclaim buffer is inherited across a fork",
717     T_META_IGNORECRASHES(".*vm_reclaim_fork.*"),
718     T_META_VM_RECLAIM_ENABLED,
719     T_META_TAG_VM_PREFERRED)
720 {
721 	dt_helper_t helpers[1];
722 
723 	buffer_4fork_inherit = ringbuffer_init();
724 
725 	mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(
726 		allocation_size_4fork_inherit, buffer_4fork_inherit, value_4fork_inherit, &addr_4fork_inherit);
727 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
728 	helpers[0] = dt_fork_helper("reuse_freed_entry_fork");
729 	dt_run_helpers(helpers, 1, 30);
730 }
731 
732 #define SUSPEND_AND_RESUME_COUNT 4
733 
734 // rdar://110081398
735 T_DECL(reclaim_async_on_repeated_suspend,
736     "verify that subsequent suspends are allowed",
737     T_META_VM_RECLAIM_ENABLED,
738     T_META_TAG_VM_PREFERRED)
739 {
740 	const int sleep_duration = 3;
741 	test_after_background_helper_launches("allocate_and_suspend", "20", ^{
742 		int ret = 0;
743 		for (int i = 0; i < SUSPEND_AND_RESUME_COUNT; i++) {
744 		        ret = pid_suspend(child_pid);
745 		        T_ASSERT_POSIX_SUCCESS(ret, "pid_suspend()");
746 		        ret = pid_resume(child_pid);
747 		        T_ASSERT_POSIX_SUCCESS(ret, "pid_resume()");
748 		}
749 		T_LOG("Sleeping %d sec...", sleep_duration);
750 		sleep(sleep_duration);
751 		T_LOG("Killing child...");
752 		T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "kill()");
753 	}, ^{
754 		int status;
755 		pid_t rc = waitpid(child_pid, &status, 0);
756 		T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
757 		T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
758 		T_END;
759 	});
760 }
761 
762 T_HELPER_DECL(buffer_init_after_exec,
763     "initialize a ringbuffer after exec")
764 {
765 	mach_vm_reclaim_ring_t ringbuffer;
766 	kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, 1, 1);
767 	T_ASSERT_MACH_SUCCESS(kr, "post-exec: mach_vm_reclaim_ring_allocate()");
768 }
769 
770 extern char **environ;
771 
772 T_DECL(reclaim_exec_new_reclaim_buffer,
773     "verify that an exec-ed process may instantiate a new buffer",
774     T_META_VM_RECLAIM_ENABLED,
775     T_META_TAG_VM_PREFERRED)
776 {
777 	char **launch_tool_args;
778 	char testpath[PATH_MAX];
779 	uint32_t testpath_buf_size;
780 	mach_vm_reclaim_ring_t ringbuffer;
781 
782 	kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, 1, 1);
783 	T_ASSERT_MACH_SUCCESS(kr, "pre-exec: mach_vm_reclaim_ring_allocate()");
784 
785 	testpath_buf_size = sizeof(testpath);
786 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
787 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
788 	T_LOG("Executable path: %s", testpath);
789 	launch_tool_args = (char *[]){
790 		testpath,
791 		"-n",
792 		"buffer_init_after_exec",
793 		NULL
794 	};
795 
796 	/* Spawn the child process. */
797 	posix_spawnattr_t spawnattrs;
798 	posix_spawnattr_init(&spawnattrs);
799 	posix_spawnattr_setflags(&spawnattrs, POSIX_SPAWN_SETEXEC);
800 	posix_spawn(&child_pid, testpath, NULL, &spawnattrs, launch_tool_args, environ);
801 	T_ASSERT_FAIL("should not be reached");
802 }
803 
804 T_DECL(resize_buffer,
805     "verify that a reclaim buffer may be safely resized",
806     T_META_VM_RECLAIM_ENABLED,
807     T_META_TAG_VM_PREFERRED)
808 {
809 	kern_return_t kr;
810 	mach_vm_reclaim_ring_t ringbuffer;
811 	mach_vm_address_t addr_tmp;
812 	mach_vm_reclaim_id_t id_tmp;
813 	mach_vm_reclaim_id_t ids[4095] = {0};
814 	mach_vm_address_t addrs[4095] = {0};
815 
816 	T_LOG("Initializing 1 page buffer");
817 	mach_vm_reclaim_count_t initial_len = mach_vm_reclaim_round_capacity(512);
818 	mach_vm_reclaim_count_t max_len = 4 * initial_len;
819 	kr = mach_vm_reclaim_ring_allocate(&ringbuffer, initial_len, max_len);
820 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_allocate()");
821 
822 	T_LOG("Filling buffer with entries");
823 	mach_vm_reclaim_count_t old_capacity;
824 	kr = mach_vm_reclaim_ring_capacity(ringbuffer, &old_capacity);
825 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()");
826 	T_EXPECT_EQ(old_capacity, initial_len, "Capacity is same as asked for");
827 	for (mach_vm_reclaim_count_t i = 0; i < old_capacity; i++) {
828 		ids[i] = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'A', &addrs[i]);
829 		T_QUIET; T_ASSERT_NE(ids[i], VM_RECLAIM_ID_NULL, "Able to defer deallocation");
830 	}
831 	id_tmp = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'X', &addr_tmp);
832 	T_ASSERT_EQ(id_tmp, VM_RECLAIM_ID_NULL, "Unable to over-fill buffer");
833 	uint64_t initial_tail = os_atomic_load(&ringbuffer->tail, relaxed);
834 	T_ASSERT_EQ(initial_tail, (uint64_t)old_capacity, "tail == capacity after fill");
835 
836 	T_LOG("Resizing buffer to 4x");
837 	kr = mach_vm_reclaim_ring_resize(ringbuffer, max_len);
838 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_resize()");
839 
840 	// All entries should be reclaimed after resize
841 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->head, relaxed), initial_tail, "head is incremented");
842 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->busy, relaxed), initial_tail, "busy is incremented");
843 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->tail, relaxed), initial_tail, "tail is preserved");
844 
845 	mach_vm_reclaim_count_t new_capacity;
846 	kr = mach_vm_reclaim_ring_capacity(ringbuffer, &new_capacity);
847 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()");
848 	T_EXPECT_GT(new_capacity, old_capacity, "Buffer capacity grew");
849 	T_ASSERT_EQ(new_capacity, max_len, "length is set correctly");
850 
851 	T_LOG("Attempting to use all entries (should fail)");
852 	for (mach_vm_reclaim_count_t i = 0; i < old_capacity; i++) {
853 		mach_vm_reclaim_state_t state;
854 		kr = mach_vm_reclaim_query_state(ringbuffer, ids[i], VM_RECLAIM_DEALLOCATE, &state);
855 		bool reclaimed = !(state == VM_RECLAIM_UNRECLAIMED);
856 		T_QUIET; T_EXPECT_TRUE(reclaimed, "Entry is reclaimed after resize");
857 		bool usable = try_cancel(ringbuffer, ids[i], addrs[i], vm_page_size, VM_RECLAIM_DEALLOCATE);
858 		T_QUIET; T_EXPECT_FALSE(usable, "Entry cannot be re-used after resize");
859 	}
860 
861 	T_LOG("Filling resized buffer");
862 	for (mach_vm_reclaim_count_t i = 0; i < new_capacity; i++) {
863 		ids[i] = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'B', &addrs[i]);
864 		T_QUIET; T_ASSERT_NE(ids[i], VM_RECLAIM_ID_NULL, "Able to defer deallocation");
865 	}
866 	id_tmp = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'X', &addr_tmp);
867 	T_ASSERT_EQ(id_tmp, VM_RECLAIM_ID_NULL, "Unable to over-fill buffer");
868 	T_LOG("Re-using all entries");
869 	for (mach_vm_reclaim_count_t i = 0; i < new_capacity; i++) {
870 		bool usable = try_cancel(ringbuffer, ids[i], addrs[i], vm_page_size, VM_RECLAIM_DEALLOCATE);
871 		T_QUIET; T_EXPECT_TRUE(usable, "Entry is available for re-use");
872 	}
873 }
874 
875 T_DECL(resize_after_drain,
876     "resize a buffer after draining it",
877     T_META_VM_RECLAIM_ENABLED,
878     T_META_TAG_VM_PREFERRED)
879 {
880 	int ret;
881 	mach_vm_reclaim_error_t err;
882 	mach_vm_reclaim_ring_t ring;
883 	uint64_t sampling_period_ns;
884 	size_t sampling_period_size = sizeof(sampling_period_ns);
885 
886 	ret = sysctlbyname("vm.reclaim.sampling_period_ns", &sampling_period_ns, &sampling_period_size, NULL, 0);
887 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl(vm.reclaim.sampling_period_ns)");
888 
889 	T_LOG("Initializing ring");
890 	mach_vm_reclaim_count_t initial_len = mach_vm_reclaim_round_capacity(512);
891 	mach_vm_reclaim_count_t max_len = 4 * initial_len;
892 	err = mach_vm_reclaim_ring_allocate(&ring, initial_len, max_len);
893 	T_QUIET; T_ASSERT_MACH_SUCCESS(err, "mach_vm_reclaim_ring_allocate()");
894 
895 	// Fill the buffer with some memory
896 	T_LOG("Allocating and deferring memory");
897 	for (mach_vm_reclaim_count_t i = 0; i < 128; i++) {
898 		mach_vm_address_t addr;
899 		mach_vm_reclaim_id_t id = allocate_and_defer_deallocate(vm_page_size, ring, 'A', &addr);
900 		T_QUIET; T_ASSERT_NE(id, VM_RECLAIM_ID_NULL, "Able to defer deallocation");
901 	}
902 
903 	T_LOG("Draining ring");
904 	pid_t pid = getpid();
905 	ret = sysctlbyname("vm.reclaim.drain_pid", NULL, NULL, &pid, sizeof(pid));
906 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl(vm.reclaim.drain_pid)");
907 
908 	err = mach_vm_reclaim_ring_resize(ring, 2 * initial_len);
909 	T_ASSERT_MACH_SUCCESS(err, "mach_vm_reclaim_ring_resize()");
910 
911 	T_LOG("Sleeping for 1 sampling period...");
912 	struct timespec ts = {
913 		.tv_sec = sampling_period_ns / NSEC_PER_SEC,
914 		.tv_nsec = sampling_period_ns % NSEC_PER_SEC,
915 	};
916 	ret = nanosleep(&ts, NULL);
917 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "nanosleep()");
918 
919 	err = mach_vm_reclaim_update_kernel_accounting(ring);
920 	T_ASSERT_MACH_SUCCESS(err, "mach_vm_reclaim_update_kernel_accounting()");
921 }
922 
923 #define QUERY_BUFFER_RING_COUNT 25
924 
925 static void
kill_child()926 kill_child()
927 {
928 	kill(child_pid, SIGKILL);
929 }
930 
931 
932 kern_return_t
933 mach_vm_deferred_reclamation_buffer_remap(task_t source_task,
934     task_t dest_task,
935     mach_vm_address_t addr,
936     mach_vm_address_t *addr_u,
937     mach_vm_size_t *size_u);
938 
939 T_DECL(copy_and_query_buffer,
940     "verify that a reclaim ring may be queried correctly",
941     T_META_VM_RECLAIM_ENABLED,
942     T_META_TAG_VM_PREFERRED,
943     T_META_ASROOT(true))
944 {
945 	kern_return_t kr;
946 	mach_vm_reclaim_error_t rr;
947 	mach_vm_reclaim_ring_t self_ring;
948 	mach_vm_reclaim_id_t ids[QUERY_BUFFER_RING_COUNT];
949 	mach_vm_address_t addrs[QUERY_BUFFER_RING_COUNT];
950 	mach_vm_size_t sizes[QUERY_BUFFER_RING_COUNT];
951 	mach_vm_reclaim_action_t actions[QUERY_BUFFER_RING_COUNT];
952 	struct mach_vm_reclaim_region_s query_buffer[QUERY_BUFFER_RING_COUNT];
953 	mach_vm_reclaim_count_t query_count;
954 	task_t child_task;
955 	mach_vm_reclaim_count_t n_rings;
956 	struct mach_vm_reclaim_ring_ref_s ring_ref;
957 	mach_vm_reclaim_count_t capacity = mach_vm_reclaim_round_capacity(512);
958 	mach_vm_reclaim_ring_copy_t copied_ring;
959 
960 	T_SETUPBEGIN;
961 
962 	T_LOG("Initializing buffer");
963 	kr = mach_vm_reclaim_ring_allocate(&self_ring, capacity, capacity);
964 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_allocate()");
965 
966 	T_LOG("Adding entries to buffer");
967 	for (mach_vm_reclaim_count_t i = 0; i < QUERY_BUFFER_RING_COUNT; i++) {
968 		actions[i] = (rand() % 2 == 0) ? VM_RECLAIM_FREE : VM_RECLAIM_DEALLOCATE;
969 		sizes[i] = ((rand() % 3) + 1) * vm_page_size;
970 		addrs[i] = 0;
971 		ids[i] = allocate_and_defer_free(sizes[i], self_ring, 'A', actions[i], &addrs[i]);
972 		T_QUIET; T_ASSERT_NE(ids[i], VM_RECLAIM_ID_NULL, "Able to defer allocation");
973 	}
974 
975 	child_pid = fork();
976 	if (child_pid == 0) {
977 		while (true) {
978 			sleep(1);
979 		}
980 	}
981 	T_ATEND(kill_child);
982 
983 	kr = task_for_pid(mach_task_self(), child_pid, &child_task);
984 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_for_pid");
985 
986 	T_SETUPEND;
987 
988 	T_LOG("Copying buffer");
989 	rr = mach_vm_reclaim_get_rings_for_task(child_task, NULL, &n_rings);
990 	T_ASSERT_MACH_SUCCESS(rr, "Query ring count");
991 	T_ASSERT_EQ(n_rings, 1, "Task has one ring");
992 	rr = mach_vm_reclaim_get_rings_for_task(child_task, &ring_ref, &n_rings);
993 	T_ASSERT_MACH_SUCCESS(rr, "Get ring reference");
994 	T_ASSERT_NE(ring_ref.addr, 0ULL, "Ring ref ring is not null");
995 
996 	kr = mach_vm_reclaim_ring_copy(child_task, &ring_ref, &copied_ring);
997 	T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_copy()");
998 	T_ASSERT_NOTNULL(copied_ring, "copied ring is not null");
999 
1000 	T_LOG("Querying buffer");
1001 
1002 	rr = mach_vm_reclaim_copied_ring_query(&copied_ring, NULL, &query_count);
1003 	T_QUIET; T_ASSERT_MACH_SUCCESS(rr, "query reclaim ring size");
1004 	T_ASSERT_EQ(query_count, QUERY_BUFFER_RING_COUNT, "correct reclaim ring query size");
1005 
1006 	rr = mach_vm_reclaim_copied_ring_query(&copied_ring, query_buffer, &query_count);
1007 	T_QUIET; T_ASSERT_MACH_SUCCESS(rr, "query reclaim ring");
1008 	T_ASSERT_EQ(query_count, QUERY_BUFFER_RING_COUNT, "query count is correct");
1009 
1010 	bool all_match = true;
1011 	for (mach_vm_reclaim_count_t i = 0; i < QUERY_BUFFER_RING_COUNT; i++) {
1012 		mach_vm_reclaim_region_t qentry = &query_buffer[i];
1013 		if ((qentry->vmrr_addr != addrs[i]) ||
1014 		    (qentry->vmrr_size != sizes[i]) ||
1015 		    (qentry->vmrr_behavior != actions[i])) {
1016 			all_match = false;
1017 		}
1018 		T_QUIET; T_EXPECT_EQ(qentry->vmrr_addr, addrs[i], "query->vmrr_addr is correct");
1019 		T_QUIET; T_EXPECT_EQ(qentry->vmrr_size, sizes[i], "query->vmrr_size is correct");
1020 		T_QUIET; T_EXPECT_EQ(qentry->vmrr_behavior, actions[i], "query->vmrr_behavior is correct");
1021 	}
1022 	T_ASSERT_TRUE(all_match, "query entries are correct");
1023 
1024 	query_count = 5;
1025 	rr = mach_vm_reclaim_copied_ring_query(&copied_ring, query_buffer, &query_count);
1026 	T_QUIET; T_ASSERT_MACH_SUCCESS(rr, "query reclaim ring with small buffer");
1027 	T_ASSERT_EQ(query_count, 5, "query reclaim ring with small buffer returns correct size");
1028 
1029 	T_LOG("Freeing buffer");
1030 	rr = mach_vm_reclaim_copied_ring_free(&copied_ring);
1031 	T_ASSERT_MACH_SUCCESS(rr, "free reclaim ring");
1032 }
1033