xref: /xnu-11417.121.6/tests/vm/vm_reclaim.c (revision a1e26a70f38d1d7daa7b49b258e2f8538ad81650)
1 #include <sys/types.h>
2 #include <sys/sysctl.h>
3 #include <mach/mach.h>
4 #include <mach/mach_vm.h>
5 #include <mach/vm_reclaim_private.h>
6 #include <mach-o/dyld.h>
7 #include <os/atomic_private.h>
8 #include <signal.h>
9 #include <spawn.h>
10 #include <spawn_private.h>
11 #include <time.h>
12 #include <unistd.h>
13 
14 #include <darwintest.h>
15 #include <darwintest_multiprocess.h>
16 #include <darwintest_utils.h>
17 
18 #include <Kernel/kern/ledger.h>
19 extern int ledger(int cmd, caddr_t arg1, caddr_t arg2, caddr_t arg3);
20 
21 #include "memorystatus_assertion_helpers.h"
22 
23 #if TARGET_OS_IOS && !TARGET_OS_VISION
24 // Some of the unit tests test deferred deallocations.
25 // For these we need to set a sufficiently large reclaim threshold
26 // to ensure their buffers aren't freed prematurely.
27 #define T_META_VM_RECLAIM_ENABLED T_META_SYSCTL_INT("vm.reclaim.max_threshold=268435456")
28 #define T_META_VM_RECLAIM_DISABLED T_META_SYSCTL_INT("vm.reclaim.max_threshold=0")
29 #else // !TARGET_OS_IOS
30 #define T_META_VM_RECLAIM_ENABLED T_META_SYSCTL_INT("vm.reclaim.enabled=1")
31 #define T_META_VM_RECLAIM_DISABLED T_META_SYSCTL_INT("vm.reclaim.enabled=0")
32 #endif // TARGET_OS_IOS
33 
34 #define MiB(x) (x << 20)
35 
36 T_GLOBAL_META(
37 	T_META_NAMESPACE("xnu.vm_reclaim"),
38 	T_META_RADAR_COMPONENT_NAME("xnu"),
39 	T_META_RADAR_COMPONENT_VERSION("performance"),
40 	T_META_OWNER("jarrad"),
41 	// Ensure we don't conflict with libmalloc's reclaim buffer
42 	T_META_ENVVAR("MallocDeferredReclaim=0"),
43 	T_META_RUN_CONCURRENTLY(false),
44 	T_META_CHECK_LEAKS(false)
45 	);
46 
47 static mach_vm_reclaim_ring_t
ringbuffer_init(void)48 ringbuffer_init(void)
49 {
50 	mach_vm_reclaim_ring_t ringbuffer = NULL;
51 	mach_vm_reclaim_count_t len = mach_vm_reclaim_round_capacity(1);
52 	mach_vm_reclaim_count_t max_len = len;
53 	kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, len, max_len);
54 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_allocate()");
55 	return ringbuffer;
56 }
57 
58 T_DECL(vm_reclaim_init, "Set up and tear down a reclaim buffer",
59     T_META_VM_RECLAIM_ENABLED,
60     T_META_TAG_VM_PREFERRED)
61 {
62 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
63 	T_ASSERT_NOTNULL(ringbuffer, "ringbuffer is allocated");
64 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->indices.head, relaxed), 0ull, "head is zeroed");
65 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->indices.busy, relaxed), 0ull, "busy is zeroed");
66 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->indices.tail, relaxed), 0ull, "tail is zeroed");
67 	size_t expected_len = (vm_page_size - offsetof(struct mach_vm_reclaim_ring_s, entries)) /
68 	    sizeof(struct mach_vm_reclaim_entry_s);
69 	T_ASSERT_EQ((size_t)ringbuffer->len, expected_len, "length is set correctly");
70 	for (unsigned i = 0; i < ringbuffer->len; i++) {
71 		mach_vm_reclaim_entry_t entry = &ringbuffer->entries[i];
72 		T_QUIET; T_EXPECT_EQ(entry->address, 0ull, "address is zeroed");
73 		T_QUIET; T_EXPECT_EQ(entry->size, 0u, "size is zeroed");
74 		T_QUIET; T_EXPECT_EQ(entry->behavior, 0, "behavior is zeroed");
75 	}
76 }
77 
78 T_DECL(vm_reclaim_init_fails_when_disabled,
79     "Initializing a ring buffer on a system with vm_reclaim disabled should fail",
80     T_META_VM_RECLAIM_DISABLED, T_META_TAG_VM_PREFERRED)
81 {
82 	mach_vm_reclaim_ring_t ringbuffer;
83 	kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, 1, 1);
84 	T_EXPECT_MACH_ERROR(kr, VM_RECLAIM_NOT_SUPPORTED, "mach_vm_reclaim_ring_allocate()");
85 }
86 
87 static bool
try_cancel(mach_vm_reclaim_ring_t ringbuffer,mach_vm_reclaim_id_t id,mach_vm_address_t addr,mach_vm_size_t size,mach_vm_reclaim_action_t behavior)88 try_cancel(mach_vm_reclaim_ring_t ringbuffer, mach_vm_reclaim_id_t id, mach_vm_address_t addr, mach_vm_size_t size, mach_vm_reclaim_action_t behavior)
89 {
90 	bool update_accounting;
91 	mach_vm_reclaim_state_t state;
92 	kern_return_t kr;
93 	kr = mach_vm_reclaim_try_cancel(ringbuffer, id, addr, size, behavior, &state, &update_accounting);
94 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_cancel()");
95 	if (update_accounting) {
96 		kern_return_t tmp_kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer);
97 		T_QUIET; T_ASSERT_MACH_SUCCESS(tmp_kr, "mach_vm_reclaim_update_kernel_accounting()");
98 	}
99 	return mach_vm_reclaim_is_reusable(state);
100 }
101 
102 /*
103  * Allocate a buffer of the given size, write val to each byte, and free it via a deferred free call.
104  */
105 static mach_vm_reclaim_id_t
allocate_and_defer_free(size_t size,mach_vm_reclaim_ring_t ringbuffer,unsigned char val,mach_vm_reclaim_action_t behavior,mach_vm_address_t * addr)106 allocate_and_defer_free(size_t size, mach_vm_reclaim_ring_t ringbuffer,
107     unsigned char val, mach_vm_reclaim_action_t behavior,
108     mach_vm_address_t *addr /* OUT */)
109 {
110 	kern_return_t kr = mach_vm_map(mach_task_self(), addr, size, 0, VM_FLAGS_ANYWHERE, MEMORY_OBJECT_NULL, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
111 	bool should_update_kernel_accounting = false;
112 	mach_vm_reclaim_id_t id = VM_RECLAIM_ID_NULL;
113 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_map");
114 
115 	memset((void *) *addr, val, size);
116 
117 	kr = mach_vm_reclaim_try_enter(ringbuffer, *addr, size, behavior, &id, &should_update_kernel_accounting);
118 	if (should_update_kernel_accounting) {
119 		kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer);
120 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()");
121 	}
122 	return id;
123 }
124 
125 static mach_vm_reclaim_id_t
allocate_and_defer_deallocate(size_t size,mach_vm_reclaim_ring_t ringbuffer,unsigned char val,mach_vm_address_t * addr)126 allocate_and_defer_deallocate(size_t size, mach_vm_reclaim_ring_t ringbuffer, unsigned char val, mach_vm_address_t *addr /* OUT */)
127 {
128 	return allocate_and_defer_free(size, ringbuffer, val, VM_RECLAIM_DEALLOCATE, addr);
129 }
130 
131 T_DECL(vm_reclaim_single_entry, "Place a single entry in the buffer and call sync",
132     T_META_VM_RECLAIM_ENABLED,
133     T_META_TAG_VM_PREFERRED)
134 {
135 	static const size_t kAllocationSize = (1UL << 20); // 1MB
136 	mach_vm_address_t addr;
137 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
138 
139 	mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, 1, &addr);
140 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
141 	mach_vm_reclaim_ring_flush(ringbuffer, 1);
142 }
143 
144 static pid_t
spawn_helper(char * helper)145 spawn_helper(char *helper)
146 {
147 	char **launch_tool_args;
148 	char testpath[PATH_MAX];
149 	uint32_t testpath_buf_size;
150 	pid_t child_pid;
151 
152 	testpath_buf_size = sizeof(testpath);
153 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
154 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
155 	T_LOG("Executable path: %s", testpath);
156 	launch_tool_args = (char *[]){
157 		testpath,
158 		"-n",
159 		helper,
160 		NULL
161 	};
162 
163 	/* Spawn the child process. */
164 	ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
165 	if (ret != 0) {
166 		T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
167 	}
168 	T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
169 
170 	return child_pid;
171 }
172 
173 static int
spawn_helper_and_wait_for_exit(char * helper)174 spawn_helper_and_wait_for_exit(char *helper)
175 {
176 	int status;
177 	pid_t child_pid, rc;
178 
179 	child_pid = spawn_helper(helper);
180 	rc = waitpid(child_pid, &status, 0);
181 	T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
182 	return status;
183 }
184 
185 /*
186  * Returns true iff every entry in buffer is expected.
187  */
188 static bool
check_buffer(mach_vm_address_t addr,size_t size,unsigned char expected)189 check_buffer(mach_vm_address_t addr, size_t size, unsigned char expected)
190 {
191 	unsigned char *buffer = (unsigned char *) addr;
192 	for (size_t i = 0; i < size; i++) {
193 		if (buffer[i] != expected) {
194 			return false;
195 		}
196 	}
197 	return true;
198 }
199 
200 /*
201  * Read every byte of a buffer to ensure re-usability
202  */
203 static void
read_buffer(mach_vm_address_t addr,size_t size)204 read_buffer(mach_vm_address_t addr, size_t size)
205 {
206 	volatile uint8_t byte;
207 	uint8_t *buffer = (uint8_t *)addr;
208 	for (size_t i = 0; i < size; i++) {
209 		byte = buffer[i];
210 	}
211 }
212 
213 /*
214  * Check that the given (freed) buffer has changed.
215  * This will likely crash, but if we make it through the entire buffer then segfault on purpose.
216  */
217 static void
assert_buffer_has_changed_and_crash(mach_vm_address_t addr,size_t size,unsigned char expected)218 assert_buffer_has_changed_and_crash(mach_vm_address_t addr, size_t size, unsigned char expected)
219 {
220 	/*
221 	 * mach_vm_reclaim_ring_flush should have ensured the buffer was freed.
222 	 * Two cases:
223 	 * 1. The buffer is still free (touching it causes a crash)
224 	 * 2. The address range was re-allocated by some other library in process.
225 	 * #1 is far more likely. But if #2 happened, the buffer shouldn't be filled
226 	 * with the value we wrote to it. So scan the buffer. If we segfault it's case #1
227 	 * and if we see another value it's case #2.
228 	 */
229 	bool changed = !check_buffer(addr, size, expected);
230 	T_QUIET; T_ASSERT_TRUE(changed, "buffer was re-allocated");
231 	/* Case #2. Force a segfault so the parent sees that we crashed. */
232 	*(volatile int *) 0 = 1;
233 
234 	T_FAIL("Test did not crash when dereferencing NULL");
235 }
236 
237 static void
reuse_reclaimed_entry(mach_vm_reclaim_action_t behavior)238 reuse_reclaimed_entry(mach_vm_reclaim_action_t behavior)
239 {
240 	kern_return_t kr;
241 	static const size_t kAllocationSize = (1UL << 20); // 1MB
242 	mach_vm_address_t addr;
243 	static const unsigned char kValue = 220;
244 
245 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
246 
247 	mach_vm_reclaim_id_t idx = allocate_and_defer_free(kAllocationSize, ringbuffer, kValue, behavior, &addr);
248 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
249 	kr = mach_vm_reclaim_ring_flush(ringbuffer, 10);
250 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_flush");
251 	bool usable = try_cancel(ringbuffer, idx, addr, kAllocationSize, behavior);
252 	switch (behavior) {
253 	case VM_RECLAIM_DEALLOCATE:
254 		T_EXPECT_FALSE(usable, "reclaimed entry is not re-usable");
255 		assert_buffer_has_changed_and_crash(addr, kAllocationSize, kValue);
256 		break;
257 	case VM_RECLAIM_FREE:
258 		T_EXPECT_TRUE(usable, "reclaimed REUSABLE entry is re-usable");
259 		read_buffer(addr, kAllocationSize);
260 		T_PASS("Freed buffer re-used successfully");
261 		break;
262 	default:
263 		T_FAIL("Unexpected reclaim behavior %d", behavior);
264 	}
265 }
266 
267 T_HELPER_DECL(reuse_freed_entry_dealloc,
268     "defer free (dealloc), sync, and try to use entry")
269 {
270 	reuse_reclaimed_entry(VM_RECLAIM_DEALLOCATE);
271 }
272 
273 T_HELPER_DECL(reuse_freed_entry_reusable,
274     "defer free (reusable), sync, and try to use entry")
275 {
276 	reuse_reclaimed_entry(VM_RECLAIM_FREE);
277 }
278 
279 T_DECL(vm_reclaim_single_entry_verify_free, "Place a single entry in the buffer and call sync",
280     T_META_IGNORECRASHES(".*vm_reclaim_single_entry_verify_free.*"),
281     T_META_VM_RECLAIM_ENABLED,
282     T_META_TAG_VM_PREFERRED)
283 {
284 	int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_dealloc");
285 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
286 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGSEGV, "Test process crashed with segmentation fault.");
287 }
288 
289 T_DECL(vm_reclaim_single_entry_reusable,
290     "Reclaim a reusable entry and verify re-use is legal",
291     T_META_VM_RECLAIM_ENABLED,
292     T_META_TAG_VM_PREFERRED)
293 {
294 	int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_reusable");
295 	T_QUIET; T_ASSERT_TRUE(WIFEXITED(status), "Test process exited.");
296 	T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
297 }
298 
299 static void
allocate_and_suspend(char * const * argv,bool free_buffer,bool double_free)300 allocate_and_suspend(char *const *argv, bool free_buffer, bool double_free)
301 {
302 	kern_return_t kr;
303 	static const mach_vm_reclaim_count_t kAllocationSize = (1UL << 20); // 1MB
304 	mach_vm_address_t addr = 0;
305 	bool should_update_kernel_accounting = false;
306 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
307 
308 	const mach_vm_reclaim_count_t kNumEntries = (mach_vm_reclaim_count_t)atoi(argv[0]);
309 	mach_vm_reclaim_count_t capacity;
310 	kr = mach_vm_reclaim_ring_capacity(ringbuffer, &capacity);
311 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()");
312 	T_QUIET; T_ASSERT_LT(kNumEntries, capacity, "Test does not fill up ringbuffer");
313 
314 	T_LOG("allocate_and_suspend: Allocating and freeing %u entries...", kNumEntries);
315 	for (size_t i = 0; i < kNumEntries; i++) {
316 		addr = 0;
317 		mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, (unsigned char) i, &addr);
318 		T_QUIET; T_ASSERT_EQ(idx, (mach_vm_reclaim_id_t)i, "idx is correct");
319 		T_LOG("allocate_and_suspend: Allocated and deferred 0x%llx", addr);
320 	}
321 
322 	if (double_free) {
323 		// Double free the last entry
324 		mach_vm_reclaim_id_t id = VM_RECLAIM_ID_NULL;
325 		kr = mach_vm_reclaim_try_enter(ringbuffer, addr, kAllocationSize, VM_RECLAIM_DEALLOCATE, &id, &should_update_kernel_accounting);
326 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter");
327 	}
328 
329 	if (free_buffer) {
330 		mach_vm_size_t buffer_size = (size_t)capacity *
331 		    sizeof(struct mach_vm_reclaim_entry_s) + offsetof(struct mach_vm_reclaim_ring_s, entries);
332 		kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)ringbuffer, buffer_size);
333 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate");
334 	}
335 
336 	T_LOG("allocate_and_suspend: Signalling parent");
337 	// Signal to our parent to suspend us
338 	if (kill(getppid(), SIGUSR1) != 0) {
339 		T_LOG("Unable to signal to parent process!");
340 		exit(1);
341 	}
342 
343 	T_LOG("allocate_and_suspend: Spinning");
344 	while (1) {
345 		;
346 	}
347 	T_ASSERT_FAIL("notreached");
348 }
349 
350 T_HELPER_DECL(allocate_and_suspend,
351     "defer free, and signal parent to suspend")
352 {
353 	allocate_and_suspend(argv, false, false);
354 }
355 
356 static void
resume_and_kill_proc(pid_t pid)357 resume_and_kill_proc(pid_t pid)
358 {
359 	int ret = pid_resume(pid);
360 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "proc resumed after freeze");
361 	T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(pid, SIGKILL), "Killed process");
362 }
363 
364 static void
wait_for_pid_to_be_drained(pid_t child_pid)365 wait_for_pid_to_be_drained(pid_t child_pid)
366 {
367 	int val = child_pid;
368 	int ret;
369 	size_t len = sizeof(val);
370 	ret = sysctlbyname("vm.reclaim.wait_for_pid", NULL, NULL, &val, len);
371 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.reclaim.wait_for_pid");
372 }
373 
374 static size_t
ledger_phys_footprint_index(size_t * num_entries)375 ledger_phys_footprint_index(size_t *num_entries)
376 {
377 	struct ledger_info li;
378 	struct ledger_template_info *templateInfo = NULL;
379 	int ret;
380 	size_t i, footprint_index;
381 	bool found = false;
382 
383 	ret = ledger(LEDGER_INFO, (caddr_t)(uintptr_t)getpid(), (caddr_t)&li, NULL);
384 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_INFO)");
385 
386 	T_QUIET; T_ASSERT_GT(li.li_entries, (int64_t) 0, "num ledger entries is valid");
387 	*num_entries = (size_t) li.li_entries;
388 	templateInfo = malloc((size_t)li.li_entries * sizeof(struct ledger_template_info));
389 	T_QUIET; T_ASSERT_NOTNULL(templateInfo, "malloc entries");
390 
391 	footprint_index = 0;
392 	ret = ledger(LEDGER_TEMPLATE_INFO, (caddr_t) templateInfo, (caddr_t) num_entries, NULL);
393 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_TEMPLATE_INFO)");
394 	for (i = 0; i < *num_entries; i++) {
395 		if (strcmp(templateInfo[i].lti_name, "phys_footprint") == 0) {
396 			footprint_index = i;
397 			found = true;
398 		}
399 	}
400 	free(templateInfo);
401 	T_QUIET; T_ASSERT_TRUE(found, "found phys_footprint in ledger");
402 	return footprint_index;
403 }
404 
405 static int64_t
get_ledger_entry_for_pid(pid_t pid,size_t index,size_t num_entries)406 get_ledger_entry_for_pid(pid_t pid, size_t index, size_t num_entries)
407 {
408 	int ret;
409 	int64_t value;
410 	struct ledger_entry_info *lei = NULL;
411 
412 	lei = malloc(num_entries * sizeof(*lei));
413 	ret = ledger(LEDGER_ENTRY_INFO, (caddr_t) (uintptr_t) pid, (caddr_t) lei, (caddr_t) &num_entries);
414 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_ENTRY_INFO)");
415 	value = lei[index].lei_balance;
416 	free(lei);
417 	return value;
418 }
419 
420 static pid_t child_pid;
421 
422 static void
test_after_background_helper_launches(char * variant,char * arg1,dispatch_block_t test_block,dispatch_block_t exit_block)423 test_after_background_helper_launches(char* variant, char * arg1, dispatch_block_t test_block, dispatch_block_t exit_block)
424 {
425 	char **launch_tool_args;
426 	char testpath[PATH_MAX];
427 	uint32_t testpath_buf_size;
428 
429 	dispatch_source_t ds_signal, ds_exit;
430 
431 	/* Wait for the child process to tell us that it's ready, and then freeze it */
432 	signal(SIGUSR1, SIG_IGN);
433 	ds_signal = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dispatch_get_main_queue());
434 	T_QUIET; T_ASSERT_NOTNULL(ds_signal, "dispatch_source_create");
435 	dispatch_source_set_event_handler(ds_signal, test_block);
436 
437 	dispatch_activate(ds_signal);
438 
439 	testpath_buf_size = sizeof(testpath);
440 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
441 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
442 	T_LOG("Executable path: %s", testpath);
443 	launch_tool_args = (char *[]){
444 		testpath,
445 		"-n",
446 		variant,
447 		arg1,
448 		NULL
449 	};
450 
451 	/* Spawn the child process. */
452 	ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
453 	if (ret != 0) {
454 		T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
455 	}
456 	T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
457 
458 	/* Listen for exit. */
459 	ds_exit = dispatch_source_create(DISPATCH_SOURCE_TYPE_PROC, (uintptr_t)child_pid, DISPATCH_PROC_EXIT, dispatch_get_main_queue());
460 	dispatch_source_set_event_handler(ds_exit, exit_block);
461 
462 	dispatch_activate(ds_exit);
463 	dispatch_main();
464 }
465 
466 T_DECL(vm_reclaim_full_reclaim_on_suspend, "Defer free memory and then suspend.",
467     T_META_ASROOT(true),
468     T_META_VM_RECLAIM_ENABLED,
469     T_META_TAG_VM_PREFERRED)
470 {
471 	test_after_background_helper_launches("allocate_and_suspend", "20", ^{
472 		int ret = 0;
473 		size_t num_ledger_entries = 0;
474 		size_t phys_footprint_index = ledger_phys_footprint_index(&num_ledger_entries);
475 		int64_t before_footprint, after_footprint, reclaimable_bytes = 20 * (1ULL << 20);
476 		before_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries);
477 		T_QUIET; T_EXPECT_GE(before_footprint, reclaimable_bytes, "memory was allocated");
478 		ret = pid_suspend(child_pid);
479 		T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
480 		/*
481 		 * The reclaim work is kicked off asynchronously by the suspend.
482 		 * So we need to call into the kernel to synchronize with the reclaim worker
483 		 * thread.
484 		 */
485 		wait_for_pid_to_be_drained(child_pid);
486 		after_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries);
487 		T_QUIET; T_EXPECT_LE(after_footprint, before_footprint - reclaimable_bytes, "memory was reclaimed");
488 
489 		resume_and_kill_proc(child_pid);
490 	},
491 	    ^{
492 		int status = 0, code = 0;
493 		pid_t rc = waitpid(child_pid, &status, 0);
494 		T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
495 		code = WEXITSTATUS(status);
496 		T_QUIET; T_ASSERT_EQ(code, 0, "Child exited cleanly");
497 		T_END;
498 	});
499 }
500 
501 T_DECL(vm_reclaim_limit_kills, "Deferred reclaims are processed before a limit kill",
502     T_META_VM_RECLAIM_ENABLED,
503     T_META_TAG_VM_PREFERRED)
504 {
505 	int err;
506 	const size_t kNumEntries = 50;
507 	static const size_t kAllocationSize = (1UL << 20); // 1MB
508 	static const size_t kMemoryLimit = kNumEntries / 10 * kAllocationSize;
509 
510 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
511 
512 	err = set_memlimits(getpid(), kMemoryLimit >> 20, kMemoryLimit >> 20, TRUE, TRUE);
513 	T_QUIET; T_ASSERT_POSIX_SUCCESS(err, "set_memlimits");
514 
515 	for (size_t i = 0; i < kNumEntries; i++) {
516 		mach_vm_address_t addr = 0;
517 		mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, (unsigned char) i, &addr);
518 		T_QUIET; T_ASSERT_EQ(idx, (mach_vm_reclaim_id_t)i, "idx is correct");
519 	}
520 
521 	T_PASS("Was able to allocate and defer free %zu chunks of size %zu bytes while staying under limit of %zu bytes", kNumEntries, kAllocationSize, kMemoryLimit);
522 }
523 
524 #if TARGET_OS_IOS && !TARGET_OS_VISION
525 T_DECL(vm_reclaim_update_reclaimable_bytes_threshold, "Kernel reclaims when num_bytes_reclaimable crosses threshold",
526     T_META_SYSCTL_INT("vm.reclaim.max_threshold=16384"),
527     T_META_TAG_VM_PREFERRED)
528 {
529 	mach_vm_reclaim_count_t kNumEntries = 0;
530 	const size_t kAllocationSize = vm_kernel_page_size;
531 	uint64_t vm_reclaim_reclaimable_max_threshold;
532 	int ret;
533 	mach_error_t err;
534 	size_t len = sizeof(vm_reclaim_reclaimable_max_threshold);
535 	size_t num_ledger_entries = 0;
536 	size_t phys_footprint_index = ledger_phys_footprint_index(&num_ledger_entries);
537 
538 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
539 
540 	// Allocate 1000 times the reclaim threshold
541 	ret = sysctlbyname("vm.reclaim.max_threshold", &vm_reclaim_reclaimable_max_threshold, &len, NULL, 0);
542 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.reclaim.max_threshold");
543 	kNumEntries = (mach_vm_reclaim_count_t)(vm_reclaim_reclaimable_max_threshold / kAllocationSize * 1000);
544 	mach_vm_reclaim_count_t capacity;
545 	err = mach_vm_reclaim_ring_capacity(ringbuffer, &capacity);
546 	T_QUIET; T_ASSERT_MACH_SUCCESS(err, "mach_vm_reclaim_ring_capacity()");
547 	T_QUIET; T_ASSERT_LT(kNumEntries, capacity, "Test does not fill up ringbuffer");
548 
549 	mach_vm_address_t addr = 0;
550 	for (uint64_t i = 0; i < kNumEntries; i++) {
551 		mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, (unsigned char)i, &addr);
552 		T_QUIET; T_ASSERT_EQ(idx, i, "idx is correct");
553 	}
554 
555 	T_QUIET; T_ASSERT_LT(get_ledger_entry_for_pid(getpid(), phys_footprint_index, num_ledger_entries),
556 	    (int64_t) ((kNumEntries) * kAllocationSize), "Entries were reclaimed as we crossed threshold");
557 }
558 #else /* !TARGET_OS_IPHONE */
559 T_DECL(vm_reclaim_trim_minimum,
560     "update_accounting trims buffer according to sampling minimum",
561     T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED)
562 {
563 	kern_return_t kr;
564 	int ret;
565 	bool success, update_accounting;
566 	mach_vm_reclaim_ring_t ringbuffer;
567 	uint64_t sampling_period_ns;
568 	size_t sampling_period_size = sizeof(sampling_period_ns);
569 	uint32_t sizes[3] = {MiB(128), MiB(128), MiB(128)};
570 	mach_vm_address_t addrs[3] = {0};
571 	uint64_t ids[3] = {0};
572 
573 	ret = sysctlbyname("vm.reclaim.sampling_period_ns", &sampling_period_ns, &sampling_period_size, NULL, 0);
574 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctlbyname(\"vm.reclaim.sampling_period_ns\")");
575 	struct timespec ts = {
576 		.tv_sec = 2 * sampling_period_ns / NSEC_PER_SEC,
577 		.tv_nsec = 2 * sampling_period_ns % NSEC_PER_SEC,
578 	};
579 
580 	ringbuffer = ringbuffer_init();
581 
582 	// This should result in a sample taken (min 0)
583 	kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer);
584 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()");
585 
586 	for (int i = 0; i < 3; i++) {
587 		T_LOG("Placing entries[%d] into buffer", i);
588 		ids[i] = allocate_and_defer_deallocate(sizes[i], ringbuffer, 0xAB, &addrs[i]);
589 	}
590 
591 	for (int i = 0; i < 3; i++) {
592 		// The minimum for the first sample should be 0
593 		success = try_cancel(ringbuffer, ids[i], addrs[i], sizes[i], VM_RECLAIM_DEALLOCATE);
594 		T_ASSERT_TRUE(success, "Entry %d should not be reclaimed", i);
595 		kr = mach_vm_reclaim_try_enter(ringbuffer, addrs[i], sizes[i], VM_RECLAIM_DEALLOCATE, &ids[i], &update_accounting);
596 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter()");
597 		if (update_accounting) {
598 			kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer);
599 			T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()");
600 		}
601 	}
602 
603 	T_LOG("Sleeping for 2 sampling periods (%llu ns)", 2 * sampling_period_ns);
604 	ret = nanosleep(&ts, NULL);
605 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "nanosleep()");
606 
607 	// This should result in a sample taken (still min 0)
608 	kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer);
609 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()");
610 
611 	for (int i = 0; i < 3; i++) {
612 		success = try_cancel(ringbuffer, ids[i], addrs[i], sizes[i], VM_RECLAIM_DEALLOCATE);
613 		T_EXPECT_TRUE(success, "Entry %d should not be reclaimed", i);
614 		kr = mach_vm_reclaim_try_enter(ringbuffer, addrs[i], sizes[i], VM_RECLAIM_DEALLOCATE, &ids[i], &update_accounting);
615 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter()");
616 		if (update_accounting) {
617 			kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer);
618 			T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()");
619 		}
620 	}
621 	T_LOG("Sleeping for 2 sampling periods (%llu ns)", 2 * sampling_period_ns);
622 	ret = nanosleep(&ts, NULL);
623 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "nanosleep()");
624 
625 	// This should result in a sample taken (still min 0)
626 	kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer);
627 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()");
628 
629 	T_LOG("Sleeping for 2 sampling periods (%llu ns)", 2 * sampling_period_ns);
630 	ret = nanosleep(&ts, NULL);
631 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "nanosleep()");
632 
633 	// This should result in a sample taken (min sum(sizeof(entries[i])))
634 	kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer);
635 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()");
636 
637 	for (int i = 0; i < 3; i++) {
638 		success = try_cancel(ringbuffer, ids[i], addrs[i], sizes[i], VM_RECLAIM_DEALLOCATE);
639 		T_EXPECT_FALSE(success, "Entry %d should not be reclaimed", i);
640 	}
641 }
642 #endif /* TARGET_OS_IPHONE */
643 
644 T_HELPER_DECL(deallocate_buffer,
645     "deallocate the buffer from underneath the kernel")
646 {
647 	kern_return_t kr;
648 	static const size_t kAllocationSize = (1UL << 20); // 1MB
649 	mach_vm_address_t addr;
650 
651 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
652 
653 	mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, 1, &addr);
654 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
655 	mach_vm_reclaim_count_t capacity;
656 	kr = mach_vm_reclaim_ring_capacity(ringbuffer, &capacity);
657 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()");
658 
659 	mach_vm_size_t buffer_size = (size_t)capacity *
660 	    sizeof(struct mach_vm_reclaim_entry_s) + offsetof(struct mach_vm_reclaim_ring_s, entries);
661 	kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)ringbuffer, buffer_size);
662 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate");
663 
664 	mach_vm_reclaim_ring_flush(ringbuffer, 10);
665 
666 	T_FAIL("Test did not crash when synchronizing on a deallocated buffer!");
667 }
668 
669 T_DECL(vm_reclaim_copyio_buffer_error, "Force a copyio error on the buffer",
670     T_META_IGNORECRASHES(".*deallocate_buffer.*"),
671     T_META_VM_RECLAIM_ENABLED,
672     T_META_TAG_VM_PREFERRED)
673 {
674 	int status = spawn_helper_and_wait_for_exit("deallocate_buffer");
675 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
676 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
677 }
678 
679 T_HELPER_DECL(dealloc_gap, "Put a bad entry in the buffer")
680 {
681 	kern_return_t kr;
682 	static const size_t kAllocationSize = (1UL << 20); // 1MB
683 	mach_vm_address_t addr;
684 	bool should_update_kernel_accounting = false;
685 
686 	kr = task_set_exc_guard_behavior(mach_task_self(), TASK_EXC_GUARD_ALL);
687 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_set_exc_guard_behavior()");
688 
689 	mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init();
690 
691 	mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, 1, &addr);
692 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
693 	idx = VM_RECLAIM_ID_NULL;
694 	kr = mach_vm_reclaim_try_enter(ringbuffer, addr, kAllocationSize, VM_RECLAIM_DEALLOCATE, &idx, &should_update_kernel_accounting);
695 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter()");
696 	T_QUIET; T_ASSERT_EQ(idx, 1ULL, "Entry placed at correct index");
697 
698 	mach_vm_reclaim_ring_flush(ringbuffer, 2);
699 
700 	T_FAIL("Test did not crash when doing a double free!");
701 }
702 
703 T_DECL(vm_reclaim_dealloc_gap, "Ensure a dealloc gap delivers a fatal exception",
704     T_META_IGNORECRASHES(".*dealloc_gap.*"),
705     T_META_VM_RECLAIM_ENABLED,
706     T_META_TAG_VM_PREFERRED)
707 {
708 	int status = spawn_helper_and_wait_for_exit("dealloc_gap");
709 	T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
710 	T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
711 }
712 
713 T_HELPER_DECL(allocate_and_suspend_with_dealloc_gap,
714     "defer double free, and signal parent to suspend")
715 {
716 	kern_return_t kr = task_set_exc_guard_behavior(mach_task_self(), TASK_EXC_GUARD_ALL);
717 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_set_exc_guard_behavior()");
718 	allocate_and_suspend(argv, false, true);
719 }
720 
721 static void
vm_reclaim_async_exception(char * variant,char * arg1)722 vm_reclaim_async_exception(char *variant, char *arg1)
723 {
724 	test_after_background_helper_launches(variant, arg1, ^{
725 		int ret = 0;
726 		ret = pid_suspend(child_pid);
727 		T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
728 		/*
729 		 * The reclaim work is kicked off asynchronously by the suspend.
730 		 * So we need to call into the kernel to synchronize with the reclaim worker
731 		 * thread.
732 		 */
733 		T_LOG("Waiting for child to be drained...");
734 		wait_for_pid_to_be_drained(child_pid);
735 	}, ^{
736 		int status;
737 		int signal;
738 		T_LOG("Waiting for child to exit...");
739 		bool exited = dt_waitpid(child_pid, &status, &signal, 30);
740 		T_QUIET; T_EXPECT_FALSE(exited, "waitpid");
741 		T_QUIET; T_EXPECT_FALSE(status, "Test process crashed.");
742 		T_QUIET; T_EXPECT_EQ(signal, SIGKILL, "Test process crashed with SIGKILL.");
743 		T_END;
744 	});
745 }
746 
747 T_DECL(vm_reclaim_dealloc_gap_async, "Ensure a dealloc gap delivers an async fatal exception",
748     T_META_IGNORECRASHES(".*allocate_and_suspend_with_dealloc_gap.*"),
749     T_META_VM_RECLAIM_ENABLED,
750     T_META_TAG_VM_PREFERRED)
751 {
752 	vm_reclaim_async_exception("allocate_and_suspend_with_dealloc_gap", "15");
753 }
754 
755 T_HELPER_DECL(allocate_and_suspend_with_buffer_error,
756     "defer free, free buffer, and signal parent to suspend")
757 {
758 	allocate_and_suspend(argv, true, false);
759 }
760 
761 T_DECL(vm_reclaim_copyio_buffer_error_async, "Ensure a buffer copyio failure delivers an async fatal exception",
762     T_META_IGNORECRASHES(".*allocate_and_suspend_with_buffer_error.*"),
763     T_META_VM_RECLAIM_ENABLED,
764     T_META_TAG_VM_PREFERRED)
765 {
766 	vm_reclaim_async_exception("allocate_and_suspend_with_buffer_error", "15");
767 }
768 
769 static mach_vm_reclaim_ring_t buffer_4fork_inherit;
770 static const size_t allocation_size_4fork_inherit = (16UL << 10); // 16 KiB
771 static const unsigned char value_4fork_inherit = 119;
772 static mach_vm_address_t addr_4fork_inherit;
773 
774 T_HELPER_DECL(reuse_freed_entry_fork,
775     "defer free, sync, and try to use entry")
776 {
777 	kern_return_t kr;
778 	bool usable, update;
779 	mach_vm_reclaim_id_t id = VM_RECLAIM_ID_NULL;
780 	mach_vm_reclaim_ring_t ringbuffer_tmp;
781 	kr = mach_vm_reclaim_ring_allocate(&ringbuffer_tmp, 1, 1);
782 	T_ASSERT_MACH_ERROR(kr, VM_RECLAIM_RESOURCE_SHORTAGE, "mach_vm_reclaim_ring_allocate() should fail");
783 	usable = try_cancel(buffer_4fork_inherit, 0, addr_4fork_inherit,
784 	    allocation_size_4fork_inherit, VM_RECLAIM_DEALLOCATE);
785 	T_ASSERT_TRUE(usable, "Entry can be re-used after fork()");
786 
787 	T_EXPECT_EQ(*(unsigned char *)addr_4fork_inherit, value_4fork_inherit,
788 	    "value is preserved");
789 
790 	kr = mach_vm_reclaim_try_enter(buffer_4fork_inherit,
791 	    addr_4fork_inherit, allocation_size_4fork_inherit, VM_RECLAIM_DEALLOCATE, &id, &update);
792 	T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter()");
793 	T_EXPECT_EQ(id, 1ull, "new entry is placed at tail");
794 
795 	kr = mach_vm_reclaim_ring_flush(buffer_4fork_inherit, 10);
796 	T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_flush()");
797 }
798 
799 T_DECL(inherit_buffer_after_fork, "Ensure reclaim buffer is inherited across a fork",
800     T_META_IGNORECRASHES(".*vm_reclaim_fork.*"),
801     T_META_VM_RECLAIM_ENABLED,
802     T_META_TAG_VM_PREFERRED)
803 {
804 	dt_helper_t helpers[1];
805 
806 	buffer_4fork_inherit = ringbuffer_init();
807 
808 	mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(
809 		allocation_size_4fork_inherit, buffer_4fork_inherit, value_4fork_inherit, &addr_4fork_inherit);
810 	T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
811 	helpers[0] = dt_fork_helper("reuse_freed_entry_fork");
812 	dt_run_helpers(helpers, 1, 30);
813 }
814 
815 #define SUSPEND_AND_RESUME_COUNT 4
816 
817 // rdar://110081398
818 T_DECL(reclaim_async_on_repeated_suspend,
819     "verify that subsequent suspends are allowed",
820     T_META_VM_RECLAIM_ENABLED,
821     T_META_TAG_VM_PREFERRED)
822 {
823 	const int sleep_duration = 3;
824 	test_after_background_helper_launches("allocate_and_suspend", "20", ^{
825 		int ret = 0;
826 		for (int i = 0; i < SUSPEND_AND_RESUME_COUNT; i++) {
827 		        ret = pid_suspend(child_pid);
828 		        T_ASSERT_POSIX_SUCCESS(ret, "pid_suspend()");
829 		        ret = pid_resume(child_pid);
830 		        T_ASSERT_POSIX_SUCCESS(ret, "pid_resume()");
831 		}
832 		T_LOG("Sleeping %d sec...", sleep_duration);
833 		sleep(sleep_duration);
834 		T_LOG("Killing child...");
835 		T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "kill()");
836 	}, ^{
837 		int status;
838 		pid_t rc = waitpid(child_pid, &status, 0);
839 		T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
840 		T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
841 		T_END;
842 	});
843 }
844 
845 T_HELPER_DECL(buffer_init_after_exec,
846     "initialize a ringbuffer after exec")
847 {
848 	mach_vm_reclaim_ring_t ringbuffer;
849 	kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, 1, 1);
850 	T_ASSERT_MACH_SUCCESS(kr, "post-exec: mach_vm_reclaim_ring_allocate()");
851 }
852 
853 extern char **environ;
854 
855 T_DECL(reclaim_exec_new_reclaim_buffer,
856     "verify that an exec-ed process may instantiate a new buffer",
857     T_META_VM_RECLAIM_ENABLED,
858     T_META_TAG_VM_PREFERRED)
859 {
860 	char **launch_tool_args;
861 	char testpath[PATH_MAX];
862 	uint32_t testpath_buf_size;
863 	mach_vm_reclaim_ring_t ringbuffer;
864 
865 	kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, 1, 1);
866 	T_ASSERT_MACH_SUCCESS(kr, "pre-exec: mach_vm_reclaim_ring_allocate()");
867 
868 	testpath_buf_size = sizeof(testpath);
869 	int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
870 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
871 	T_LOG("Executable path: %s", testpath);
872 	launch_tool_args = (char *[]){
873 		testpath,
874 		"-n",
875 		"buffer_init_after_exec",
876 		NULL
877 	};
878 
879 	/* Spawn the child process. */
880 	posix_spawnattr_t spawnattrs;
881 	posix_spawnattr_init(&spawnattrs);
882 	posix_spawnattr_setflags(&spawnattrs, POSIX_SPAWN_SETEXEC);
883 	posix_spawn(&child_pid, testpath, NULL, &spawnattrs, launch_tool_args, environ);
884 	T_ASSERT_FAIL("should not be reached");
885 }
886 
887 T_DECL(resize_buffer,
888     "verify that a reclaim buffer may be safely resized",
889     T_META_VM_RECLAIM_ENABLED,
890     T_META_TAG_VM_PREFERRED)
891 {
892 	kern_return_t kr;
893 	mach_vm_reclaim_ring_t ringbuffer;
894 	mach_vm_address_t addr_tmp;
895 	mach_vm_reclaim_id_t id_tmp;
896 	mach_vm_reclaim_id_t ids[4095] = {0};
897 	mach_vm_address_t addrs[4095] = {0};
898 
899 	T_LOG("Initializing 1 page buffer");
900 	mach_vm_reclaim_count_t initial_len = mach_vm_reclaim_round_capacity(512);
901 	mach_vm_reclaim_count_t max_len = 4 * initial_len;
902 	kr = mach_vm_reclaim_ring_allocate(&ringbuffer, initial_len, max_len);
903 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_allocate()");
904 
905 	// Should be able to fit 1022 entries in a one-page buffer (two entries for indices)
906 	T_LOG("Filling buffer with entries");
907 	mach_vm_reclaim_count_t old_capacity;
908 	kr = mach_vm_reclaim_ring_capacity(ringbuffer, &old_capacity);
909 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()");
910 	T_EXPECT_EQ(old_capacity, initial_len, "Capacity is same as asked for");
911 	for (mach_vm_reclaim_count_t i = 0; i < old_capacity; i++) {
912 		ids[i] = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'A', &addrs[i]);
913 		T_QUIET; T_ASSERT_NE(ids[i], VM_RECLAIM_ID_NULL, "Able to defer deallocation");
914 	}
915 	id_tmp = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'X', &addr_tmp);
916 	T_ASSERT_EQ(id_tmp, VM_RECLAIM_ID_NULL, "Unable to over-fill buffer");
917 	uint64_t initial_tail = os_atomic_load(&ringbuffer->indices.tail, relaxed);
918 	T_ASSERT_EQ(initial_tail, (uint64_t)old_capacity, "tail == capacity after fill");
919 
920 	T_LOG("Resizing buffer to 4x");
921 	kr = mach_vm_reclaim_ring_resize(ringbuffer, max_len);
922 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_resize()");
923 
924 	// All entries should be reclaimed after resize
925 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->indices.head, relaxed), initial_tail, "head is incremented");
926 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->indices.busy, relaxed), initial_tail, "busy is incremented");
927 	T_EXPECT_EQ(os_atomic_load(&ringbuffer->indices.tail, relaxed), initial_tail, "tail is preserved");
928 
929 	mach_vm_reclaim_count_t new_capacity;
930 	kr = mach_vm_reclaim_ring_capacity(ringbuffer, &new_capacity);
931 	T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()");
932 	T_EXPECT_GT(new_capacity, old_capacity, "Buffer capacity grew");
933 	T_ASSERT_EQ(new_capacity, max_len, "length is set correctly");
934 
935 	T_LOG("Attempting to use all entries (should fail)");
936 	for (mach_vm_reclaim_count_t i = 0; i < old_capacity; i++) {
937 		mach_vm_reclaim_state_t state;
938 		kr = mach_vm_reclaim_query_state(ringbuffer, ids[i], VM_RECLAIM_DEALLOCATE, &state);
939 		bool reclaimed = !(state == VM_RECLAIM_UNRECLAIMED);
940 		T_QUIET; T_EXPECT_TRUE(reclaimed, "Entry is reclaimed after resize");
941 		bool usable = try_cancel(ringbuffer, ids[i], addrs[i], vm_page_size, VM_RECLAIM_DEALLOCATE);
942 		T_QUIET; T_EXPECT_FALSE(usable, "Entry cannot be re-used after resize");
943 	}
944 
945 	T_LOG("Filling resized buffer");
946 	for (mach_vm_reclaim_count_t i = 0; i < new_capacity; i++) {
947 		ids[i] = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'B', &addrs[i]);
948 		T_QUIET; T_ASSERT_NE(ids[i], VM_RECLAIM_ID_NULL, "Able to defer deallocation");
949 	}
950 	id_tmp = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'X', &addr_tmp);
951 	T_ASSERT_EQ(id_tmp, VM_RECLAIM_ID_NULL, "Unable to over-fill buffer");
952 	T_LOG("Re-using all entries");
953 	for (mach_vm_reclaim_count_t i = 0; i < new_capacity; i++) {
954 		bool usable = try_cancel(ringbuffer, ids[i], addrs[i], vm_page_size, VM_RECLAIM_DEALLOCATE);
955 		T_QUIET; T_EXPECT_TRUE(usable, "Entry is available for re-use");
956 	}
957 }
958