1 #include <sys/types.h>
2 #include <sys/sysctl.h>
3 #include <mach/mach.h>
4 #include <mach/mach_vm.h>
5 #include <mach/vm_reclaim.h>
6 #include <mach-o/dyld.h>
7 #include <os/atomic_private.h>
8 #include <signal.h>
9 #include <spawn.h>
10 #include <spawn_private.h>
11 #include <unistd.h>
12
13 #include <darwintest.h>
14 #include <darwintest_multiprocess.h>
15 #include <darwintest_utils.h>
16
17 #include <Kernel/kern/ledger.h>
18 extern int ledger(int cmd, caddr_t arg1, caddr_t arg2, caddr_t arg3);
19
20 #include "memorystatus_assertion_helpers.h"
21
22 // Some of the unit tests test deferred deallocations.
23 // For these we need to set a sufficiently large reclaim threshold
24 // to ensure their buffers aren't freed prematurely.
25 #define VM_RECLAIM_THRESHOLD_BOOTARG_HIGH "vm_reclaim_max_threshold=268435456"
26 #define VM_RECLAIM_THRESHOLD_BOOTARG_LOW "vm_reclaim_max_threshold=16384"
27 #define VM_RECLAIM_BOOTARG_DISABLED "vm_reclaim_max_threshold=0"
28 #define VM_RECLAIM_THRESHOLD_SYSCTL_HIGH "vm.reclaim_max_threshold=268435456"
29 #define VM_RECLAIM_THRESHOLD_SYSCTL_LOW "vm.reclaim_max_threshold=16384"
30 #define VM_RECLAIM_SYSCTL_DISABLED "vm.reclaim_max_threshold=0"
31
32 T_GLOBAL_META(
33 T_META_NAMESPACE("xnu.vm"),
34 T_META_RADAR_COMPONENT_NAME("xnu"),
35 T_META_RADAR_COMPONENT_VERSION("performance"),
36 T_META_OWNER("jarrad"),
37 T_META_ENABLED(TARGET_OS_IOS && !TARGET_OS_MACCATALYST),
38 // Ensure we don't conflict with libmalloc's reclaim buffer
39 T_META_ENVVAR("MallocLargeCache=0"),
40 T_META_RUN_CONCURRENTLY(false)
41 );
42
43 T_DECL(vm_reclaim_init, "Set up and tear down a reclaim buffer",
44 T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_LOW),
45 T_META_TAG_VM_PREFERRED)
46 {
47 struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
48
49 kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
50
51 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
52 }
53
54 T_DECL(vm_reclaim_init_fails_when_disabled, "Initializing a ring buffer on a system with vm_reclaim disabled should fail",
55 T_META_BOOTARGS_SET(VM_RECLAIM_BOOTARG_DISABLED), T_META_TAG_VM_PREFERRED)
56 {
57 struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
58
59 kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
60
61 T_QUIET; T_EXPECT_MACH_ERROR(kr, KERN_NOT_SUPPORTED, "mach_vm_reclaim_ringbuffer_init");
62 }
63
64 /*
65 * Allocate a buffer of the given size, write val to each byte, and free it via a deferred free call.
66 */
67 static uint64_t
allocate_and_defer_free(size_t size,mach_vm_reclaim_ringbuffer_v1_t ringbuffer,unsigned char val,mach_vm_reclaim_behavior_v1_t behavior,mach_vm_address_t * addr)68 allocate_and_defer_free(size_t size, mach_vm_reclaim_ringbuffer_v1_t ringbuffer, unsigned char val, mach_vm_reclaim_behavior_v1_t behavior, mach_vm_address_t *addr /* OUT */)
69 {
70 kern_return_t kr = mach_vm_map(mach_task_self(), addr, size, 0, VM_FLAGS_ANYWHERE, MEMORY_OBJECT_NULL, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
71 bool should_update_kernel_accounting = false;
72 uint64_t idx;
73 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_map");
74
75 memset((void *) *addr, val, size);
76
77 idx = mach_vm_reclaim_mark_free(ringbuffer, *addr, (uint32_t) size, behavior, &should_update_kernel_accounting);
78 if (should_update_kernel_accounting) {
79 mach_vm_reclaim_update_kernel_accounting(ringbuffer);
80 }
81 return idx;
82 }
83
84 static uint64_t
allocate_and_defer_deallocate(size_t size,mach_vm_reclaim_ringbuffer_v1_t ringbuffer,unsigned char val,mach_vm_address_t * addr)85 allocate_and_defer_deallocate(size_t size, mach_vm_reclaim_ringbuffer_v1_t ringbuffer, unsigned char val, mach_vm_address_t *addr /* OUT */)
86 {
87 return allocate_and_defer_free(size, ringbuffer, val, MACH_VM_RECLAIM_DEALLOCATE, addr);
88 }
89
90 T_DECL(vm_reclaim_single_entry, "Place a single entry in the buffer and call sync",
91 T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_LOW),
92 T_META_TAG_VM_PREFERRED)
93 {
94 struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
95 static const size_t kAllocationSize = (1UL << 20); // 1MB
96 mach_vm_address_t addr;
97
98 kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
99 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
100
101 uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, 1, &addr);
102 T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
103 mach_vm_reclaim_synchronize(&ringbuffer, 1);
104 }
105
106 static pid_t
spawn_helper(char * helper)107 spawn_helper(char *helper)
108 {
109 char **launch_tool_args;
110 char testpath[PATH_MAX];
111 uint32_t testpath_buf_size;
112 pid_t child_pid;
113
114 testpath_buf_size = sizeof(testpath);
115 int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
116 T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
117 T_LOG("Executable path: %s", testpath);
118 launch_tool_args = (char *[]){
119 testpath,
120 "-n",
121 helper,
122 NULL
123 };
124
125 /* Spawn the child process. */
126 ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
127 if (ret != 0) {
128 T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
129 }
130 T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
131
132 return child_pid;
133 }
134
135 static int
spawn_helper_and_wait_for_exit(char * helper)136 spawn_helper_and_wait_for_exit(char *helper)
137 {
138 int status;
139 pid_t child_pid, rc;
140
141 child_pid = spawn_helper(helper);
142 rc = waitpid(child_pid, &status, 0);
143 T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
144 return status;
145 }
146
147 /*
148 * Returns true iff every entry in buffer is expected.
149 */
150 static bool
check_buffer(mach_vm_address_t addr,size_t size,unsigned char expected)151 check_buffer(mach_vm_address_t addr, size_t size, unsigned char expected)
152 {
153 unsigned char *buffer = (unsigned char *) addr;
154 for (size_t i = 0; i < size; i++) {
155 if (buffer[i] != expected) {
156 return false;
157 }
158 }
159 return true;
160 }
161
162 /*
163 * Read every byte of a buffer to ensure re-usability
164 */
165 static void
read_buffer(mach_vm_address_t addr,size_t size)166 read_buffer(mach_vm_address_t addr, size_t size)
167 {
168 volatile uint8_t byte;
169 uint8_t *buffer = (uint8_t *)addr;
170 for (size_t i = 0; i < size; i++) {
171 byte = buffer[i];
172 }
173 }
174
175 /*
176 * Check that the given (freed) buffer has changed.
177 * This will likely crash, but if we make it through the entire buffer then segfault on purpose.
178 */
179 static void
assert_buffer_has_changed_and_crash(mach_vm_address_t addr,size_t size,unsigned char expected)180 assert_buffer_has_changed_and_crash(mach_vm_address_t addr, size_t size, unsigned char expected)
181 {
182 /*
183 * mach_vm_reclaim_synchronize should have ensured the buffer was freed.
184 * Two cases:
185 * 1. The buffer is still free (touching it causes a crash)
186 * 2. The address range was re-allocated by some other library in process.
187 * #1 is far more likely. But if #2 happened, the buffer shouldn't be filled
188 * with the value we wrote to it. So scan the buffer. If we segfault it's case #1
189 * and if we see another value it's case #2.
190 */
191 bool changed = !check_buffer(addr, size, expected);
192 T_QUIET; T_ASSERT_TRUE(changed, "buffer was re-allocated");
193 /* Case #2. Force a segfault so the parent sees that we crashed. */
194 *(volatile int *) 0 = 1;
195
196 T_FAIL("Test did not crash when dereferencing NULL");
197 }
198
199 static void
reuse_reclaimed_entry(mach_vm_reclaim_behavior_v1_t behavior)200 reuse_reclaimed_entry(mach_vm_reclaim_behavior_v1_t behavior)
201 {
202 struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
203 static const size_t kAllocationSize = (1UL << 20); // 1MB
204 mach_vm_address_t addr;
205 static const unsigned char kValue = 220;
206
207 kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
208 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
209
210 uint64_t idx = allocate_and_defer_free(kAllocationSize, &ringbuffer, kValue, behavior, &addr);
211 T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
212 kr = mach_vm_reclaim_synchronize(&ringbuffer, 10);
213 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_synchronize");
214 bool usable = mach_vm_reclaim_mark_used(&ringbuffer, idx, addr, kAllocationSize);
215 bool reclaimed = mach_vm_reclaim_is_reclaimed(&ringbuffer, idx);
216 T_EXPECT_FALSE(usable, "reclaimed entry is not re-usable");
217 T_EXPECT_TRUE(reclaimed, "reclaimed entry was marked reclaimed");
218 switch (behavior) {
219 case MACH_VM_RECLAIM_DEALLOCATE:
220 assert_buffer_has_changed_and_crash(addr, kAllocationSize, kValue);
221 break;
222 case MACH_VM_RECLAIM_REUSABLE:
223 read_buffer(addr, kAllocationSize);
224 T_PASS("Freed buffer re-used successfully");
225 break;
226 default:
227 T_FAIL("Unexpected reclaim behavior %d", behavior);
228 }
229 }
230
231 T_HELPER_DECL(reuse_freed_entry_dealloc,
232 "defer free (dealloc), sync, and try to use entry")
233 {
234 reuse_reclaimed_entry(MACH_VM_RECLAIM_DEALLOCATE);
235 }
236
237 T_HELPER_DECL(reuse_freed_entry_reusable,
238 "defer free (reusable), sync, and try to use entry")
239 {
240 reuse_reclaimed_entry(MACH_VM_RECLAIM_REUSABLE);
241 }
242
243 T_DECL(vm_reclaim_single_entry_verify_free, "Place a single entry in the buffer and call sync",
244 T_META_IGNORECRASHES(".*vm_reclaim_single_entry_verify_free.*"),
245 T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_HIGH),
246 T_META_TAG_VM_PREFERRED)
247 {
248 int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_dealloc");
249 T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
250 T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGSEGV, "Test process crashed with segmentation fault.");
251 }
252
253 T_DECL(vm_reclaim_single_entry_reusable,
254 "Reclaim a reusable entry and verify re-use is legal",
255 T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_HIGH),
256 T_META_TAG_VM_PREFERRED)
257 {
258 int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_reusable");
259 T_QUIET; T_ASSERT_TRUE(WIFEXITED(status), "Test process exited.");
260 T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
261 }
262
263 static void
allocate_and_suspend(char * const * argv,bool free_buffer,bool double_free)264 allocate_and_suspend(char *const *argv, bool free_buffer, bool double_free)
265 {
266 struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
267 static const size_t kAllocationSize = (1UL << 20); // 1MB
268 mach_vm_address_t addr = 0;
269 bool should_update_kernel_accounting = false;
270
271 const mach_vm_size_t kNumEntries = (size_t) atoi(argv[0]);
272
273 kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
274 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
275 T_QUIET; T_ASSERT_LT(kNumEntries, ringbuffer.buffer_len, "Test does not fill up ringubffer");
276
277 for (size_t i = 0; i < kNumEntries; i++) {
278 uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, (unsigned char) i, &addr);
279 T_QUIET; T_ASSERT_EQ(idx, (uint64_t) i, "idx is correct");
280 }
281
282 if (double_free) {
283 // Double free the last entry
284 mach_vm_reclaim_mark_free(&ringbuffer, addr, (uint32_t) kAllocationSize, MACH_VM_RECLAIM_DEALLOCATE, &should_update_kernel_accounting);
285 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_mark_free");
286 }
287
288 if (free_buffer) {
289 mach_vm_size_t buffer_size = ringbuffer.buffer_len * sizeof(mach_vm_reclaim_entry_v1_t) + \
290 offsetof(struct mach_vm_reclaim_buffer_v1_s, entries);
291 kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t) ringbuffer.buffer, buffer_size);
292 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate");
293 }
294
295 // Signal to our parent to suspend us
296 if (kill(getppid(), SIGUSR1) != 0) {
297 T_LOG("Unable to signal to parent process!");
298 exit(1);
299 }
300
301 while (1) {
302 ;
303 }
304 }
305
306 T_HELPER_DECL(allocate_and_suspend,
307 "defer free, and signal parent to suspend")
308 {
309 allocate_and_suspend(argv, false, false);
310 }
311
312 static void
resume_and_kill_proc(pid_t pid)313 resume_and_kill_proc(pid_t pid)
314 {
315 int ret = pid_resume(pid);
316 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "proc resumed after freeze");
317 T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(pid, SIGKILL), "Killed process");
318 }
319
320 static void
drain_async_queue(pid_t child_pid)321 drain_async_queue(pid_t child_pid)
322 {
323 int val = child_pid;
324 int ret;
325 size_t len = sizeof(val);
326 ret = sysctlbyname("vm.reclaim_drain_async_queue", NULL, NULL, &val, len);
327 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.reclaim_drain_async_queue");
328 }
329
330 static size_t
ledger_phys_footprint_index(size_t * num_entries)331 ledger_phys_footprint_index(size_t *num_entries)
332 {
333 struct ledger_info li;
334 struct ledger_template_info *templateInfo = NULL;
335 int ret;
336 size_t i, footprint_index;
337 bool found = false;
338
339 ret = ledger(LEDGER_INFO, (caddr_t)(uintptr_t)getpid(), (caddr_t)&li, NULL);
340 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_INFO)");
341
342 T_QUIET; T_ASSERT_GT(li.li_entries, (int64_t) 0, "num ledger entries is valid");
343 *num_entries = (size_t) li.li_entries;
344 templateInfo = malloc((size_t)li.li_entries * sizeof(struct ledger_template_info));
345 T_QUIET; T_ASSERT_NOTNULL(templateInfo, "malloc entries");
346
347 footprint_index = 0;
348 ret = ledger(LEDGER_TEMPLATE_INFO, (caddr_t) templateInfo, (caddr_t) num_entries, NULL);
349 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_TEMPLATE_INFO)");
350 for (i = 0; i < *num_entries; i++) {
351 if (strcmp(templateInfo[i].lti_name, "phys_footprint") == 0) {
352 footprint_index = i;
353 found = true;
354 }
355 }
356 free(templateInfo);
357 T_QUIET; T_ASSERT_TRUE(found, "found phys_footprint in ledger");
358 return footprint_index;
359 }
360
361 static int64_t
get_ledger_entry_for_pid(pid_t pid,size_t index,size_t num_entries)362 get_ledger_entry_for_pid(pid_t pid, size_t index, size_t num_entries)
363 {
364 int ret;
365 int64_t value;
366 struct ledger_entry_info *lei = NULL;
367
368 lei = malloc(num_entries * sizeof(*lei));
369 ret = ledger(LEDGER_ENTRY_INFO, (caddr_t) (uintptr_t) pid, (caddr_t) lei, (caddr_t) &num_entries);
370 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_ENTRY_INFO)");
371 value = lei[index].lei_balance;
372 free(lei);
373 return value;
374 }
375
376 static pid_t child_pid;
377
378 static void
test_after_background_helper_launches(char * variant,char * arg1,dispatch_block_t test_block,dispatch_block_t exit_block)379 test_after_background_helper_launches(char* variant, char * arg1, dispatch_block_t test_block, dispatch_block_t exit_block)
380 {
381 char **launch_tool_args;
382 char testpath[PATH_MAX];
383 uint32_t testpath_buf_size;
384
385 dispatch_source_t ds_signal, ds_exit;
386
387 /* Wait for the child process to tell us that it's ready, and then freeze it */
388 signal(SIGUSR1, SIG_IGN);
389 ds_signal = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dispatch_get_main_queue());
390 T_QUIET; T_ASSERT_NOTNULL(ds_signal, "dispatch_source_create");
391 dispatch_source_set_event_handler(ds_signal, test_block);
392
393 dispatch_activate(ds_signal);
394
395 testpath_buf_size = sizeof(testpath);
396 int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
397 T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
398 T_LOG("Executable path: %s", testpath);
399 launch_tool_args = (char *[]){
400 testpath,
401 "-n",
402 variant,
403 arg1,
404 NULL
405 };
406
407 /* Spawn the child process. */
408 ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL);
409 if (ret != 0) {
410 T_LOG("dt_launch tool returned %d with error code %d", ret, errno);
411 }
412 T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool");
413
414 /* Listen for exit. */
415 ds_exit = dispatch_source_create(DISPATCH_SOURCE_TYPE_PROC, (uintptr_t)child_pid, DISPATCH_PROC_EXIT, dispatch_get_main_queue());
416 dispatch_source_set_event_handler(ds_exit, exit_block);
417
418 dispatch_activate(ds_exit);
419 dispatch_main();
420 }
421
422 T_DECL(vm_reclaim_full_reclaim_on_suspend, "Defer free memory and then suspend.",
423 T_META_ASROOT(true),
424 T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_HIGH),
425 T_META_TAG_VM_PREFERRED)
426 {
427 test_after_background_helper_launches("allocate_and_suspend", "20", ^{
428 int ret = 0;
429 size_t num_ledger_entries = 0;
430 size_t phys_footprint_index = ledger_phys_footprint_index(&num_ledger_entries);
431 int64_t before_footprint, after_footprint, reclaimable_bytes = 20 * (1ULL << 20);
432 before_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries);
433 T_QUIET; T_EXPECT_GE(before_footprint, reclaimable_bytes, "memory was allocated");
434 ret = pid_suspend(child_pid);
435 T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
436 /*
437 * The reclaim work is kicked off asynchronously by the suspend.
438 * So we need to call into the kernel to synchronize with the reclaim worker
439 * thread.
440 */
441 drain_async_queue(child_pid);
442
443 after_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries);
444 T_QUIET; T_EXPECT_LE(after_footprint, before_footprint - reclaimable_bytes, "memory was reclaimed");
445
446 resume_and_kill_proc(child_pid);
447 },
448 ^{
449 int status = 0, code = 0;
450 pid_t rc = waitpid(child_pid, &status, 0);
451 T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
452 code = WEXITSTATUS(status);
453 T_QUIET; T_ASSERT_EQ(code, 0, "Child exited cleanly");
454 T_END;
455 });
456 }
457
458 T_DECL(vm_reclaim_limit_kills, "Deferred reclaims are processed before a limit kill",
459 T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_LOW),
460 T_META_TAG_VM_PREFERRED)
461 {
462 int err;
463 struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
464 const size_t kNumEntries = 50;
465 static const size_t kAllocationSize = (1UL << 20); // 1MB
466 static const size_t kMemoryLimit = kNumEntries / 10 * kAllocationSize;
467
468 kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
469 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
470
471 err = set_memlimits(getpid(), kMemoryLimit >> 20, kMemoryLimit >> 20, TRUE, TRUE);
472 T_QUIET; T_ASSERT_POSIX_SUCCESS(err, "set_memlimits");
473
474 for (size_t i = 0; i < kNumEntries; i++) {
475 mach_vm_address_t addr = 0;
476 uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, (unsigned char) i, &addr);
477 T_QUIET; T_ASSERT_EQ(idx, (uint64_t) i, "idx is correct");
478 }
479
480 T_PASS("Was able to allocate and defer free %zu chunks of size %zu bytes while staying under limit of %zu bytes", kNumEntries, kAllocationSize, kMemoryLimit);
481 }
482
483 T_DECL(vm_reclaim_update_reclaimable_bytes_threshold, "Kernel reclaims when num_bytes_reclaimable crosses threshold",
484 T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_LOW),
485 T_META_TAG_VM_PREFERRED)
486 {
487 mach_vm_size_t kNumEntries = 0;
488 struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
489 const size_t kAllocationSize = vm_kernel_page_size;
490 uint64_t vm_reclaim_reclaimable_max_threshold;
491 int ret;
492 size_t len = sizeof(vm_reclaim_reclaimable_max_threshold);
493 size_t num_ledger_entries = 0;
494 size_t phys_footprint_index = ledger_phys_footprint_index(&num_ledger_entries);
495
496 kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
497 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
498
499 // Allocate 1000 times the reclaim threshold
500 ret = sysctlbyname("vm.reclaim_max_threshold", &vm_reclaim_reclaimable_max_threshold, &len, NULL, 0);
501 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.reclaim_max_threshold");
502 kNumEntries = vm_reclaim_reclaimable_max_threshold / kAllocationSize * 1000;
503 T_QUIET; T_ASSERT_LT(kNumEntries, ringbuffer.buffer_len, "Entries will not fill up ringbuffer.");
504
505 mach_vm_address_t addr = 0;
506 for (uint64_t i = 0; i < kNumEntries; i++) {
507 uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, (unsigned char) i, &addr);
508 T_QUIET; T_ASSERT_EQ(idx, i, "idx is correct");
509 }
510
511 T_QUIET; T_ASSERT_LT(get_ledger_entry_for_pid(getpid(), phys_footprint_index, num_ledger_entries),
512 (int64_t) ((kNumEntries) * kAllocationSize), "Entries were reclaimed as we crossed threshold");
513 }
514
515 T_HELPER_DECL(deallocate_buffer,
516 "deallocate the buffer from underneath the kernel")
517 {
518 struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
519 static const size_t kAllocationSize = (1UL << 20); // 1MB
520 mach_vm_address_t addr;
521
522 kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
523 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
524
525 uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, 1, &addr);
526 T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
527 mach_vm_size_t buffer_size = ringbuffer.buffer_len * sizeof(mach_vm_reclaim_entry_v1_t) + \
528 offsetof(struct mach_vm_reclaim_buffer_v1_s, entries);
529 kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t) ringbuffer.buffer, buffer_size);
530 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate");
531
532 mach_vm_reclaim_synchronize(&ringbuffer, 10);
533
534 T_FAIL("Test did not crash when synchronizing on a deallocated buffer!");
535 }
536
537 T_DECL(vm_reclaim_copyio_buffer_error, "Force a copyio error on the buffer",
538 T_META_IGNORECRASHES(".*deallocate_buffer.*"),
539 T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_HIGH),
540 T_META_TAG_VM_PREFERRED)
541 {
542 int status = spawn_helper_and_wait_for_exit("deallocate_buffer");
543 T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
544 T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
545 }
546
547 T_HELPER_DECL(dealloc_gap, "Put a bad entry in the buffer")
548 {
549 struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
550 static const size_t kAllocationSize = (1UL << 20); // 1MB
551 mach_vm_address_t addr;
552 bool should_update_kernel_accounting = false;
553
554 kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
555 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
556
557 uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, 1, &addr);
558 T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
559 idx = mach_vm_reclaim_mark_free(&ringbuffer, addr, (uint32_t) kAllocationSize, MACH_VM_RECLAIM_DEALLOCATE, &should_update_kernel_accounting);
560 T_QUIET; T_ASSERT_EQ(idx, 1ULL, "Entry placed at correct index");
561
562 mach_vm_reclaim_synchronize(&ringbuffer, 2);
563
564 T_FAIL("Test did not crash when doing a double free!");
565 }
566
567 T_DECL(vm_reclaim_dealloc_gap, "Ensure a dealloc gap delivers a fatal exception",
568 T_META_IGNORECRASHES(".*vm_reclaim_dealloc_gap.*"),
569 T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_LOW),
570 T_META_TAG_VM_PREFERRED)
571 {
572 int status = spawn_helper_and_wait_for_exit("dealloc_gap");
573 T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
574 T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
575 }
576
577 T_HELPER_DECL(allocate_and_suspend_with_dealloc_gap,
578 "defer double free, and signal parent to suspend")
579 {
580 allocate_and_suspend(argv, false, true);
581 }
582
583 static void
vm_reclaim_async_exception(char * variant,char * arg1)584 vm_reclaim_async_exception(char *variant, char *arg1)
585 {
586 test_after_background_helper_launches(variant, arg1, ^{
587 int ret = 0;
588 ret = pid_suspend(child_pid);
589 T_ASSERT_POSIX_SUCCESS(ret, "child suspended");
590 /*
591 * The reclaim work is kicked off asynchronously by the suspend.
592 * So we need to call into the kernel to synchronize with the reclaim worker
593 * thread.
594 */
595 drain_async_queue(child_pid);
596 }, ^{
597 int status;
598 pid_t rc = waitpid(child_pid, &status, 0);
599 T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
600 T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed.");
601 T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL.");
602 T_END;
603 });
604 }
605
606 T_DECL(vm_reclaim_dealloc_gap_async, "Ensure a dealloc gap delivers an async fatal exception",
607 T_META_IGNORECRASHES(".*vm_reclaim_dealloc_gap_async.*"),
608 T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_LOW),
609 T_META_TAG_VM_PREFERRED)
610 {
611 vm_reclaim_async_exception("allocate_and_suspend_with_dealloc_gap", "15");
612 }
613
614 T_HELPER_DECL(allocate_and_suspend_with_buffer_error,
615 "defer free, free buffer, and signal parent to suspend")
616 {
617 allocate_and_suspend(argv, true, false);
618 }
619
620 T_DECL(vm_reclaim_copyio_buffer_error_async, "Ensure a buffer copyio failure delivers an async fatal exception",
621 T_META_IGNORECRASHES(".*vm_reclaim_dealloc_gap_async.*"),
622 T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_HIGH),
623 T_META_TAG_VM_PREFERRED)
624 {
625 vm_reclaim_async_exception("allocate_and_suspend_with_buffer_error", "15");
626 }
627
628 T_HELPER_DECL(reuse_freed_entry_fork,
629 "defer free, sync, and try to use entry")
630 {
631 struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
632 static const size_t kAllocationSize = (1UL << 20); // 1MB
633 mach_vm_address_t addr;
634 static const unsigned char kValue = 119;
635
636 kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
637 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init");
638
639 uint64_t idx = allocate_and_defer_deallocate(kAllocationSize, &ringbuffer, kValue, &addr);
640 T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer");
641
642 pid_t forked_pid = fork();
643 T_QUIET; T_WITH_ERRNO; T_ASSERT_NE(forked_pid, -1, "fork()");
644 if (forked_pid == 0) {
645 kr = mach_vm_reclaim_synchronize(&ringbuffer, 10);
646 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_synchronize");
647 assert_buffer_has_changed_and_crash(addr, kAllocationSize, kValue);
648 } else {
649 int status;
650 pid_t rc = waitpid(forked_pid, &status, 0);
651 T_QUIET; T_ASSERT_EQ(rc, forked_pid, "waitpid");
652 T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Forked process crashed.");
653 T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGSEGV, "Forked process crashed with segmentation fault.");
654 }
655 }
656
657 T_DECL(vm_reclaim_fork, "Ensure reclaim buffer is inherited across a fork",
658 T_META_IGNORECRASHES(".*vm_reclaim_fork.*"),
659 T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_HIGH),
660 T_META_TAG_VM_PREFERRED)
661 {
662 int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_fork");
663 T_QUIET; T_ASSERT_TRUE(WIFEXITED(status), "Test process exited.");
664 T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
665 }
666
667 #define SUSPEND_AND_RESUME_COUNT 4
668
669 // rdar://110081398
670 T_DECL(reclaim_async_on_repeated_suspend,
671 "verify that subsequent suspends are allowed",
672 T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_HIGH),
673 T_META_TAG_VM_PREFERRED)
674 {
675 const int sleep_duration = 3;
676 test_after_background_helper_launches("allocate_and_suspend", "20", ^{
677 int ret = 0;
678 for (int i = 0; i < SUSPEND_AND_RESUME_COUNT; i++) {
679 ret = pid_suspend(child_pid);
680 T_ASSERT_POSIX_SUCCESS(ret, "pid_suspend()");
681 ret = pid_resume(child_pid);
682 T_ASSERT_POSIX_SUCCESS(ret, "pid_resume()");
683 }
684 T_LOG("Sleeping %d sec...", sleep_duration);
685 sleep(sleep_duration);
686 T_LOG("Killing child...");
687 T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "kill()");
688 }, ^{
689 int status;
690 pid_t rc = waitpid(child_pid, &status, 0);
691 T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid");
692 T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly.");
693 T_END;
694 });
695 }
696
697 T_HELPER_DECL(ringbuffer_init_after_exec,
698 "initialize a ringbuffer after exec")
699 {
700 struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
701 kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
702 T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init()");
703 }
704
705 extern char **environ;
706
707 T_HELPER_DECL(exec_after_ringbuffer_init,
708 "initialize a ringbuffer then exec")
709 {
710 char **launch_tool_args;
711 char testpath[PATH_MAX];
712 uint32_t testpath_buf_size;
713 struct mach_vm_reclaim_ringbuffer_v1_s ringbuffer;
714
715 kern_return_t kr = mach_vm_reclaim_ringbuffer_init(&ringbuffer);
716 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ringbuffer_init()");
717
718 testpath_buf_size = sizeof(testpath);
719 int ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
720 T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
721 T_LOG("Executable path: %s", testpath);
722 launch_tool_args = (char *[]){
723 testpath,
724 "-n",
725 "ringbuffer_init_after_exec",
726 NULL
727 };
728
729 /* Spawn the child process. */
730 posix_spawnattr_t spawnattrs;
731 posix_spawnattr_init(&spawnattrs);
732 posix_spawnattr_setflags(&spawnattrs, POSIX_SPAWN_SETEXEC);
733 posix_spawn(&child_pid, testpath, NULL, &spawnattrs, launch_tool_args, environ);
734 T_ASSERT_FAIL("should not be reached");
735 }
736
737 T_DECL(reclaim_exec_new_reclaim_buffer,
738 "verify that an exec-ed process may instantiate a new buffer",
739 T_META_SYSCTL_INT(VM_RECLAIM_THRESHOLD_SYSCTL_LOW),
740 T_META_TAG_VM_PREFERRED)
741 {
742 dt_helper_t helpers[1];
743 helpers[0] = dt_child_helper("exec_after_ringbuffer_init");
744
745 dt_run_helpers(helpers, 1, 30);
746 }
747