#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern int ledger(int cmd, caddr_t arg1, caddr_t arg2, caddr_t arg3); #include "memorystatus_assertion_helpers.h" #if TARGET_OS_IOS && !TARGET_OS_VISION // Some of the unit tests test deferred deallocations. // For these we need to set a sufficiently large reclaim threshold // to ensure their buffers aren't freed prematurely. #define T_META_VM_RECLAIM_ENABLED T_META_SYSCTL_INT("vm.reclaim.max_threshold=268435456") #define T_META_VM_RECLAIM_DISABLED T_META_SYSCTL_INT("vm.reclaim.max_threshold=0") #else // !TARGET_OS_IOS #define T_META_VM_RECLAIM_ENABLED T_META_SYSCTL_INT("vm.reclaim.enabled=1") #define T_META_VM_RECLAIM_DISABLED T_META_SYSCTL_INT("vm.reclaim.enabled=0") #endif // TARGET_OS_IOS #define MiB(x) (x << 20) T_GLOBAL_META( T_META_NAMESPACE("xnu.vm_reclaim"), T_META_RADAR_COMPONENT_NAME("xnu"), T_META_RADAR_COMPONENT_VERSION("performance"), T_META_OWNER("jarrad"), // Ensure we don't conflict with libmalloc's reclaim buffer T_META_ENVVAR("MallocDeferredReclaim=0"), T_META_RUN_CONCURRENTLY(false), T_META_CHECK_LEAKS(false) ); static mach_vm_reclaim_ring_t ringbuffer_init(void) { mach_vm_reclaim_ring_t ringbuffer = NULL; mach_vm_reclaim_count_t len = mach_vm_reclaim_round_capacity(1); mach_vm_reclaim_count_t max_len = len; kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, len, max_len); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_allocate()"); return ringbuffer; } T_DECL(vm_reclaim_init, "Set up and tear down a reclaim buffer", T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED) { mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init(); T_ASSERT_NOTNULL(ringbuffer, "ringbuffer is allocated"); T_EXPECT_EQ(os_atomic_load(&ringbuffer->indices.head, relaxed), 0ull, "head is zeroed"); T_EXPECT_EQ(os_atomic_load(&ringbuffer->indices.busy, relaxed), 0ull, "busy is zeroed"); T_EXPECT_EQ(os_atomic_load(&ringbuffer->indices.tail, relaxed), 0ull, "tail is zeroed"); size_t expected_len = (vm_page_size - offsetof(struct mach_vm_reclaim_ring_s, entries)) / sizeof(struct mach_vm_reclaim_entry_s); T_ASSERT_EQ((size_t)ringbuffer->len, expected_len, "length is set correctly"); for (unsigned i = 0; i < ringbuffer->len; i++) { mach_vm_reclaim_entry_t entry = &ringbuffer->entries[i]; T_QUIET; T_EXPECT_EQ(entry->address, 0ull, "address is zeroed"); T_QUIET; T_EXPECT_EQ(entry->size, 0u, "size is zeroed"); T_QUIET; T_EXPECT_EQ(entry->behavior, 0, "behavior is zeroed"); } } T_DECL(vm_reclaim_init_fails_when_disabled, "Initializing a ring buffer on a system with vm_reclaim disabled should fail", T_META_VM_RECLAIM_DISABLED, T_META_TAG_VM_PREFERRED) { mach_vm_reclaim_ring_t ringbuffer; kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, 1, 1); T_EXPECT_MACH_ERROR(kr, VM_RECLAIM_NOT_SUPPORTED, "mach_vm_reclaim_ring_allocate()"); } static bool try_cancel(mach_vm_reclaim_ring_t ringbuffer, mach_vm_reclaim_id_t id, mach_vm_address_t addr, mach_vm_size_t size, mach_vm_reclaim_action_t behavior) { bool update_accounting; mach_vm_reclaim_state_t state; kern_return_t kr; kr = mach_vm_reclaim_try_cancel(ringbuffer, id, addr, size, behavior, &state, &update_accounting); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_cancel()"); if (update_accounting) { kern_return_t tmp_kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer); T_QUIET; T_ASSERT_MACH_SUCCESS(tmp_kr, "mach_vm_reclaim_update_kernel_accounting()"); } return mach_vm_reclaim_is_reusable(state); } /* * Allocate a buffer of the given size, write val to each byte, and free it via a deferred free call. */ static mach_vm_reclaim_id_t allocate_and_defer_free(size_t size, mach_vm_reclaim_ring_t ringbuffer, unsigned char val, mach_vm_reclaim_action_t behavior, mach_vm_address_t *addr /* OUT */) { kern_return_t kr = mach_vm_map(mach_task_self(), addr, size, 0, VM_FLAGS_ANYWHERE, MEMORY_OBJECT_NULL, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); bool should_update_kernel_accounting = false; mach_vm_reclaim_id_t id = VM_RECLAIM_ID_NULL; T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_map"); memset((void *) *addr, val, size); kr = mach_vm_reclaim_try_enter(ringbuffer, *addr, size, behavior, &id, &should_update_kernel_accounting); if (should_update_kernel_accounting) { kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()"); } return id; } static mach_vm_reclaim_id_t allocate_and_defer_deallocate(size_t size, mach_vm_reclaim_ring_t ringbuffer, unsigned char val, mach_vm_address_t *addr /* OUT */) { return allocate_and_defer_free(size, ringbuffer, val, VM_RECLAIM_DEALLOCATE, addr); } T_DECL(vm_reclaim_single_entry, "Place a single entry in the buffer and call sync", T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED) { static const size_t kAllocationSize = (1UL << 20); // 1MB mach_vm_address_t addr; mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init(); mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, 1, &addr); T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer"); mach_vm_reclaim_ring_flush(ringbuffer, 1); } static pid_t spawn_helper(char *helper) { char **launch_tool_args; char testpath[PATH_MAX]; uint32_t testpath_buf_size; pid_t child_pid; testpath_buf_size = sizeof(testpath); int ret = _NSGetExecutablePath(testpath, &testpath_buf_size); T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath"); T_LOG("Executable path: %s", testpath); launch_tool_args = (char *[]){ testpath, "-n", helper, NULL }; /* Spawn the child process. */ ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL); if (ret != 0) { T_LOG("dt_launch tool returned %d with error code %d", ret, errno); } T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool"); return child_pid; } static int spawn_helper_and_wait_for_exit(char *helper) { int status; pid_t child_pid, rc; child_pid = spawn_helper(helper); rc = waitpid(child_pid, &status, 0); T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid"); return status; } /* * Returns true iff every entry in buffer is expected. */ static bool check_buffer(mach_vm_address_t addr, size_t size, unsigned char expected) { unsigned char *buffer = (unsigned char *) addr; for (size_t i = 0; i < size; i++) { if (buffer[i] != expected) { return false; } } return true; } /* * Read every byte of a buffer to ensure re-usability */ static void read_buffer(mach_vm_address_t addr, size_t size) { volatile uint8_t byte; uint8_t *buffer = (uint8_t *)addr; for (size_t i = 0; i < size; i++) { byte = buffer[i]; } } /* * Check that the given (freed) buffer has changed. * This will likely crash, but if we make it through the entire buffer then segfault on purpose. */ static void assert_buffer_has_changed_and_crash(mach_vm_address_t addr, size_t size, unsigned char expected) { /* * mach_vm_reclaim_ring_flush should have ensured the buffer was freed. * Two cases: * 1. The buffer is still free (touching it causes a crash) * 2. The address range was re-allocated by some other library in process. * #1 is far more likely. But if #2 happened, the buffer shouldn't be filled * with the value we wrote to it. So scan the buffer. If we segfault it's case #1 * and if we see another value it's case #2. */ bool changed = !check_buffer(addr, size, expected); T_QUIET; T_ASSERT_TRUE(changed, "buffer was re-allocated"); /* Case #2. Force a segfault so the parent sees that we crashed. */ *(volatile int *) 0 = 1; T_FAIL("Test did not crash when dereferencing NULL"); } static void reuse_reclaimed_entry(mach_vm_reclaim_action_t behavior) { kern_return_t kr; static const size_t kAllocationSize = (1UL << 20); // 1MB mach_vm_address_t addr; static const unsigned char kValue = 220; mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init(); mach_vm_reclaim_id_t idx = allocate_and_defer_free(kAllocationSize, ringbuffer, kValue, behavior, &addr); T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer"); kr = mach_vm_reclaim_ring_flush(ringbuffer, 10); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_flush"); bool usable = try_cancel(ringbuffer, idx, addr, kAllocationSize, behavior); switch (behavior) { case VM_RECLAIM_DEALLOCATE: T_EXPECT_FALSE(usable, "reclaimed entry is not re-usable"); assert_buffer_has_changed_and_crash(addr, kAllocationSize, kValue); break; case VM_RECLAIM_FREE: T_EXPECT_TRUE(usable, "reclaimed REUSABLE entry is re-usable"); read_buffer(addr, kAllocationSize); T_PASS("Freed buffer re-used successfully"); break; default: T_FAIL("Unexpected reclaim behavior %d", behavior); } } T_HELPER_DECL(reuse_freed_entry_dealloc, "defer free (dealloc), sync, and try to use entry") { reuse_reclaimed_entry(VM_RECLAIM_DEALLOCATE); } T_HELPER_DECL(reuse_freed_entry_reusable, "defer free (reusable), sync, and try to use entry") { reuse_reclaimed_entry(VM_RECLAIM_FREE); } T_DECL(vm_reclaim_single_entry_verify_free, "Place a single entry in the buffer and call sync", T_META_IGNORECRASHES(".*vm_reclaim_single_entry_verify_free.*"), T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED) { int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_dealloc"); T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed."); T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGSEGV, "Test process crashed with segmentation fault."); } T_DECL(vm_reclaim_single_entry_reusable, "Reclaim a reusable entry and verify re-use is legal", T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED) { int status = spawn_helper_and_wait_for_exit("reuse_freed_entry_reusable"); T_QUIET; T_ASSERT_TRUE(WIFEXITED(status), "Test process exited."); T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly."); } static void allocate_and_suspend(char *const *argv, bool free_buffer, bool double_free) { kern_return_t kr; static const mach_vm_reclaim_count_t kAllocationSize = (1UL << 20); // 1MB mach_vm_address_t addr = 0; bool should_update_kernel_accounting = false; mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init(); const mach_vm_reclaim_count_t kNumEntries = (mach_vm_reclaim_count_t)atoi(argv[0]); mach_vm_reclaim_count_t capacity; kr = mach_vm_reclaim_ring_capacity(ringbuffer, &capacity); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()"); T_QUIET; T_ASSERT_LT(kNumEntries, capacity, "Test does not fill up ringbuffer"); T_LOG("allocate_and_suspend: Allocating and freeing %u entries...", kNumEntries); for (size_t i = 0; i < kNumEntries; i++) { addr = 0; mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, (unsigned char) i, &addr); T_QUIET; T_ASSERT_EQ(idx, (mach_vm_reclaim_id_t)i, "idx is correct"); T_LOG("allocate_and_suspend: Allocated and deferred 0x%llx", addr); } if (double_free) { // Double free the last entry mach_vm_reclaim_id_t id = VM_RECLAIM_ID_NULL; kr = mach_vm_reclaim_try_enter(ringbuffer, addr, kAllocationSize, VM_RECLAIM_DEALLOCATE, &id, &should_update_kernel_accounting); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter"); } if (free_buffer) { mach_vm_size_t buffer_size = (size_t)capacity * sizeof(struct mach_vm_reclaim_entry_s) + offsetof(struct mach_vm_reclaim_ring_s, entries); kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)ringbuffer, buffer_size); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate"); } T_LOG("allocate_and_suspend: Signalling parent"); // Signal to our parent to suspend us if (kill(getppid(), SIGUSR1) != 0) { T_LOG("Unable to signal to parent process!"); exit(1); } T_LOG("allocate_and_suspend: Spinning"); while (1) { ; } T_ASSERT_FAIL("notreached"); } T_HELPER_DECL(allocate_and_suspend, "defer free, and signal parent to suspend") { allocate_and_suspend(argv, false, false); } static void resume_and_kill_proc(pid_t pid) { int ret = pid_resume(pid); T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "proc resumed after freeze"); T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(pid, SIGKILL), "Killed process"); } static void wait_for_pid_to_be_drained(pid_t child_pid) { int val = child_pid; int ret; size_t len = sizeof(val); ret = sysctlbyname("vm.reclaim.wait_for_pid", NULL, NULL, &val, len); T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.reclaim.wait_for_pid"); } static size_t ledger_phys_footprint_index(size_t *num_entries) { struct ledger_info li; struct ledger_template_info *templateInfo = NULL; int ret; size_t i, footprint_index; bool found = false; ret = ledger(LEDGER_INFO, (caddr_t)(uintptr_t)getpid(), (caddr_t)&li, NULL); T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_INFO)"); T_QUIET; T_ASSERT_GT(li.li_entries, (int64_t) 0, "num ledger entries is valid"); *num_entries = (size_t) li.li_entries; templateInfo = malloc((size_t)li.li_entries * sizeof(struct ledger_template_info)); T_QUIET; T_ASSERT_NOTNULL(templateInfo, "malloc entries"); footprint_index = 0; ret = ledger(LEDGER_TEMPLATE_INFO, (caddr_t) templateInfo, (caddr_t) num_entries, NULL); T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_TEMPLATE_INFO)"); for (i = 0; i < *num_entries; i++) { if (strcmp(templateInfo[i].lti_name, "phys_footprint") == 0) { footprint_index = i; found = true; } } free(templateInfo); T_QUIET; T_ASSERT_TRUE(found, "found phys_footprint in ledger"); return footprint_index; } static int64_t get_ledger_entry_for_pid(pid_t pid, size_t index, size_t num_entries) { int ret; int64_t value; struct ledger_entry_info *lei = NULL; lei = malloc(num_entries * sizeof(*lei)); ret = ledger(LEDGER_ENTRY_INFO, (caddr_t) (uintptr_t) pid, (caddr_t) lei, (caddr_t) &num_entries); T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "ledger(LEDGER_ENTRY_INFO)"); value = lei[index].lei_balance; free(lei); return value; } static pid_t child_pid; static void test_after_background_helper_launches(char* variant, char * arg1, dispatch_block_t test_block, dispatch_block_t exit_block) { char **launch_tool_args; char testpath[PATH_MAX]; uint32_t testpath_buf_size; dispatch_source_t ds_signal, ds_exit; /* Wait for the child process to tell us that it's ready, and then freeze it */ signal(SIGUSR1, SIG_IGN); ds_signal = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dispatch_get_main_queue()); T_QUIET; T_ASSERT_NOTNULL(ds_signal, "dispatch_source_create"); dispatch_source_set_event_handler(ds_signal, test_block); dispatch_activate(ds_signal); testpath_buf_size = sizeof(testpath); int ret = _NSGetExecutablePath(testpath, &testpath_buf_size); T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath"); T_LOG("Executable path: %s", testpath); launch_tool_args = (char *[]){ testpath, "-n", variant, arg1, NULL }; /* Spawn the child process. */ ret = dt_launch_tool(&child_pid, launch_tool_args, false, NULL, NULL); if (ret != 0) { T_LOG("dt_launch tool returned %d with error code %d", ret, errno); } T_QUIET; T_ASSERT_POSIX_SUCCESS(child_pid, "dt_launch_tool"); /* Listen for exit. */ ds_exit = dispatch_source_create(DISPATCH_SOURCE_TYPE_PROC, (uintptr_t)child_pid, DISPATCH_PROC_EXIT, dispatch_get_main_queue()); dispatch_source_set_event_handler(ds_exit, exit_block); dispatch_activate(ds_exit); dispatch_main(); } T_DECL(vm_reclaim_full_reclaim_on_suspend, "Defer free memory and then suspend.", T_META_ASROOT(true), T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED) { test_after_background_helper_launches("allocate_and_suspend", "20", ^{ int ret = 0; size_t num_ledger_entries = 0; size_t phys_footprint_index = ledger_phys_footprint_index(&num_ledger_entries); int64_t before_footprint, after_footprint, reclaimable_bytes = 20 * (1ULL << 20); before_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries); T_QUIET; T_EXPECT_GE(before_footprint, reclaimable_bytes, "memory was allocated"); ret = pid_suspend(child_pid); T_ASSERT_POSIX_SUCCESS(ret, "child suspended"); /* * The reclaim work is kicked off asynchronously by the suspend. * So we need to call into the kernel to synchronize with the reclaim worker * thread. */ wait_for_pid_to_be_drained(child_pid); after_footprint = get_ledger_entry_for_pid(child_pid, phys_footprint_index, num_ledger_entries); T_QUIET; T_EXPECT_LE(after_footprint, before_footprint - reclaimable_bytes, "memory was reclaimed"); resume_and_kill_proc(child_pid); }, ^{ int status = 0, code = 0; pid_t rc = waitpid(child_pid, &status, 0); T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid"); code = WEXITSTATUS(status); T_QUIET; T_ASSERT_EQ(code, 0, "Child exited cleanly"); T_END; }); } T_DECL(vm_reclaim_limit_kills, "Deferred reclaims are processed before a limit kill", T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED) { int err; const size_t kNumEntries = 50; static const size_t kAllocationSize = (1UL << 20); // 1MB static const size_t kMemoryLimit = kNumEntries / 10 * kAllocationSize; mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init(); err = set_memlimits(getpid(), kMemoryLimit >> 20, kMemoryLimit >> 20, TRUE, TRUE); T_QUIET; T_ASSERT_POSIX_SUCCESS(err, "set_memlimits"); for (size_t i = 0; i < kNumEntries; i++) { mach_vm_address_t addr = 0; mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, (unsigned char) i, &addr); T_QUIET; T_ASSERT_EQ(idx, (mach_vm_reclaim_id_t)i, "idx is correct"); } T_PASS("Was able to allocate and defer free %zu chunks of size %zu bytes while staying under limit of %zu bytes", kNumEntries, kAllocationSize, kMemoryLimit); } #if TARGET_OS_IOS && !TARGET_OS_VISION T_DECL(vm_reclaim_update_reclaimable_bytes_threshold, "Kernel reclaims when num_bytes_reclaimable crosses threshold", T_META_SYSCTL_INT("vm.reclaim.max_threshold=16384"), T_META_TAG_VM_PREFERRED) { mach_vm_reclaim_count_t kNumEntries = 0; const size_t kAllocationSize = vm_kernel_page_size; uint64_t vm_reclaim_reclaimable_max_threshold; int ret; mach_error_t err; size_t len = sizeof(vm_reclaim_reclaimable_max_threshold); size_t num_ledger_entries = 0; size_t phys_footprint_index = ledger_phys_footprint_index(&num_ledger_entries); mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init(); // Allocate 1000 times the reclaim threshold ret = sysctlbyname("vm.reclaim.max_threshold", &vm_reclaim_reclaimable_max_threshold, &len, NULL, 0); T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "vm.reclaim.max_threshold"); kNumEntries = (mach_vm_reclaim_count_t)(vm_reclaim_reclaimable_max_threshold / kAllocationSize * 1000); mach_vm_reclaim_count_t capacity; err = mach_vm_reclaim_ring_capacity(ringbuffer, &capacity); T_QUIET; T_ASSERT_MACH_SUCCESS(err, "mach_vm_reclaim_ring_capacity()"); T_QUIET; T_ASSERT_LT(kNumEntries, capacity, "Test does not fill up ringbuffer"); mach_vm_address_t addr = 0; for (uint64_t i = 0; i < kNumEntries; i++) { mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, (unsigned char)i, &addr); T_QUIET; T_ASSERT_EQ(idx, i, "idx is correct"); } T_QUIET; T_ASSERT_LT(get_ledger_entry_for_pid(getpid(), phys_footprint_index, num_ledger_entries), (int64_t) ((kNumEntries) * kAllocationSize), "Entries were reclaimed as we crossed threshold"); } #else /* !TARGET_OS_IPHONE */ T_DECL(vm_reclaim_trim_minimum, "update_accounting trims buffer according to sampling minimum", T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED) { kern_return_t kr; int ret; bool success, update_accounting; mach_vm_reclaim_ring_t ringbuffer; uint64_t sampling_period_ns; size_t sampling_period_size = sizeof(sampling_period_ns); uint32_t sizes[3] = {MiB(128), MiB(128), MiB(128)}; mach_vm_address_t addrs[3] = {0}; uint64_t ids[3] = {0}; ret = sysctlbyname("vm.reclaim.sampling_period_ns", &sampling_period_ns, &sampling_period_size, NULL, 0); T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctlbyname(\"vm.reclaim.sampling_period_ns\")"); struct timespec ts = { .tv_sec = 2 * sampling_period_ns / NSEC_PER_SEC, .tv_nsec = 2 * sampling_period_ns % NSEC_PER_SEC, }; ringbuffer = ringbuffer_init(); // This should result in a sample taken (min 0) kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()"); for (int i = 0; i < 3; i++) { T_LOG("Placing entries[%d] into buffer", i); ids[i] = allocate_and_defer_deallocate(sizes[i], ringbuffer, 0xAB, &addrs[i]); } for (int i = 0; i < 3; i++) { // The minimum for the first sample should be 0 success = try_cancel(ringbuffer, ids[i], addrs[i], sizes[i], VM_RECLAIM_DEALLOCATE); T_ASSERT_TRUE(success, "Entry %d should not be reclaimed", i); kr = mach_vm_reclaim_try_enter(ringbuffer, addrs[i], sizes[i], VM_RECLAIM_DEALLOCATE, &ids[i], &update_accounting); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter()"); if (update_accounting) { kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()"); } } T_LOG("Sleeping for 2 sampling periods (%llu ns)", 2 * sampling_period_ns); ret = nanosleep(&ts, NULL); T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "nanosleep()"); // This should result in a sample taken (still min 0) kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()"); for (int i = 0; i < 3; i++) { success = try_cancel(ringbuffer, ids[i], addrs[i], sizes[i], VM_RECLAIM_DEALLOCATE); T_EXPECT_TRUE(success, "Entry %d should not be reclaimed", i); kr = mach_vm_reclaim_try_enter(ringbuffer, addrs[i], sizes[i], VM_RECLAIM_DEALLOCATE, &ids[i], &update_accounting); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter()"); if (update_accounting) { kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()"); } } T_LOG("Sleeping for 2 sampling periods (%llu ns)", 2 * sampling_period_ns); ret = nanosleep(&ts, NULL); T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "nanosleep()"); // This should result in a sample taken (still min 0) kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()"); T_LOG("Sleeping for 2 sampling periods (%llu ns)", 2 * sampling_period_ns); ret = nanosleep(&ts, NULL); T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "nanosleep()"); // This should result in a sample taken (min sum(sizeof(entries[i]))) kr = mach_vm_reclaim_update_kernel_accounting(ringbuffer); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_update_kernel_accounting()"); for (int i = 0; i < 3; i++) { success = try_cancel(ringbuffer, ids[i], addrs[i], sizes[i], VM_RECLAIM_DEALLOCATE); T_EXPECT_FALSE(success, "Entry %d should not be reclaimed", i); } } #endif /* TARGET_OS_IPHONE */ T_HELPER_DECL(deallocate_buffer, "deallocate the buffer from underneath the kernel") { kern_return_t kr; static const size_t kAllocationSize = (1UL << 20); // 1MB mach_vm_address_t addr; mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init(); mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, 1, &addr); T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer"); mach_vm_reclaim_count_t capacity; kr = mach_vm_reclaim_ring_capacity(ringbuffer, &capacity); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()"); mach_vm_size_t buffer_size = (size_t)capacity * sizeof(struct mach_vm_reclaim_entry_s) + offsetof(struct mach_vm_reclaim_ring_s, entries); kr = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)ringbuffer, buffer_size); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_deallocate"); mach_vm_reclaim_ring_flush(ringbuffer, 10); T_FAIL("Test did not crash when synchronizing on a deallocated buffer!"); } T_DECL(vm_reclaim_copyio_buffer_error, "Force a copyio error on the buffer", T_META_IGNORECRASHES(".*deallocate_buffer.*"), T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED) { int status = spawn_helper_and_wait_for_exit("deallocate_buffer"); T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed."); T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL."); } T_HELPER_DECL(dealloc_gap, "Put a bad entry in the buffer") { kern_return_t kr; static const size_t kAllocationSize = (1UL << 20); // 1MB mach_vm_address_t addr; bool should_update_kernel_accounting = false; kr = task_set_exc_guard_behavior(mach_task_self(), TASK_EXC_GUARD_ALL); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_set_exc_guard_behavior()"); mach_vm_reclaim_ring_t ringbuffer = ringbuffer_init(); mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate(kAllocationSize, ringbuffer, 1, &addr); T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer"); idx = VM_RECLAIM_ID_NULL; kr = mach_vm_reclaim_try_enter(ringbuffer, addr, kAllocationSize, VM_RECLAIM_DEALLOCATE, &idx, &should_update_kernel_accounting); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter()"); T_QUIET; T_ASSERT_EQ(idx, 1ULL, "Entry placed at correct index"); mach_vm_reclaim_ring_flush(ringbuffer, 2); T_FAIL("Test did not crash when doing a double free!"); } T_DECL(vm_reclaim_dealloc_gap, "Ensure a dealloc gap delivers a fatal exception", T_META_IGNORECRASHES(".*dealloc_gap.*"), T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED) { int status = spawn_helper_and_wait_for_exit("dealloc_gap"); T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "Test process crashed."); T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "Test process crashed with SIGKILL."); } T_HELPER_DECL(allocate_and_suspend_with_dealloc_gap, "defer double free, and signal parent to suspend") { kern_return_t kr = task_set_exc_guard_behavior(mach_task_self(), TASK_EXC_GUARD_ALL); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "task_set_exc_guard_behavior()"); allocate_and_suspend(argv, false, true); } static void vm_reclaim_async_exception(char *variant, char *arg1) { test_after_background_helper_launches(variant, arg1, ^{ int ret = 0; ret = pid_suspend(child_pid); T_ASSERT_POSIX_SUCCESS(ret, "child suspended"); /* * The reclaim work is kicked off asynchronously by the suspend. * So we need to call into the kernel to synchronize with the reclaim worker * thread. */ T_LOG("Waiting for child to be drained..."); wait_for_pid_to_be_drained(child_pid); }, ^{ int status; int signal; T_LOG("Waiting for child to exit..."); bool exited = dt_waitpid(child_pid, &status, &signal, 30); T_QUIET; T_EXPECT_FALSE(exited, "waitpid"); T_QUIET; T_EXPECT_FALSE(status, "Test process crashed."); T_QUIET; T_EXPECT_EQ(signal, SIGKILL, "Test process crashed with SIGKILL."); T_END; }); } T_DECL(vm_reclaim_dealloc_gap_async, "Ensure a dealloc gap delivers an async fatal exception", T_META_IGNORECRASHES(".*allocate_and_suspend_with_dealloc_gap.*"), T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED) { vm_reclaim_async_exception("allocate_and_suspend_with_dealloc_gap", "15"); } T_HELPER_DECL(allocate_and_suspend_with_buffer_error, "defer free, free buffer, and signal parent to suspend") { allocate_and_suspend(argv, true, false); } T_DECL(vm_reclaim_copyio_buffer_error_async, "Ensure a buffer copyio failure delivers an async fatal exception", T_META_IGNORECRASHES(".*allocate_and_suspend_with_buffer_error.*"), T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED) { vm_reclaim_async_exception("allocate_and_suspend_with_buffer_error", "15"); } static mach_vm_reclaim_ring_t buffer_4fork_inherit; static const size_t allocation_size_4fork_inherit = (16UL << 10); // 16 KiB static const unsigned char value_4fork_inherit = 119; static mach_vm_address_t addr_4fork_inherit; T_HELPER_DECL(reuse_freed_entry_fork, "defer free, sync, and try to use entry") { kern_return_t kr; bool usable, update; mach_vm_reclaim_id_t id = VM_RECLAIM_ID_NULL; mach_vm_reclaim_ring_t ringbuffer_tmp; kr = mach_vm_reclaim_ring_allocate(&ringbuffer_tmp, 1, 1); T_ASSERT_MACH_ERROR(kr, VM_RECLAIM_RESOURCE_SHORTAGE, "mach_vm_reclaim_ring_allocate() should fail"); usable = try_cancel(buffer_4fork_inherit, 0, addr_4fork_inherit, allocation_size_4fork_inherit, VM_RECLAIM_DEALLOCATE); T_ASSERT_TRUE(usable, "Entry can be re-used after fork()"); T_EXPECT_EQ(*(unsigned char *)addr_4fork_inherit, value_4fork_inherit, "value is preserved"); kr = mach_vm_reclaim_try_enter(buffer_4fork_inherit, addr_4fork_inherit, allocation_size_4fork_inherit, VM_RECLAIM_DEALLOCATE, &id, &update); T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_try_enter()"); T_EXPECT_EQ(id, 1ull, "new entry is placed at tail"); kr = mach_vm_reclaim_ring_flush(buffer_4fork_inherit, 10); T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_flush()"); } T_DECL(inherit_buffer_after_fork, "Ensure reclaim buffer is inherited across a fork", T_META_IGNORECRASHES(".*vm_reclaim_fork.*"), T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED) { dt_helper_t helpers[1]; buffer_4fork_inherit = ringbuffer_init(); mach_vm_reclaim_id_t idx = allocate_and_defer_deallocate( allocation_size_4fork_inherit, buffer_4fork_inherit, value_4fork_inherit, &addr_4fork_inherit); T_QUIET; T_ASSERT_EQ(idx, 0ULL, "Entry placed at start of buffer"); helpers[0] = dt_fork_helper("reuse_freed_entry_fork"); dt_run_helpers(helpers, 1, 30); } #define SUSPEND_AND_RESUME_COUNT 4 // rdar://110081398 T_DECL(reclaim_async_on_repeated_suspend, "verify that subsequent suspends are allowed", T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED) { const int sleep_duration = 3; test_after_background_helper_launches("allocate_and_suspend", "20", ^{ int ret = 0; for (int i = 0; i < SUSPEND_AND_RESUME_COUNT; i++) { ret = pid_suspend(child_pid); T_ASSERT_POSIX_SUCCESS(ret, "pid_suspend()"); ret = pid_resume(child_pid); T_ASSERT_POSIX_SUCCESS(ret, "pid_resume()"); } T_LOG("Sleeping %d sec...", sleep_duration); sleep(sleep_duration); T_LOG("Killing child..."); T_QUIET; T_ASSERT_POSIX_SUCCESS(kill(child_pid, SIGKILL), "kill()"); }, ^{ int status; pid_t rc = waitpid(child_pid, &status, 0); T_QUIET; T_ASSERT_EQ(rc, child_pid, "waitpid"); T_QUIET; T_ASSERT_EQ(WEXITSTATUS(status), 0, "Test process exited cleanly."); T_END; }); } T_HELPER_DECL(buffer_init_after_exec, "initialize a ringbuffer after exec") { mach_vm_reclaim_ring_t ringbuffer; kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, 1, 1); T_ASSERT_MACH_SUCCESS(kr, "post-exec: mach_vm_reclaim_ring_allocate()"); } extern char **environ; T_DECL(reclaim_exec_new_reclaim_buffer, "verify that an exec-ed process may instantiate a new buffer", T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED) { char **launch_tool_args; char testpath[PATH_MAX]; uint32_t testpath_buf_size; mach_vm_reclaim_ring_t ringbuffer; kern_return_t kr = mach_vm_reclaim_ring_allocate(&ringbuffer, 1, 1); T_ASSERT_MACH_SUCCESS(kr, "pre-exec: mach_vm_reclaim_ring_allocate()"); testpath_buf_size = sizeof(testpath); int ret = _NSGetExecutablePath(testpath, &testpath_buf_size); T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath"); T_LOG("Executable path: %s", testpath); launch_tool_args = (char *[]){ testpath, "-n", "buffer_init_after_exec", NULL }; /* Spawn the child process. */ posix_spawnattr_t spawnattrs; posix_spawnattr_init(&spawnattrs); posix_spawnattr_setflags(&spawnattrs, POSIX_SPAWN_SETEXEC); posix_spawn(&child_pid, testpath, NULL, &spawnattrs, launch_tool_args, environ); T_ASSERT_FAIL("should not be reached"); } T_DECL(resize_buffer, "verify that a reclaim buffer may be safely resized", T_META_VM_RECLAIM_ENABLED, T_META_TAG_VM_PREFERRED) { kern_return_t kr; mach_vm_reclaim_ring_t ringbuffer; mach_vm_address_t addr_tmp; mach_vm_reclaim_id_t id_tmp; mach_vm_reclaim_id_t ids[4095] = {0}; mach_vm_address_t addrs[4095] = {0}; T_LOG("Initializing 1 page buffer"); mach_vm_reclaim_count_t initial_len = mach_vm_reclaim_round_capacity(512); mach_vm_reclaim_count_t max_len = 4 * initial_len; kr = mach_vm_reclaim_ring_allocate(&ringbuffer, initial_len, max_len); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_allocate()"); // Should be able to fit 1022 entries in a one-page buffer (two entries for indices) T_LOG("Filling buffer with entries"); mach_vm_reclaim_count_t old_capacity; kr = mach_vm_reclaim_ring_capacity(ringbuffer, &old_capacity); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()"); T_EXPECT_EQ(old_capacity, initial_len, "Capacity is same as asked for"); for (mach_vm_reclaim_count_t i = 0; i < old_capacity; i++) { ids[i] = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'A', &addrs[i]); T_QUIET; T_ASSERT_NE(ids[i], VM_RECLAIM_ID_NULL, "Able to defer deallocation"); } id_tmp = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'X', &addr_tmp); T_ASSERT_EQ(id_tmp, VM_RECLAIM_ID_NULL, "Unable to over-fill buffer"); uint64_t initial_tail = os_atomic_load(&ringbuffer->indices.tail, relaxed); T_ASSERT_EQ(initial_tail, (uint64_t)old_capacity, "tail == capacity after fill"); T_LOG("Resizing buffer to 4x"); kr = mach_vm_reclaim_ring_resize(ringbuffer, max_len); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_resize()"); // All entries should be reclaimed after resize T_EXPECT_EQ(os_atomic_load(&ringbuffer->indices.head, relaxed), initial_tail, "head is incremented"); T_EXPECT_EQ(os_atomic_load(&ringbuffer->indices.busy, relaxed), initial_tail, "busy is incremented"); T_EXPECT_EQ(os_atomic_load(&ringbuffer->indices.tail, relaxed), initial_tail, "tail is preserved"); mach_vm_reclaim_count_t new_capacity; kr = mach_vm_reclaim_ring_capacity(ringbuffer, &new_capacity); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_vm_reclaim_ring_capacity()"); T_EXPECT_GT(new_capacity, old_capacity, "Buffer capacity grew"); T_ASSERT_EQ(new_capacity, max_len, "length is set correctly"); T_LOG("Attempting to use all entries (should fail)"); for (mach_vm_reclaim_count_t i = 0; i < old_capacity; i++) { mach_vm_reclaim_state_t state; kr = mach_vm_reclaim_query_state(ringbuffer, ids[i], VM_RECLAIM_DEALLOCATE, &state); bool reclaimed = !(state == VM_RECLAIM_UNRECLAIMED); T_QUIET; T_EXPECT_TRUE(reclaimed, "Entry is reclaimed after resize"); bool usable = try_cancel(ringbuffer, ids[i], addrs[i], vm_page_size, VM_RECLAIM_DEALLOCATE); T_QUIET; T_EXPECT_FALSE(usable, "Entry cannot be re-used after resize"); } T_LOG("Filling resized buffer"); for (mach_vm_reclaim_count_t i = 0; i < new_capacity; i++) { ids[i] = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'B', &addrs[i]); T_QUIET; T_ASSERT_NE(ids[i], VM_RECLAIM_ID_NULL, "Able to defer deallocation"); } id_tmp = allocate_and_defer_deallocate(vm_page_size, ringbuffer, 'X', &addr_tmp); T_ASSERT_EQ(id_tmp, VM_RECLAIM_ID_NULL, "Unable to over-fill buffer"); T_LOG("Re-using all entries"); for (mach_vm_reclaim_count_t i = 0; i < new_capacity; i++) { bool usable = try_cancel(ringbuffer, ids[i], addrs[i], vm_page_size, VM_RECLAIM_DEALLOCATE); T_QUIET; T_EXPECT_TRUE(usable, "Entry is available for re-use"); } }