1 #include <stdio.h>
2 #include <mach/mach_vm.h>
3 #include <mach/mach_port.h>
4 #include <mach/mach_host.h>
5 #include <mach/mach_error.h>
6 #include <mach-o/dyld.h>
7 #include <sys/sysctl.h>
8 #include <sys/kdebug.h>
9 #include <sys/mman.h>
10 #include <sys/kern_memorystatus.h>
11 #include <ktrace/session.h>
12 #include <dispatch/private.h>
13
14 #ifdef T_NAMESPACE
15 #undef T_NAMESPACE
16 #endif
17 #include <darwintest.h>
18 #include <darwintest_utils.h>
19
20 T_GLOBAL_META(
21 T_META_NAMESPACE("xnu.vm"),
22 T_META_RADAR_COMPONENT_NAME("xnu"),
23 T_META_RADAR_COMPONENT_VERSION("VM"),
24 T_META_CHECK_LEAKS(false)
25 );
26
27 #define TIMEOUT_SECS 10 * 60 /* abort if test takes > 10 minutes */
28
29 #if (TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
30 #define ALLOCATION_SIZE_VM_REGION (16*1024) /* 16 KB */
31 #define ALLOCATION_SIZE_VM_OBJECT ALLOCATION_SIZE_VM_REGION
32 #else
33 #define ALLOCATION_SIZE_VM_REGION (1024*1024*100) /* 100 MB */
34 #define ALLOCATION_SIZE_VM_OBJECT (16*1024) /* 16 KB */
35 #endif
36 #define MAX_CHILD_PROCS 100
37
38 #define NUM_GIVE_BACK 5
39 #define NUM_GIVE_BACK_PORTS 20
40
41 /* 60% is too high on bridgeOS to achieve without vm-pageshortage jetsams. Set it to 40%. */
42 #if TARGET_OS_BRIDGE
43 #define ZONEMAP_JETSAM_LIMIT_SYSCTL "kern.zone_map_jetsam_limit=40"
44 #else
45 #define ZONEMAP_JETSAM_LIMIT_SYSCTL "kern.zone_map_jetsam_limit=60"
46 #endif
47
48 #define VME_ZONE_TEST_OPT "allocate_vm_regions"
49 #define VM_OBJECTS_ZONE_TEST_OPT "allocate_vm_objects"
50 #define GENERIC_ZONE_TEST_OPT "allocate_from_generic_zone"
51
52 #define VME_ZONE "VM map entries"
53 #define VMOBJECTS_ZONE "vm objects"
54 #define VMENTRY_TO_VMOBJECT_COMPARISON_RATIO 98
55
56 #define VM_TAG1 100
57 #define VM_TAG2 101
58
59 #define LARGE_MEM_GB 32
60 #define LARGE_MEM_JETSAM_LIMIT 40
61 #define JETSAM_LIMIT_LOWEST 10
62
63 enum {
64 VME_ZONE_TEST = 0,
65 VM_OBJECTS_ZONE_TEST,
66 GENERIC_ZONE_TEST,
67 };
68
69 typedef struct test_config_struct {
70 int test_index;
71 int num_zones;
72 const char *helper_func;
73 mach_zone_name_array_t zone_names;
74 } test_config_struct;
75
76 static test_config_struct current_test;
77 static dispatch_source_t ds_signal = NULL;
78 static dispatch_source_t ds_timer = NULL;
79 static dispatch_queue_t dq_spawn = NULL;
80 static ktrace_session_t session = NULL;
81
82 static mach_zone_info_array_t zone_info_array = NULL;
83 static mach_zone_name_t largest_zone_name;
84 static mach_zone_info_t largest_zone_info;
85
86 static pthread_mutex_t test_mtx = PTHREAD_MUTEX_INITIALIZER; /* protects the next 3 things */
87 static bool test_ending = false;
88 static int num_children = 0;
89 static pid_t child_pids[MAX_CHILD_PROCS];
90
91 static char testpath[PATH_MAX];
92 static void allocate_vm_stuff(int);
93 static void allocate_from_generic_zone(void);
94 static void begin_test_teardown(void);
95 static void cleanup_and_end_test(void);
96 static void setup_ktrace_session(void);
97 static void spawn_child_process(void);
98 static void run_test(void);
99 static bool verify_generic_jetsam_criteria(void);
100 static bool vme_zone_compares_to_vm_objects(void);
101 static int query_zone_map_size(void);
102 static void query_zone_info(void);
103 static void print_zone_info(mach_zone_name_t *zn, mach_zone_info_t *zi);
104
105 extern void mach_zone_force_gc(host_t host);
106 extern kern_return_t mach_zone_info_for_largest_zone(
107 host_priv_t host,
108 mach_zone_name_t *name,
109 mach_zone_info_t *info
110 );
111
112 static bool
check_time(time_t start,int timeout)113 check_time(time_t start, int timeout)
114 {
115 return start + timeout < time(NULL);
116 }
117
118 /*
119 * flag values for allocate_vm_stuff()
120 */
121 #define REGIONS 1
122 #define OBJECTS 2
123
124 static void
allocate_vm_stuff(int flags)125 allocate_vm_stuff(int flags)
126 {
127 uint64_t alloc_size, i;
128 time_t start = time(NULL);
129 mach_vm_address_t give_back[NUM_GIVE_BACK];
130 char *msg;
131
132 if (flags == REGIONS) {
133 alloc_size = ALLOCATION_SIZE_VM_REGION;
134 msg = "";
135 } else {
136 alloc_size = ALLOCATION_SIZE_VM_OBJECT;
137 msg = " each region backed by a VM object";
138 }
139
140 printf("[%d] Allocating VM regions, each of size %lld KB%s\n", getpid(), (alloc_size >> 10), msg);
141
142 for (i = 0;; i++) {
143 mach_vm_address_t addr = (mach_vm_address_t)NULL;
144
145 /* Alternate VM tags between consecutive regions to prevent coalescing */
146 int vmflags = VM_MAKE_TAG((i % 2)? VM_TAG1: VM_TAG2) | VM_FLAGS_ANYWHERE;
147
148 if ((mach_vm_allocate(mach_task_self(), &addr, (mach_vm_size_t)alloc_size, vmflags)) != KERN_SUCCESS) {
149 break;
150 }
151
152 /*
153 * If interested in objects, touch the region so the VM object is created,
154 * then free this page. Keeps us from holding a lot of dirty pages.
155 */
156 if (flags == OBJECTS) {
157 *((int *)addr) = 0;
158 madvise((void *)addr, (size_t)alloc_size, MADV_FREE);
159 }
160
161 if (check_time(start, TIMEOUT_SECS)) {
162 printf("[%d] child timeout during allocations\n", getpid());
163 exit(0);
164 }
165
166 if (i < NUM_GIVE_BACK) {
167 give_back[i] = addr;
168 }
169 }
170
171 /* return some of the resource to avoid O-O-M problems */
172 for (uint64_t j = 0; j < NUM_GIVE_BACK && j < i; ++j) {
173 mach_vm_deallocate(mach_task_self(), give_back[j], (mach_vm_size_t)alloc_size);
174 }
175
176 printf("[%d] Number of allocations: %lld\n", getpid(), i);
177
178 /* Signal to the parent that we're done allocating */
179 kill(getppid(), SIGUSR1);
180
181 while (1) {
182 sleep(2);
183 /* Exit if parent has exited. Ensures child processes don't linger around after the test exits */
184 if (getppid() == 1) {
185 exit(0);
186 }
187
188 if (check_time(start, TIMEOUT_SECS)) {
189 printf("[%d] child timeout while waiting\n", getpid());
190 exit(0);
191 }
192 }
193 }
194
195
196 static void
allocate_from_generic_zone(void)197 allocate_from_generic_zone(void)
198 {
199 uint64_t i = 0;
200 time_t start = time(NULL);
201 mach_port_t give_back[NUM_GIVE_BACK_PORTS];
202 int old_limit = 0;
203
204 printf("[%d] Allocating mach_ports\n", getpid());
205
206 size_t size = sizeof(old_limit);
207 int kr = sysctlbyname("machdep.max_port_table_size", &old_limit, &size, NULL, 0);
208 T_QUIET; T_ASSERT_POSIX_SUCCESS(kr, "sysctl kern.max_port_table_size failed");
209 T_LOG("machdep.max_port_table_size = %d", old_limit);
210
211 /* Avoid hitting the resource limit exception */
212 uint64_t limit = (uint64_t)(old_limit * 7 / 8);
213
214 for (i = 0; i < limit; i++) {
215 mach_port_t port;
216
217 if ((mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port)) != KERN_SUCCESS) {
218 break;
219 }
220
221 if (check_time(start, TIMEOUT_SECS)) {
222 printf("[%d] child timeout during allocations\n", getpid());
223 exit(0);
224 }
225
226 if (i < NUM_GIVE_BACK_PORTS) {
227 give_back[i] = port;
228 }
229 }
230
231 /* return some of the resource to avoid O-O-M problems */
232 for (uint64_t j = 0; j < NUM_GIVE_BACK_PORTS && j < i; ++j) {
233 int ret;
234 ret = mach_port_mod_refs(mach_task_self(), give_back[j], MACH_PORT_RIGHT_RECEIVE, -1);
235 T_ASSERT_MACH_SUCCESS(ret, "mach_port_mod_refs(RECV_RIGHT, -1)");
236 }
237 printf("[%d] Number of allocations: %lld\n", getpid(), i);
238
239 /* Signal to the parent that we're done allocating */
240 kill(getppid(), SIGUSR1);
241
242 while (1) {
243 sleep(2);
244 /* Exit if parent has exited. Ensures child processes don't linger around after the test exits */
245 if (getppid() == 1) {
246 exit(0);
247 }
248
249 if (check_time(start, TIMEOUT_SECS)) {
250 printf("[%d] child timeout while waiting\n", getpid());
251 exit(0);
252 }
253 }
254 }
255
256 static void
print_zone_info(mach_zone_name_t * zn,mach_zone_info_t * zi)257 print_zone_info(mach_zone_name_t *zn, mach_zone_info_t *zi)
258 {
259 T_LOG("ZONE NAME: %-35sSIZE: %-25lluELEMENTS: %llu",
260 zn->mzn_name, zi->mzi_cur_size, zi->mzi_count);
261 }
262
263 static time_t main_start;
264
265 static void
query_zone_info(void)266 query_zone_info(void)
267 {
268 int i;
269 kern_return_t kr;
270 static uint64_t num_calls = 0;
271
272 if (check_time(main_start, TIMEOUT_SECS)) {
273 T_ASSERT_FAIL("Global timeout expired");
274 }
275 for (i = 0; i < current_test.num_zones; i++) {
276 kr = mach_zone_info_for_zone(mach_host_self(), current_test.zone_names[i], &(zone_info_array[i]));
277 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_zone_info_for_zone(%s) returned %d [%s]", current_test.zone_names[i].mzn_name, kr, mach_error_string(kr));
278 }
279 kr = mach_zone_info_for_largest_zone(mach_host_self(), &largest_zone_name, &largest_zone_info);
280 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_zone_info_for_largest_zone returned %d [%s]", kr, mach_error_string(kr));
281
282 num_calls++;
283 if (num_calls % 5 != 0) {
284 return;
285 }
286
287 /* Print out size and element count for zones relevant to the test */
288 for (i = 0; i < current_test.num_zones; i++) {
289 print_zone_info(&(current_test.zone_names[i]), &(zone_info_array[i]));
290 }
291 }
292
293 static bool
vme_zone_compares_to_vm_objects(void)294 vme_zone_compares_to_vm_objects(void)
295 {
296 int i;
297 uint64_t vm_object_element_count = 0, vm_map_entry_element_count = 0;
298
299 T_LOG("Comparing element counts of \"VM map entries\" and \"vm objects\" zones");
300 for (i = 0; i < current_test.num_zones; i++) {
301 if (!strcmp(current_test.zone_names[i].mzn_name, VME_ZONE)) {
302 vm_map_entry_element_count = zone_info_array[i].mzi_count;
303 } else if (!strcmp(current_test.zone_names[i].mzn_name, VMOBJECTS_ZONE)) {
304 vm_object_element_count = zone_info_array[i].mzi_count;
305 }
306 print_zone_info(&(current_test.zone_names[i]), &(zone_info_array[i]));
307 }
308
309 T_LOG("# VM map entries as percentage of # vm objects = %llu", (vm_map_entry_element_count * 100) / vm_object_element_count);
310 if (vm_map_entry_element_count >= ((vm_object_element_count * VMENTRY_TO_VMOBJECT_COMPARISON_RATIO) / 100)) {
311 T_LOG("Number of VM map entries is comparable to vm objects\n\n");
312 return true;
313 }
314 T_LOG("Number of VM map entries is NOT comparable to vm objects\n\n");
315 return false;
316 }
317
318 static bool
verify_generic_jetsam_criteria(void)319 verify_generic_jetsam_criteria(void)
320 {
321 T_LOG("Largest zone info");
322 print_zone_info(&largest_zone_name, &largest_zone_info);
323
324 /* If VM map entries is not the largest zone */
325 if (strcmp(largest_zone_name.mzn_name, VME_ZONE)) {
326 /* If vm objects is the largest zone and the VM map entries zone had comparable # of elements, return false */
327 if (!strcmp(largest_zone_name.mzn_name, VMOBJECTS_ZONE) && vme_zone_compares_to_vm_objects()) {
328 return false;
329 }
330 return true;
331 }
332 return false;
333 }
334
335 static void
begin_test_teardown(void)336 begin_test_teardown(void)
337 {
338 int ret, old_limit = 95;
339
340 /*
341 * Restore kern.zone_map_jetsam_limit to the default high value, to prevent further jetsams.
342 * We should change the value of old_limit if ZONE_MAP_JETSAM_LIMIT_DEFAULT changes in the kernel.
343 * We don't have a way to capture what the original value was before the test, because the
344 * T_META_SYSCTL_INT macro will have changed the value before the test starts running.
345 */
346 ret = sysctlbyname("kern.zone_map_jetsam_limit", NULL, NULL, &old_limit, sizeof(old_limit));
347 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl kern.zone_map_jetsam_limit failed");
348 T_LOG("kern.zone_map_jetsam_limit set to %d%%", old_limit);
349
350
351 /* End ktrace session */
352 if (session != NULL) {
353 T_LOG("Ending ktrace session...");
354 ktrace_end(session, 1);
355 }
356
357 dispatch_sync(dq_spawn, ^{
358 T_LOG("Cancelling dispatch sources...");
359
360 /* Disable the timer that queries and prints zone info periodically */
361 if (ds_timer != NULL) {
362 dispatch_source_cancel(ds_timer);
363 }
364
365 /* Disable signal handler that spawns child processes */
366 if (ds_signal != NULL) {
367 /*
368 * No need for a dispatch_source_cancel_and_wait here.
369 * We're queueing this on the spawn queue, so no further
370 * processes will be spawned after the source is cancelled.
371 */
372 dispatch_source_cancel(ds_signal);
373 }
374 });
375 }
376
377 static void
cleanup_and_end_test(void)378 cleanup_and_end_test(void)
379 {
380 int i;
381
382 /*
383 * The atend handler executes on a different dispatch queue.
384 * We want to do the cleanup only once.
385 */
386 pthread_mutex_lock(&test_mtx);
387 if (test_ending) {
388 pthread_mutex_unlock(&test_mtx);
389 return;
390 }
391 test_ending = TRUE;
392 pthread_mutex_unlock(&test_mtx);
393
394 dispatch_async(dq_spawn, ^{
395 /*
396 * If the test succeeds, we will call dispatch_source_cancel twice, which is fine since
397 * the operation is idempotent. Just make sure to not drop all references to the dispatch sources
398 * (in this case we're not, we have globals holding references to them), or we can end up with
399 * use-after-frees which would be a problem.
400 */
401 /* Disable the timer that queries and prints zone info periodically */
402 if (ds_timer != NULL) {
403 dispatch_source_cancel(ds_timer);
404 }
405
406 /* Disable signal handler that spawns child processes */
407 if (ds_signal != NULL) {
408 dispatch_source_cancel(ds_signal);
409 }
410 });
411
412 pthread_mutex_lock(&test_mtx);
413 T_LOG("Number of processes spawned: %d", num_children);
414 T_LOG("Killing child processes...");
415
416 /* Kill all the child processes that were spawned */
417 for (i = 0; i < num_children; i++) {
418 pid_t pid = child_pids[i];
419 int status = 0;
420
421 /*
422 * Kill and wait for each child to exit
423 * Without this we were seeing hw_lock_bit timeouts in BATS.
424 */
425 kill(pid, SIGKILL);
426 pthread_mutex_unlock(&test_mtx);
427 if (waitpid(pid, &status, 0) < 0) {
428 T_LOG("waitpid returned status %d", status);
429 }
430 pthread_mutex_lock(&test_mtx);
431 }
432 sleep(1);
433
434 /* Force zone_gc before starting test for another zone or exiting */
435 mach_zone_force_gc(mach_host_self());
436
437 /* End ktrace session */
438 if (session != NULL) {
439 ktrace_end(session, 1);
440 }
441
442 if (current_test.num_zones > 0) {
443 T_LOG("Relevant zone info at the end of the test:");
444 for (i = 0; i < current_test.num_zones; i++) {
445 print_zone_info(&(current_test.zone_names[i]), &(zone_info_array[i]));
446 }
447 }
448 }
449
450 static void
setup_ktrace_session(void)451 setup_ktrace_session(void)
452 {
453 int ret = 0;
454
455 T_LOG("Setting up ktrace session...");
456 session = ktrace_session_create();
457 T_QUIET; T_ASSERT_NOTNULL(session, "ktrace_session_create");
458
459 ktrace_set_interactive(session);
460
461 ktrace_set_dropped_events_handler(session, ^{
462 T_FAIL("Dropped ktrace events; might have missed an expected jetsam event. Terminating early.");
463 });
464
465 ktrace_set_completion_handler(session, ^{
466 ktrace_session_destroy(session);
467 T_END;
468 });
469
470 /* Listen for memorystatus_do_kill trace events */
471 ret = ktrace_events_single(session, (BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)), ^(ktrace_event_t event) {
472 int i;
473 bool received_jetsam_event = false;
474
475 /*
476 * libktrace does not support DBG_FUNC_START/END in the event filter. It simply ignores it.
477 * So we need to explicitly check for the end event (a successful jetsam kill) here,
478 * instead of passing in ((BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_START).
479 */
480 if (!(event->debugid & DBG_FUNC_START)) {
481 return;
482 }
483
484 /* Check for zone-map-exhaustion jetsam. */
485 if (event->arg2 == kMemorystatusKilledZoneMapExhaustion) {
486 begin_test_teardown();
487 T_LOG("[memorystatus_do_kill] jetsam reason: zone-map-exhaustion, pid: %d\n\n", (int)event->arg1);
488 if (current_test.test_index == VME_ZONE_TEST || current_test.test_index == VM_OBJECTS_ZONE_TEST) {
489 /*
490 * For the VM map entries zone we try to kill the leaking process.
491 * Verify that we jetsammed one of the processes we spawned.
492 *
493 * For the vm objects zone we pick the leaking process via the VM map entries
494 * zone, if the number of vm objects and VM map entries are comparable.
495 * The test simulates this scenario, we should see a targeted jetsam for the
496 * vm objects zone too.
497 */
498 pthread_mutex_lock(&test_mtx);
499 for (i = 0; i < num_children; i++) {
500 if (child_pids[i] == (pid_t)event->arg1) {
501 received_jetsam_event = true;
502 T_LOG("Received jetsam event for a child");
503 break;
504 }
505 }
506 pthread_mutex_unlock(&test_mtx);
507 /*
508 * If we didn't see a targeted jetsam, verify that the largest zone actually
509 * fulfilled the criteria for generic jetsams.
510 */
511 if (!received_jetsam_event && verify_generic_jetsam_criteria()) {
512 received_jetsam_event = true;
513 T_LOG("Did not receive jetsam event for a child, but generic jetsam criteria holds");
514 }
515 } else {
516 received_jetsam_event = true;
517 T_LOG("Received generic jetsam event");
518 }
519
520 T_QUIET; T_ASSERT_TRUE(received_jetsam_event, "Jetsam event not as expected");
521 } else {
522 /*
523 * The test relies on the children being able to send a signal to the parent, to continue spawning new processes
524 * that leak more zone memory. If a child is jetsammed for some other reason, the parent can get stuck waiting for
525 * a signal from the child, never being able to make progress (We spawn only a single process at a time to rate-limit
526 * the zone memory bloat.). If this happens, the test eventually times out. So if a child is jetsammed for some
527 * reason other than zone-map-exhaustion, end the test early.
528 *
529 * This typically happens when we end up triggering vm-pageshortage jetsams before zone-map-exhaustion jetsams.
530 * Lowering the zone_map_jetsam_limit if the zone map size was initially low should help with this too.
531 * See sysctlbyname("kern.zone_map_jetsam_limit"...) in run_test() below.
532 */
533 pthread_mutex_lock(&test_mtx);
534 for (i = 0; i < num_children; i++) {
535 if (child_pids[i] == (pid_t)event->arg1) {
536 begin_test_teardown();
537 T_PASS("Child pid %d was jetsammed due to reason %d. Terminating early.",
538 (int)event->arg1, (int)event->arg2);
539 }
540 }
541 pthread_mutex_unlock(&test_mtx);
542 }
543 });
544 T_QUIET; T_ASSERT_POSIX_ZERO(ret, "ktrace_events_single");
545
546 ret = ktrace_start(session, dispatch_get_main_queue());
547 T_QUIET; T_ASSERT_POSIX_ZERO(ret, "ktrace_start");
548 }
549
550 static int
query_zone_map_size(void)551 query_zone_map_size(void)
552 {
553 int ret;
554 uint64_t zstats[2];
555 size_t zstats_size = sizeof(zstats);
556
557 ret = sysctlbyname("kern.zone_map_size_and_capacity", &zstats, &zstats_size, NULL, 0);
558 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl kern.zone_map_size_and_capacity failed");
559
560 T_LOG("Zone map capacity: %-30lldZone map size: %lld [%lld%% full]", zstats[1], zstats[0], (zstats[0] * 100) / zstats[1]);
561
562 #if (TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR)
563 int memstat_level;
564 size_t memstat_level_size = sizeof(memstat_level);
565 ret = sysctlbyname("kern.memorystatus_level", &memstat_level, &memstat_level_size, NULL, 0);
566 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl kern.memorystatus_level failed");
567
568 T_LOG("kern.memorystatus_level = %d%%", memstat_level);
569 #endif
570 return (int)(zstats[0] * 100 / zstats[1]);
571 }
572
573 static void
spawn_child_process(void)574 spawn_child_process(void)
575 {
576 pid_t pid = -1;
577 char helper_func[50];
578 char *launch_tool_args[4];
579
580 pthread_mutex_lock(&test_mtx);
581 if (!test_ending) {
582 if (num_children == MAX_CHILD_PROCS) {
583 pthread_mutex_unlock(&test_mtx);
584 T_ASSERT_FAIL("Spawned too many children. Aborting test");
585 /* not reached */
586 }
587
588 strlcpy(helper_func, current_test.helper_func, sizeof(helper_func));
589 launch_tool_args[0] = testpath;
590 launch_tool_args[1] = "-n";
591 launch_tool_args[2] = helper_func;
592 launch_tool_args[3] = NULL;
593
594 /* Spawn the child process */
595 int rc = dt_launch_tool(&pid, launch_tool_args, false, NULL, NULL);
596 if (rc != 0) {
597 T_LOG("dt_launch tool returned %d with error code %d", rc, errno);
598 }
599 T_QUIET; T_ASSERT_POSIX_SUCCESS(pid, "dt_launch_tool");
600
601 child_pids[num_children++] = pid;
602 }
603 pthread_mutex_unlock(&test_mtx);
604 }
605
606 static void
run_test(void)607 run_test(void)
608 {
609 uint64_t mem;
610 uint32_t testpath_buf_size, pages;
611 int ret, dev, pgsz, initial_zone_occupancy, old_limit, new_limit = 0;
612 size_t sysctl_size;
613
614 T_ATEND(cleanup_and_end_test);
615 T_SETUPBEGIN;
616
617 main_start = time(NULL);
618 dev = 0;
619 sysctl_size = sizeof(dev);
620 ret = sysctlbyname("kern.development", &dev, &sysctl_size, NULL, 0);
621 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl kern.development failed");
622 if (dev == 0) {
623 T_SKIP("Skipping test on release kernel");
624 }
625
626 testpath_buf_size = sizeof(testpath);
627 ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
628 T_QUIET; T_ASSERT_POSIX_ZERO(ret, "_NSGetExecutablePath");
629 T_LOG("Executable path: %s", testpath);
630
631 sysctl_size = sizeof(mem);
632 ret = sysctlbyname("hw.memsize", &mem, &sysctl_size, NULL, 0);
633 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl hw.memsize failed");
634 T_LOG("hw.memsize: %llu", mem);
635
636 sysctl_size = sizeof(pgsz);
637 ret = sysctlbyname("vm.pagesize", &pgsz, &sysctl_size, NULL, 0);
638 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl vm.pagesize failed");
639 T_LOG("vm.pagesize: %d", pgsz);
640
641 sysctl_size = sizeof(pages);
642 ret = sysctlbyname("vm.pages", &pages, &sysctl_size, NULL, 0);
643 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl vm.pages failed");
644 T_LOG("vm.pages: %d", pages);
645
646 sysctl_size = sizeof(old_limit);
647 ret = sysctlbyname("kern.zone_map_jetsam_limit", &old_limit, &sysctl_size, NULL, 0);
648 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl kern.zone_map_jetsam_limit failed");
649 T_LOG("kern.zone_map_jetsam_limit: %d", old_limit);
650
651 initial_zone_occupancy = query_zone_map_size();
652
653 /* On large memory systems, set the zone maps jetsam limit lower so we can hit it without timing out. */
654 if (mem > (uint64_t)LARGE_MEM_GB * 1024 * 1024 * 1024) {
655 new_limit = LARGE_MEM_JETSAM_LIMIT;
656 }
657
658 /*
659 * If we start out with the zone map < 5% full, aim for 10% as the limit, so we don't time out.
660 * For anything else aim for 2x the initial size, capped by whatever value was set by T_META_SYSCTL_INT,
661 * or LARGE_MEM_JETSAM_LIMIT for large memory systems.
662 */
663 if (initial_zone_occupancy < 5) {
664 new_limit = JETSAM_LIMIT_LOWEST;
665 } else {
666 new_limit = initial_zone_occupancy * 2;
667 }
668
669 if (new_limit > 0 && new_limit < old_limit) {
670 /*
671 * We should be fine messing with the zone_map_jetsam_limit here, i.e. outside of T_META_SYSCTL_INT.
672 * When the test ends, T_META_SYSCTL_INT will restore the zone_map_jetsam_limit to what it was
673 * before the test anyway.
674 */
675 ret = sysctlbyname("kern.zone_map_jetsam_limit", NULL, NULL, &new_limit, sizeof(new_limit));
676 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl kern.zone_map_jetsam_limit failed");
677 T_LOG("kern.zone_map_jetsam_limit set to %d%%", new_limit);
678 }
679
680 zone_info_array = (mach_zone_info_array_t) calloc((unsigned long)current_test.num_zones, sizeof *zone_info_array);
681
682 /*
683 * If the timeout specified by T_META_TIMEOUT is hit, the atend handler does not get called.
684 * So we're queueing a dispatch block to fire after TIMEOUT_SECS seconds, so we can exit cleanly.
685 */
686 dispatch_after(dispatch_time(DISPATCH_TIME_NOW, TIMEOUT_SECS * NSEC_PER_SEC), dispatch_get_main_queue(), ^{
687 T_ASSERT_FAIL("Timed out after %d seconds", TIMEOUT_SECS);
688 });
689
690 /*
691 * Create a dispatch source for the signal SIGUSR1. When a child is done allocating zone memory, it
692 * sends SIGUSR1 to the parent. Only then does the parent spawn another child. This prevents us from
693 * spawning many children at once and creating a lot of memory pressure.
694 */
695 signal(SIGUSR1, SIG_IGN);
696 dq_spawn = dispatch_queue_create("spawn_queue", DISPATCH_QUEUE_SERIAL);
697 ds_signal = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dq_spawn);
698 T_QUIET; T_ASSERT_NOTNULL(ds_signal, "dispatch_source_create: signal");
699
700 dispatch_source_set_event_handler(ds_signal, ^{
701 (void)query_zone_map_size();
702
703 /* Wait a few seconds before spawning another child. Keeps us from allocating too aggressively */
704 sleep(5);
705 spawn_child_process();
706 });
707 dispatch_activate(ds_signal);
708
709 /* Timer to query jetsam-relevant zone info every second. Print it every 5 seconds. */
710 ds_timer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, dispatch_queue_create("timer_queue", NULL));
711 T_QUIET; T_ASSERT_NOTNULL(ds_timer, "dispatch_source_create: timer");
712 dispatch_source_set_timer(ds_timer, dispatch_time(DISPATCH_TIME_NOW, NSEC_PER_SEC), NSEC_PER_SEC, 0);
713
714 dispatch_source_set_event_handler(ds_timer, ^{
715 query_zone_info();
716 });
717 dispatch_activate(ds_timer);
718
719 /* Set up a ktrace session to listen for jetsam events */
720 setup_ktrace_session();
721
722 T_SETUPEND;
723
724 /* Spawn the first child process */
725 T_LOG("Spawning child processes to allocate zone memory...\n\n");
726 spawn_child_process();
727
728 dispatch_main();
729 }
730
731 static void
move_to_idle_band(void)732 move_to_idle_band(void)
733 {
734 memorystatus_priority_properties_t props;
735
736 /*
737 * We want to move the processes we spawn into the idle band, so that jetsam can target them first.
738 * This prevents other important BATS tasks from getting killed, specially in LTE where we have very few
739 * processes running.
740 *
741 * This is only needed for tests which (are likely to) lead us down the generic jetsam path.
742 */
743 props.priority = JETSAM_PRIORITY_IDLE;
744 props.user_data = 0;
745
746 if (memorystatus_control(MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES, getpid(), 0, &props, sizeof(props))) {
747 printf("memorystatus call to change jetsam priority failed\n");
748 exit(-1);
749 }
750 }
751
752 T_HELPER_DECL(allocate_vm_regions, "allocates VM regions")
753 {
754 move_to_idle_band();
755 allocate_vm_stuff(REGIONS);
756 }
757
758 T_HELPER_DECL(allocate_vm_objects, "allocates VM objects and VM regions")
759 {
760 move_to_idle_band();
761 allocate_vm_stuff(OBJECTS);
762 }
763
764 T_HELPER_DECL(allocate_from_generic_zone, "allocates from a generic zone")
765 {
766 move_to_idle_band();
767 allocate_from_generic_zone();
768 }
769
770 /*
771 * T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL) changes the zone_map_jetsam_limit to a
772 * lower value, so that the test can complete faster.
773 * The test allocates zone memory pretty aggressively which can cause the system to panic
774 * if the jetsam limit is quite high; a lower value keeps us from panicking.
775 */
776 T_DECL( memorystatus_vme_zone_test,
777 "allocates elements from the VM map entries zone, verifies zone-map-exhaustion jetsams",
778 T_META_ASROOT(true),
779 T_META_TIMEOUT(1800),
780 /* T_META_LTEPHASE(LTE_POSTINIT),
781 */
782 T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL))
783 {
784 current_test = (test_config_struct) {
785 .test_index = VME_ZONE_TEST,
786 .helper_func = VME_ZONE_TEST_OPT,
787 .num_zones = 1,
788 .zone_names = (mach_zone_name_t[]){
789 { .mzn_name = VME_ZONE }
790 }
791 };
792 run_test();
793 }
794
795 T_DECL( memorystatus_vm_objects_zone_test,
796 "allocates elements from the VM objects and the VM map entries zones, verifies zone-map-exhaustion jetsams",
797 T_META_ASROOT(true),
798 T_META_TIMEOUT(1800),
799 /* T_META_LTEPHASE(LTE_POSTINIT),
800 */
801 T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL))
802 {
803 current_test = (test_config_struct) {
804 .test_index = VM_OBJECTS_ZONE_TEST,
805 .helper_func = VM_OBJECTS_ZONE_TEST_OPT,
806 .num_zones = 2,
807 .zone_names = (mach_zone_name_t[]){
808 { .mzn_name = VME_ZONE },
809 { .mzn_name = VMOBJECTS_ZONE}
810 }
811 };
812 run_test();
813 }
814
815 T_DECL( memorystatus_generic_zone_test,
816 "allocates elements from a zone that doesn't have an optimized jetsam path, verifies zone-map-exhaustion jetsams",
817 T_META_ASROOT(true),
818 T_META_TIMEOUT(1800),
819 /* T_META_LTEPHASE(LTE_POSTINIT),
820 */
821 T_META_SYSCTL_INT(ZONEMAP_JETSAM_LIMIT_SYSCTL))
822 {
823 current_test = (test_config_struct) {
824 .test_index = GENERIC_ZONE_TEST,
825 .helper_func = GENERIC_ZONE_TEST_OPT,
826 .num_zones = 0,
827 .zone_names = NULL
828 };
829 run_test();
830 }
831