xref: /xnu-8020.101.4/tests/perf_vmfault.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 #include <unistd.h>
2 #include <stdlib.h>
3 #include <sys/mman.h>
4 #include <sys/sysctl.h>
5 #include <mach/mach.h>
6 #include <mach/vm_map.h>
7 #include <darwintest.h>
8 #include <TargetConditionals.h>
9 #include <perfcheck_keys.h>
10 
11 #include "benchmark/helpers.h"
12 
13 T_GLOBAL_META(
14 	T_META_NAMESPACE("xnu.vm.perf"),
15 	T_META_RADAR_COMPONENT_NAME("xnu"),
16 	T_META_RADAR_COMPONENT_VERSION("VM"),
17 	T_META_CHECK_LEAKS(false),
18 	T_META_TAG_PERF
19 	);
20 
21 #ifdef DT_IOSMARK
22 #define MEMSIZE                 (1UL<<29)       /* 512 MB */
23 #else
24 #define MEMSIZE                 (1UL<<27)       /* 128 MB */
25 #endif
26 
27 #define VM_TAG1                 100
28 #define VM_TAG2                 101
29 
30 enum {
31 	SOFT_FAULT,
32 	ZERO_FILL,
33 	NUM_FAULT_TYPES
34 };
35 
36 enum {
37 	VARIANT_DEFAULT = 1,
38 	VARIANT_SINGLE_REGION,
39 	VARIANT_MULTIPLE_REGIONS,
40 	NUM_MAPPING_VARIANTS
41 };
42 
43 static char *variant_str[] = {
44 	"none",
45 	"default",
46 	"single-region",
47 	"multiple-regions"
48 };
49 
50 
51 typedef struct {
52 	char *region_addr;
53 	char *shared_region_addr;
54 	size_t region_len;
55 } memregion_config;
56 
57 static memregion_config *memregion_config_per_thread;
58 
59 static size_t pgsize;
60 static int num_threads;
61 static int ready_thread_count;
62 static int finished_thread_count;
63 static dt_stat_time_t runtime;
64 static pthread_cond_t start_cvar;
65 static pthread_cond_t threads_ready_cvar;
66 static pthread_cond_t threads_finished_cvar;
67 static pthread_mutex_t ready_thread_count_lock;
68 static pthread_mutex_t finished_thread_count_lock;
69 
70 static void map_mem_regions_default(int fault_type, size_t memsize);
71 static void map_mem_regions_single(int fault_type, size_t memsize);
72 static void map_mem_regions_multiple(int fault_type, size_t memsize);
73 static void map_mem_regions(int fault_type, int mapping_variant, size_t memsize);
74 static void unmap_mem_regions(int mapping_variant, size_t memsize);
75 static void setup_per_thread_regions(char *memblock, char *memblock_share, int fault_type, size_t memsize);
76 static void fault_pages(int thread_id);
77 static void execute_threads(void);
78 static void *thread_setup(void *arg);
79 static void run_test(int fault_type, int mapping_variant, size_t memsize);
80 static void setup_and_run_test(int test, int threads);
81 
82 /* Allocates memory using the default mmap behavior. Each VM region created is capped at 128 MB. */
83 static void
map_mem_regions_default(int fault_type,size_t memsize)84 map_mem_regions_default(int fault_type, size_t memsize)
85 {
86 	volatile char val;
87 	vm_prot_t curprot, maxprot;
88 	char *ptr, *memblock, *memblock_share = NULL;
89 
90 	memblock = (char *)mmap(NULL, memsize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
91 	T_QUIET; T_ASSERT_NE((void *)memblock, MAP_FAILED, "mmap");
92 
93 	if (fault_type == SOFT_FAULT) {
94 		/* Fault in all the pages of the original region. */
95 		for (ptr = memblock; ptr < memblock + memsize; ptr += pgsize) {
96 			val = *ptr;
97 		}
98 		/* Remap the region so that subsequent accesses result in read soft faults. */
99 		T_QUIET; T_ASSERT_MACH_SUCCESS(vm_remap(mach_task_self(), (vm_address_t *)&memblock_share,
100 		    memsize, 0, VM_FLAGS_ANYWHERE, mach_task_self(), (vm_address_t)memblock, FALSE,
101 		    &curprot, &maxprot, VM_INHERIT_DEFAULT), "vm_remap");
102 	}
103 	setup_per_thread_regions(memblock, memblock_share, fault_type, memsize);
104 }
105 
106 /* Creates a single VM region by mapping in a named memory entry. */
107 static void
map_mem_regions_single(int fault_type,size_t memsize)108 map_mem_regions_single(int fault_type, size_t memsize)
109 {
110 	volatile char val;
111 	vm_prot_t curprot, maxprot;
112 	char *ptr, *memblock = NULL, *memblock_share = NULL;
113 	vm_size_t size = memsize;
114 	vm_offset_t addr1 = 0;
115 	mach_port_t mem_handle = MACH_PORT_NULL;
116 
117 	/* Allocate a region and fault in all the pages. */
118 	T_QUIET; T_ASSERT_MACH_SUCCESS(vm_allocate(mach_task_self(), &addr1, size, VM_FLAGS_ANYWHERE), "vm_allocate");
119 	for (ptr = (char *)addr1; ptr < (char *)addr1 + memsize; ptr += pgsize) {
120 		val = *ptr;
121 	}
122 
123 	/* Create a named memory entry from the region allocated above, and de-allocate said region. */
124 	T_QUIET; T_ASSERT_MACH_SUCCESS(mach_make_memory_entry(mach_task_self(), &size, addr1, VM_PROT_ALL | MAP_MEM_NAMED_CREATE,
125 	    &mem_handle, MACH_PORT_NULL), "mach_make_memory_entry");
126 	T_QUIET; T_ASSERT_MACH_SUCCESS(vm_deallocate(mach_task_self(), addr1, size), "vm_deallocate");
127 
128 	/* Map in the named entry and deallocate it. */
129 	T_QUIET; T_ASSERT_MACH_SUCCESS(vm_map(mach_task_self(), (vm_address_t *)&memblock, size, 0, VM_FLAGS_ANYWHERE, mem_handle, 0,
130 	    FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_NONE), "vm_map");
131 	T_QUIET; T_ASSERT_MACH_SUCCESS(mach_port_deallocate(mach_task_self(), mem_handle), "mach_port_deallocate");
132 
133 	if (fault_type == SOFT_FAULT) {
134 		/* Fault in all the pages of the original region. */
135 		for (ptr = memblock; ptr < memblock + memsize; ptr += pgsize) {
136 			val = *ptr;
137 		}
138 		/* Remap the region so that subsequent accesses result in read soft faults. */
139 		T_QUIET; T_ASSERT_MACH_SUCCESS(vm_remap(mach_task_self(), (vm_address_t *)&memblock_share,
140 		    memsize, 0, VM_FLAGS_ANYWHERE, mach_task_self(), (vm_address_t)memblock, FALSE,
141 		    &curprot, &maxprot, VM_INHERIT_DEFAULT), "vm_remap");
142 	}
143 	setup_per_thread_regions(memblock, memblock_share, fault_type, memsize);
144 }
145 
146 /* Allocates a separate VM region for each thread. */
147 static void
map_mem_regions_multiple(int fault_type,size_t memsize)148 map_mem_regions_multiple(int fault_type, size_t memsize)
149 {
150 	int i;
151 	size_t region_len, num_pages;
152 	volatile char val;
153 	char *ptr, *memblock, *memblock_share;
154 	vm_prot_t curprot, maxprot;
155 
156 	num_pages = memsize / pgsize;
157 
158 	for (i = 0; i < num_threads; i++) {
159 		memblock = NULL;
160 
161 		region_len = num_pages / (size_t)num_threads;
162 		if ((size_t)i < num_pages % (size_t)num_threads) {
163 			region_len++;
164 		}
165 		region_len *= pgsize;
166 
167 		int fd = VM_MAKE_TAG((i % 2)? VM_TAG1 : VM_TAG2);
168 		memblock = (char *)mmap(NULL, region_len, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, fd, 0);
169 		T_QUIET; T_ASSERT_NE((void *)memblock, MAP_FAILED, "mmap");
170 		memregion_config_per_thread[i].region_addr = memblock;
171 		memregion_config_per_thread[i].shared_region_addr = 0;
172 		memregion_config_per_thread[i].region_len = region_len;
173 
174 		if (fault_type == SOFT_FAULT) {
175 			/* Fault in all the pages of the original region. */
176 			for (ptr = memblock; ptr < memblock + region_len; ptr += pgsize) {
177 				val = *ptr;
178 			}
179 			memblock_share = NULL;
180 			/* Remap the region so that subsequent accesses result in read soft faults. */
181 			T_QUIET; T_ASSERT_MACH_SUCCESS(vm_remap(mach_task_self(), (vm_address_t *)&memblock_share,
182 			    region_len, 0, VM_FLAGS_ANYWHERE, mach_task_self(), (vm_address_t)memblock, FALSE,
183 			    &curprot, &maxprot, VM_INHERIT_DEFAULT), "vm_remap");
184 			memregion_config_per_thread[i].shared_region_addr = memblock_share;
185 		}
186 	}
187 }
188 
189 static void
map_mem_regions(int fault_type,int mapping_variant,size_t memsize)190 map_mem_regions(int fault_type, int mapping_variant, size_t memsize)
191 {
192 	memregion_config_per_thread = (memregion_config *)malloc(sizeof(*memregion_config_per_thread) * (size_t)num_threads);
193 	switch (mapping_variant) {
194 	case VARIANT_SINGLE_REGION:
195 		map_mem_regions_single(fault_type, memsize);
196 		break;
197 	case VARIANT_MULTIPLE_REGIONS:
198 		map_mem_regions_multiple(fault_type, memsize);
199 		break;
200 	case VARIANT_DEFAULT:
201 	default:
202 		map_mem_regions_default(fault_type, memsize);
203 	}
204 }
205 
206 static void
setup_per_thread_regions(char * memblock,char * memblock_share,int fault_type,size_t memsize)207 setup_per_thread_regions(char *memblock, char *memblock_share, int fault_type, size_t memsize)
208 {
209 	int i;
210 	size_t region_len, region_start, num_pages;
211 
212 	num_pages = memsize / pgsize;
213 	for (i = 0; i < num_threads; i++) {
214 		region_len = num_pages / (size_t)num_threads;
215 		region_start = region_len * (size_t)i;
216 
217 		if ((size_t)i < num_pages % (size_t)num_threads) {
218 			region_start += (size_t)i;
219 			region_len++;
220 		} else {
221 			region_start += num_pages % (size_t)num_threads;
222 		}
223 
224 		region_start *= pgsize;
225 		region_len *= pgsize;
226 
227 		memregion_config_per_thread[i].region_addr = memblock + region_start;
228 		memregion_config_per_thread[i].shared_region_addr = ((fault_type == SOFT_FAULT) ?
229 		    memblock_share + region_start : 0);
230 		memregion_config_per_thread[i].region_len = region_len;
231 	}
232 }
233 
234 static void
unmap_mem_regions(int mapping_variant,size_t memsize)235 unmap_mem_regions(int mapping_variant, size_t memsize)
236 {
237 	if (mapping_variant == VARIANT_MULTIPLE_REGIONS) {
238 		int i;
239 		for (i = 0; i < num_threads; i++) {
240 			if (memregion_config_per_thread[i].shared_region_addr != 0) {
241 				T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[i].shared_region_addr,
242 				    memregion_config_per_thread[i].region_len), "munmap");
243 			}
244 			T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[i].region_addr,
245 			    memregion_config_per_thread[i].region_len), "munmap");
246 		}
247 	} else {
248 		if (memregion_config_per_thread[0].shared_region_addr != 0) {
249 			T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[0].shared_region_addr, memsize), "munmap");
250 		}
251 		T_QUIET; T_ASSERT_MACH_SUCCESS(munmap(memregion_config_per_thread[0].region_addr, memsize), "munmap");
252 	}
253 }
254 
255 static void
fault_pages(int thread_id)256 fault_pages(int thread_id)
257 {
258 	char *ptr, *block;
259 	volatile char val;
260 
261 	block = memregion_config_per_thread[thread_id].shared_region_addr ?
262 	    memregion_config_per_thread[thread_id].shared_region_addr :
263 	    memregion_config_per_thread[thread_id].region_addr;
264 	for (ptr = block; ptr < block + memregion_config_per_thread[thread_id].region_len; ptr += pgsize) {
265 		val = *ptr;
266 	}
267 }
268 
269 static void *
thread_setup(void * arg)270 thread_setup(void *arg)
271 {
272 	int my_index = *((int *)arg);
273 
274 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&ready_thread_count_lock), "pthread_mutex_lock");
275 	ready_thread_count++;
276 	if (ready_thread_count == num_threads) {
277 		T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_signal(&threads_ready_cvar), "pthread_cond_signal");
278 	}
279 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&start_cvar, &ready_thread_count_lock), "pthread_cond_wait");
280 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&ready_thread_count_lock), "pthread_mutex_unlock");
281 
282 	fault_pages(my_index);
283 
284 	/* Up the finished count */
285 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&finished_thread_count_lock), "pthread_mutex_lock");
286 	finished_thread_count++;
287 	if (finished_thread_count == num_threads) {
288 		/* All the threads are done. Wake up the main thread */
289 		T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_signal(&threads_finished_cvar), "pthread_cond_signal");
290 	}
291 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&finished_thread_count_lock), "pthread_mutex_unlock");
292 	return NULL;
293 }
294 
295 static void
execute_threads(void)296 execute_threads(void)
297 {
298 	int thread_index, thread_retval;
299 	int *thread_indices;
300 	void *thread_retval_ptr = &thread_retval;
301 	pthread_t* threads;
302 
303 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_init(&threads_ready_cvar, NULL), "pthread_cond_init");
304 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_init(&start_cvar, NULL), "pthread_cond_init");
305 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_init(&ready_thread_count_lock, NULL), "pthread_mutex_init");
306 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_init(&threads_finished_cvar, NULL), "pthread_cond_init");
307 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_init(&finished_thread_count_lock, NULL), "pthread_mutex_init");
308 	ready_thread_count = 0;
309 	finished_thread_count = 0;
310 
311 	threads = (pthread_t *)malloc(sizeof(*threads) * (size_t)num_threads);
312 	thread_indices = (int *)malloc(sizeof(*thread_indices) * (size_t)num_threads);
313 	for (thread_index = 0; thread_index < num_threads; thread_index++) {
314 		thread_indices[thread_index] = thread_index;
315 		T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_create(&threads[thread_index], NULL,
316 		    thread_setup, (void *)&thread_indices[thread_index]), "pthread_create");
317 	}
318 
319 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&ready_thread_count_lock), "pthread_mutex_lock");
320 	while (ready_thread_count != num_threads) {
321 		T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&threads_ready_cvar, &ready_thread_count_lock),
322 		    "pthread_cond_wait");
323 	}
324 	T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&ready_thread_count_lock), "pthread_mutex_unlock");
325 
326 	T_STAT_MEASURE(runtime) {
327 		/* Ungate the threads */
328 		T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_broadcast(&start_cvar), "pthread_cond_broadcast");
329 		/* Wait for the threads to finish */
330 		T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&finished_thread_count_lock), "pthread_mutex_lock");
331 		while (finished_thread_count != num_threads) {
332 			T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&threads_finished_cvar, &finished_thread_count_lock), "pthread_cond_wait");
333 		}
334 	};
335 
336 	/* Join the threads */
337 	for (thread_index = 0; thread_index < num_threads; thread_index++) {
338 		T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_join(threads[thread_index], &thread_retval_ptr),
339 		    "pthread_join");
340 	}
341 
342 	free(threads);
343 	free(thread_indices);
344 }
345 
346 static void
run_test(int fault_type,int mapping_variant,size_t memsize)347 run_test(int fault_type, int mapping_variant, size_t memsize)
348 {
349 	char metric_str[32];
350 	size_t num_pages;
351 	size_t sysctl_size = sizeof(pgsize);
352 	int ret = sysctlbyname("vm.pagesize", &pgsize, &sysctl_size, NULL, 0);
353 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sysctl vm.pagesize failed");
354 
355 	num_pages = memsize / pgsize;
356 
357 	T_QUIET; T_ASSERT_LT(fault_type, NUM_FAULT_TYPES, "invalid test type");
358 	T_QUIET; T_ASSERT_LT(mapping_variant, NUM_MAPPING_VARIANTS, "invalid mapping variant");
359 	T_QUIET; T_ASSERT_GT(num_threads, 0, "num_threads <= 0");
360 	T_QUIET; T_ASSERT_GT((int)num_pages / num_threads, 0, "num_pages/num_threads <= 0");
361 
362 	T_LOG("No. of cpus:     %d", get_ncpu());
363 	T_LOG("No. of threads:  %d", num_threads);
364 	T_LOG("No. of pages:    %ld", num_pages);
365 	T_LOG("Pagesize:        %ld", pgsize);
366 	T_LOG("Allocation size: %ld MB", memsize / (1024 * 1024));
367 	T_LOG("Mapping variant: %s", variant_str[mapping_variant]);
368 
369 	snprintf(metric_str, 32, "Runtime-%s", variant_str[mapping_variant]);
370 	runtime = dt_stat_time_create(metric_str);
371 
372 	while (!dt_stat_stable(runtime)) {
373 		map_mem_regions(fault_type, mapping_variant, memsize);
374 		execute_threads();
375 		unmap_mem_regions(mapping_variant, memsize);
376 	}
377 
378 	dt_stat_finalize(runtime);
379 	T_LOG("Throughput-%s (MB/s): %lf\n\n", variant_str[mapping_variant], (double)memsize / (1024 * 1024) / dt_stat_mean((dt_stat_t)runtime));
380 }
381 
382 static void
setup_and_run_test(int fault_type,int threads)383 setup_and_run_test(int fault_type, int threads)
384 {
385 	int i, mapping_variant;
386 	size_t memsize;
387 	char *e;
388 
389 	mapping_variant = VARIANT_DEFAULT;
390 	memsize = MEMSIZE;
391 	num_threads = threads;
392 
393 	if ((e = getenv("NTHREADS"))) {
394 		if (threads == 1) {
395 			T_SKIP("Custom environment variables specified. Skipping single threaded version.");
396 		}
397 		num_threads = (int)strtol(e, NULL, 0);
398 	}
399 
400 	if ((e = getenv("MEMSIZEMB"))) {
401 		memsize = (size_t)strtol(e, NULL, 0) * 1024 * 1024;
402 	}
403 
404 	if ((e = getenv("VARIANT"))) {
405 		mapping_variant = (int)strtol(e, NULL, 0);
406 		run_test(fault_type, mapping_variant, memsize);
407 	} else {
408 		for (i = VARIANT_DEFAULT; i < NUM_MAPPING_VARIANTS; i++) {
409 			run_test(fault_type, i, memsize);
410 		}
411 	}
412 
413 	T_END;
414 }
415 
416 T_DECL(read_soft_fault,
417     "Read soft faults (single thread)")
418 {
419 	setup_and_run_test(SOFT_FAULT, 1);
420 }
421 
422 T_DECL(read_soft_fault_multithreaded,
423     "Read soft faults (multi-threaded)")
424 {
425 	char *e;
426 	int nthreads;
427 
428 	/* iOSMark passes in the no. of threads via an env. variable */
429 	if ((e = getenv("DT_STAT_NTHREADS"))) {
430 		nthreads = (int)strtol(e, NULL, 0);
431 	} else {
432 		nthreads = get_ncpu();
433 		if (nthreads == 1) {
434 			T_SKIP("Skipping multi-threaded test on single core device.");
435 		}
436 	}
437 	setup_and_run_test(SOFT_FAULT, nthreads);
438 }
439 
440 T_DECL(zero_fill_fault,
441     "Zero fill faults (single thread)")
442 {
443 	setup_and_run_test(ZERO_FILL, 1);
444 }
445 
446 T_DECL(zero_fill_fault_multithreaded,
447     "Zero fill faults (multi-threaded)")
448 {
449 	char *e;
450 	int nthreads;
451 
452 	/* iOSMark passes in the no. of threads via an env. variable */
453 	if ((e = getenv("DT_STAT_NTHREADS"))) {
454 		nthreads = (int)strtol(e, NULL, 0);
455 	} else {
456 		nthreads = get_ncpu();
457 		if (nthreads == 1) {
458 			T_SKIP("Skipping multi-threaded test on single core device.");
459 		}
460 	}
461 	setup_and_run_test(ZERO_FILL, nthreads);
462 }
463