1*a325d9c4SApple OSS Distributions /*
2*a325d9c4SApple OSS Distributions * Benchmark VM fault throughput.
3*a325d9c4SApple OSS Distributions * This test faults memory for a configurable amount of time across a
4*a325d9c4SApple OSS Distributions * configurable number of threads. Currently it only measures zero fill faults.
5*a325d9c4SApple OSS Distributions * Currently it supports two variants:
6*a325d9c4SApple OSS Distributions * 1. Each thread gets its own vm objects to fault in
7*a325d9c4SApple OSS Distributions * 2. Threads share vm objects
8*a325d9c4SApple OSS Distributions *
9*a325d9c4SApple OSS Distributions * We'll add more fault types as we identify problematic user-facing workloads
10*a325d9c4SApple OSS Distributions * in macro benchmarks.
11*a325d9c4SApple OSS Distributions *
12*a325d9c4SApple OSS Distributions * Throughput is reported as pages / second using both wall time and cpu time.
13*a325d9c4SApple OSS Distributions * CPU time is a more reliable metric for regression testing, but wall time can
14*a325d9c4SApple OSS Distributions * highlight blocking in the VM.
15*a325d9c4SApple OSS Distributions *
16*a325d9c4SApple OSS Distributions * Running this benchmark directly is not recommended.
17*a325d9c4SApple OSS Distributions * Use fault_throughput.lua which provides a nicer interface and outputs
18*a325d9c4SApple OSS Distributions * perfdata.
19*a325d9c4SApple OSS Distributions */
20*a325d9c4SApple OSS Distributions #include <assert.h>
21*a325d9c4SApple OSS Distributions #include <ctype.h>
22*a325d9c4SApple OSS Distributions #include <errno.h>
23*a325d9c4SApple OSS Distributions #include <stdarg.h>
24*a325d9c4SApple OSS Distributions #include <stdio.h>
25*a325d9c4SApple OSS Distributions #include <stdlib.h>
26*a325d9c4SApple OSS Distributions #include <strings.h>
27*a325d9c4SApple OSS Distributions
28*a325d9c4SApple OSS Distributions #include <sys/mman.h>
29*a325d9c4SApple OSS Distributions #include <sys/types.h>
30*a325d9c4SApple OSS Distributions #include <sys/sysctl.h>
31*a325d9c4SApple OSS Distributions
32*a325d9c4SApple OSS Distributions /*
33*a325d9c4SApple OSS Distributions * TODO: Make this benchmark runnable on linux so we can do a perf comparison.
34*a325d9c4SApple OSS Distributions * We're mostly using POSIX APIs, but we'll need to replace
35*a325d9c4SApple OSS Distributions * the sysctls with the /proc equivalents, and replace clock_gettime_nsec_np
36*a325d9c4SApple OSS Distributions * with the linux equivalent.
37*a325d9c4SApple OSS Distributions */
38*a325d9c4SApple OSS Distributions #include <mach/mach.h>
39*a325d9c4SApple OSS Distributions
40*a325d9c4SApple OSS Distributions #include <TargetConditionals.h>
41*a325d9c4SApple OSS Distributions
42*a325d9c4SApple OSS Distributions #include <pthread.h>
43*a325d9c4SApple OSS Distributions #include <stdatomic.h>
44*a325d9c4SApple OSS Distributions
45*a325d9c4SApple OSS Distributions #include "benchmark/helpers.h"
46*a325d9c4SApple OSS Distributions
47*a325d9c4SApple OSS Distributions #if (TARGET_OS_OSX || TARGET_OS_SIMULATOR)
48*a325d9c4SApple OSS Distributions /*
49*a325d9c4SApple OSS Distributions * On non-embedded platforms we coalesce vm objects up to 128 MB, so
50*a325d9c4SApple OSS Distributions * we make the objects 128 MB on that platform to ensure they're not
51*a325d9c4SApple OSS Distributions * merged with anything else.
52*a325d9c4SApple OSS Distributions */
53*a325d9c4SApple OSS Distributions const static size_t kVmObjectSize = 128 * (1UL << 20);
54*a325d9c4SApple OSS Distributions #else
55*a325d9c4SApple OSS Distributions /*
56*a325d9c4SApple OSS Distributions * Embedded platforms don't coalesce vm objects. This number
57*a325d9c4SApple OSS Distributions * needs to be big enough that faulting it in dwarfs the cost of dequeuing
58*a325d9c4SApple OSS Distributions * it from the work queue, but can't be too large or else we won't be able
59*a325d9c4SApple OSS Distributions * to allocate one per thread in the separate-objects benchmark.
60*a325d9c4SApple OSS Distributions */
61*a325d9c4SApple OSS Distributions const static size_t kVmObjectSize = 4 * (1UL << 20);
62*a325d9c4SApple OSS Distributions #endif /* (TARGET_OS_OSX || TARGET_OS_SIMULATOR) */
63*a325d9c4SApple OSS Distributions static const clockid_t kWallTimeClock = CLOCK_MONOTONIC_RAW;
64*a325d9c4SApple OSS Distributions static const clockid_t kThreadCPUTimeClock = CLOCK_THREAD_CPUTIME_ID;
65*a325d9c4SApple OSS Distributions /* These globals are set dynamically during test setup based on sysctls. */
66*a325d9c4SApple OSS Distributions static uint64_t kCacheLineSize = 0;
67*a325d9c4SApple OSS Distributions /* The VM page size */
68*a325d9c4SApple OSS Distributions static size_t kPageSize = 0;
69*a325d9c4SApple OSS Distributions
70*a325d9c4SApple OSS Distributions
71*a325d9c4SApple OSS Distributions typedef struct fault_buffer {
72*a325d9c4SApple OSS Distributions unsigned char* fb_start; /* The start of this buffer. */
73*a325d9c4SApple OSS Distributions size_t fb_size; /* The size of this buffer in bytes. */
74*a325d9c4SApple OSS Distributions } fault_buffer_t;
75*a325d9c4SApple OSS Distributions
76*a325d9c4SApple OSS Distributions typedef enum test_variant {
77*a325d9c4SApple OSS Distributions VARIANT_SEPARATE_VM_OBJECTS,
78*a325d9c4SApple OSS Distributions VARIANT_SHARE_VM_OBJECTS
79*a325d9c4SApple OSS Distributions } test_variant_t;
80*a325d9c4SApple OSS Distributions
81*a325d9c4SApple OSS Distributions typedef struct test_globals {
82*a325d9c4SApple OSS Distributions /* This lock protects: tg_cv, tg_running_count, tg_done, tg_current_iteration, and tg_iterations_completed. */
83*a325d9c4SApple OSS Distributions pthread_mutex_t tg_lock;
84*a325d9c4SApple OSS Distributions pthread_cond_t tg_cv;
85*a325d9c4SApple OSS Distributions /* The number of currently running threads */
86*a325d9c4SApple OSS Distributions unsigned int tg_running_count;
87*a325d9c4SApple OSS Distributions /* Set during cleanup to indicate that the benchmark is over. */
88*a325d9c4SApple OSS Distributions bool tg_done;
89*a325d9c4SApple OSS Distributions size_t tg_current_iteration;
90*a325d9c4SApple OSS Distributions size_t tg_iterations_completed;
91*a325d9c4SApple OSS Distributions unsigned int tg_num_threads;
92*a325d9c4SApple OSS Distributions test_variant_t tg_variant;
93*a325d9c4SApple OSS Distributions bool pin_threads;
94*a325d9c4SApple OSS Distributions /*
95*a325d9c4SApple OSS Distributions * An array of memory objects to fault in.
96*a325d9c4SApple OSS Distributions * This is basically a workqueue of
97*a325d9c4SApple OSS Distributions * contiguous chunks of memory that the worker threads
98*a325d9c4SApple OSS Distributions * will fault in.
99*a325d9c4SApple OSS Distributions */
100*a325d9c4SApple OSS Distributions fault_buffer_t *tg_fault_buffer_arr;
101*a325d9c4SApple OSS Distributions size_t tg_fault_buffer_arr_length;
102*a325d9c4SApple OSS Distributions /*
103*a325d9c4SApple OSS Distributions * To avoid false sharing, we pad the test globals with an extra cache line and place the atomic
104*a325d9c4SApple OSS Distributions * next_fault_buffer_index size_t after the cache line.
105*a325d9c4SApple OSS Distributions */
106*a325d9c4SApple OSS Distributions __unused char padding[];
107*a325d9c4SApple OSS Distributions /*
108*a325d9c4SApple OSS Distributions * This field is directly after the padding buffer.
109*a325d9c4SApple OSS Distributions * It is used to synchronize access to tg_fault_buffer_arr.
110*a325d9c4SApple OSS Distributions */
111*a325d9c4SApple OSS Distributions //_Atomic size_t tg_next_fault_buffer_index;
112*a325d9c4SApple OSS Distributions } test_globals_t;
113*a325d9c4SApple OSS Distributions
114*a325d9c4SApple OSS Distributions typedef struct {
115*a325d9c4SApple OSS Distributions void *test_globals;
116*a325d9c4SApple OSS Distributions uint32_t cpu_id;
117*a325d9c4SApple OSS Distributions } faulting_thread_args_t;
118*a325d9c4SApple OSS Distributions
119*a325d9c4SApple OSS Distributions static faulting_thread_args_t *faulting_thread_args;
120*a325d9c4SApple OSS Distributions
121*a325d9c4SApple OSS Distributions static const char* kSeparateObjectsArgument = "separate-objects";
122*a325d9c4SApple OSS Distributions static const char* kShareObjectsArgument = "share-objects";
123*a325d9c4SApple OSS Distributions
124*a325d9c4SApple OSS Distributions /* Arguments parsed from the command line */
125*a325d9c4SApple OSS Distributions typedef struct test_args {
126*a325d9c4SApple OSS Distributions uint32_t n_threads;
127*a325d9c4SApple OSS Distributions uint32_t first_cpu;
128*a325d9c4SApple OSS Distributions uint64_t duration_seconds;
129*a325d9c4SApple OSS Distributions test_variant_t variant;
130*a325d9c4SApple OSS Distributions bool pin_threads;
131*a325d9c4SApple OSS Distributions bool verbose;
132*a325d9c4SApple OSS Distributions } test_args_t;
133*a325d9c4SApple OSS Distributions
134*a325d9c4SApple OSS Distributions /*
135*a325d9c4SApple OSS Distributions * Fault in the pages in the given buffer.
136*a325d9c4SApple OSS Distributions */
137*a325d9c4SApple OSS Distributions static void fault_pages(fault_buffer_t *buffer, size_t stride);
138*a325d9c4SApple OSS Distributions /* Get a unique fault buffer from the global work queue. */
139*a325d9c4SApple OSS Distributions static fault_buffer_t *get_fault_buffer(test_globals_t* globals);
140*a325d9c4SApple OSS Distributions /*
141*a325d9c4SApple OSS Distributions * Grabs buffers from the global test structure and faults them in, using this
142*a325d9c4SApple OSS Distributions * test variant's stride, until there are no more buffers to grab.
143*a325d9c4SApple OSS Distributions * Returns the number of microseconds spent on-cpu.
144*a325d9c4SApple OSS Distributions */
145*a325d9c4SApple OSS Distributions static uint64_t grab_and_fault_pages(test_globals_t* globals);
146*a325d9c4SApple OSS Distributions
147*a325d9c4SApple OSS Distributions static bool worker_thread_iteration_setup(size_t current_iteration, test_globals_t *globals);
148*a325d9c4SApple OSS Distributions static void worker_thread_iteration_complete(test_globals_t *globals);
149*a325d9c4SApple OSS Distributions
150*a325d9c4SApple OSS Distributions static void parse_arguments(int argc, char **argv, test_args_t *args);
151*a325d9c4SApple OSS Distributions /*
152*a325d9c4SApple OSS Distributions * Sets up the test globals and spawns the background threads to do the faults.
153*a325d9c4SApple OSS Distributions * Returns an array of size `num_threads`
154*a325d9c4SApple OSS Distributions * Containing the thread ids of the forked threads.
155*a325d9c4SApple OSS Distributions */
156*a325d9c4SApple OSS Distributions static pthread_t* setup_test(test_globals_t *globals, const test_args_t *args, size_t memory_size, bool verbose);
157*a325d9c4SApple OSS Distributions static test_globals_t *allocate_test_globals(void);
158*a325d9c4SApple OSS Distributions /* Initializes variables in the globals array. */
159*a325d9c4SApple OSS Distributions static void init_globals(test_globals_t *globals, const test_args_t *args);
160*a325d9c4SApple OSS Distributions static inline _Atomic size_t *next_fault_buffer_index_ptr(test_globals_t *globals);
161*a325d9c4SApple OSS Distributions /*
162*a325d9c4SApple OSS Distributions * Called on the main thread.
163*a325d9c4SApple OSS Distributions * Waits for the background threads to be ready, sets up the memory objects,
164*a325d9c4SApple OSS Distributions * and then starts a faulting iteration.
165*a325d9c4SApple OSS Distributions * Returns the start (wall) time.
166*a325d9c4SApple OSS Distributions */
167*a325d9c4SApple OSS Distributions static uint64_t start_iteration(test_globals_t* globals, test_variant_t variant, bool verbose);
168*a325d9c4SApple OSS Distributions /*
169*a325d9c4SApple OSS Distributions * Called on the main thread.
170*a325d9c4SApple OSS Distributions * Waits for the background threads to complete the iteration and cleans up.
171*a325d9c4SApple OSS Distributions * Returns the total amount of time spent faulting pages in nanoseconds by all threads thus far.
172*a325d9c4SApple OSS Distributions */
173*a325d9c4SApple OSS Distributions static uint64_t finish_iteration(test_globals_t *globals, uint64_t start_time);
174*a325d9c4SApple OSS Distributions /*
175*a325d9c4SApple OSS Distributions * Called on the main thread.
176*a325d9c4SApple OSS Distributions * Maps buffers and places them in the work queue.
177*a325d9c4SApple OSS Distributions */
178*a325d9c4SApple OSS Distributions static void setup_memory(test_globals_t* globals, test_variant_t variant);
179*a325d9c4SApple OSS Distributions /*
180*a325d9c4SApple OSS Distributions * Dump test results as a csv to stdout.
181*a325d9c4SApple OSS Distributions * Use fault_throughput.lua to convert to perfdata.
182*a325d9c4SApple OSS Distributions */
183*a325d9c4SApple OSS Distributions static void output_results(const test_globals_t *globals, double walltime_elapsed_seconds, double cputime_elapsed_seconds);
184*a325d9c4SApple OSS Distributions static void cleanup_test(test_globals_t *globals);
185*a325d9c4SApple OSS Distributions /*
186*a325d9c4SApple OSS Distributions * Join the background threads and return the total microseconds
187*a325d9c4SApple OSS Distributions * of cpu time spent faulting across all of the threads.
188*a325d9c4SApple OSS Distributions * Takes ownership of the threads array and frees it.
189*a325d9c4SApple OSS Distributions */
190*a325d9c4SApple OSS Distributions static uint64_t join_background_threads(test_globals_t *globals, pthread_t *threads);
191*a325d9c4SApple OSS Distributions static void unmap_fault_buffers(test_globals_t *globals);
192*a325d9c4SApple OSS Distributions /*
193*a325d9c4SApple OSS Distributions * Get the stride between each vm object in the fault buffer array.
194*a325d9c4SApple OSS Distributions */
195*a325d9c4SApple OSS Distributions static size_t fault_buffer_stride(const test_globals_t *globals);
196*a325d9c4SApple OSS Distributions
197*a325d9c4SApple OSS Distributions int
main(int argc,char ** argv)198*a325d9c4SApple OSS Distributions main(int argc, char **argv)
199*a325d9c4SApple OSS Distributions {
200*a325d9c4SApple OSS Distributions /* How much memory should the test consume (per-core on the system)? */
201*a325d9c4SApple OSS Distributions #if (TARGET_OS_OSX || TARGET_OS_SIMULATOR)
202*a325d9c4SApple OSS Distributions static const size_t memory_per_core = kVmObjectSize;
203*a325d9c4SApple OSS Distributions #else
204*a325d9c4SApple OSS Distributions static const size_t memory_per_core = 25 * (1UL << 20);
205*a325d9c4SApple OSS Distributions #endif /* (TARGET_OS_OSX || TARGET_OS_SIMULATOR) */
206*a325d9c4SApple OSS Distributions const size_t kMemSize = memory_per_core * (size_t) get_ncpu();
207*a325d9c4SApple OSS Distributions test_globals_t *globals = allocate_test_globals();
208*a325d9c4SApple OSS Distributions /* Total wall-time spent faulting in pages. */
209*a325d9c4SApple OSS Distributions uint64_t wall_time_elapsed_ns = 0;
210*a325d9c4SApple OSS Distributions /* Total cpu-time spent faulting in pages */
211*a325d9c4SApple OSS Distributions uint64_t cpu_time_faulting_us = 0;
212*a325d9c4SApple OSS Distributions uint64_t start_time_ns;
213*a325d9c4SApple OSS Distributions test_args_t args;
214*a325d9c4SApple OSS Distributions parse_arguments(argc, argv, &args);
215*a325d9c4SApple OSS Distributions pthread_t* threads = setup_test(globals, &args, kMemSize, args.verbose);
216*a325d9c4SApple OSS Distributions
217*a325d9c4SApple OSS Distributions /* Keep doing more iterations until we've hit our (wall) time budget */
218*a325d9c4SApple OSS Distributions while (wall_time_elapsed_ns < args.duration_seconds * kNumNanosecondsInSecond) {
219*a325d9c4SApple OSS Distributions benchmark_log(args.verbose, "----Starting Iteration %lu-----\n", globals->tg_current_iteration + 1);
220*a325d9c4SApple OSS Distributions start_time_ns = start_iteration(globals, args.variant, args.verbose);
221*a325d9c4SApple OSS Distributions wall_time_elapsed_ns += finish_iteration(globals, start_time_ns);
222*a325d9c4SApple OSS Distributions benchmark_log(args.verbose, "----Completed Iteration %lu----\n", globals->tg_current_iteration);
223*a325d9c4SApple OSS Distributions }
224*a325d9c4SApple OSS Distributions
225*a325d9c4SApple OSS Distributions benchmark_log(args.verbose, "Hit time budget\nJoining worker threads\n");
226*a325d9c4SApple OSS Distributions cpu_time_faulting_us = join_background_threads(globals, threads);
227*a325d9c4SApple OSS Distributions benchmark_log(args.verbose, "----End Test Output----\n");
228*a325d9c4SApple OSS Distributions output_results(globals, (double) wall_time_elapsed_ns / kNumNanosecondsInSecond,
229*a325d9c4SApple OSS Distributions (double)cpu_time_faulting_us / kNumMicrosecondsInSecond);
230*a325d9c4SApple OSS Distributions cleanup_test(globals);
231*a325d9c4SApple OSS Distributions
232*a325d9c4SApple OSS Distributions return 0;
233*a325d9c4SApple OSS Distributions }
234*a325d9c4SApple OSS Distributions
235*a325d9c4SApple OSS Distributions
236*a325d9c4SApple OSS Distributions /* The main loop for the worker threads. */
237*a325d9c4SApple OSS Distributions static void*
faulting_thread(void * arg)238*a325d9c4SApple OSS Distributions faulting_thread(void* arg)
239*a325d9c4SApple OSS Distributions {
240*a325d9c4SApple OSS Distributions test_globals_t* globals = ((faulting_thread_args_t *)arg)->test_globals;
241*a325d9c4SApple OSS Distributions uint64_t on_cpu_time_faulting = 0;
242*a325d9c4SApple OSS Distributions size_t current_iteration = 1;
243*a325d9c4SApple OSS Distributions
244*a325d9c4SApple OSS Distributions if (globals->pin_threads) {
245*a325d9c4SApple OSS Distributions uint32_t cpu_id = ((faulting_thread_args_t *)arg)->cpu_id;
246*a325d9c4SApple OSS Distributions int err = sysctlbyname("kern.sched_thread_bind_cpu", NULL, 0, &cpu_id, sizeof(cpu_id));
247*a325d9c4SApple OSS Distributions assert(err == 0);
248*a325d9c4SApple OSS Distributions }
249*a325d9c4SApple OSS Distributions
250*a325d9c4SApple OSS Distributions while (true) {
251*a325d9c4SApple OSS Distributions bool should_continue = worker_thread_iteration_setup(current_iteration, globals);
252*a325d9c4SApple OSS Distributions if (!should_continue) {
253*a325d9c4SApple OSS Distributions break;
254*a325d9c4SApple OSS Distributions }
255*a325d9c4SApple OSS Distributions on_cpu_time_faulting += grab_and_fault_pages(globals);
256*a325d9c4SApple OSS Distributions worker_thread_iteration_complete(globals);
257*a325d9c4SApple OSS Distributions current_iteration++;
258*a325d9c4SApple OSS Distributions }
259*a325d9c4SApple OSS Distributions return (void*)on_cpu_time_faulting;
260*a325d9c4SApple OSS Distributions }
261*a325d9c4SApple OSS Distributions
262*a325d9c4SApple OSS Distributions /*
263*a325d9c4SApple OSS Distributions * Called on the worker threads before each iteration to synchronize this
264*a325d9c4SApple OSS Distributions * iteration start with the other threads.
265*a325d9c4SApple OSS Distributions * Returns true if the iteration should continue, and false if the test is over.
266*a325d9c4SApple OSS Distributions */
267*a325d9c4SApple OSS Distributions static bool
worker_thread_iteration_setup(size_t current_iteration,test_globals_t * globals)268*a325d9c4SApple OSS Distributions worker_thread_iteration_setup(size_t current_iteration, test_globals_t *globals)
269*a325d9c4SApple OSS Distributions {
270*a325d9c4SApple OSS Distributions bool should_continue = false;
271*a325d9c4SApple OSS Distributions int ret = 0;
272*a325d9c4SApple OSS Distributions // Gate on the other threads being ready to start
273*a325d9c4SApple OSS Distributions ret = pthread_mutex_lock(&globals->tg_lock);
274*a325d9c4SApple OSS Distributions assert(ret == 0);
275*a325d9c4SApple OSS Distributions globals->tg_running_count++;
276*a325d9c4SApple OSS Distributions if (globals->tg_running_count == globals->tg_num_threads) {
277*a325d9c4SApple OSS Distributions // All the worker threads are running.
278*a325d9c4SApple OSS Distributions // Wake up the main thread so that it can ungate the test.
279*a325d9c4SApple OSS Distributions ret = pthread_cond_broadcast(&globals->tg_cv);
280*a325d9c4SApple OSS Distributions assert(ret == 0);
281*a325d9c4SApple OSS Distributions }
282*a325d9c4SApple OSS Distributions /*
283*a325d9c4SApple OSS Distributions * The main thread will start this iteration by incrementing
284*a325d9c4SApple OSS Distributions * tg_current_iteration. Block until that happens.
285*a325d9c4SApple OSS Distributions * See start_iteration for the wakeup code.
286*a325d9c4SApple OSS Distributions */
287*a325d9c4SApple OSS Distributions while (!globals->tg_done && globals->tg_current_iteration != current_iteration) {
288*a325d9c4SApple OSS Distributions ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
289*a325d9c4SApple OSS Distributions assert(ret == 0);
290*a325d9c4SApple OSS Distributions }
291*a325d9c4SApple OSS Distributions should_continue = !globals->tg_done;
292*a325d9c4SApple OSS Distributions ret = pthread_mutex_unlock(&globals->tg_lock);
293*a325d9c4SApple OSS Distributions assert(ret == 0);
294*a325d9c4SApple OSS Distributions return should_continue;
295*a325d9c4SApple OSS Distributions }
296*a325d9c4SApple OSS Distributions
297*a325d9c4SApple OSS Distributions /*
298*a325d9c4SApple OSS Distributions * Called on the worker threads before each iteration finishes to synchronize
299*a325d9c4SApple OSS Distributions * with the other threads.
300*a325d9c4SApple OSS Distributions */
301*a325d9c4SApple OSS Distributions static void
worker_thread_iteration_complete(test_globals_t * globals)302*a325d9c4SApple OSS Distributions worker_thread_iteration_complete(test_globals_t *globals)
303*a325d9c4SApple OSS Distributions {
304*a325d9c4SApple OSS Distributions int ret;
305*a325d9c4SApple OSS Distributions // Mark ourselves as done and wait for the other threads to finish
306*a325d9c4SApple OSS Distributions ret = pthread_mutex_lock(&globals->tg_lock);
307*a325d9c4SApple OSS Distributions assert(ret == 0);
308*a325d9c4SApple OSS Distributions globals->tg_running_count--;
309*a325d9c4SApple OSS Distributions if (globals->tg_running_count == 0) {
310*a325d9c4SApple OSS Distributions // We're the last one to finish. Mark this iteration as completed and wake everyone up.
311*a325d9c4SApple OSS Distributions globals->tg_iterations_completed++;
312*a325d9c4SApple OSS Distributions ret = pthread_cond_broadcast(&globals->tg_cv);
313*a325d9c4SApple OSS Distributions assert(ret == 0);
314*a325d9c4SApple OSS Distributions } else {
315*a325d9c4SApple OSS Distributions // Others are running. Wait for them to finish.
316*a325d9c4SApple OSS Distributions while (globals->tg_iterations_completed != globals->tg_current_iteration) {
317*a325d9c4SApple OSS Distributions ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
318*a325d9c4SApple OSS Distributions assert(ret == 0);
319*a325d9c4SApple OSS Distributions }
320*a325d9c4SApple OSS Distributions }
321*a325d9c4SApple OSS Distributions ret = pthread_mutex_unlock(&globals->tg_lock);
322*a325d9c4SApple OSS Distributions assert(ret == 0);
323*a325d9c4SApple OSS Distributions }
324*a325d9c4SApple OSS Distributions
325*a325d9c4SApple OSS Distributions static void
fault_pages(fault_buffer_t * buffer,size_t stride)326*a325d9c4SApple OSS Distributions fault_pages(fault_buffer_t *buffer, size_t stride)
327*a325d9c4SApple OSS Distributions {
328*a325d9c4SApple OSS Distributions volatile unsigned char val;
329*a325d9c4SApple OSS Distributions for (unsigned char* ptr = buffer->fb_start; ptr < buffer->fb_start + buffer->fb_size; ptr += stride) {
330*a325d9c4SApple OSS Distributions val = *ptr;
331*a325d9c4SApple OSS Distributions }
332*a325d9c4SApple OSS Distributions }
333*a325d9c4SApple OSS Distributions
334*a325d9c4SApple OSS Distributions static fault_buffer_t *
get_fault_buffer(test_globals_t * globals)335*a325d9c4SApple OSS Distributions get_fault_buffer(test_globals_t* globals)
336*a325d9c4SApple OSS Distributions {
337*a325d9c4SApple OSS Distributions size_t index = atomic_fetch_add_explicit(next_fault_buffer_index_ptr(globals), 1UL, memory_order_acq_rel);
338*a325d9c4SApple OSS Distributions if (index < globals->tg_fault_buffer_arr_length) {
339*a325d9c4SApple OSS Distributions return &globals->tg_fault_buffer_arr[index];
340*a325d9c4SApple OSS Distributions }
341*a325d9c4SApple OSS Distributions return NULL;
342*a325d9c4SApple OSS Distributions }
343*a325d9c4SApple OSS Distributions
344*a325d9c4SApple OSS Distributions static uint64_t
grab_and_fault_pages(test_globals_t * globals)345*a325d9c4SApple OSS Distributions grab_and_fault_pages(test_globals_t* globals)
346*a325d9c4SApple OSS Distributions {
347*a325d9c4SApple OSS Distributions struct timespec start_time, end_time;
348*a325d9c4SApple OSS Distributions uint64_t nanoseconds_faulting_on_cpu = 0;
349*a325d9c4SApple OSS Distributions int ret;
350*a325d9c4SApple OSS Distributions size_t stride = fault_buffer_stride(globals) * kPageSize;
351*a325d9c4SApple OSS Distributions while (true) {
352*a325d9c4SApple OSS Distributions fault_buffer_t *object = get_fault_buffer(globals);
353*a325d9c4SApple OSS Distributions if (object == NULL) {
354*a325d9c4SApple OSS Distributions break;
355*a325d9c4SApple OSS Distributions }
356*a325d9c4SApple OSS Distributions ret = clock_gettime(kThreadCPUTimeClock, &start_time);
357*a325d9c4SApple OSS Distributions assert(ret == 0);
358*a325d9c4SApple OSS Distributions
359*a325d9c4SApple OSS Distributions fault_pages(object, stride);
360*a325d9c4SApple OSS Distributions
361*a325d9c4SApple OSS Distributions ret = clock_gettime(kThreadCPUTimeClock, &end_time);
362*a325d9c4SApple OSS Distributions assert(ret == 0);
363*a325d9c4SApple OSS Distributions nanoseconds_faulting_on_cpu += (unsigned long) timespec_difference_us(&end_time, &start_time);
364*a325d9c4SApple OSS Distributions }
365*a325d9c4SApple OSS Distributions return nanoseconds_faulting_on_cpu;
366*a325d9c4SApple OSS Distributions }
367*a325d9c4SApple OSS Distributions
368*a325d9c4SApple OSS Distributions static uint64_t
start_iteration(test_globals_t * globals,test_variant_t variant,bool verbose)369*a325d9c4SApple OSS Distributions start_iteration(test_globals_t* globals, test_variant_t variant, bool verbose)
370*a325d9c4SApple OSS Distributions {
371*a325d9c4SApple OSS Distributions int ret;
372*a325d9c4SApple OSS Distributions uint64_t start_time;
373*a325d9c4SApple OSS Distributions ret = pthread_mutex_lock(&globals->tg_lock);
374*a325d9c4SApple OSS Distributions assert(ret == 0);
375*a325d9c4SApple OSS Distributions benchmark_log(verbose, "Waiting for workers to catch up before starting next iteration.\n");
376*a325d9c4SApple OSS Distributions /* Wait until all the threads are ready to go to the next iteration */
377*a325d9c4SApple OSS Distributions while (globals->tg_running_count != globals->tg_num_threads) {
378*a325d9c4SApple OSS Distributions ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
379*a325d9c4SApple OSS Distributions }
380*a325d9c4SApple OSS Distributions benchmark_log(verbose, "Workers are all caught up\n");
381*a325d9c4SApple OSS Distributions setup_memory(globals, variant);
382*a325d9c4SApple OSS Distributions benchmark_log(verbose, "Initialized data structures for iteration. Waking workers.\n");
383*a325d9c4SApple OSS Distributions /* Grab a timestamp, tick the current iteration, and wake up the worker threads */
384*a325d9c4SApple OSS Distributions start_time = current_timestamp_ns();
385*a325d9c4SApple OSS Distributions globals->tg_current_iteration++;
386*a325d9c4SApple OSS Distributions ret = pthread_mutex_unlock(&globals->tg_lock);
387*a325d9c4SApple OSS Distributions assert(ret == 0);
388*a325d9c4SApple OSS Distributions ret = pthread_cond_broadcast(&globals->tg_cv);
389*a325d9c4SApple OSS Distributions assert(ret == 0);
390*a325d9c4SApple OSS Distributions return start_time;
391*a325d9c4SApple OSS Distributions }
392*a325d9c4SApple OSS Distributions
393*a325d9c4SApple OSS Distributions static uint64_t
finish_iteration(test_globals_t * globals,uint64_t start_time)394*a325d9c4SApple OSS Distributions finish_iteration(test_globals_t* globals, uint64_t start_time)
395*a325d9c4SApple OSS Distributions {
396*a325d9c4SApple OSS Distributions int ret;
397*a325d9c4SApple OSS Distributions uint64_t end_time;
398*a325d9c4SApple OSS Distributions ret = pthread_mutex_lock(&globals->tg_lock);
399*a325d9c4SApple OSS Distributions assert(ret == 0);
400*a325d9c4SApple OSS Distributions while (globals->tg_iterations_completed != globals->tg_current_iteration) {
401*a325d9c4SApple OSS Distributions ret = pthread_cond_wait(&globals->tg_cv, &globals->tg_lock);
402*a325d9c4SApple OSS Distributions }
403*a325d9c4SApple OSS Distributions end_time = current_timestamp_ns();
404*a325d9c4SApple OSS Distributions ret = pthread_mutex_unlock(&globals->tg_lock);
405*a325d9c4SApple OSS Distributions unmap_fault_buffers(globals);
406*a325d9c4SApple OSS Distributions assert(ret == 0);
407*a325d9c4SApple OSS Distributions return end_time - start_time;
408*a325d9c4SApple OSS Distributions }
409*a325d9c4SApple OSS Distributions
410*a325d9c4SApple OSS Distributions static void
setup_memory(test_globals_t * globals,test_variant_t variant)411*a325d9c4SApple OSS Distributions setup_memory(test_globals_t* globals, test_variant_t variant)
412*a325d9c4SApple OSS Distributions {
413*a325d9c4SApple OSS Distributions size_t stride = fault_buffer_stride(globals);
414*a325d9c4SApple OSS Distributions for (size_t i = 0; i < globals->tg_fault_buffer_arr_length; i += stride) {
415*a325d9c4SApple OSS Distributions fault_buffer_t *object = &globals->tg_fault_buffer_arr[i];
416*a325d9c4SApple OSS Distributions object->fb_start = mmap_buffer(kVmObjectSize);
417*a325d9c4SApple OSS Distributions object->fb_size = kVmObjectSize;
418*a325d9c4SApple OSS Distributions if (variant == VARIANT_SHARE_VM_OBJECTS) {
419*a325d9c4SApple OSS Distributions /*
420*a325d9c4SApple OSS Distributions * Insert another buffer into the work queue for each thread.
421*a325d9c4SApple OSS Distributions * Each buffer starts 1 page past where the previous buffer started into the vm object.
422*a325d9c4SApple OSS Distributions * Since each thread strides by the number of threads * the page size they won't fault in the same pages.
423*a325d9c4SApple OSS Distributions */
424*a325d9c4SApple OSS Distributions for (size_t j = 1; j < globals->tg_num_threads; j++) {
425*a325d9c4SApple OSS Distributions size_t offset = kPageSize * j;
426*a325d9c4SApple OSS Distributions fault_buffer_t *offset_object = &globals->tg_fault_buffer_arr[i + j];
427*a325d9c4SApple OSS Distributions offset_object->fb_start = object->fb_start + offset;
428*a325d9c4SApple OSS Distributions offset_object->fb_size = object->fb_size - offset;
429*a325d9c4SApple OSS Distributions }
430*a325d9c4SApple OSS Distributions } else if (variant != VARIANT_SEPARATE_VM_OBJECTS) {
431*a325d9c4SApple OSS Distributions fprintf(stderr, "Unknown test variant.\n");
432*a325d9c4SApple OSS Distributions exit(2);
433*a325d9c4SApple OSS Distributions }
434*a325d9c4SApple OSS Distributions }
435*a325d9c4SApple OSS Distributions atomic_store_explicit(next_fault_buffer_index_ptr(globals), 0, memory_order_release);
436*a325d9c4SApple OSS Distributions }
437*a325d9c4SApple OSS Distributions
438*a325d9c4SApple OSS Distributions static void
unmap_fault_buffers(test_globals_t * globals)439*a325d9c4SApple OSS Distributions unmap_fault_buffers(test_globals_t* globals)
440*a325d9c4SApple OSS Distributions {
441*a325d9c4SApple OSS Distributions size_t stride = fault_buffer_stride(globals);
442*a325d9c4SApple OSS Distributions for (size_t i = 0; i < globals->tg_fault_buffer_arr_length; i += stride) {
443*a325d9c4SApple OSS Distributions fault_buffer_t *buffer = &globals->tg_fault_buffer_arr[i];
444*a325d9c4SApple OSS Distributions int res = munmap(buffer->fb_start, buffer->fb_size);
445*a325d9c4SApple OSS Distributions assert(res == 0);
446*a325d9c4SApple OSS Distributions }
447*a325d9c4SApple OSS Distributions }
448*a325d9c4SApple OSS Distributions
449*a325d9c4SApple OSS Distributions static test_globals_t *
allocate_test_globals()450*a325d9c4SApple OSS Distributions allocate_test_globals()
451*a325d9c4SApple OSS Distributions {
452*a325d9c4SApple OSS Distributions test_globals_t *globals = NULL;
453*a325d9c4SApple OSS Distributions int ret;
454*a325d9c4SApple OSS Distributions if (kCacheLineSize == 0) {
455*a325d9c4SApple OSS Distributions size_t cachelinesize_size = sizeof(kCacheLineSize);
456*a325d9c4SApple OSS Distributions ret = sysctlbyname("hw.cachelinesize", &kCacheLineSize, &cachelinesize_size, NULL, 0);
457*a325d9c4SApple OSS Distributions assert(ret == 0);
458*a325d9c4SApple OSS Distributions assert(kCacheLineSize > 0);
459*a325d9c4SApple OSS Distributions }
460*a325d9c4SApple OSS Distributions if (kPageSize == 0) {
461*a325d9c4SApple OSS Distributions size_t pagesize_size = sizeof(kPageSize);
462*a325d9c4SApple OSS Distributions ret = sysctlbyname("vm.pagesize", &kPageSize, &pagesize_size, NULL, 0);
463*a325d9c4SApple OSS Distributions assert(ret == 0);
464*a325d9c4SApple OSS Distributions assert(kPageSize > 0);
465*a325d9c4SApple OSS Distributions }
466*a325d9c4SApple OSS Distributions size_t test_globals_size = sizeof(test_globals_t) + kCacheLineSize + sizeof(_Atomic size_t);
467*a325d9c4SApple OSS Distributions globals = malloc(test_globals_size);
468*a325d9c4SApple OSS Distributions assert(globals != NULL);
469*a325d9c4SApple OSS Distributions memset(globals, 0, test_globals_size);
470*a325d9c4SApple OSS Distributions return globals;
471*a325d9c4SApple OSS Distributions }
472*a325d9c4SApple OSS Distributions
473*a325d9c4SApple OSS Distributions static void
init_globals(test_globals_t * globals,const test_args_t * args)474*a325d9c4SApple OSS Distributions init_globals(test_globals_t *globals, const test_args_t *args)
475*a325d9c4SApple OSS Distributions {
476*a325d9c4SApple OSS Distributions pthread_mutexattr_t mutex_attrs;
477*a325d9c4SApple OSS Distributions pthread_condattr_t cond_attrs;
478*a325d9c4SApple OSS Distributions int ret;
479*a325d9c4SApple OSS Distributions memset(globals, 0, sizeof(test_globals_t));
480*a325d9c4SApple OSS Distributions
481*a325d9c4SApple OSS Distributions ret = pthread_mutexattr_init(&mutex_attrs);
482*a325d9c4SApple OSS Distributions assert(ret == 0);
483*a325d9c4SApple OSS Distributions ret = pthread_mutex_init(&globals->tg_lock, &mutex_attrs);
484*a325d9c4SApple OSS Distributions assert(ret == 0);
485*a325d9c4SApple OSS Distributions ret = pthread_condattr_init(&cond_attrs);
486*a325d9c4SApple OSS Distributions assert(ret == 0);
487*a325d9c4SApple OSS Distributions ret = pthread_cond_init(&globals->tg_cv, &cond_attrs);
488*a325d9c4SApple OSS Distributions assert(ret == 0);
489*a325d9c4SApple OSS Distributions ret = pthread_mutexattr_destroy(&mutex_attrs);
490*a325d9c4SApple OSS Distributions assert(ret == 0);
491*a325d9c4SApple OSS Distributions ret = pthread_condattr_destroy(&cond_attrs);
492*a325d9c4SApple OSS Distributions assert(ret == 0);
493*a325d9c4SApple OSS Distributions
494*a325d9c4SApple OSS Distributions globals->tg_num_threads = args->n_threads;
495*a325d9c4SApple OSS Distributions globals->tg_variant = args->variant;
496*a325d9c4SApple OSS Distributions globals->pin_threads = args->pin_threads;
497*a325d9c4SApple OSS Distributions }
498*a325d9c4SApple OSS Distributions
499*a325d9c4SApple OSS Distributions static void
init_fault_buffer_arr(test_globals_t * globals,const test_args_t * args,size_t memory_size)500*a325d9c4SApple OSS Distributions init_fault_buffer_arr(test_globals_t *globals, const test_args_t *args, size_t memory_size)
501*a325d9c4SApple OSS Distributions {
502*a325d9c4SApple OSS Distributions if (args->variant == VARIANT_SEPARATE_VM_OBJECTS) {
503*a325d9c4SApple OSS Distributions // This variant creates separate vm objects up to memory size bytes total
504*a325d9c4SApple OSS Distributions globals->tg_fault_buffer_arr_length = memory_size / kVmObjectSize;
505*a325d9c4SApple OSS Distributions } else if (args->variant == VARIANT_SHARE_VM_OBJECTS) {
506*a325d9c4SApple OSS Distributions // This variant creates separate vm objects up to memory size bytes total
507*a325d9c4SApple OSS Distributions // And places a pointer into each vm object for each thread.
508*a325d9c4SApple OSS Distributions globals->tg_fault_buffer_arr_length = memory_size / kVmObjectSize * globals->tg_num_threads;
509*a325d9c4SApple OSS Distributions } else {
510*a325d9c4SApple OSS Distributions fprintf(stderr, "Unsupported test variant.\n");
511*a325d9c4SApple OSS Distributions exit(2);
512*a325d9c4SApple OSS Distributions }
513*a325d9c4SApple OSS Distributions // It doesn't make sense to have more threads than elements in the work queue.
514*a325d9c4SApple OSS Distributions // NB: Since we scale memory_size by ncpus, this can only happen if the user
515*a325d9c4SApple OSS Distributions // tries to run the benchmark with many more threads than cores.
516*a325d9c4SApple OSS Distributions assert(globals->tg_fault_buffer_arr_length >= globals->tg_num_threads);
517*a325d9c4SApple OSS Distributions globals->tg_fault_buffer_arr = calloc(sizeof(fault_buffer_t), globals->tg_fault_buffer_arr_length);
518*a325d9c4SApple OSS Distributions assert(globals->tg_fault_buffer_arr);
519*a325d9c4SApple OSS Distributions }
520*a325d9c4SApple OSS Distributions
521*a325d9c4SApple OSS Distributions static pthread_t *
spawn_worker_threads(test_globals_t * globals,unsigned int num_threads,unsigned int first_cpu)522*a325d9c4SApple OSS Distributions spawn_worker_threads(test_globals_t *globals, unsigned int num_threads, unsigned int first_cpu)
523*a325d9c4SApple OSS Distributions {
524*a325d9c4SApple OSS Distributions int ret;
525*a325d9c4SApple OSS Distributions pthread_attr_t pthread_attrs;
526*a325d9c4SApple OSS Distributions globals->tg_num_threads = num_threads;
527*a325d9c4SApple OSS Distributions pthread_t* threads = malloc(sizeof(pthread_t) * num_threads);
528*a325d9c4SApple OSS Distributions faulting_thread_args = malloc(sizeof(faulting_thread_args_t) * num_threads);
529*a325d9c4SApple OSS Distributions assert(threads);
530*a325d9c4SApple OSS Distributions ret = pthread_attr_init(&pthread_attrs);
531*a325d9c4SApple OSS Distributions assert(ret == 0);
532*a325d9c4SApple OSS Distributions // Spawn the background threads
533*a325d9c4SApple OSS Distributions for (unsigned int i = 0; i < num_threads; i++) {
534*a325d9c4SApple OSS Distributions if (globals->pin_threads) {
535*a325d9c4SApple OSS Distributions faulting_thread_args[i].cpu_id = (i + first_cpu) % get_ncpu();
536*a325d9c4SApple OSS Distributions }
537*a325d9c4SApple OSS Distributions faulting_thread_args[i].test_globals = globals;
538*a325d9c4SApple OSS Distributions ret = pthread_create(threads + i, &pthread_attrs, faulting_thread, &faulting_thread_args[i]);
539*a325d9c4SApple OSS Distributions assert(ret == 0);
540*a325d9c4SApple OSS Distributions }
541*a325d9c4SApple OSS Distributions ret = pthread_attr_destroy(&pthread_attrs);
542*a325d9c4SApple OSS Distributions assert(ret == 0);
543*a325d9c4SApple OSS Distributions return threads;
544*a325d9c4SApple OSS Distributions }
545*a325d9c4SApple OSS Distributions
546*a325d9c4SApple OSS Distributions static pthread_t*
setup_test(test_globals_t * globals,const test_args_t * args,size_t memory_size,bool verbose)547*a325d9c4SApple OSS Distributions setup_test(test_globals_t *globals, const test_args_t *args, size_t memory_size, bool verbose)
548*a325d9c4SApple OSS Distributions {
549*a325d9c4SApple OSS Distributions init_globals(globals, args);
550*a325d9c4SApple OSS Distributions init_fault_buffer_arr(globals, args, memory_size);
551*a325d9c4SApple OSS Distributions benchmark_log(verbose, "Initialized global data structures.\n");
552*a325d9c4SApple OSS Distributions pthread_t *workers = spawn_worker_threads(globals, args->n_threads, args->first_cpu);
553*a325d9c4SApple OSS Distributions benchmark_log(verbose, "Spawned workers.\n");
554*a325d9c4SApple OSS Distributions return workers;
555*a325d9c4SApple OSS Distributions }
556*a325d9c4SApple OSS Distributions
557*a325d9c4SApple OSS Distributions static uint64_t
join_background_threads(test_globals_t * globals,pthread_t * threads)558*a325d9c4SApple OSS Distributions join_background_threads(test_globals_t *globals, pthread_t *threads)
559*a325d9c4SApple OSS Distributions {
560*a325d9c4SApple OSS Distributions // Set the done flag so that the background threads exit
561*a325d9c4SApple OSS Distributions int ret;
562*a325d9c4SApple OSS Distributions uint64_t total_cputime_spent_faulting = 0;
563*a325d9c4SApple OSS Distributions ret = pthread_mutex_lock(&globals->tg_lock);
564*a325d9c4SApple OSS Distributions assert(ret == 0);
565*a325d9c4SApple OSS Distributions globals->tg_done = true;
566*a325d9c4SApple OSS Distributions ret = pthread_cond_broadcast(&globals->tg_cv);
567*a325d9c4SApple OSS Distributions assert(ret == 0);
568*a325d9c4SApple OSS Distributions ret = pthread_mutex_unlock(&globals->tg_lock);
569*a325d9c4SApple OSS Distributions assert(ret == 0);
570*a325d9c4SApple OSS Distributions
571*a325d9c4SApple OSS Distributions // Join the background threads
572*a325d9c4SApple OSS Distributions for (unsigned int i = 0; i < globals->tg_num_threads; i++) {
573*a325d9c4SApple OSS Distributions uint64_t cputime_spent_faulting = 0;
574*a325d9c4SApple OSS Distributions ret = pthread_join(threads[i], (void **)&cputime_spent_faulting);
575*a325d9c4SApple OSS Distributions assert(ret == 0);
576*a325d9c4SApple OSS Distributions total_cputime_spent_faulting += cputime_spent_faulting;
577*a325d9c4SApple OSS Distributions }
578*a325d9c4SApple OSS Distributions free(threads);
579*a325d9c4SApple OSS Distributions free(faulting_thread_args);
580*a325d9c4SApple OSS Distributions return total_cputime_spent_faulting;
581*a325d9c4SApple OSS Distributions }
582*a325d9c4SApple OSS Distributions
583*a325d9c4SApple OSS Distributions static void
cleanup_test(test_globals_t * globals)584*a325d9c4SApple OSS Distributions cleanup_test(test_globals_t* globals)
585*a325d9c4SApple OSS Distributions {
586*a325d9c4SApple OSS Distributions int ret;
587*a325d9c4SApple OSS Distributions ret = pthread_mutex_destroy(&globals->tg_lock);
588*a325d9c4SApple OSS Distributions assert(ret == 0);
589*a325d9c4SApple OSS Distributions ret = pthread_cond_destroy(&globals->tg_cv);
590*a325d9c4SApple OSS Distributions assert(ret == 0);
591*a325d9c4SApple OSS Distributions free(globals->tg_fault_buffer_arr);
592*a325d9c4SApple OSS Distributions free(globals);
593*a325d9c4SApple OSS Distributions }
594*a325d9c4SApple OSS Distributions
595*a325d9c4SApple OSS Distributions static void
output_results(const test_globals_t * globals,double walltime_elapsed_seconds,double cputime_elapsed_seconds)596*a325d9c4SApple OSS Distributions output_results(const test_globals_t* globals, double walltime_elapsed_seconds, double cputime_elapsed_seconds)
597*a325d9c4SApple OSS Distributions {
598*a325d9c4SApple OSS Distributions size_t pgsize;
599*a325d9c4SApple OSS Distributions size_t sysctl_size = sizeof(pgsize);
600*a325d9c4SApple OSS Distributions int ret = sysctlbyname("vm.pagesize", &pgsize, &sysctl_size, NULL, 0);
601*a325d9c4SApple OSS Distributions assert(ret == 0);
602*a325d9c4SApple OSS Distributions size_t num_pages = 0;
603*a325d9c4SApple OSS Distributions double walltime_throughput, cputime_throughput;
604*a325d9c4SApple OSS Distributions size_t stride = fault_buffer_stride(globals);
605*a325d9c4SApple OSS Distributions for (size_t i = 0; i < globals->tg_fault_buffer_arr_length; i += stride) {
606*a325d9c4SApple OSS Distributions num_pages += globals->tg_fault_buffer_arr[i].fb_size / pgsize;
607*a325d9c4SApple OSS Distributions }
608*a325d9c4SApple OSS Distributions num_pages *= globals->tg_iterations_completed;
609*a325d9c4SApple OSS Distributions walltime_throughput = num_pages / walltime_elapsed_seconds;
610*a325d9c4SApple OSS Distributions cputime_throughput = num_pages / cputime_elapsed_seconds;
611*a325d9c4SApple OSS Distributions printf("-----Results-----\n");
612*a325d9c4SApple OSS Distributions printf("Throughput (pages / wall second), Throughput (pages / CPU second)\n");
613*a325d9c4SApple OSS Distributions printf("%f,%f\n", walltime_throughput, cputime_throughput);
614*a325d9c4SApple OSS Distributions }
615*a325d9c4SApple OSS Distributions
616*a325d9c4SApple OSS Distributions static void
print_help(char ** argv)617*a325d9c4SApple OSS Distributions print_help(char** argv)
618*a325d9c4SApple OSS Distributions {
619*a325d9c4SApple OSS Distributions fprintf(stderr, "%s: <test-variant> [-v] duration num_threads\n", argv[0]);
620*a325d9c4SApple OSS Distributions fprintf(stderr, "\ntest variants:\n");
621*a325d9c4SApple OSS Distributions fprintf(stderr, " %s Fault in different vm objects in each thread.\n", kSeparateObjectsArgument);
622*a325d9c4SApple OSS Distributions fprintf(stderr, " %s Share vm objects across faulting threads.\n", kShareObjectsArgument);
623*a325d9c4SApple OSS Distributions }
624*a325d9c4SApple OSS Distributions
625*a325d9c4SApple OSS Distributions static void
parse_arguments(int argc,char ** argv,test_args_t * args)626*a325d9c4SApple OSS Distributions parse_arguments(int argc, char** argv, test_args_t *args)
627*a325d9c4SApple OSS Distributions {
628*a325d9c4SApple OSS Distributions int current_argument = 1;
629*a325d9c4SApple OSS Distributions memset(args, 0, sizeof(test_args_t));
630*a325d9c4SApple OSS Distributions if (argc < 4 || argc > 6) {
631*a325d9c4SApple OSS Distributions print_help(argv);
632*a325d9c4SApple OSS Distributions exit(1);
633*a325d9c4SApple OSS Distributions }
634*a325d9c4SApple OSS Distributions if (argv[current_argument][0] == '-') {
635*a325d9c4SApple OSS Distributions if (strcmp(argv[current_argument], "-v") == 0) {
636*a325d9c4SApple OSS Distributions args->verbose = true;
637*a325d9c4SApple OSS Distributions } else {
638*a325d9c4SApple OSS Distributions fprintf(stderr, "Unknown argument %s\n", argv[current_argument]);
639*a325d9c4SApple OSS Distributions print_help(argv);
640*a325d9c4SApple OSS Distributions exit(1);
641*a325d9c4SApple OSS Distributions }
642*a325d9c4SApple OSS Distributions current_argument++;
643*a325d9c4SApple OSS Distributions }
644*a325d9c4SApple OSS Distributions if (strncasecmp(argv[current_argument], kSeparateObjectsArgument, strlen(kSeparateObjectsArgument)) == 0) {
645*a325d9c4SApple OSS Distributions args->variant = VARIANT_SEPARATE_VM_OBJECTS;
646*a325d9c4SApple OSS Distributions } else if (strncasecmp(argv[current_argument], kShareObjectsArgument, strlen(kShareObjectsArgument)) == 0) {
647*a325d9c4SApple OSS Distributions args->variant = VARIANT_SHARE_VM_OBJECTS;
648*a325d9c4SApple OSS Distributions } else {
649*a325d9c4SApple OSS Distributions print_help(argv);
650*a325d9c4SApple OSS Distributions exit(1);
651*a325d9c4SApple OSS Distributions }
652*a325d9c4SApple OSS Distributions current_argument++;
653*a325d9c4SApple OSS Distributions
654*a325d9c4SApple OSS Distributions long duration = strtol(argv[current_argument++], NULL, 10);
655*a325d9c4SApple OSS Distributions if (duration == 0) {
656*a325d9c4SApple OSS Distributions print_help(argv);
657*a325d9c4SApple OSS Distributions exit(1);
658*a325d9c4SApple OSS Distributions }
659*a325d9c4SApple OSS Distributions long num_cores = strtol(argv[current_argument++], NULL, 10);
660*a325d9c4SApple OSS Distributions if (num_cores == 0) {
661*a325d9c4SApple OSS Distributions print_help(argv);
662*a325d9c4SApple OSS Distributions exit(1);
663*a325d9c4SApple OSS Distributions }
664*a325d9c4SApple OSS Distributions if (current_argument < argc) {
665*a325d9c4SApple OSS Distributions long first_cpu = strtol(argv[current_argument++], NULL, 10);
666*a325d9c4SApple OSS Distributions assert(first_cpu >= 0 && first_cpu < get_ncpu());
667*a325d9c4SApple OSS Distributions args->pin_threads = true;
668*a325d9c4SApple OSS Distributions args->first_cpu = (unsigned int) first_cpu;
669*a325d9c4SApple OSS Distributions } else {
670*a325d9c4SApple OSS Distributions args->pin_threads = false;
671*a325d9c4SApple OSS Distributions }
672*a325d9c4SApple OSS Distributions
673*a325d9c4SApple OSS Distributions assert(num_cores > 0 && num_cores <= get_ncpu());
674*a325d9c4SApple OSS Distributions args->n_threads = (unsigned int) num_cores;
675*a325d9c4SApple OSS Distributions args->duration_seconds = (unsigned long) duration;
676*a325d9c4SApple OSS Distributions }
677*a325d9c4SApple OSS Distributions
678*a325d9c4SApple OSS Distributions static inline
679*a325d9c4SApple OSS Distributions _Atomic size_t *
next_fault_buffer_index_ptr(test_globals_t * globals)680*a325d9c4SApple OSS Distributions next_fault_buffer_index_ptr(test_globals_t *globals)
681*a325d9c4SApple OSS Distributions {
682*a325d9c4SApple OSS Distributions return (_Atomic size_t *) (((ptrdiff_t)(globals + 1)) + (int64_t)kCacheLineSize);
683*a325d9c4SApple OSS Distributions }
684*a325d9c4SApple OSS Distributions static size_t
fault_buffer_stride(const test_globals_t * globals)685*a325d9c4SApple OSS Distributions fault_buffer_stride(const test_globals_t *globals)
686*a325d9c4SApple OSS Distributions {
687*a325d9c4SApple OSS Distributions size_t stride;
688*a325d9c4SApple OSS Distributions if (globals->tg_variant == VARIANT_SEPARATE_VM_OBJECTS) {
689*a325d9c4SApple OSS Distributions stride = 1;
690*a325d9c4SApple OSS Distributions } else if (globals->tg_variant == VARIANT_SHARE_VM_OBJECTS) {
691*a325d9c4SApple OSS Distributions stride = globals->tg_num_threads;
692*a325d9c4SApple OSS Distributions } else {
693*a325d9c4SApple OSS Distributions fprintf(stderr, "Unknown variant\n");
694*a325d9c4SApple OSS Distributions exit(-1);
695*a325d9c4SApple OSS Distributions }
696*a325d9c4SApple OSS Distributions return stride;
697*a325d9c4SApple OSS Distributions }
698