1 #include <mach_ldebug.h>
2 #include <debug.h>
3
4 #include <mach/kern_return.h>
5 #include <mach/mach_host_server.h>
6 #include <mach_debug/lockgroup_info.h>
7
8 #include <os/atomic.h>
9
10 #include <kern/locks.h>
11 #include <kern/smr_hash.h>
12 #include <kern/misc_protos.h>
13 #include <kern/kalloc.h>
14 #include <kern/thread.h>
15 #include <kern/processor.h>
16 #include <kern/sched_prim.h>
17 #include <kern/debug.h>
18 #include <libkern/section_keywords.h>
19 #include <machine/atomic.h>
20 #include <machine/machine_cpu.h>
21 #include <machine/atomic.h>
22 #include <string.h>
23 #include <kern/kalloc.h>
24 #include <vm/vm_kern_xnu.h>
25
26 #include <sys/kdebug.h>
27 #include <sys/errno.h>
28
29 #if SCHED_HYGIENE_DEBUG
30 static uint64_t
sane_us2abs(uint64_t us)31 sane_us2abs(uint64_t us)
32 {
33 uint64_t t;
34 nanoseconds_to_absolutetime(us * NSEC_PER_USEC, &t);
35 return t;
36 }
37 #endif
38
39 #if !KASAN
40 static void
hw_lck_ticket_test_wait_for_delta(hw_lck_ticket_t * lck,uint8_t delta,int msec)41 hw_lck_ticket_test_wait_for_delta(hw_lck_ticket_t *lck, uint8_t delta, int msec)
42 {
43 hw_lck_ticket_t tmp;
44
45 delta *= HW_LCK_TICKET_LOCK_INCREMENT;
46 for (int i = 0; i < msec * 1000; i++) {
47 tmp.lck_value = os_atomic_load(&lck->lck_value, relaxed);
48 #if CONFIG_PV_TICKET
49 const uint8_t cticket = tmp.cticket &
50 ~HW_LCK_TICKET_LOCK_PVWAITFLAG;
51 #else
52 const uint8_t cticket = tmp.cticket;
53 #endif
54 if ((uint8_t)(tmp.nticket - cticket) == delta) {
55 return;
56 }
57 delay(1);
58 }
59 release_assert(false);
60 }
61
62 __dead2
63 static void
hw_lck_ticket_allow_invalid_worker(void * arg,wait_result_t __unused wr)64 hw_lck_ticket_allow_invalid_worker(void *arg, wait_result_t __unused wr)
65 {
66 hw_lck_ticket_t *lck = arg;
67 hw_lock_status_t rc;
68
69 /* wait until we can observe the test take the lock */
70 hw_lck_ticket_test_wait_for_delta(lck, 1, 10);
71
72 rc = hw_lck_ticket_lock_allow_invalid(lck,
73 &hw_lock_test_give_up_policy, NULL);
74 release_assert(rc == HW_LOCK_INVALID); // because the other thread invalidated it
75 release_assert(preemption_enabled());
76
77 thread_terminate_self();
78 __builtin_unreachable();
79 }
80 #endif /* !KASAN */
81
82 static int
hw_lck_ticket_allow_invalid_test(__unused int64_t in,int64_t * out)83 hw_lck_ticket_allow_invalid_test(__unused int64_t in, int64_t *out)
84 {
85 vm_offset_t addr = 0;
86 hw_lck_ticket_t *lck;
87 kern_return_t kr;
88 hw_lock_status_t rc;
89
90 printf("%s: STARTING\n", __func__);
91
92 kr = kmem_alloc(kernel_map, &addr, PAGE_SIZE,
93 KMA_ZERO | KMA_KOBJECT, VM_KERN_MEMORY_DIAG);
94 if (kr != KERN_SUCCESS) {
95 printf("%s: kma failed (%d)\n", __func__, kr);
96 return ENOMEM;
97 }
98
99 lck = (hw_lck_ticket_t *)addr;
100 rc = hw_lck_ticket_lock_allow_invalid(lck,
101 &hw_lock_test_give_up_policy, NULL);
102 release_assert(rc == HW_LOCK_INVALID); // because the lock is 0
103 release_assert(preemption_enabled());
104
105 hw_lck_ticket_init(lck, NULL);
106
107 release_assert(hw_lck_ticket_lock_try(lck, NULL));
108 release_assert(!hw_lck_ticket_lock_try(lck, NULL));
109 hw_lck_ticket_unlock(lck);
110
111 rc = hw_lck_ticket_lock_allow_invalid(lck,
112 &hw_lock_test_give_up_policy, NULL);
113 release_assert(rc == HW_LOCK_ACQUIRED); // because the lock is initialized
114 release_assert(!preemption_enabled());
115
116 #if SCHED_HYGIENE_DEBUG
117 if (os_atomic_load(&sched_preemption_disable_threshold_mt, relaxed) < sane_us2abs(20 * 1000)) {
118 /*
119 * This test currently relies on timeouts that cannot always
120 * be guaranteed (rdar://84691107). Abandon the measurement if
121 * we have a tight timeout.
122 */
123 abandon_preemption_disable_measurement();
124 }
125 #endif
126
127 hw_lck_ticket_unlock(lck);
128 release_assert(preemption_enabled());
129
130 #if !KASAN
131 thread_t th;
132
133 kr = kernel_thread_start_priority(hw_lck_ticket_allow_invalid_worker, lck,
134 BASEPRI_KERNEL, &th);
135 release_assert(kr == KERN_SUCCESS);
136 thread_deallocate(th);
137
138 /* invalidate the lock */
139 hw_lck_ticket_lock(lck, NULL);
140
141 /* wait for the worker thread to take the reservation */
142 hw_lck_ticket_test_wait_for_delta(lck, 2, 20);
143 hw_lck_ticket_invalidate(lck);
144 hw_lck_ticket_unlock(lck);
145 hw_lck_ticket_destroy(lck, NULL);
146
147 hw_lck_ticket_init(lck, NULL);
148 #endif /* !KASAN */
149
150 kernel_memory_depopulate(addr, PAGE_SIZE, KMA_KOBJECT,
151 VM_KERN_MEMORY_DIAG);
152
153 rc = hw_lck_ticket_lock_allow_invalid(lck,
154 &hw_lock_test_give_up_policy, NULL);
155 release_assert(rc == HW_LOCK_INVALID); // because the memory is unmapped
156
157 kmem_free(kernel_map, addr, PAGE_SIZE);
158
159 printf("%s: SUCCESS\n", __func__);
160
161 *out = 1;
162 return 0;
163 }
164 SYSCTL_TEST_REGISTER(hw_lck_ticket_allow_invalid, hw_lck_ticket_allow_invalid_test);
165
166
167 struct smrh_elem {
168 struct smrq_slink link;
169 uintptr_t val;
170 };
171
172 static bool
smrh_elem_try_get(void * arg __unused)173 smrh_elem_try_get(void *arg __unused)
174 {
175 return true;
176 }
177
178 SMRH_TRAITS_DEFINE_SCALAR(smrh_test_traits, struct smrh_elem, val, link,
179 .domain = &smr_system,
180 .obj_try_get = smrh_elem_try_get);
181
182 LCK_GRP_DECLARE(smrh_test_grp, "foo");
183 LCK_MTX_DECLARE(smrh_test_lck, &smrh_test_grp);
184
185 static int
smr_hash_basic_test(__unused int64_t in,int64_t * out)186 smr_hash_basic_test(__unused int64_t in, int64_t *out)
187 {
188 __auto_type T = &smrh_test_traits;
189 const size_t nelems = 64;
190 struct smrh_elem e_buf[nelems];
191 struct smr_hash h_buf;
192
193 struct smrh_elem *elems = e_buf;
194 struct smr_hash *h = &h_buf;
195
196 __auto_type check_content = ^{
197 struct smrh_elem *e;
198 smrh_key_t key;
199 bool seen[nelems] = { };
200
201 assert3u(smr_hash_serialized_count(h), ==, nelems / 2);
202
203 for (int i = 0; i < nelems / 2; i++) {
204 key = SMRH_SCALAR_KEY(elems[i].val);
205 release_assert(smr_hash_entered_find(h, key, T));
206
207 key = SMRH_SCALAR_KEY(elems[i + nelems / 2].val);
208 release_assert(!smr_hash_entered_find(h, key, T));
209 }
210
211 smr_hash_foreach(e, h, T) {
212 for (int i = 0; i < nelems / 2; i++) {
213 if (e->val == elems[i].val) {
214 release_assert(!seen[i]);
215 seen[i] = true;
216 break;
217 }
218 }
219 }
220
221 for (int i = 0; i < nelems / 2; i++) {
222 release_assert(seen[i]);
223 }
224 };
225
226 printf("%s: STARTING\n", __func__);
227
228 smr_hash_init_empty(h);
229
230 assert3u(smr_hash_serialized_count(h), ==, 0);
231 release_assert(!smr_hash_entered_find(h, SMRH_SCALAR_KEY(0ul), T));
232 release_assert(!smr_hash_entered_find(h, SMRH_SCALAR_KEY(42ul), T));
233 release_assert(!smr_hash_entered_find(h, SMRH_SCALAR_KEY(314ul), T));
234 release_assert(smr_hash_is_empty_initialized(h));
235
236 smr_hash_init(h, 4);
237
238 release_assert(!smr_hash_is_empty_initialized(h));
239
240 printf("%s: populating the hash with unique entries\n", __func__);
241
242 uintptr_t base = early_random();
243 for (size_t i = 0; i < nelems; i++) {
244 elems[i].val = base + (uint16_t)early_random() + 1;
245 base = elems[i].val;
246 }
247
248 for (int i = 0; i < nelems / 2; i++) {
249 smr_hash_serialized_insert(h, &elems[i].link, T);
250 }
251 check_content();
252
253 static bool progression[4] = {
254 1, 1, 0, 0,
255 };
256
257 for (int step = 0; step < ARRAY_COUNT(progression); step++) {
258 if (progression[step]) {
259 printf("%s: growing the hash\n", __func__);
260 lck_mtx_lock(&smrh_test_lck);
261 smr_hash_grow_and_unlock(h, &smrh_test_lck, T);
262 } else {
263 printf("%s: shrinking the hash\n", __func__);
264 lck_mtx_lock(&smrh_test_lck);
265 smr_hash_shrink_and_unlock(h, &smrh_test_lck, T);
266 }
267 check_content();
268 }
269
270 printf("%s: destroying the hash\n", __func__);
271 smr_hash_destroy(h);
272
273 printf("%s: SUCCESS\n", __func__);
274
275 *out = 1;
276 return 0;
277 }
278 SYSCTL_TEST_REGISTER(smr_hash_basic, smr_hash_basic_test);
279
280 static int
smr_shash_basic_test(__unused int64_t in,int64_t * out)281 smr_shash_basic_test(__unused int64_t in, int64_t *out)
282 {
283 __auto_type T = &smrh_test_traits;
284 const size_t nelems = 8192;
285 const size_t never = 512; /* never inserted elements */
286 struct smr_shash h_buf;
287
288 struct smrh_elem *elems;
289 struct smr_shash *h = &h_buf;
290
291 elems = kalloc_type(struct smrh_elem, nelems, Z_WAITOK | Z_ZERO);
292 if (elems == 0) {
293 return ENOMEM;
294 }
295
296 __auto_type check_content = ^(size_t max_inserted){
297 smrh_key_t key;
298 size_t n = 0;
299
300 assert3u(counter_load(&h->smrsh_count), ==, max_inserted);
301
302 smrht_enter(T);
303
304 for (size_t i = 0; i < nelems; i++, n++) {
305 if (n > 0 && n % 32 == 0) {
306 smrht_leave(T);
307 smrht_enter(T);
308 }
309 key = SMRH_SCALAR_KEY(elems[i].val);
310 if (i < max_inserted) {
311 release_assert(smr_shash_entered_find(h, key, T));
312 } else {
313 release_assert(!smr_shash_entered_find(h, key, T));
314 }
315 }
316
317 smrht_leave(T);
318 };
319
320 printf("%s: STARTING\n", __func__);
321
322 smr_shash_init(h, SMRSH_COMPACT, 8);
323
324 printf("%s: populating the hash with unique entries\n", __func__);
325
326 uintptr_t base = early_random();
327 for (size_t i = 0; i < nelems; i++) {
328 elems[i].val = base + (uint32_t)early_random();
329 base = elems[i].val;
330 }
331
332 printf("%s: insert into the hash, triggering several resizes\n", __func__);
333
334 for (size_t i = 0; i < nelems - never; i++) {
335 smrh_key_t key = SMRH_SCALAR_KEY(elems[i].val);
336 struct smrh_elem *dupe;
337
338 if (i > 0 && i % 32 == 0) {
339 check_content(i);
340 }
341
342 dupe = smr_shash_get_or_insert(h, key, &elems[i].link, T);
343 release_assert(dupe == NULL);
344 }
345 check_content(nelems - never);
346
347 printf("%s: remove from the hash, triggering several resizes\n", __func__);
348
349 for (size_t i = nelems - never; i-- > 0;) {
350 smr_shash_remove(h, &elems[i].link, T);
351
352 if (i % 32 == 0) {
353 check_content(i);
354 }
355 }
356
357 printf("%s: destroying the hash\n", __func__);
358 smr_shash_destroy(h, T, NULL);
359
360 printf("%s: SUCCESS\n", __func__);
361
362 kfree_type(struct smrh_elem, nelems, elems);
363
364 *out = 1;
365 return 0;
366 }
367 SYSCTL_TEST_REGISTER(smr_shash_basic, smr_shash_basic_test);
368
369 struct smr_ctx {
370 thread_t driver;
371 smr_t smr;
372 uint32_t active;
373 uint32_t idx;
374 uint64_t deadline;
375 uint32_t calls_sent;
376 uint32_t calls_done;
377 uint32_t syncs_done;
378 uint32_t barriers_done;
379 };
380
381 struct smr_call_ctx {
382 struct smr_node node;
383 struct smr_ctx *ctx;
384 };
385
386 static void
smr_sleepable_stress_cb(smr_node_t node)387 smr_sleepable_stress_cb(smr_node_t node)
388 {
389 struct smr_call_ctx *cctx;
390
391 cctx = __container_of(node, struct smr_call_ctx, node);
392 os_atomic_inc(&cctx->ctx->calls_done, relaxed);
393
394 kfree_type(struct smr_call_ctx, cctx);
395 }
396
397 static void
smr_sleepable_stress_make_call(struct smr_ctx * ctx)398 smr_sleepable_stress_make_call(struct smr_ctx *ctx)
399 {
400 struct smr_call_ctx *cctx;
401
402 cctx = kalloc_type(struct smr_call_ctx, Z_WAITOK);
403 cctx->ctx = ctx;
404 os_atomic_inc(&ctx->calls_sent, relaxed);
405 smr_call(ctx->smr, &cctx->node, sizeof(*cctx), smr_sleepable_stress_cb);
406 }
407
408 static void
smr_sleepable_stress_log(struct smr_ctx * ctx,uint64_t n)409 smr_sleepable_stress_log(struct smr_ctx *ctx, uint64_t n)
410 {
411 printf("%s[%lld]: "
412 "%d/%d calls, %d syncs, %d barriers, "
413 "rd-seq: %ld, wr-seq: %ld\n",
414 __func__, n,
415 ctx->calls_done, ctx->calls_sent,
416 ctx->syncs_done,
417 ctx->barriers_done,
418 ctx->smr->smr_clock.s_rd_seq / SMR_SEQ_INC,
419 ctx->smr->smr_clock.s_wr_seq / SMR_SEQ_INC);
420 }
421
422 static uint32_t
smr_sleepable_stress_idx(struct smr_ctx * ctx,thread_t self)423 smr_sleepable_stress_idx(struct smr_ctx *ctx, thread_t self)
424 {
425 if (ctx->driver == self) {
426 return 0;
427 }
428 return os_atomic_inc(&ctx->idx, relaxed);
429 }
430
431
432 static void
smr_sleepable_stress_worker(void * arg,wait_result_t wr __unused)433 smr_sleepable_stress_worker(void *arg, wait_result_t wr __unused)
434 {
435 thread_t self = current_thread();
436 struct smr_ctx *ctx = arg;
437 smr_t smr = ctx->smr;
438
439 const uint64_t step = NSEC_PER_SEC / 4;
440 const uint64_t start = mach_absolute_time();
441 const uint32_t idx = smr_sleepable_stress_idx(ctx, self);
442
443 uint64_t now, delta, last = 0;
444
445 printf("%s: thread %p starting\n", __func__, self);
446
447 while ((now = mach_absolute_time()) < ctx->deadline) {
448 struct smr_tracker smrt;
449 uint64_t what;
450
451 if (idx == 0) {
452 absolutetime_to_nanoseconds(now - start, &delta);
453 if (delta >= (last + 1) * step) {
454 last = delta / step;
455 smr_sleepable_stress_log(ctx, last);
456 }
457 }
458
459 smr_enter_sleepable(smr, &smrt);
460 release_assert(smr_entered(smr));
461
462 what = early_random() % 100;
463 if (what == 0 && idx == 1) {
464 /* 1% of the time, sleep for a long time */
465 delay_for_interval(1, NSEC_PER_MSEC);
466 } else if (what < 10) {
467 /* 9% of the time, just yield */
468 thread_block_reason(THREAD_CONTINUE_NULL, NULL, AST_YIELD);
469 } else if (what < 30) {
470 /* 20% of the time, do some longer work on core */
471 uint64_t busy_start = mach_absolute_time();
472
473 do {
474 now = mach_absolute_time();
475 absolutetime_to_nanoseconds(now - busy_start, &delta);
476 } while (delta < (what + 50) * NSEC_PER_USEC);
477 }
478
479 release_assert(smr_entered(smr));
480 smr_leave_sleepable(smr, &smrt);
481
482 what = early_random() % 100;
483 if (what < 20) {
484 /* smr_call 20% of the time */
485 smr_sleepable_stress_make_call(ctx);
486 } else if (what < 22 && (idx & 1)) {
487 /* smr_synchronize 2% of the time for half the threads */
488 smr_synchronize(smr);
489 os_atomic_inc(&ctx->syncs_done, relaxed);
490 } else if (what < 23 && (idx & 1)) {
491 /* smr_barrier 1% of the time for half the threads */
492 smr_barrier(smr);
493 os_atomic_inc(&ctx->barriers_done, relaxed);
494 }
495 }
496
497 printf("%s: thread %p done\n", __func__, self);
498
499 if (idx != 0) {
500 if (os_atomic_dec(&ctx->active, relaxed) == 0) {
501 thread_wakeup(ctx);
502 }
503
504 thread_terminate_self();
505 __builtin_unreachable();
506 }
507 }
508
509 static int
smr_sleepable_stress_test(int64_t seconds,int64_t * out)510 smr_sleepable_stress_test(int64_t seconds, int64_t *out)
511 {
512 thread_pri_floor_t token;
513 struct smr_ctx ctx = { };
514 thread_t th;
515
516 if (seconds > 60) {
517 return EINVAL;
518 }
519
520 printf("%s: STARTING\n", __func__);
521
522 ctx.active = zpercpu_count() * 2; /* overcommit the system on purpose */
523 ctx.driver = current_thread();
524 ctx.smr = smr_domain_create(SMR_SLEEPABLE, "test (sleepable)");
525 assert3p(ctx.smr, !=, NULL);
526
527 clock_interval_to_deadline((uint32_t)seconds, NSEC_PER_SEC, &ctx.deadline);
528
529 /*
530 * We will relatively massively hammer the system,
531 * stay above that crowd.
532 */
533 token = thread_priority_floor_start();
534
535 for (uint32_t i = 1; i < ctx.active; i++) {
536 kernel_thread_start_priority(smr_sleepable_stress_worker,
537 &ctx, BASEPRI_DEFAULT, &th);
538 thread_deallocate(th);
539 }
540
541 smr_sleepable_stress_worker(&ctx, THREAD_AWAKENED);
542
543 thread_priority_floor_end(&token);
544
545 assert_wait(&ctx, THREAD_UNINT);
546 if (os_atomic_dec(&ctx.active, relaxed) == 0) {
547 clear_wait(ctx.driver, THREAD_AWAKENED);
548 } else {
549 thread_block(THREAD_CONTINUE_NULL);
550 }
551
552 smr_barrier(ctx.smr); /* to get accurate stats */
553 smr_sleepable_stress_log(&ctx, seconds * 4);
554 smr_domain_free(ctx.smr);
555
556 assert3u(ctx.calls_done, ==, ctx.calls_sent);
557
558 printf("%s: SUCCESS\n", __func__);
559
560 *out = 1;
561 return 0;
562 }
563 SYSCTL_TEST_REGISTER(smr_sleepable_stress, smr_sleepable_stress_test);
564