1 #include <mach_ldebug.h>
2 #include <debug.h>
3
4 #include <mach/kern_return.h>
5 #include <mach/mach_host_server.h>
6 #include <mach_debug/lockgroup_info.h>
7
8 #include <os/atomic.h>
9
10 #include <kern/locks.h>
11 #include <kern/smr_hash.h>
12 #include <kern/misc_protos.h>
13 #include <kern/kalloc.h>
14 #include <kern/thread.h>
15 #include <kern/processor.h>
16 #include <kern/sched_prim.h>
17 #include <kern/debug.h>
18 #include <libkern/section_keywords.h>
19 #include <machine/atomic.h>
20 #include <machine/machine_cpu.h>
21 #include <machine/atomic.h>
22 #include <string.h>
23 #include <kern/kalloc.h>
24
25 #include <sys/kdebug.h>
26 #include <sys/errno.h>
27
28 #if SCHED_HYGIENE_DEBUG
29 static uint64_t
sane_us2abs(uint64_t us)30 sane_us2abs(uint64_t us)
31 {
32 uint64_t t;
33 nanoseconds_to_absolutetime(us * NSEC_PER_USEC, &t);
34 return t;
35 }
36 #endif
37
38 #if !KASAN
39 static void
hw_lck_ticket_test_wait_for_delta(hw_lck_ticket_t * lck,uint8_t delta,int msec)40 hw_lck_ticket_test_wait_for_delta(hw_lck_ticket_t *lck, uint8_t delta, int msec)
41 {
42 hw_lck_ticket_t tmp;
43
44 delta *= HW_LCK_TICKET_LOCK_INCREMENT;
45 for (int i = 0; i < msec * 1000; i++) {
46 tmp.lck_value = os_atomic_load(&lck->lck_value, relaxed);
47 #if CONFIG_PV_TICKET
48 const uint8_t cticket = tmp.cticket &
49 ~HW_LCK_TICKET_LOCK_PVWAITFLAG;
50 #else
51 const uint8_t cticket = tmp.cticket;
52 #endif
53 if ((uint8_t)(tmp.nticket - cticket) == delta) {
54 return;
55 }
56 delay(1);
57 }
58 assert(false);
59 }
60
61 __dead2
62 static void
hw_lck_ticket_allow_invalid_worker(void * arg,wait_result_t __unused wr)63 hw_lck_ticket_allow_invalid_worker(void *arg, wait_result_t __unused wr)
64 {
65 hw_lck_ticket_t *lck = arg;
66 hw_lock_status_t rc;
67
68 /* wait until we can observe the test take the lock */
69 hw_lck_ticket_test_wait_for_delta(lck, 1, 10);
70
71 rc = hw_lck_ticket_lock_allow_invalid(lck,
72 &hw_lock_test_give_up_policy, NULL);
73 assert(rc == HW_LOCK_INVALID); // because the other thread invalidated it
74 assert(preemption_enabled());
75
76 thread_terminate_self();
77 __builtin_unreachable();
78 }
79 #endif /* !KASAN */
80
81 static int
hw_lck_ticket_allow_invalid_test(__unused int64_t in,int64_t * out)82 hw_lck_ticket_allow_invalid_test(__unused int64_t in, int64_t *out)
83 {
84 vm_offset_t addr = 0;
85 hw_lck_ticket_t *lck;
86 kern_return_t kr;
87 hw_lock_status_t rc;
88
89 printf("%s: STARTING\n", __func__);
90
91 kr = kmem_alloc(kernel_map, &addr, PAGE_SIZE,
92 KMA_ZERO | KMA_KOBJECT, VM_KERN_MEMORY_DIAG);
93 if (kr != KERN_SUCCESS) {
94 printf("%s: kma failed (%d)\n", __func__, kr);
95 return ENOMEM;
96 }
97
98 lck = (hw_lck_ticket_t *)addr;
99 rc = hw_lck_ticket_lock_allow_invalid(lck,
100 &hw_lock_test_give_up_policy, NULL);
101 assert(rc == HW_LOCK_INVALID); // because the lock is 0
102 assert(preemption_enabled());
103
104 hw_lck_ticket_init(lck, NULL);
105
106 assert(hw_lck_ticket_lock_try(lck, NULL));
107 assert(!hw_lck_ticket_lock_try(lck, NULL));
108 hw_lck_ticket_unlock(lck);
109
110 rc = hw_lck_ticket_lock_allow_invalid(lck,
111 &hw_lock_test_give_up_policy, NULL);
112 assert(rc == HW_LOCK_ACQUIRED); // because the lock is initialized
113 assert(!preemption_enabled());
114
115 #if SCHED_HYGIENE_DEBUG
116 if (os_atomic_load(&sched_preemption_disable_threshold_mt, relaxed) < sane_us2abs(20 * 1000)) {
117 /*
118 * This test currently relies on timeouts that cannot always
119 * be guaranteed (rdar://84691107). Abandon the measurement if
120 * we have a tight timeout.
121 */
122 abandon_preemption_disable_measurement();
123 }
124 #endif
125
126 hw_lck_ticket_unlock(lck);
127 assert(preemption_enabled());
128
129 #if !KASAN
130 thread_t th;
131
132 kr = kernel_thread_start_priority(hw_lck_ticket_allow_invalid_worker, lck,
133 BASEPRI_KERNEL, &th);
134 assert(kr == KERN_SUCCESS);
135 thread_deallocate(th);
136
137 /* invalidate the lock */
138 hw_lck_ticket_lock(lck, NULL);
139
140 /* wait for the worker thread to take the reservation */
141 hw_lck_ticket_test_wait_for_delta(lck, 2, 20);
142 hw_lck_ticket_invalidate(lck);
143 hw_lck_ticket_unlock(lck);
144 hw_lck_ticket_destroy(lck, NULL);
145
146 hw_lck_ticket_init(lck, NULL);
147 #endif /* !KASAN */
148
149 kernel_memory_depopulate(addr, PAGE_SIZE, KMA_KOBJECT,
150 VM_KERN_MEMORY_DIAG);
151
152 rc = hw_lck_ticket_lock_allow_invalid(lck,
153 &hw_lock_test_give_up_policy, NULL);
154 assert(rc == HW_LOCK_INVALID); // because the memory is unmapped
155
156 kmem_free(kernel_map, addr, PAGE_SIZE);
157
158 printf("%s: SUCCESS\n", __func__);
159
160 *out = 1;
161 return 0;
162 }
163 SYSCTL_TEST_REGISTER(hw_lck_ticket_allow_invalid, hw_lck_ticket_allow_invalid_test);
164
165
166 struct smrh_elem {
167 struct smrq_slink link;
168 uintptr_t val;
169 };
170
171 static bool
smrh_elem_try_get(void * arg __unused)172 smrh_elem_try_get(void *arg __unused)
173 {
174 return true;
175 }
176
177 SMRH_TRAITS_DEFINE_SCALAR(smrh_test_traits, struct smrh_elem, val, link,
178 .domain = &smr_system,
179 .obj_try_get = smrh_elem_try_get);
180
181 LCK_GRP_DECLARE(smrh_test_grp, "foo");
182 LCK_MTX_DECLARE(smrh_test_lck, &smrh_test_grp);
183
184 static int
smr_hash_basic_test(__unused int64_t in,int64_t * out)185 smr_hash_basic_test(__unused int64_t in, int64_t *out)
186 {
187 __auto_type T = &smrh_test_traits;
188 const size_t nelems = 64;
189 struct smrh_elem e_buf[nelems];
190 struct smr_hash h_buf;
191
192 struct smrh_elem *elems = e_buf;
193 struct smr_hash *h = &h_buf;
194
195 __auto_type check_content = ^{
196 struct smrh_elem *e;
197 smrh_key_t key;
198 bool seen[nelems] = { };
199
200 assert3u(h->smrh_count, ==, nelems / 2);
201
202 for (int i = 0; i < nelems / 2; i++) {
203 key = SMRH_SCALAR_KEY(elems[i].val);
204 assert(smr_hash_entered_find(h, key, T));
205
206 key = SMRH_SCALAR_KEY(elems[i + nelems / 2].val);
207 assert(!smr_hash_entered_find(h, key, T));
208 }
209
210 smr_hash_foreach(e, h, T) {
211 for (int i = 0; i < nelems / 2; i++) {
212 if (e->val == elems[i].val) {
213 assert(!seen[i]);
214 seen[i] = true;
215 break;
216 }
217 }
218 }
219
220 for (int i = 0; i < nelems / 2; i++) {
221 assert(seen[i]);
222 }
223 };
224
225 printf("%s: STARTING\n", __func__);
226
227 smr_hash_init(h, 4);
228
229 printf("%s: populating the hash with unique entries\n", __func__);
230
231 uintptr_t base = early_random();
232 for (size_t i = 0; i < nelems; i++) {
233 elems[i].val = base + (uint16_t)early_random() + 1;
234 base = elems[i].val;
235 }
236
237 for (int i = 0; i < nelems / 2; i++) {
238 smr_hash_serialized_insert(h, &elems[i].link, T);
239 }
240 check_content();
241
242 static bool progression[4] = {
243 1, 1, 0, 0,
244 };
245
246 for (int step = 0; step < ARRAY_COUNT(progression); step++) {
247 if (progression[step]) {
248 printf("%s: growing the hash\n", __func__);
249 lck_mtx_lock(&smrh_test_lck);
250 smr_hash_grow_and_unlock(h, &smrh_test_lck, T);
251 } else {
252 printf("%s: shrinking the hash\n", __func__);
253 lck_mtx_lock(&smrh_test_lck);
254 smr_hash_shrink_and_unlock(h, &smrh_test_lck, T);
255 }
256 check_content();
257 }
258
259 printf("%s: destroying the hash\n", __func__);
260 smr_hash_destroy(h);
261
262 printf("%s: SUCCESS\n", __func__);
263
264 *out = 1;
265 return 0;
266 }
267 SYSCTL_TEST_REGISTER(smr_hash_basic, smr_hash_basic_test);
268
269 static int
smr_shash_basic_test(__unused int64_t in,int64_t * out)270 smr_shash_basic_test(__unused int64_t in, int64_t *out)
271 {
272 __auto_type T = &smrh_test_traits;
273 const size_t nelems = 8192;
274 const size_t never = 512; /* never inserted elements */
275 struct smr_shash h_buf;
276
277 struct smrh_elem *elems;
278 struct smr_shash *h = &h_buf;
279
280 elems = kalloc_type(struct smrh_elem, nelems, Z_WAITOK | Z_ZERO);
281 if (elems == 0) {
282 return ENOMEM;
283 }
284
285 __auto_type check_content = ^(size_t max_inserted){
286 smrh_key_t key;
287 size_t n = 0;
288
289 assert3u(counter_load(&h->smrsh_count), ==, max_inserted);
290
291 smrht_enter(T);
292
293 for (size_t i = 0; i < nelems; i++, n++) {
294 if (n > 0 && n % 32 == 0) {
295 smrht_leave(T);
296 smrht_enter(T);
297 }
298 key = SMRH_SCALAR_KEY(elems[i].val);
299 if (i < max_inserted) {
300 assert(smr_shash_entered_find(h, key, T));
301 } else {
302 assert(!smr_shash_entered_find(h, key, T));
303 }
304 }
305
306 smrht_leave(T);
307 };
308
309 printf("%s: STARTING\n", __func__);
310
311 smr_shash_init(h, SMRSH_COMPACT, 8);
312
313 printf("%s: populating the hash with unique entries\n", __func__);
314
315 uintptr_t base = early_random();
316 for (size_t i = 0; i < nelems; i++) {
317 elems[i].val = base + (uint32_t)early_random();
318 base = elems[i].val;
319 }
320
321 printf("%s: insert into the hash, triggering several resizes\n", __func__);
322
323 for (size_t i = 0; i < nelems - never; i++) {
324 smrh_key_t key = SMRH_SCALAR_KEY(elems[i].val);
325 struct smrh_elem *dupe;
326
327 if (i > 0 && i % 32 == 0) {
328 check_content(i);
329 }
330
331 dupe = smr_shash_get_or_insert(h, key, &elems[i].link, T);
332 assert(dupe == NULL);
333 }
334 check_content(nelems - never);
335
336 printf("%s: remove from the hash, triggering several resizes\n", __func__);
337
338 for (size_t i = nelems - never; i-- > 0;) {
339 smr_shash_remove(h, &elems[i].link, T);
340
341 if (i % 32 == 0) {
342 check_content(i);
343 }
344 }
345
346 printf("%s: destroying the hash\n", __func__);
347 smr_shash_destroy(h, T, NULL);
348
349 printf("%s: SUCCESS\n", __func__);
350
351 kfree_type(struct smrh_elem, nelems, elems);
352
353 *out = 1;
354 return 0;
355 }
356 SYSCTL_TEST_REGISTER(smr_shash_basic, smr_shash_basic_test);
357
358 struct smr_ctx {
359 thread_t driver;
360 smr_t smr;
361 uint32_t active;
362 uint32_t idx;
363 uint64_t deadline;
364 uint32_t calls_sent;
365 uint32_t calls_done;
366 uint32_t syncs_done;
367 uint32_t barriers_done;
368 };
369
370 struct smr_call_ctx {
371 struct smr_node node;
372 struct smr_ctx *ctx;
373 };
374
375 static void
smr_sleepable_stress_cb(smr_node_t node)376 smr_sleepable_stress_cb(smr_node_t node)
377 {
378 struct smr_call_ctx *cctx;
379
380 cctx = __container_of(node, struct smr_call_ctx, node);
381 os_atomic_inc(&cctx->ctx->calls_done, relaxed);
382
383 kfree_type(struct smr_call_ctx, cctx);
384 }
385
386 static void
smr_sleepable_stress_make_call(struct smr_ctx * ctx)387 smr_sleepable_stress_make_call(struct smr_ctx *ctx)
388 {
389 struct smr_call_ctx *cctx;
390
391 cctx = kalloc_type(struct smr_call_ctx, Z_WAITOK);
392 cctx->ctx = ctx;
393 os_atomic_inc(&ctx->calls_sent, relaxed);
394 smr_call(ctx->smr, &cctx->node, sizeof(*cctx), smr_sleepable_stress_cb);
395 }
396
397 static void
smr_sleepable_stress_log(struct smr_ctx * ctx,uint64_t n)398 smr_sleepable_stress_log(struct smr_ctx *ctx, uint64_t n)
399 {
400 printf("%s[%lld]: "
401 "%d/%d calls, %d syncs, %d barriers, "
402 "rd-seq: %ld, wr-seq: %ld\n",
403 __func__, n,
404 ctx->calls_done, ctx->calls_sent,
405 ctx->syncs_done,
406 ctx->barriers_done,
407 ctx->smr->smr_clock.s_rd_seq / SMR_SEQ_INC,
408 ctx->smr->smr_clock.s_wr_seq / SMR_SEQ_INC);
409 }
410
411 static uint32_t
smr_sleepable_stress_idx(struct smr_ctx * ctx,thread_t self)412 smr_sleepable_stress_idx(struct smr_ctx *ctx, thread_t self)
413 {
414 if (ctx->driver == self) {
415 return 0;
416 }
417 return os_atomic_inc(&ctx->idx, relaxed);
418 }
419
420
421 static void
smr_sleepable_stress_worker(void * arg,wait_result_t wr __unused)422 smr_sleepable_stress_worker(void *arg, wait_result_t wr __unused)
423 {
424 thread_t self = current_thread();
425 struct smr_ctx *ctx = arg;
426 smr_t smr = ctx->smr;
427
428 const uint64_t step = NSEC_PER_SEC / 4;
429 const uint64_t start = mach_absolute_time();
430 const uint32_t idx = smr_sleepable_stress_idx(ctx, self);
431
432 uint64_t now, delta, last = 0;
433
434 printf("%s: thread %p starting\n", __func__, self);
435
436 while ((now = mach_absolute_time()) < ctx->deadline) {
437 struct smr_tracker smrt;
438 uint64_t what;
439
440 if (idx == 0) {
441 absolutetime_to_nanoseconds(now - start, &delta);
442 if (delta >= (last + 1) * step) {
443 last = delta / step;
444 smr_sleepable_stress_log(ctx, last);
445 }
446 }
447
448 smr_enter_sleepable(smr, &smrt);
449 assert(smr_entered(smr));
450
451 what = early_random() % 100;
452 if (what == 0 && idx == 1) {
453 /* 1% of the time, sleep for a long time */
454 delay_for_interval(1, NSEC_PER_MSEC);
455 } else if (what < 10) {
456 /* 9% of the time, just yield */
457 thread_block_reason(THREAD_CONTINUE_NULL, NULL, AST_YIELD);
458 } else if (what < 30) {
459 /* 20% of the time, do some longer work on core */
460 uint64_t busy_start = mach_absolute_time();
461
462 do {
463 now = mach_absolute_time();
464 absolutetime_to_nanoseconds(now - busy_start, &delta);
465 } while (delta < (what + 50) * NSEC_PER_USEC);
466 }
467
468 assert(smr_entered(smr));
469 smr_leave_sleepable(smr, &smrt);
470
471 what = early_random() % 100;
472 if (what < 20) {
473 /* smr_call 20% of the time */
474 smr_sleepable_stress_make_call(ctx);
475 } else if (what < 22 && (idx & 1)) {
476 /* smr_synchronize 2% of the time for half the threads */
477 smr_synchronize(smr);
478 os_atomic_inc(&ctx->syncs_done, relaxed);
479 } else if (what < 23 && (idx & 1)) {
480 /* smr_barrier 1% of the time for half the threads */
481 smr_barrier(smr);
482 os_atomic_inc(&ctx->barriers_done, relaxed);
483 }
484 }
485
486 printf("%s: thread %p done\n", __func__, self);
487
488 if (idx != 0) {
489 if (os_atomic_dec(&ctx->active, relaxed) == 0) {
490 thread_wakeup(ctx);
491 }
492
493 thread_terminate_self();
494 __builtin_unreachable();
495 }
496 }
497
498 static int
smr_sleepable_stress_test(int64_t seconds,int64_t * out)499 smr_sleepable_stress_test(int64_t seconds, int64_t *out)
500 {
501 thread_pri_floor_t token;
502 struct smr_ctx ctx = { };
503 thread_t th;
504
505 if (seconds > 60) {
506 return EINVAL;
507 }
508
509 printf("%s: STARTING\n", __func__);
510
511 ctx.active = zpercpu_count() * 2; /* overcommit the system on purpose */
512 ctx.driver = current_thread();
513 ctx.smr = smr_domain_create(SMR_SLEEPABLE, "test (sleepable)");
514 assert3p(ctx.smr, !=, NULL);
515
516 clock_interval_to_deadline((uint32_t)seconds, NSEC_PER_SEC, &ctx.deadline);
517
518 /*
519 * We will relatively massively hammer the system,
520 * stay above that crowd.
521 */
522 token = thread_priority_floor_start();
523
524 for (uint32_t i = 1; i < ctx.active; i++) {
525 kernel_thread_start_priority(smr_sleepable_stress_worker,
526 &ctx, BASEPRI_DEFAULT, &th);
527 thread_deallocate(th);
528 }
529
530 smr_sleepable_stress_worker(&ctx, THREAD_AWAKENED);
531
532 thread_priority_floor_end(&token);
533
534 assert_wait(&ctx, THREAD_UNINT);
535 if (os_atomic_dec(&ctx.active, relaxed) == 0) {
536 clear_wait(ctx.driver, THREAD_AWAKENED);
537 } else {
538 thread_block(THREAD_CONTINUE_NULL);
539 }
540
541 smr_barrier(ctx.smr); /* to get accurate stats */
542 smr_sleepable_stress_log(&ctx, seconds * 4);
543 smr_domain_free(ctx.smr);
544
545 assert3u(ctx.calls_done, ==, ctx.calls_sent);
546
547 printf("%s: SUCCESS\n", __func__);
548
549 *out = 1;
550 return 0;
551 }
552 SYSCTL_TEST_REGISTER(smr_sleepable_stress, smr_sleepable_stress_test);
553