xref: /xnu-8019.80.24/osfmk/kern/hazard.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <kern/cpu_data.h>
30 #include <kern/hazard.h>
31 #include <kern/mpsc_queue.h>
32 #include <kern/percpu.h>
33 #include <kern/startup.h>
34 #include <kern/zalloc.h>
35 #include <sys/queue.h>
36 
37 #pragma mark - Hazard types and globals
38 
39 typedef struct hazard_record {
40 	void                   *hr_val;
41 	union {
42 		void          (*hr_dtor)(void *);
43 		vm_size_t       hr_size;
44 	};
45 } *hazard_record_t;
46 
47 typedef struct hazard_bucket {
48 	union {
49 		struct mpsc_queue_chain     hb_mplink;
50 		STAILQ_ENTRY(hazard_bucket) hb_stqlink;
51 	};
52 	uint32_t                hb_count;
53 	uint32_t                hb_size;
54 	struct hazard_record    hb_recs[];
55 } *hazard_bucket_t;
56 
57 struct hazard_guard_array {
58 	hazard_bucket_t         hga_bucket;
59 #if DEBUG || DEVELOPMENT
60 	unsigned long           hga_mask;
61 #endif
62 	struct hazard_guard     hga_array[HAZARD_GUARD_SLOTS];
63 };
64 
65 STAILQ_HEAD(hazard_bucket_list, hazard_bucket);
66 
67 
68 /*! per-cpu state for hazard pointers. */
69 static struct hazard_guard_array PERCPU_DATA(hazard_guards_array);
70 
71 /*! the minimum number of items cached in per-cpu buckets */
72 static TUNABLE(uint32_t, hazard_bucket_count_min, "hazard_bucket_count_min", 8);
73 
74 /*! the amount of memory pending retiring that causes a foreceful flush */
75 #if XNU_TARGET_OS_OSX
76 #define HAZARD_RETIRE_THRESHOLD_DEFAULT     (256 << 10)
77 #else
78 #define HAZARD_RETIRE_THRESHOLD_DEFAULT     (64 << 10)
79 #endif
80 static TUNABLE(vm_size_t, hazard_retire_threshold, "hazard_retire_threshold",
81     HAZARD_RETIRE_THRESHOLD_DEFAULT);
82 
83 /*! the number of items cached in per-cpu buckets */
84 static SECURITY_READ_ONLY_LATE(uint32_t) hazard_bucket_count;
85 
86 /*! the queue of elements that couldn't be freed immediately */
87 static struct hazard_bucket_list hazard_buckets_pending =
88     STAILQ_HEAD_INITIALIZER(hazard_buckets_pending);
89 
90 /*! the atomic queue handling deferred deallocations */
91 static struct mpsc_daemon_queue hazard_deallocate_queue;
92 
93 
94 #pragma mark - Hazard guards
95 
96 /*
97  * Memory barriers:
98  *
99  * 1. setting a guard cannot be reordered with subsequent accesses it protects.
100  *
101  *     ──[ load value ][ set guard ](1)[ reload value ][═════ use value ...
102  *                           ^               │
103  *                           ╰───────────────╯
104  *
105  *
106  * 2. clearing a guard cannot be reordered with prior accesses it protects.
107  *
108  *     ... use value ════](2)[ clear guard ]──
109  *
110  *
111  * 3. hazard_retire() needs to ensure that the update to the protected field
112  *    is visible to any thread consulting the list of retired pointers.
113  *    Note that this fence can be amortized per batch of retired pointers.
114  *
115  *     ──[ update value ](3)[ retire ]──
116  *
117  *
118  * 4. hazard_scan_and_reclaim() needs to make sure that gathering
119  *    retired pointers and scanning guards are fully ordered.
120  *
121  *    ──[ gather retired pointers ](4)[ guard scan ][ GC ]──
122  *
123  *
124  * With this, `reload value` can't possibly be a pointer to a freed value:
125  * - setting the guard "happens before" reloading the value (through (1))
126  * - updating a guard value "happens before" freeing it (through (3, 4))
127  *
128  * Of course, (2) ensures that when the scan loads NULL, then there's no longer
129  * any hazardous access in flight and reclamation is safe.
130  */
131 
132 __attribute__((always_inline))
133 hazard_guard_array_t
__hazard_guard_get(size_t slot,size_t count __assert_only)134 __hazard_guard_get(size_t slot, size_t count __assert_only)
135 {
136 	struct hazard_guard_array *hga;
137 
138 	disable_preemption();
139 	hga = PERCPU_GET(hazard_guards_array);
140 #if DEBUG || DEVELOPMENT
141 	unsigned long mask = ((1ul << count) - 1) << slot;
142 	assertf((hga->hga_mask & mask) == 0, "slot %d in use",
143 	    __builtin_ctzl(hga->hga_mask & mask));
144 	hga->hga_mask |= mask;
145 #endif /* DEBUG || DEVELOPMENT */
146 	return hga->hga_array + slot;
147 }
148 
149 static inline void
__hazard_guard_put(hazard_guard_t guard,size_t count __assert_only)150 __hazard_guard_put(hazard_guard_t guard, size_t count __assert_only)
151 {
152 #if DEBUG || DEVELOPMENT
153 	struct hazard_guard_array *hga = PERCPU_GET(hazard_guards_array);
154 	size_t slot = guard - hga->hga_array;
155 	unsigned long mask = ((1ul << count) - 1) << slot;
156 
157 	assertf(slot < HAZARD_GUARD_SLOTS, "invalid guard %p", guard);
158 	assertf((hga->hga_mask & mask) == mask, "slot %d free",
159 	    __builtin_ctzl(~hga->hga_mask & mask));
160 	hga->hga_mask &= ~mask;
161 #else
162 	(void)guard;
163 #endif /* DEBUG || DEVELOPMENT */
164 	enable_preemption();
165 }
166 
167 __attribute__((always_inline))
168 void
hazard_guard_put(hazard_guard_t guard)169 hazard_guard_put(hazard_guard_t guard) /* fence (2) */
170 {
171 	os_atomic_store(&guard->hg_val, NULL, release);
172 	__hazard_guard_put(guard, 1);
173 }
174 
175 __attribute__((always_inline))
176 void
hazard_guard_put_n(hazard_guard_t guard,size_t n)177 hazard_guard_put_n(hazard_guard_t guard, size_t n) /* fence (2) */
178 {
179 	os_atomic_thread_fence(release);
180 	__builtin_bzero(guard, n * sizeof(guard->hg_val));
181 	__hazard_guard_put(guard, n);
182 }
183 
184 __attribute__((always_inline))
185 void
hazard_guard_dismiss(hazard_guard_t guard)186 hazard_guard_dismiss(hazard_guard_t guard)
187 {
188 	os_atomic_store(&guard->hg_val, NULL, relaxed);
189 	__hazard_guard_put(guard, 1);
190 }
191 
192 __attribute__((always_inline))
193 void
hazard_guard_dismiss_n(hazard_guard_t guard,size_t n)194 hazard_guard_dismiss_n(hazard_guard_t guard, size_t n)
195 {
196 	__builtin_bzero(guard, n * sizeof(guard->hg_val));
197 	__hazard_guard_put(guard, n);
198 }
199 
200 __attribute__((always_inline))
201 void
hazard_guard_set(hazard_guard_t guard,void * value)202 hazard_guard_set(hazard_guard_t guard, void *value) /* fence (1) */
203 {
204 #if __x86_64__ || __i386__
205 	os_atomic_xchg(&guard->hg_val, value, seq_cst);
206 #else /* c11 */
207 	os_atomic_store(&guard->hg_val, value, relaxed);
208 	os_atomic_thread_fence(seq_cst);
209 #endif
210 }
211 
212 __attribute__((always_inline))
213 void
hazard_guard_replace(hazard_guard_t guard,void * value)214 hazard_guard_replace(hazard_guard_t guard, void *value) /* fence (2) and (1) */
215 {
216 #if __x86_64__ || __i386__
217 	os_atomic_xchg(&guard->hg_val, value, seq_cst);
218 #else /* c11 */
219 	os_atomic_store(&guard->hg_val, value, release);
220 	os_atomic_thread_fence(seq_cst);
221 #endif
222 }
223 
224 
225 #pragma mark - Hazard GC
226 
227 static inline size_t
hazard_bucket_size(void)228 hazard_bucket_size(void)
229 {
230 	return sizeof(struct hazard_bucket) +
231 	       hazard_bucket_count * sizeof(struct hazard_record);
232 }
233 
234 static hazard_bucket_t
hazard_bucket_alloc(zalloc_flags_t flags)235 hazard_bucket_alloc(zalloc_flags_t flags)
236 {
237 	return kalloc_flags(hazard_bucket_size(), Z_ZERO | flags);
238 }
239 
240 static void
hazard_bucket_free(hazard_bucket_t bucket)241 hazard_bucket_free(hazard_bucket_t bucket)
242 {
243 	return kfree(bucket, hazard_bucket_size());
244 }
245 
246 void
hazard_retire(void * value,vm_size_t size,void (* destructor)(void *))247 hazard_retire(void *value, vm_size_t size, void (*destructor)(void *))
248 {
249 	struct hazard_guard_array *hga;
250 	hazard_bucket_t bucket, free_bucket = NULL;
251 
252 	/* the retired pointer must be aligned */
253 	assert(((vm_address_t)value % sizeof(vm_offset_t)) == 0);
254 
255 	if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
256 		/*
257 		 * The system is still single threaded and this module
258 		 * is still not fully initialized.
259 		 */
260 		destructor(value);
261 		return;
262 	}
263 
264 again:
265 	disable_preemption();
266 	hga = PERCPU_GET(hazard_guards_array);
267 	bucket = hga->hga_bucket;
268 	if (bucket == NULL) {
269 		if (free_bucket) {
270 			bucket = free_bucket;
271 			free_bucket = NULL;
272 		} else if ((bucket = hazard_bucket_alloc(Z_NOWAIT)) == NULL) {
273 			enable_preemption();
274 			free_bucket = hazard_bucket_alloc(Z_WAITOK | Z_NOFAIL);
275 			goto again;
276 		}
277 		hga->hga_bucket = bucket;
278 	}
279 
280 	bucket->hb_recs[bucket->hb_count].hr_val = value;
281 	bucket->hb_recs[bucket->hb_count].hr_dtor = destructor;
282 
283 	if (os_add_overflow(bucket->hb_size, size, &bucket->hb_size)) {
284 		bucket->hb_size = UINT32_MAX;
285 	}
286 
287 	if (++bucket->hb_count == hazard_bucket_count ||
288 	    bucket->hb_size >= hazard_retire_threshold) {
289 		/*
290 		 * It is ok for this allocation to fail: when it fails,
291 		 * hga_bucket is set to NULL, and the zone will be primed
292 		 * which makes it more likely that the next attempt at
293 		 * allocating will work immediately
294 		 */
295 		hga->hga_bucket = hazard_bucket_alloc(Z_NOWAIT);
296 
297 		mpsc_daemon_enqueue(&hazard_deallocate_queue,
298 		    &bucket->hb_mplink, MPSC_QUEUE_NONE); /* fence (3) */
299 	}
300 	enable_preemption();
301 
302 	if (__improbable(free_bucket)) {
303 		hazard_bucket_free(free_bucket);
304 	}
305 }
306 
307 /*!
308  * @struct hazard_bucket_filter_state
309  *
310  * @brief
311  * Data structure used to maintain the state during a hazard reclaim phase.
312  *
313  * @field hbfs_partial
314  * Bucket used to keep records that can't be freed yet.
315  *
316  * @field hbfs_partial_pos
317  * How many pointers are saved in @c hbfs_partial.
318  *
319  * The @c hbfs_partial->hb_count field cannot be used as the bucket
320  * being "filtered" could be the same.
321  *
322  * @field hbfs_array
323  * The array of pointers that were scanned as being active
324  * and cannot be safely reclaimed yet.
325  *
326  * @field hbfs_array_len
327  * How many entries @c hbfs_array is holding.
328  */
329 struct hazard_bucket_filter_state {
330 	hazard_bucket_t     hbfs_partial;
331 	uint32_t            hbfs_partial_pos;
332 	uint32_t            hbfs_array_len;
333 	const void        **hbfs_array;
334 };
335 
336 extern void
337 qsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *));
338 
339 static int
hazard_compare(const void * a,const void * b)340 hazard_compare(const void *a, const void *b)
341 {
342 	void * const *x = a;
343 	void * const *y = b;
344 
345 	if (x == y) {
346 		return 0;
347 	}
348 	return x < y ? 1 : -1;
349 }
350 
351 static bool
hazard_bsearch(const void * p,const void ** array,size_t l,size_t r)352 hazard_bsearch(const void *p, const void **array, size_t l, size_t r)
353 {
354 	while (l < r) {
355 		size_t i = (l + r) / 2;
356 		if (array[i] == p) {
357 			return true;
358 		}
359 		if (array[i] < p) {
360 			r = i;
361 		} else {
362 			l = i + 1;
363 		}
364 	}
365 
366 	return false;
367 }
368 
369 /*!
370  * @function hazard_filter_bucket
371  *
372  * @brief
373  * Filter bucket records to only keep unreclaimable ones.
374  *
375  * @discussion
376  * Pointers that can't be reclaimed are stored into @c st->hbfs_partial
377  * which will eventually stay on the @c hazard_buckets_pending queue
378  * until a next scan/filter occurs.
379  */
380 static void
hazard_filter_bucket(struct hazard_bucket_filter_state * st,hazard_bucket_t bucket)381 hazard_filter_bucket(struct hazard_bucket_filter_state *st,
382     hazard_bucket_t bucket)
383 {
384 	hazard_bucket_t partial = st->hbfs_partial;
385 	uint32_t partial_pos = st->hbfs_partial_pos;
386 
387 	for (uint32_t i = 0, r_end = bucket->hb_count; i < r_end; i++) {
388 		struct hazard_record hr = bucket->hb_recs[i];
389 
390 		if (!hazard_bsearch(hr.hr_val, st->hbfs_array, 0, st->hbfs_array_len)) {
391 			hr.hr_dtor(hr.hr_val);
392 			continue;
393 		}
394 
395 		partial->hb_recs[partial_pos] = hr;
396 		if (++partial_pos == hazard_bucket_count) {
397 			/* we do not keep track of per record size */
398 			partial->hb_size  = 0;
399 			partial->hb_count = partial_pos;
400 			STAILQ_INSERT_TAIL(&hazard_buckets_pending, partial, hb_stqlink);
401 			st->hbfs_partial = partial = bucket;
402 			partial_pos = 0;
403 		}
404 	}
405 
406 	if (bucket != partial) {
407 		hazard_bucket_free(bucket);
408 	}
409 	st->hbfs_partial_pos = partial_pos;
410 }
411 
412 static void
hazard_filter_finish(struct hazard_bucket_filter_state * st)413 hazard_filter_finish(struct hazard_bucket_filter_state *st)
414 {
415 	if (st->hbfs_partial_pos == 0) {
416 		hazard_bucket_free(st->hbfs_partial);
417 	} else {
418 		hazard_bucket_t bucket = st->hbfs_partial;
419 
420 		bucket->hb_count = st->hbfs_partial_pos;
421 		STAILQ_INSERT_TAIL(&hazard_buckets_pending, bucket, hb_stqlink);
422 		bzero(bucket->hb_recs + bucket->hb_count,
423 		    sizeof(bucket->hb_recs[0]) *
424 		    (hazard_bucket_count - bucket->hb_count));
425 	}
426 }
427 
428 /*!
429  * @function hazard_scan_and_reclaim()
430  *
431  * @brief
432  * Perform the reclamation phase of hazard pointers.
433  *
434  * @discussion
435  * Buckets are enqueued onto the global @c hazard_bucket_list list
436  * by @c hazard_deallocate_queue_invoke().
437  *
438  * Then this function is called to filter this list.
439  * Records that are not safe to reclaim stay on the list,
440  * and will be filtered again the next time around.
441  */
442 static void
hazard_scan_and_reclaim(void)443 hazard_scan_and_reclaim(void)
444 {
445 	__attribute__((uninitialized))
446 	const void *protected_array[MAX_CPUS * HAZARD_GUARD_SLOTS];
447 
448 	struct hazard_bucket_list head = STAILQ_HEAD_INITIALIZER(head);
449 	struct hazard_bucket_filter_state st = {
450 		.hbfs_array = protected_array,
451 		.hbfs_partial = STAILQ_FIRST(&hazard_buckets_pending),
452 	};
453 	hazard_bucket_t bucket;
454 	const void *p;
455 
456 	/*
457 	 * The mpsc daemon is called with a shallow stack depth,
458 	 * so we really should be able to have up 1k worth of pointers
459 	 * on our stack.
460 	 *
461 	 * When this becomes no longer true, we will keep a reasonnably sized
462 	 * stack buffer and will allocate if it overflows. Chances are that
463 	 * even on a very wide machine, there aren't enough live hazard
464 	 * pointers anyway.
465 	 */
466 	static_assert(sizeof(protected_array) <= sizeof(void *) * 1024,
467 	    "our stack usage is ok");
468 
469 	STAILQ_CONCAT(&head, &hazard_buckets_pending);
470 
471 	percpu_foreach(hga, hazard_guards_array) {
472 		for (size_t i = 0; i < HAZARD_GUARD_SLOTS; i++) {
473 			p = os_atomic_load(&hga->hga_array[i].hg_val, relaxed);
474 			if (p) {
475 				st.hbfs_array[st.hbfs_array_len++] = p;
476 			}
477 		}
478 	}
479 
480 	qsort(st.hbfs_array, st.hbfs_array_len, sizeof(void *), hazard_compare);
481 
482 	while ((bucket = STAILQ_FIRST(&head))) {
483 		STAILQ_REMOVE_HEAD(&head, hb_stqlink);
484 		hazard_filter_bucket(&st, bucket);
485 	}
486 
487 	hazard_filter_finish(&st);
488 }
489 
490 static void
hazard_deallocate_queue_invoke(mpsc_queue_chain_t e,__assert_only mpsc_daemon_queue_t dq)491 hazard_deallocate_queue_invoke(mpsc_queue_chain_t e,
492     __assert_only mpsc_daemon_queue_t dq)
493 {
494 	assert(dq == &hazard_deallocate_queue);
495 
496 	/*
497 	 * Because we need to issue a fence before scanning for active
498 	 * pointers, we accumulate pending buckets in a first pass,
499 	 *
500 	 * then the MPSC system calls us with the MPSC_QUEUE_BATCH_END marker
501 	 * to mark the end of a batch. Realistically batches are extremely
502 	 * unlikely to be longer than NCPU.
503 	 *
504 	 * We enqueue all buckets onto a global list (hazard_buckets_pending)
505 	 * which is then filtered/trimmed by hazard_scan_and_reclaim().
506 	 */
507 
508 	if (e != MPSC_QUEUE_BATCH_END) {
509 		hazard_bucket_t bucket;
510 
511 		bucket = mpsc_queue_element(e, struct hazard_bucket, hb_mplink);
512 		STAILQ_INSERT_TAIL(&hazard_buckets_pending, bucket, hb_stqlink);
513 		return;
514 	}
515 
516 	if (!STAILQ_EMPTY(&hazard_buckets_pending)) {
517 		os_atomic_thread_fence(seq_cst); /* fence (4) */
518 
519 		hazard_scan_and_reclaim();
520 	}
521 }
522 
523 
524 #pragma mark - module initialization
525 
526 void
hazard_register_mpsc_queue(void)527 hazard_register_mpsc_queue(void)
528 {
529 	thread_deallocate_daemon_register_queue(&hazard_deallocate_queue,
530 	    hazard_deallocate_queue_invoke);
531 	hazard_deallocate_queue.mpd_options |= MPSC_QUEUE_OPTION_BATCH;
532 }
533 
534 static void
hazard_startup(void)535 hazard_startup(void)
536 {
537 	hazard_bucket_count = zpercpu_count() * HAZARD_GUARD_SLOTS / 2;
538 	if (hazard_bucket_count < hazard_bucket_count_min) {
539 		hazard_bucket_count = hazard_bucket_count_min;
540 	}
541 }
542 STARTUP(PERCPU, STARTUP_RANK_LAST, hazard_startup);
543 
544 #pragma mark - tests
545 #if DEBUG || DEVELOPMENT
546 #include <sys/errno.h>
547 
548 struct hazard_test_value {
549 	os_refcnt_t htv_ref;
550 	int         htv_step;
551 	bool        htv_reclaim_ok;
552 	void       *htv_reclaimed;
553 };
554 
555 static _Atomic uint32_t hazard_test_outstanding;
556 
557 static struct hazard_test_value *
hazard_test_value_alloc(int count,int step,bool ok)558 hazard_test_value_alloc(int count, int step, bool ok)
559 {
560 	struct hazard_test_value *val;
561 
562 	val = kalloc_data(sizeof(struct hazard_test_value),
563 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
564 	os_ref_init_count(&val->htv_ref, NULL, count);
565 	val->htv_reclaim_ok = ok;
566 	val->htv_step = step;
567 	os_atomic_inc(&hazard_test_outstanding, relaxed);
568 	return val;
569 }
570 
571 static void
hazard_test_value_release(struct hazard_test_value * val)572 hazard_test_value_release(struct hazard_test_value *val)
573 {
574 	if (os_ref_release(&val->htv_ref) == 0) {
575 		os_atomic_dec(&hazard_test_outstanding, relaxed);
576 		kfree_data(val, sizeof(*val));
577 	}
578 }
579 
580 static void
hazard_test_value_retire_cb(void * ptr)581 hazard_test_value_retire_cb(void *ptr)
582 {
583 	struct hazard_test_value *val = ptr;
584 
585 	if (!val->htv_reclaim_ok) {
586 		panic("%p: step %d should not be reclaimed", val, val->htv_step);
587 	}
588 	os_atomic_store(&val->htv_reclaimed, (void *)1, seq_cst);
589 	hazard_test_value_release(val);
590 }
591 
592 static int
hazard_basic_test(__unused int64_t in,int64_t * out)593 hazard_basic_test(__unused int64_t in, int64_t *out)
594 {
595 	static HAZARD_POINTER(struct hazard_test_value *) pointer;
596 
597 	uint32_t start, end, count;
598 	hazard_guard_t guard;
599 
600 	if (hazard_bucket_count < 2 || hazard_bucket_count > 32) {
601 		printf("%s: skipping test because hazard_bucket_count is %d\n",
602 		    __func__, hazard_bucket_count);
603 	}
604 
605 	printf("%s: using some guards\n", __func__);
606 	{
607 		struct hazard_test_value *val, *tmp;
608 
609 		val = hazard_test_value_alloc(1, 0, false);
610 		hazard_ptr_serialized_store(&pointer, val);
611 
612 		for (int i = 0; i < 10; i++) {
613 			guard = hazard_guard_get(0);
614 			assert(guard != NULL);
615 
616 			tmp = hazard_guard_acquire(guard, &pointer);
617 			assert(tmp == val);
618 
619 			hazard_guard_put(guard);
620 
621 			delay_for_interval(1, NSEC_PER_MSEC);
622 		}
623 
624 		hazard_ptr_clear(&pointer);
625 		hazard_test_value_release(val);
626 	}
627 	printf("%s: done\n", __func__);
628 
629 	count = hazard_bucket_count * MAX_CPUS * 2 + 3;
630 	printf("%s: retiring %d values in a loop\n", __func__, count);
631 	{
632 		struct hazard_test_value *val;
633 
634 		start = os_atomic_load(&hazard_test_outstanding, relaxed);
635 		printf("%s: starting (%d outstanding)\n", __func__, start);
636 
637 		for (int i = 0; i < count; i++) {
638 			val = hazard_test_value_alloc(1, 1000 + i, true);
639 			hazard_retire(val, sizeof(*val), hazard_test_value_retire_cb);
640 		}
641 
642 		delay_for_interval(10, NSEC_PER_MSEC);
643 
644 		end = os_atomic_load(&hazard_test_outstanding, relaxed);
645 		printf("%s: ending (%d outstanding)\n", __func__, end);
646 
647 		assert(end <= start || end - start < hazard_bucket_count * MAX_CPUS);
648 	}
649 	printf("%s: done\n", __func__);
650 
651 	printf("%s: cheating and checking scan works\n", __func__);
652 	if (zpercpu_count() > 1 && processor_avail_count > 1) {
653 		struct hazard_test_value *v1, *v2, *tmp;
654 		hazard_bucket_t bucket = hazard_bucket_alloc(Z_WAITOK);
655 
656 		v1 = hazard_test_value_alloc(2, 10000, false);
657 		v2 = hazard_test_value_alloc(2, 10001, true);
658 		hazard_ptr_serialized_store(&pointer, v1);
659 
660 		/* create a fake bucket to simulate a retire in flight */
661 		bucket = hazard_bucket_alloc(Z_WAITOK);
662 		bucket->hb_count = 2;
663 		bucket->hb_recs[0].hr_val = v1;
664 		bucket->hb_recs[0].hr_dtor = &hazard_test_value_retire_cb;
665 		bucket->hb_recs[1].hr_val = v2;
666 		bucket->hb_recs[1].hr_dtor = &hazard_test_value_retire_cb;
667 
668 		guard = hazard_guard_get(0);
669 		tmp = hazard_guard_acquire(guard, &pointer);
670 		assert(v1 == tmp);
671 
672 		/* simulate an enqueue */
673 		mpsc_daemon_enqueue(&hazard_deallocate_queue,
674 		    &bucket->hb_mplink, MPSC_QUEUE_NONE);
675 
676 		/*
677 		 * wait until we can observe v2 being freed,
678 		 * it will panic if not happening quickly enough
679 		 */
680 		hw_wait_while_equals(&v2->htv_reclaimed, NULL);
681 
682 		/* Allow it to be reclaimed now */
683 		os_atomic_store(&v1->htv_reclaim_ok, true, seq_cst);
684 
685 		hazard_guard_put(guard);
686 
687 		printf("%s: observed %p die and %p stay\n", __func__, v2, v1);
688 
689 		/* do a fake bucket again to force a flush */
690 		bucket = hazard_bucket_alloc(Z_WAITOK);
691 		bucket->hb_count = 1;
692 		bucket->hb_recs[0].hr_val = v2;
693 		bucket->hb_recs[0].hr_dtor = &hazard_test_value_retire_cb;
694 
695 		/* simulate an enqueue */
696 		mpsc_daemon_enqueue(&hazard_deallocate_queue,
697 		    &bucket->hb_mplink, MPSC_QUEUE_DISABLE_PREEMPTION);
698 
699 		/*
700 		 * wait until we can observe v1 being freed,
701 		 * now that there's no guard preventing it to disappear
702 		 */
703 		hw_wait_while_equals(&v1->htv_reclaimed, NULL);
704 
705 		hazard_test_value_release(v1);
706 		printf("%s: observed %p die too\n", __func__, v1);
707 	}
708 	printf("%s: done\n", __func__);
709 
710 	*out = 1;
711 	return 0;
712 }
713 SYSCTL_TEST_REGISTER(hazard_basic, hazard_basic_test);
714 
715 #endif /* DEBUG || DEVELOPMENT */
716