xref: /xnu-8020.101.4/osfmk/kern/hazard.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <kern/cpu_data.h>
30 #include <kern/hazard.h>
31 #include <kern/mpsc_queue.h>
32 #include <kern/percpu.h>
33 #include <kern/startup.h>
34 #include <kern/zalloc.h>
35 #include <sys/queue.h>
36 
37 #pragma mark - Hazard types and globals
38 
39 typedef struct hazard_record {
40 	void                   *hr_val;
41 	union {
42 		void          (*hr_dtor)(void *);
43 		vm_size_t       hr_size;
44 	};
45 } *hazard_record_t;
46 
47 typedef struct hazard_bucket {
48 	union {
49 		struct mpsc_queue_chain     hb_mplink;
50 		STAILQ_ENTRY(hazard_bucket) hb_stqlink;
51 	};
52 	uint32_t                hb_count;
53 	uint32_t                hb_size;
54 	struct hazard_record    hb_recs[];
55 } *hazard_bucket_t;
56 
57 struct hazard_guard_array {
58 	hazard_bucket_t         hga_bucket;
59 #if DEBUG || DEVELOPMENT
60 	unsigned long           hga_mask;
61 #endif
62 	struct hazard_guard     hga_array[HAZARD_GUARD_SLOTS];
63 };
64 
65 STAILQ_HEAD(hazard_bucket_list, hazard_bucket);
66 
67 
68 /*! per-cpu state for hazard pointers. */
69 static struct hazard_guard_array PERCPU_DATA(hazard_guards_array);
70 
71 /*! the minimum number of items cached in per-cpu buckets */
72 static TUNABLE(uint32_t, hazard_bucket_count_min, "hazard_bucket_count_min", 8);
73 
74 /*! the amount of memory pending retiring that causes a foreceful flush */
75 #if XNU_TARGET_OS_OSX
76 #define HAZARD_RETIRE_THRESHOLD_DEFAULT     (256 << 10)
77 #else
78 #define HAZARD_RETIRE_THRESHOLD_DEFAULT     (64 << 10)
79 #endif
80 static TUNABLE(vm_size_t, hazard_retire_threshold, "hazard_retire_threshold",
81     HAZARD_RETIRE_THRESHOLD_DEFAULT);
82 
83 /*! the number of items cached in per-cpu buckets */
84 static SECURITY_READ_ONLY_LATE(uint32_t) hazard_bucket_count;
85 
86 /*! the queue of elements that couldn't be freed immediately */
87 static struct hazard_bucket_list hazard_buckets_pending =
88     STAILQ_HEAD_INITIALIZER(hazard_buckets_pending);
89 
90 /*! the atomic queue handling deferred deallocations */
91 static struct mpsc_daemon_queue hazard_deallocate_queue;
92 
93 
94 #pragma mark - Hazard guards
95 
96 /*
97  * Memory barriers:
98  *
99  * 1. setting a guard cannot be reordered with subsequent accesses it protects.
100  *
101  *     ──[ load value ][ set guard ](1)[ reload value ][═════ use value ...
102  *                           ^               │
103  *                           ╰───────────────╯
104  *
105  *
106  * 2. clearing a guard cannot be reordered with prior accesses it protects.
107  *
108  *     ... use value ════](2)[ clear guard ]──
109  *
110  *
111  * 3. hazard_retire() needs to ensure that the update to the protected field
112  *    is visible to any thread consulting the list of retired pointers.
113  *    Note that this fence can be amortized per batch of retired pointers.
114  *
115  *     ──[ update value ](3)[ retire ]──
116  *
117  *
118  * 4. hazard_scan_and_reclaim() needs to make sure that gathering
119  *    retired pointers and scanning guards are fully ordered.
120  *
121  *    ──[ gather retired pointers ](4)[ guard scan ][ GC ]──
122  *
123  *
124  * With this, `reload value` can't possibly be a pointer to a freed value:
125  * - setting the guard "happens before" reloading the value (through (1))
126  * - updating a guard value "happens before" freeing it (through (3, 4))
127  *
128  * Of course, (2) ensures that when the scan loads NULL, then there's no longer
129  * any hazardous access in flight and reclamation is safe.
130  */
131 
132 __attribute__((always_inline))
133 hazard_guard_array_t
__hazard_guard_get(size_t slot,size_t count __assert_only)134 __hazard_guard_get(size_t slot, size_t count __assert_only)
135 {
136 	struct hazard_guard_array *hga;
137 
138 	disable_preemption();
139 	hga = PERCPU_GET(hazard_guards_array);
140 #if DEBUG || DEVELOPMENT
141 	unsigned long mask = ((1ul << count) - 1) << slot;
142 	assertf((hga->hga_mask & mask) == 0, "slot %d in use",
143 	    __builtin_ctzl(hga->hga_mask & mask));
144 	hga->hga_mask |= mask;
145 #endif /* DEBUG || DEVELOPMENT */
146 	return hga->hga_array + slot;
147 }
148 
149 static inline void
__hazard_guard_put(hazard_guard_t guard,size_t count __assert_only)150 __hazard_guard_put(hazard_guard_t guard, size_t count __assert_only)
151 {
152 #if DEBUG || DEVELOPMENT
153 	struct hazard_guard_array *hga = PERCPU_GET(hazard_guards_array);
154 	size_t slot = guard - hga->hga_array;
155 	unsigned long mask = ((1ul << count) - 1) << slot;
156 
157 	assertf(slot < HAZARD_GUARD_SLOTS, "invalid guard %p", guard);
158 	assertf((hga->hga_mask & mask) == mask, "slot %d free",
159 	    __builtin_ctzl(~hga->hga_mask & mask));
160 	hga->hga_mask &= ~mask;
161 #else
162 	(void)guard;
163 #endif /* DEBUG || DEVELOPMENT */
164 	enable_preemption();
165 }
166 
167 __attribute__((always_inline))
168 void
hazard_guard_put(hazard_guard_t guard)169 hazard_guard_put(hazard_guard_t guard) /* fence (2) */
170 {
171 	os_atomic_store(&guard->hg_val, NULL, release);
172 	__hazard_guard_put(guard, 1);
173 }
174 
175 __attribute__((always_inline))
176 void
hazard_guard_put_n(hazard_guard_t guard,size_t n)177 hazard_guard_put_n(hazard_guard_t guard, size_t n) /* fence (2) */
178 {
179 	os_atomic_thread_fence(release);
180 	__builtin_bzero(guard, n * sizeof(guard->hg_val));
181 	__hazard_guard_put(guard, n);
182 }
183 
184 __attribute__((always_inline))
185 void
hazard_guard_dismiss(hazard_guard_t guard)186 hazard_guard_dismiss(hazard_guard_t guard)
187 {
188 	os_atomic_store(&guard->hg_val, NULL, relaxed);
189 	__hazard_guard_put(guard, 1);
190 }
191 
192 __attribute__((always_inline))
193 void
hazard_guard_dismiss_n(hazard_guard_t guard,size_t n)194 hazard_guard_dismiss_n(hazard_guard_t guard, size_t n)
195 {
196 	__builtin_bzero(guard, n * sizeof(guard->hg_val));
197 	__hazard_guard_put(guard, n);
198 }
199 
200 __attribute__((always_inline))
201 void
hazard_guard_set(hazard_guard_t guard,void * value)202 hazard_guard_set(hazard_guard_t guard, void *value) /* fence (1) */
203 {
204 #if __x86_64__ || __i386__
205 	os_atomic_xchg(&guard->hg_val, value, seq_cst);
206 #else /* c11 */
207 	os_atomic_store(&guard->hg_val, value, relaxed);
208 	os_atomic_thread_fence(seq_cst);
209 #endif
210 }
211 
212 __attribute__((always_inline))
213 void
hazard_guard_replace(hazard_guard_t guard,void * value)214 hazard_guard_replace(hazard_guard_t guard, void *value) /* fence (2) and (1) */
215 {
216 #if __x86_64__ || __i386__
217 	os_atomic_xchg(&guard->hg_val, value, seq_cst);
218 #else /* c11 */
219 	os_atomic_store(&guard->hg_val, value, release);
220 	os_atomic_thread_fence(seq_cst);
221 #endif
222 }
223 
224 
225 #pragma mark - Hazard GC
226 
227 static hazard_bucket_t
hazard_bucket_alloc(zalloc_flags_t flags)228 hazard_bucket_alloc(zalloc_flags_t flags)
229 {
230 	return kalloc_type(struct hazard_bucket, struct hazard_record,
231 	           hazard_bucket_count, Z_ZERO | flags);
232 }
233 
234 static void
hazard_bucket_free(hazard_bucket_t bucket)235 hazard_bucket_free(hazard_bucket_t bucket)
236 {
237 	return kfree_type(struct hazard_bucket, struct hazard_record,
238 	           hazard_bucket_count, bucket);
239 }
240 
241 void
hazard_retire(void * value,vm_size_t size,void (* destructor)(void *))242 hazard_retire(void *value, vm_size_t size, void (*destructor)(void *))
243 {
244 	struct hazard_guard_array *hga;
245 	hazard_bucket_t bucket, free_bucket = NULL;
246 
247 	/* the retired pointer must be aligned */
248 	assert(((vm_address_t)value % sizeof(vm_offset_t)) == 0);
249 
250 	if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
251 		/*
252 		 * The system is still single threaded and this module
253 		 * is still not fully initialized.
254 		 */
255 		destructor(value);
256 		return;
257 	}
258 
259 again:
260 	disable_preemption();
261 	hga = PERCPU_GET(hazard_guards_array);
262 	bucket = hga->hga_bucket;
263 	if (bucket == NULL) {
264 		if (free_bucket) {
265 			bucket = free_bucket;
266 			free_bucket = NULL;
267 		} else if ((bucket = hazard_bucket_alloc(Z_NOWAIT)) == NULL) {
268 			enable_preemption();
269 			free_bucket = hazard_bucket_alloc(Z_WAITOK | Z_NOFAIL);
270 			goto again;
271 		}
272 		hga->hga_bucket = bucket;
273 	}
274 
275 	bucket->hb_recs[bucket->hb_count].hr_val = value;
276 	bucket->hb_recs[bucket->hb_count].hr_dtor = destructor;
277 
278 	if (os_add_overflow(bucket->hb_size, size, &bucket->hb_size)) {
279 		bucket->hb_size = UINT32_MAX;
280 	}
281 
282 	if (++bucket->hb_count == hazard_bucket_count ||
283 	    bucket->hb_size >= hazard_retire_threshold) {
284 		/*
285 		 * It is ok for this allocation to fail: when it fails,
286 		 * hga_bucket is set to NULL, and the zone will be primed
287 		 * which makes it more likely that the next attempt at
288 		 * allocating will work immediately
289 		 */
290 		hga->hga_bucket = hazard_bucket_alloc(Z_NOWAIT);
291 
292 		mpsc_daemon_enqueue(&hazard_deallocate_queue,
293 		    &bucket->hb_mplink, MPSC_QUEUE_NONE); /* fence (3) */
294 	}
295 	enable_preemption();
296 
297 	if (__improbable(free_bucket)) {
298 		hazard_bucket_free(free_bucket);
299 	}
300 }
301 
302 /*!
303  * @struct hazard_bucket_filter_state
304  *
305  * @brief
306  * Data structure used to maintain the state during a hazard reclaim phase.
307  *
308  * @field hbfs_partial
309  * Bucket used to keep records that can't be freed yet.
310  *
311  * @field hbfs_partial_pos
312  * How many pointers are saved in @c hbfs_partial.
313  *
314  * The @c hbfs_partial->hb_count field cannot be used as the bucket
315  * being "filtered" could be the same.
316  *
317  * @field hbfs_array
318  * The array of pointers that were scanned as being active
319  * and cannot be safely reclaimed yet.
320  *
321  * @field hbfs_array_len
322  * How many entries @c hbfs_array is holding.
323  */
324 struct hazard_bucket_filter_state {
325 	hazard_bucket_t     hbfs_partial;
326 	uint32_t            hbfs_partial_pos;
327 	uint32_t            hbfs_array_len;
328 	const void        **hbfs_array;
329 };
330 
331 extern void
332 qsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *));
333 
334 static int
hazard_compare(const void * a,const void * b)335 hazard_compare(const void *a, const void *b)
336 {
337 	void * const *x = a;
338 	void * const *y = b;
339 
340 	if (x == y) {
341 		return 0;
342 	}
343 	return x < y ? 1 : -1;
344 }
345 
346 static bool
hazard_bsearch(const void * p,const void ** array,size_t l,size_t r)347 hazard_bsearch(const void *p, const void **array, size_t l, size_t r)
348 {
349 	while (l < r) {
350 		size_t i = (l + r) / 2;
351 		if (array[i] == p) {
352 			return true;
353 		}
354 		if (array[i] < p) {
355 			r = i;
356 		} else {
357 			l = i + 1;
358 		}
359 	}
360 
361 	return false;
362 }
363 
364 /*!
365  * @function hazard_filter_bucket
366  *
367  * @brief
368  * Filter bucket records to only keep unreclaimable ones.
369  *
370  * @discussion
371  * Pointers that can't be reclaimed are stored into @c st->hbfs_partial
372  * which will eventually stay on the @c hazard_buckets_pending queue
373  * until a next scan/filter occurs.
374  */
375 static void
hazard_filter_bucket(struct hazard_bucket_filter_state * st,hazard_bucket_t bucket)376 hazard_filter_bucket(struct hazard_bucket_filter_state *st,
377     hazard_bucket_t bucket)
378 {
379 	hazard_bucket_t partial = st->hbfs_partial;
380 	uint32_t partial_pos = st->hbfs_partial_pos;
381 
382 	for (uint32_t i = 0, r_end = bucket->hb_count; i < r_end; i++) {
383 		struct hazard_record hr = bucket->hb_recs[i];
384 
385 		if (!hazard_bsearch(hr.hr_val, st->hbfs_array, 0, st->hbfs_array_len)) {
386 			hr.hr_dtor(hr.hr_val);
387 			continue;
388 		}
389 
390 		partial->hb_recs[partial_pos] = hr;
391 		if (++partial_pos == hazard_bucket_count) {
392 			/* we do not keep track of per record size */
393 			partial->hb_size  = 0;
394 			partial->hb_count = partial_pos;
395 			STAILQ_INSERT_TAIL(&hazard_buckets_pending, partial, hb_stqlink);
396 			st->hbfs_partial = partial = bucket;
397 			partial_pos = 0;
398 		}
399 	}
400 
401 	if (bucket != partial) {
402 		hazard_bucket_free(bucket);
403 	}
404 	st->hbfs_partial_pos = partial_pos;
405 }
406 
407 static void
hazard_filter_finish(struct hazard_bucket_filter_state * st)408 hazard_filter_finish(struct hazard_bucket_filter_state *st)
409 {
410 	if (st->hbfs_partial_pos == 0) {
411 		hazard_bucket_free(st->hbfs_partial);
412 	} else {
413 		hazard_bucket_t bucket = st->hbfs_partial;
414 
415 		bucket->hb_count = st->hbfs_partial_pos;
416 		STAILQ_INSERT_TAIL(&hazard_buckets_pending, bucket, hb_stqlink);
417 		bzero(bucket->hb_recs + bucket->hb_count,
418 		    sizeof(bucket->hb_recs[0]) *
419 		    (hazard_bucket_count - bucket->hb_count));
420 	}
421 }
422 
423 /*!
424  * @function hazard_scan_and_reclaim()
425  *
426  * @brief
427  * Perform the reclamation phase of hazard pointers.
428  *
429  * @discussion
430  * Buckets are enqueued onto the global @c hazard_bucket_list list
431  * by @c hazard_deallocate_queue_invoke().
432  *
433  * Then this function is called to filter this list.
434  * Records that are not safe to reclaim stay on the list,
435  * and will be filtered again the next time around.
436  */
437 static void
hazard_scan_and_reclaim(void)438 hazard_scan_and_reclaim(void)
439 {
440 	__attribute__((uninitialized))
441 	const void *protected_array[MAX_CPUS * HAZARD_GUARD_SLOTS];
442 
443 	struct hazard_bucket_list head = STAILQ_HEAD_INITIALIZER(head);
444 	struct hazard_bucket_filter_state st = {
445 		.hbfs_array = protected_array,
446 		.hbfs_partial = STAILQ_FIRST(&hazard_buckets_pending),
447 	};
448 	hazard_bucket_t bucket;
449 	const void *p;
450 
451 	/*
452 	 * The mpsc daemon is called with a shallow stack depth,
453 	 * so we really should be able to have up 1k worth of pointers
454 	 * on our stack.
455 	 *
456 	 * When this becomes no longer true, we will keep a reasonnably sized
457 	 * stack buffer and will allocate if it overflows. Chances are that
458 	 * even on a very wide machine, there aren't enough live hazard
459 	 * pointers anyway.
460 	 */
461 	static_assert(sizeof(protected_array) <= sizeof(void *) * 1024,
462 	    "our stack usage is ok");
463 
464 	STAILQ_CONCAT(&head, &hazard_buckets_pending);
465 
466 	percpu_foreach(hga, hazard_guards_array) {
467 		for (size_t i = 0; i < HAZARD_GUARD_SLOTS; i++) {
468 			p = os_atomic_load(&hga->hga_array[i].hg_val, relaxed);
469 			if (p) {
470 				st.hbfs_array[st.hbfs_array_len++] = p;
471 			}
472 		}
473 	}
474 
475 	qsort(st.hbfs_array, st.hbfs_array_len, sizeof(void *), hazard_compare);
476 
477 	while ((bucket = STAILQ_FIRST(&head))) {
478 		STAILQ_REMOVE_HEAD(&head, hb_stqlink);
479 		hazard_filter_bucket(&st, bucket);
480 	}
481 
482 	hazard_filter_finish(&st);
483 }
484 
485 static void
hazard_deallocate_queue_invoke(mpsc_queue_chain_t e,__assert_only mpsc_daemon_queue_t dq)486 hazard_deallocate_queue_invoke(mpsc_queue_chain_t e,
487     __assert_only mpsc_daemon_queue_t dq)
488 {
489 	assert(dq == &hazard_deallocate_queue);
490 
491 	/*
492 	 * Because we need to issue a fence before scanning for active
493 	 * pointers, we accumulate pending buckets in a first pass,
494 	 *
495 	 * then the MPSC system calls us with the MPSC_QUEUE_BATCH_END marker
496 	 * to mark the end of a batch. Realistically batches are extremely
497 	 * unlikely to be longer than NCPU.
498 	 *
499 	 * We enqueue all buckets onto a global list (hazard_buckets_pending)
500 	 * which is then filtered/trimmed by hazard_scan_and_reclaim().
501 	 */
502 
503 	if (e != MPSC_QUEUE_BATCH_END) {
504 		hazard_bucket_t bucket;
505 
506 		bucket = mpsc_queue_element(e, struct hazard_bucket, hb_mplink);
507 		STAILQ_INSERT_TAIL(&hazard_buckets_pending, bucket, hb_stqlink);
508 		return;
509 	}
510 
511 	if (!STAILQ_EMPTY(&hazard_buckets_pending)) {
512 		os_atomic_thread_fence(seq_cst); /* fence (4) */
513 
514 		hazard_scan_and_reclaim();
515 	}
516 }
517 
518 
519 #pragma mark - module initialization
520 
521 void
hazard_register_mpsc_queue(void)522 hazard_register_mpsc_queue(void)
523 {
524 	thread_deallocate_daemon_register_queue(&hazard_deallocate_queue,
525 	    hazard_deallocate_queue_invoke);
526 	hazard_deallocate_queue.mpd_options |= MPSC_QUEUE_OPTION_BATCH;
527 }
528 
529 static void
hazard_startup(void)530 hazard_startup(void)
531 {
532 	hazard_bucket_count = zpercpu_count() * HAZARD_GUARD_SLOTS / 2;
533 	if (hazard_bucket_count < hazard_bucket_count_min) {
534 		hazard_bucket_count = hazard_bucket_count_min;
535 	}
536 }
537 STARTUP(PERCPU, STARTUP_RANK_LAST, hazard_startup);
538 
539 #pragma mark - tests
540 #if DEBUG || DEVELOPMENT
541 #include <sys/errno.h>
542 
543 struct hazard_test_value {
544 	os_refcnt_t htv_ref;
545 	int         htv_step;
546 	bool        htv_reclaim_ok;
547 	void       *htv_reclaimed;
548 };
549 
550 static _Atomic uint32_t hazard_test_outstanding;
551 
552 static struct hazard_test_value *
hazard_test_value_alloc(int count,int step,bool ok)553 hazard_test_value_alloc(int count, int step, bool ok)
554 {
555 	struct hazard_test_value *val;
556 
557 	val = kalloc_data(sizeof(struct hazard_test_value),
558 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
559 	os_ref_init_count(&val->htv_ref, NULL, count);
560 	val->htv_reclaim_ok = ok;
561 	val->htv_step = step;
562 	os_atomic_inc(&hazard_test_outstanding, relaxed);
563 	return val;
564 }
565 
566 static void
hazard_test_value_release(struct hazard_test_value * val)567 hazard_test_value_release(struct hazard_test_value *val)
568 {
569 	if (os_ref_release(&val->htv_ref) == 0) {
570 		os_atomic_dec(&hazard_test_outstanding, relaxed);
571 		kfree_data(val, sizeof(*val));
572 	}
573 }
574 
575 static void
hazard_test_value_retire_cb(void * ptr)576 hazard_test_value_retire_cb(void *ptr)
577 {
578 	struct hazard_test_value *val = ptr;
579 
580 	if (!val->htv_reclaim_ok) {
581 		panic("%p: step %d should not be reclaimed", val, val->htv_step);
582 	}
583 	os_atomic_store(&val->htv_reclaimed, (void *)1, seq_cst);
584 	hazard_test_value_release(val);
585 }
586 
587 static int
hazard_basic_test(__unused int64_t in,int64_t * out)588 hazard_basic_test(__unused int64_t in, int64_t *out)
589 {
590 	static HAZARD_POINTER(struct hazard_test_value *) pointer;
591 
592 	uint32_t start, end, count;
593 	hazard_guard_t guard;
594 
595 	if (hazard_bucket_count < 2 || hazard_bucket_count > 32) {
596 		printf("%s: skipping test because hazard_bucket_count is %d\n",
597 		    __func__, hazard_bucket_count);
598 	}
599 
600 	printf("%s: using some guards\n", __func__);
601 	{
602 		struct hazard_test_value *val, *tmp;
603 
604 		val = hazard_test_value_alloc(1, 0, false);
605 		hazard_ptr_serialized_store(&pointer, val);
606 
607 		for (int i = 0; i < 10; i++) {
608 			guard = hazard_guard_get(0);
609 			assert(guard != NULL);
610 
611 			tmp = hazard_guard_acquire(guard, &pointer);
612 			assert(tmp == val);
613 
614 			hazard_guard_put(guard);
615 
616 			delay_for_interval(1, NSEC_PER_MSEC);
617 		}
618 
619 		hazard_ptr_clear(&pointer);
620 		hazard_test_value_release(val);
621 	}
622 	printf("%s: done\n", __func__);
623 
624 	count = hazard_bucket_count * MAX_CPUS * 2 + 3;
625 	printf("%s: retiring %d values in a loop\n", __func__, count);
626 	{
627 		struct hazard_test_value *val;
628 
629 		start = os_atomic_load(&hazard_test_outstanding, relaxed);
630 		printf("%s: starting (%d outstanding)\n", __func__, start);
631 
632 		for (int i = 0; i < count; i++) {
633 			val = hazard_test_value_alloc(1, 1000 + i, true);
634 			hazard_retire(val, sizeof(*val), hazard_test_value_retire_cb);
635 		}
636 
637 		delay_for_interval(10, NSEC_PER_MSEC);
638 
639 		end = os_atomic_load(&hazard_test_outstanding, relaxed);
640 		printf("%s: ending (%d outstanding)\n", __func__, end);
641 
642 		assert(end <= start || end - start < hazard_bucket_count * MAX_CPUS);
643 	}
644 	printf("%s: done\n", __func__);
645 
646 	printf("%s: cheating and checking scan works\n", __func__);
647 	if (zpercpu_count() > 1 && processor_avail_count > 1) {
648 		struct hazard_test_value *v1, *v2, *tmp;
649 		hazard_bucket_t bucket = hazard_bucket_alloc(Z_WAITOK);
650 
651 		v1 = hazard_test_value_alloc(2, 10000, false);
652 		v2 = hazard_test_value_alloc(2, 10001, true);
653 		hazard_ptr_serialized_store(&pointer, v1);
654 
655 		/* create a fake bucket to simulate a retire in flight */
656 		bucket = hazard_bucket_alloc(Z_WAITOK);
657 		bucket->hb_count = 2;
658 		bucket->hb_recs[0].hr_val = v1;
659 		bucket->hb_recs[0].hr_dtor = &hazard_test_value_retire_cb;
660 		bucket->hb_recs[1].hr_val = v2;
661 		bucket->hb_recs[1].hr_dtor = &hazard_test_value_retire_cb;
662 
663 		guard = hazard_guard_get(0);
664 		tmp = hazard_guard_acquire(guard, &pointer);
665 		assert(v1 == tmp);
666 
667 		/* simulate an enqueue */
668 		mpsc_daemon_enqueue(&hazard_deallocate_queue,
669 		    &bucket->hb_mplink, MPSC_QUEUE_NONE);
670 
671 		/*
672 		 * wait until we can observe v2 being freed,
673 		 * it will panic if not happening quickly enough
674 		 */
675 		hw_wait_while_equals_long(&v2->htv_reclaimed, NULL);
676 
677 		/* Allow it to be reclaimed now */
678 		os_atomic_store(&v1->htv_reclaim_ok, true, seq_cst);
679 
680 		hazard_guard_put(guard);
681 
682 		printf("%s: observed %p die and %p stay\n", __func__, v2, v1);
683 
684 		/* do a fake bucket again to force a flush */
685 		bucket = hazard_bucket_alloc(Z_WAITOK);
686 		bucket->hb_count = 1;
687 		bucket->hb_recs[0].hr_val = v2;
688 		bucket->hb_recs[0].hr_dtor = &hazard_test_value_retire_cb;
689 
690 		/* simulate an enqueue */
691 		mpsc_daemon_enqueue(&hazard_deallocate_queue,
692 		    &bucket->hb_mplink, MPSC_QUEUE_DISABLE_PREEMPTION);
693 
694 		/*
695 		 * wait until we can observe v1 being freed,
696 		 * now that there's no guard preventing it to disappear
697 		 */
698 		hw_wait_while_equals_long(&v1->htv_reclaimed, NULL);
699 
700 		hazard_test_value_release(v1);
701 		printf("%s: observed %p die too\n", __func__, v1);
702 	}
703 	printf("%s: done\n", __func__);
704 
705 	*out = 1;
706 	return 0;
707 }
708 SYSCTL_TEST_REGISTER(hazard_basic, hazard_basic_test);
709 
710 #endif /* DEBUG || DEVELOPMENT */
711