1 #if KERNEL
2 #include <kern/assert.h>
3 #include <kern/debug.h>
4 #include <pexpert/pexpert.h>
5 #include <kern/btlog.h>
6 #include <kern/backtrace.h>
7 #include <kern/zalloc.h>
8 #include <kern/sched_prim.h>
9 #include <libkern/libkern.h>
10 #endif
11 #include <os/overflow.h>
12 #include <os/atomic_private.h>
13
14 #include "refcnt.h"
15
16 #define OS_REFCNT_MAX_COUNT ((os_ref_count_t)0x0FFFFFFFUL)
17
18 #if OS_REFCNT_DEBUG
19 extern struct os_refgrp global_ref_group;
20 os_refgrp_decl(, global_ref_group, "all", NULL);
21
22 extern bool ref_debug_enable;
23 bool ref_debug_enable = false;
24
25 #define REFLOG_GRP_DEBUG_ENABLED(grp) \
26 __improbable(grp != NULL && (ref_debug_enable || \
27 (grp->grp_flags & OS_REFGRP_F_ALWAYS_ENABLED) != 0))
28
29 static const size_t ref_log_nrecords = 1000000;
30
31 __enum_closed_decl(reflog_op_t, uint8_t, {
32 REFLOG_RETAIN = 1,
33 REFLOG_RELEASE = 2
34 });
35
36 # define __debug_only
37 # define __os_refgrp_arg(arg) , arg
38 #else
39 # define __debug_only __unused
40 # define __os_refgrp_arg(arg)
41 #endif /* OS_REFCNT_DEBUG */
42
43 void
os_ref_panic_live(void * rc)44 os_ref_panic_live(void *rc)
45 {
46 panic("os_refcnt: unexpected release of final reference (rc=%p)", rc);
47 __builtin_unreachable();
48 }
49 void
os_ref_panic_last(void * rc)50 os_ref_panic_last(void *rc)
51 {
52 panic("os_refcnt: expected release of final reference but rc %p!=0\n", rc);
53 __builtin_unreachable();
54 }
55
56 __abortlike
57 static void
os_ref_panic_underflow(void * rc)58 os_ref_panic_underflow(void *rc)
59 {
60 panic("os_refcnt: underflow (rc=%p)", rc);
61 __builtin_unreachable();
62 }
63
64 __abortlike
65 static void
os_ref_panic_overflow(os_ref_atomic_t * rc)66 os_ref_panic_overflow(os_ref_atomic_t *rc)
67 {
68 panic("os_refcnt: overflow (rc=%p, count=%u, max=%u)", rc, os_atomic_load(rc, relaxed), OS_REFCNT_MAX_COUNT);
69 __builtin_unreachable();
70 }
71
72 __abortlike
73 static void
os_ref_panic_retain(os_ref_atomic_t * rc)74 os_ref_panic_retain(os_ref_atomic_t *rc)
75 {
76 if (os_atomic_load(rc, relaxed) >= OS_REFCNT_MAX_COUNT) {
77 os_ref_panic_overflow(rc);
78 } else {
79 panic("os_refcnt: attempted resurrection (rc=%p)", rc);
80 }
81 }
82
83 static inline void
os_ref_check_underflow(void * rc,os_ref_count_t count,os_ref_count_t n)84 os_ref_check_underflow(void *rc, os_ref_count_t count, os_ref_count_t n)
85 {
86 if (__improbable(count < n)) {
87 os_ref_panic_underflow(rc);
88 }
89 }
90
91 static inline void
os_ref_check_overflow(os_ref_atomic_t * rc,os_ref_count_t count)92 os_ref_check_overflow(os_ref_atomic_t *rc, os_ref_count_t count)
93 {
94 if (__improbable(count >= OS_REFCNT_MAX_COUNT)) {
95 os_ref_panic_overflow(rc);
96 }
97 }
98
99 static inline void
os_ref_check_retain(os_ref_atomic_t * rc,os_ref_count_t count,os_ref_count_t n)100 os_ref_check_retain(os_ref_atomic_t *rc, os_ref_count_t count, os_ref_count_t n)
101 {
102 if (__improbable(count < n || count >= OS_REFCNT_MAX_COUNT)) {
103 os_ref_panic_retain(rc);
104 }
105 }
106
107 #if OS_REFCNT_DEBUG
108 #if KERNEL
109 __attribute__((cold, noinline))
110 static void
ref_log_op(struct os_refgrp * grp,void * elem,reflog_op_t op)111 ref_log_op(struct os_refgrp *grp, void *elem, reflog_op_t op)
112 {
113 if (grp == NULL) {
114 return;
115 }
116
117 if (grp->grp_log == NULL) {
118 ref_log_op(grp->grp_parent, elem, op);
119 return;
120 }
121
122 btlog_record((btlog_t)grp->grp_log, elem, op,
123 btref_get(__builtin_frame_address(0), BTREF_GET_NOWAIT));
124 }
125
126 __attribute__((cold, noinline))
127 static void
ref_log_drop(struct os_refgrp * grp,void * elem)128 ref_log_drop(struct os_refgrp *grp, void *elem)
129 {
130 if (!REFLOG_GRP_DEBUG_ENABLED(grp)) {
131 return;
132 }
133
134 if (grp->grp_log == NULL) {
135 ref_log_drop(grp->grp_parent, elem);
136 return;
137 }
138
139 btlog_erase(grp->grp_log, elem);
140 }
141
142 __attribute__((cold, noinline))
143 void
os_ref_log_init(struct os_refgrp * grp)144 os_ref_log_init(struct os_refgrp *grp)
145 {
146 if (grp->grp_log != NULL) {
147 return;
148 }
149
150 char grpbuf[128];
151 char *refgrp = grpbuf;
152 if (!PE_parse_boot_argn("rlog", refgrp, sizeof(grpbuf))) {
153 return;
154 }
155
156 /*
157 * Enable refcount statistics if the rlog boot-arg is present,
158 * even when no specific group is logged.
159 */
160 ref_debug_enable = true;
161
162 const char *g;
163 while ((g = strsep(&refgrp, ",")) != NULL) {
164 if (strcmp(g, grp->grp_name) == 0) {
165 /* enable logging on this refgrp */
166 grp->grp_log = btlog_create(BTLOG_HASH,
167 ref_log_nrecords, 0);
168 return;
169 }
170 }
171 }
172
173
174 __attribute__((cold, noinline))
175 void
os_ref_log_fini(struct os_refgrp * grp)176 os_ref_log_fini(struct os_refgrp *grp)
177 {
178 if (grp->grp_log == NULL) {
179 return;
180 }
181
182 btlog_destroy(grp->grp_log);
183 grp->grp_log = NULL;
184 }
185
186 #else
187
188 #ifndef os_ref_log_fini
189 inline void
os_ref_log_fini(struct os_refgrp * grp __unused)190 os_ref_log_fini(struct os_refgrp *grp __unused)
191 {
192 }
193 #endif
194
195 #ifndef os_ref_log_init
196 inline void
os_ref_log_init(struct os_refgrp * grp __unused)197 os_ref_log_init(struct os_refgrp *grp __unused)
198 {
199 }
200 #endif
201 #ifndef ref_log_op
202 static inline void
ref_log_op(struct os_refgrp * grp __unused,void * rc __unused,reflog_op_t op __unused)203 ref_log_op(struct os_refgrp *grp __unused, void *rc __unused, reflog_op_t op __unused)
204 {
205 }
206 #endif
207 #ifndef ref_log_drop
208 static inline void
ref_log_drop(struct os_refgrp * grp __unused,void * rc __unused)209 ref_log_drop(struct os_refgrp *grp __unused, void *rc __unused)
210 {
211 }
212 #endif
213
214 #endif /* KERNEL */
215
216 /*
217 * attach a new refcnt to a group
218 */
219 __attribute__((cold, noinline))
220 static void
ref_attach_to_group(os_ref_atomic_t * rc,struct os_refgrp * grp,os_ref_count_t init_count)221 ref_attach_to_group(os_ref_atomic_t *rc, struct os_refgrp *grp, os_ref_count_t init_count)
222 {
223 if (grp == NULL) {
224 return;
225 }
226
227 if (atomic_fetch_add_explicit(&grp->grp_children, 1, memory_order_relaxed) == 0) {
228 /* First reference count object in this group. Check if we should enable
229 * refcount logging. */
230 os_ref_log_init(grp);
231 }
232
233 atomic_fetch_add_explicit(&grp->grp_count, init_count, memory_order_relaxed);
234 atomic_fetch_add_explicit(&grp->grp_retain_total, init_count, memory_order_relaxed);
235
236 if (grp == &global_ref_group) {
237 return;
238 }
239
240 if (grp->grp_parent == NULL) {
241 grp->grp_parent = &global_ref_group;
242 }
243
244 ref_attach_to_group(rc, grp->grp_parent, init_count);
245 }
246
247 static void
ref_retain_group(struct os_refgrp * grp)248 ref_retain_group(struct os_refgrp *grp)
249 {
250 if (grp) {
251 atomic_fetch_add_explicit(&grp->grp_count, 1, memory_order_relaxed);
252 atomic_fetch_add_explicit(&grp->grp_retain_total, 1, memory_order_relaxed);
253 ref_retain_group(grp->grp_parent);
254 }
255 }
256
257 __attribute__((cold, noinline))
258 static void
ref_release_group(struct os_refgrp * grp)259 ref_release_group(struct os_refgrp *grp)
260 {
261 if (grp) {
262 atomic_fetch_sub_explicit(&grp->grp_count, 1, memory_order_relaxed);
263 atomic_fetch_add_explicit(&grp->grp_release_total, 1, memory_order_relaxed);
264
265 ref_release_group(grp->grp_parent);
266 }
267 }
268
269 __attribute__((cold, noinline))
270 static void
ref_drop_group(struct os_refgrp * grp)271 ref_drop_group(struct os_refgrp *grp)
272 {
273 if (grp) {
274 atomic_fetch_sub_explicit(&grp->grp_children, 1, memory_order_relaxed);
275 ref_drop_group(grp->grp_parent);
276 }
277 }
278
279 __attribute__((cold, noinline))
280 static void
ref_init_debug(void * rc,struct os_refgrp * grp,os_ref_count_t count)281 ref_init_debug(void *rc, struct os_refgrp *grp, os_ref_count_t count)
282 {
283 ref_attach_to_group(rc, grp, count);
284
285 for (os_ref_count_t i = 0; i < count; i++) {
286 ref_log_op(grp, rc, REFLOG_RETAIN);
287 }
288 }
289
290 __attribute__((cold, noinline))
291 static void
ref_retain_debug(void * rc,struct os_refgrp * __debug_only grp)292 ref_retain_debug(void *rc, struct os_refgrp * __debug_only grp)
293 {
294 ref_retain_group(grp);
295 ref_log_op(grp, rc, REFLOG_RETAIN);
296 }
297
298 __attribute__((cold, noinline))
299 static void
ref_release_debug(void * rc,struct os_refgrp * __debug_only grp)300 ref_release_debug(void *rc, struct os_refgrp * __debug_only grp)
301 {
302 ref_log_op(grp, rc, REFLOG_RELEASE);
303 ref_release_group(grp);
304 }
305
306 __attribute__((cold, noinline))
307 static os_ref_count_t
ref_drop_debug(void * rc,struct os_refgrp * __debug_only grp)308 ref_drop_debug(void *rc, struct os_refgrp * __debug_only grp)
309 {
310 ref_log_drop(grp, rc);
311 ref_drop_group(grp);
312 return 0;
313 }
314 #endif
315
316 void
os_ref_init_count_internal(os_ref_atomic_t * rc,struct os_refgrp * __debug_only grp,os_ref_count_t count)317 os_ref_init_count_internal(os_ref_atomic_t *rc, struct os_refgrp * __debug_only grp, os_ref_count_t count)
318 {
319 os_ref_check_underflow(rc, count, 1);
320 atomic_init(rc, count);
321
322 #if OS_REFCNT_DEBUG
323 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
324 ref_init_debug(rc, grp, count);
325 }
326 #endif
327 }
328
329 static inline void
__os_ref_retain(os_ref_atomic_t * rc,os_ref_count_t f,struct os_refgrp * __debug_only grp)330 __os_ref_retain(os_ref_atomic_t *rc, os_ref_count_t f,
331 struct os_refgrp * __debug_only grp)
332 {
333 os_ref_count_t old = atomic_fetch_add_explicit(rc, 1, memory_order_relaxed);
334 os_ref_check_retain(rc, old, f);
335
336 #if OS_REFCNT_DEBUG
337 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
338 ref_retain_debug(rc, grp);
339 }
340 #endif
341 }
342
343 void
os_ref_retain_internal(os_ref_atomic_t * rc,struct os_refgrp * grp)344 os_ref_retain_internal(os_ref_atomic_t *rc, struct os_refgrp *grp)
345 {
346 __os_ref_retain(rc, 1, grp);
347 }
348
349 void
os_ref_retain_floor_internal(os_ref_atomic_t * rc,os_ref_count_t f,struct os_refgrp * grp)350 os_ref_retain_floor_internal(os_ref_atomic_t *rc, os_ref_count_t f,
351 struct os_refgrp *grp)
352 {
353 __os_ref_retain(rc, f, grp);
354 }
355
356 static inline bool
__os_ref_retain_try(os_ref_atomic_t * rc,os_ref_count_t f,struct os_refgrp * __debug_only grp)357 __os_ref_retain_try(os_ref_atomic_t *rc, os_ref_count_t f,
358 struct os_refgrp * __debug_only grp)
359 {
360 os_ref_count_t cur, next;
361
362 os_atomic_rmw_loop(rc, cur, next, relaxed, {
363 if (__improbable(cur < f)) {
364 os_atomic_rmw_loop_give_up(return false);
365 }
366
367 next = cur + 1;
368 });
369
370 os_ref_check_overflow(rc, cur);
371
372 #if OS_REFCNT_DEBUG
373 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
374 ref_retain_debug(rc, grp);
375 }
376 #endif
377
378 return true;
379 }
380
381 bool
os_ref_retain_try_internal(os_ref_atomic_t * rc,struct os_refgrp * grp)382 os_ref_retain_try_internal(os_ref_atomic_t *rc, struct os_refgrp *grp)
383 {
384 return __os_ref_retain_try(rc, 1, grp);
385 }
386
387 bool
os_ref_retain_floor_try_internal(os_ref_atomic_t * rc,os_ref_count_t f,struct os_refgrp * grp)388 os_ref_retain_floor_try_internal(os_ref_atomic_t *rc, os_ref_count_t f,
389 struct os_refgrp *grp)
390 {
391 return __os_ref_retain_try(rc, f, grp);
392 }
393
394 __attribute__((always_inline))
395 static inline os_ref_count_t
_os_ref_release_inline(os_ref_atomic_t * rc,os_ref_count_t n,struct os_refgrp * __debug_only grp,memory_order release_order,memory_order dealloc_order)396 _os_ref_release_inline(os_ref_atomic_t *rc, os_ref_count_t n,
397 struct os_refgrp * __debug_only grp,
398 memory_order release_order, memory_order dealloc_order)
399 {
400 os_ref_count_t val;
401
402 #if OS_REFCNT_DEBUG
403 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
404 /*
405 * Care not to use 'rc' after the decrement because it might be deallocated
406 * under us.
407 */
408 ref_release_debug(rc, grp);
409 }
410 #endif
411
412 val = atomic_fetch_sub_explicit(rc, n, release_order);
413 os_ref_check_underflow(rc, val, n);
414 val -= n;
415 if (__improbable(val < n)) {
416 atomic_load_explicit(rc, dealloc_order);
417 }
418
419 #if OS_REFCNT_DEBUG
420 /*
421 * The only way to safely access the ref count or group after
422 * decrementing the count is when the count is zero (as the caller won't
423 * see the zero until the function returns).
424 */
425 if (val == 0 && REFLOG_GRP_DEBUG_ENABLED(grp)) {
426 return ref_drop_debug(rc, grp);
427 }
428 #endif
429
430 return val;
431 }
432
433 #if OS_REFCNT_DEBUG
434 __attribute__((noinline))
435 static os_ref_count_t
os_ref_release_n_internal(os_ref_atomic_t * rc,os_ref_count_t n,struct os_refgrp * __debug_only grp,memory_order release_order,memory_order dealloc_order)436 os_ref_release_n_internal(os_ref_atomic_t *rc, os_ref_count_t n,
437 struct os_refgrp * __debug_only grp,
438 memory_order release_order, memory_order dealloc_order)
439 {
440 // Legacy exported interface with bad codegen due to the barriers
441 // not being immediate
442 //
443 // Also serves as the debug function
444 return _os_ref_release_inline(rc, n, grp, release_order, dealloc_order);
445 }
446 #endif
447
448 __attribute__((noinline))
449 os_ref_count_t
os_ref_release_internal(os_ref_atomic_t * rc,struct os_refgrp * __debug_only grp,memory_order release_order,memory_order dealloc_order)450 os_ref_release_internal(os_ref_atomic_t *rc, struct os_refgrp * __debug_only grp,
451 memory_order release_order, memory_order dealloc_order)
452 {
453 // Legacy exported interface with bad codegen due to the barriers
454 // not being immediate
455 //
456 // Also serves as the debug function
457 return _os_ref_release_inline(rc, 1, grp, release_order, dealloc_order);
458 }
459
460 os_ref_count_t
os_ref_release_barrier_internal(os_ref_atomic_t * rc,struct os_refgrp * __debug_only grp)461 os_ref_release_barrier_internal(os_ref_atomic_t *rc,
462 struct os_refgrp * __debug_only grp)
463 {
464 #if OS_REFCNT_DEBUG
465 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
466 return os_ref_release_internal(rc, grp,
467 memory_order_release, memory_order_acquire);
468 }
469 #endif
470 return _os_ref_release_inline(rc, 1, NULL,
471 memory_order_release, memory_order_acquire);
472 }
473
474 os_ref_count_t
os_ref_release_relaxed_internal(os_ref_atomic_t * rc,struct os_refgrp * __debug_only grp)475 os_ref_release_relaxed_internal(os_ref_atomic_t *rc,
476 struct os_refgrp * __debug_only grp)
477 {
478 #if OS_REFCNT_DEBUG
479 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
480 return os_ref_release_internal(rc, grp,
481 memory_order_relaxed, memory_order_relaxed);
482 }
483 #endif
484 return _os_ref_release_inline(rc, 1, NULL,
485 memory_order_relaxed, memory_order_relaxed);
486 }
487
488 static inline void
__os_ref_retain_locked(os_ref_atomic_t * rc,os_ref_count_t f,struct os_refgrp * __debug_only grp)489 __os_ref_retain_locked(os_ref_atomic_t *rc, os_ref_count_t f,
490 struct os_refgrp * __debug_only grp)
491 {
492 os_ref_count_t val = os_ref_get_count_internal(rc);
493 os_ref_check_retain(rc, val, f);
494 atomic_store_explicit(rc, ++val, memory_order_relaxed);
495
496 #if OS_REFCNT_DEBUG
497 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
498 ref_retain_debug(rc, grp);
499 }
500 #endif
501 }
502
503 void
os_ref_retain_locked_internal(os_ref_atomic_t * rc,struct os_refgrp * grp)504 os_ref_retain_locked_internal(os_ref_atomic_t *rc, struct os_refgrp *grp)
505 {
506 __os_ref_retain_locked(rc, 1, grp);
507 }
508
509 void
os_ref_retain_floor_locked_internal(os_ref_atomic_t * rc,os_ref_count_t f,struct os_refgrp * grp)510 os_ref_retain_floor_locked_internal(os_ref_atomic_t *rc, os_ref_count_t f,
511 struct os_refgrp *grp)
512 {
513 __os_ref_retain_locked(rc, f, grp);
514 }
515
516 os_ref_count_t
os_ref_release_locked_internal(os_ref_atomic_t * rc,struct os_refgrp * __debug_only grp)517 os_ref_release_locked_internal(os_ref_atomic_t *rc, struct os_refgrp * __debug_only grp)
518 {
519 #if OS_REFCNT_DEBUG
520 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
521 ref_release_debug(rc, grp);
522 }
523 #endif
524
525 os_ref_count_t val = os_ref_get_count_internal(rc);
526 os_ref_check_underflow(rc, val, 1);
527 atomic_store_explicit(rc, --val, memory_order_relaxed);
528
529 #if OS_REFCNT_DEBUG
530 if (val == 0 && REFLOG_GRP_DEBUG_ENABLED(grp)) {
531 return ref_drop_debug(rc, grp);
532 }
533 #endif
534
535 return val;
536 }
537
538 /*
539 * Bitwise API
540 */
541
542 #undef os_ref_init_count_mask
543 void
os_ref_init_count_mask(os_ref_atomic_t * rc,uint32_t b,struct os_refgrp * __debug_only grp,os_ref_count_t init_count,uint32_t init_bits)544 os_ref_init_count_mask(os_ref_atomic_t *rc, uint32_t b,
545 struct os_refgrp *__debug_only grp,
546 os_ref_count_t init_count, uint32_t init_bits)
547 {
548 assert(init_bits < (1U << b));
549 atomic_init(rc, (init_count << b) | init_bits);
550 os_ref_check_underflow(rc, (init_count << b), 1u << b);
551
552 #if OS_REFCNT_DEBUG
553 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
554 ref_init_debug(rc, grp, init_count);
555 }
556 #endif
557 }
558
559 __attribute__((always_inline))
560 static inline void
os_ref_retain_mask_inline(os_ref_atomic_t * rc,uint32_t n,struct os_refgrp * __debug_only grp,memory_order mo)561 os_ref_retain_mask_inline(os_ref_atomic_t *rc, uint32_t n,
562 struct os_refgrp *__debug_only grp, memory_order mo)
563 {
564 os_ref_count_t old = atomic_fetch_add_explicit(rc, n, mo);
565 os_ref_check_retain(rc, old, n);
566
567 #if OS_REFCNT_DEBUG
568 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
569 ref_retain_debug(rc, grp);
570 }
571 #endif
572 }
573
574 void
os_ref_retain_mask_internal(os_ref_atomic_t * rc,uint32_t n,struct os_refgrp * __debug_only grp)575 os_ref_retain_mask_internal(os_ref_atomic_t *rc, uint32_t n,
576 struct os_refgrp *__debug_only grp)
577 {
578 os_ref_retain_mask_inline(rc, n, grp, memory_order_relaxed);
579 }
580
581 void
os_ref_retain_acquire_mask_internal(os_ref_atomic_t * rc,uint32_t n,struct os_refgrp * __debug_only grp)582 os_ref_retain_acquire_mask_internal(os_ref_atomic_t *rc, uint32_t n,
583 struct os_refgrp *__debug_only grp)
584 {
585 os_ref_retain_mask_inline(rc, n, grp, memory_order_acquire);
586 }
587
588 uint32_t
os_ref_release_barrier_mask_internal(os_ref_atomic_t * rc,uint32_t n,struct os_refgrp * __debug_only grp)589 os_ref_release_barrier_mask_internal(os_ref_atomic_t *rc, uint32_t n,
590 struct os_refgrp *__debug_only grp)
591 {
592 #if OS_REFCNT_DEBUG
593 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
594 return os_ref_release_n_internal(rc, n, grp,
595 memory_order_release, memory_order_acquire);
596 }
597 #endif
598
599 return _os_ref_release_inline(rc, n, NULL,
600 memory_order_release, memory_order_acquire);
601 }
602
603 uint32_t
os_ref_release_relaxed_mask_internal(os_ref_atomic_t * rc,uint32_t n,struct os_refgrp * __debug_only grp)604 os_ref_release_relaxed_mask_internal(os_ref_atomic_t *rc, uint32_t n,
605 struct os_refgrp *__debug_only grp)
606 {
607 #if OS_REFCNT_DEBUG
608 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
609 return os_ref_release_n_internal(rc, n, grp,
610 memory_order_relaxed, memory_order_relaxed);
611 }
612 #endif
613
614 return _os_ref_release_inline(rc, n, NULL,
615 memory_order_relaxed, memory_order_relaxed);
616 }
617
618 uint32_t
os_ref_retain_try_mask_internal(os_ref_atomic_t * rc,uint32_t n,uint32_t reject_mask,struct os_refgrp * __debug_only grp)619 os_ref_retain_try_mask_internal(os_ref_atomic_t *rc, uint32_t n,
620 uint32_t reject_mask, struct os_refgrp *__debug_only grp)
621 {
622 os_ref_count_t cur, next;
623
624 os_atomic_rmw_loop(rc, cur, next, relaxed, {
625 if (__improbable(cur < n || (cur & reject_mask))) {
626 os_atomic_rmw_loop_give_up(return 0);
627 }
628 next = cur + n;
629 });
630
631 os_ref_check_overflow(rc, cur);
632
633 #if OS_REFCNT_DEBUG
634 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
635 ref_retain_debug(rc, grp);
636 }
637 #endif
638
639 return next;
640 }
641
642 bool
os_ref_retain_try_acquire_mask_internal(os_ref_atomic_t * rc,uint32_t n,uint32_t reject_mask,struct os_refgrp * __debug_only grp)643 os_ref_retain_try_acquire_mask_internal(os_ref_atomic_t *rc, uint32_t n,
644 uint32_t reject_mask, struct os_refgrp *__debug_only grp)
645 {
646 os_ref_count_t cur, next;
647
648 os_atomic_rmw_loop(rc, cur, next, acquire, {
649 if (__improbable(cur < n || (cur & reject_mask))) {
650 os_atomic_rmw_loop_give_up(return false);
651 }
652 next = cur + n;
653 });
654
655 os_ref_check_overflow(rc, cur);
656
657 #if OS_REFCNT_DEBUG
658 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
659 ref_retain_debug(rc, grp);
660 }
661 #endif
662
663 return true;
664 }
665
666 #pragma mark os_pcpu
667
668 #define OS_PCPU_REF_LIVE 1ull
669 #define OS_PCPU_REF_WAITER 2ull
670 #define OS_PCPU_REF_INC 4ull
671
672 typedef uint64_t _Atomic *__zpercpu __os_pcpu_ref_t;
673
674 static inline __os_pcpu_ref_t
os_pcpu_get(os_pcpu_ref_t ref)675 os_pcpu_get(os_pcpu_ref_t ref)
676 {
677 return (__os_pcpu_ref_t)ref;
678 }
679
680 static inline uint64_t
os_pcpu_count_to_value(os_ref_count_t cnt)681 os_pcpu_count_to_value(os_ref_count_t cnt)
682 {
683 return cnt * OS_PCPU_REF_INC;
684 }
685
686 static inline os_ref_count_t
os_pcpu_value_to_count(uint64_t v)687 os_pcpu_value_to_count(uint64_t v)
688 {
689 return (os_ref_count_t)(v / OS_PCPU_REF_INC);
690 }
691
692 __abortlike
693 static void
__os_pcpu_ref_destroy_panic(os_pcpu_ref_t * ref,uint64_t n)694 __os_pcpu_ref_destroy_panic(os_pcpu_ref_t *ref, uint64_t n)
695 {
696 if (n & OS_PCPU_REF_LIVE) {
697 panic("os_pcpu_ref: destroying live refcount %p at %p",
698 os_pcpu_get(*ref), ref);
699 }
700 if (n & OS_PCPU_REF_WAITER) {
701 panic("os_pcpu_ref: destroying refcount %p with a waiter at %p",
702 os_pcpu_get(*ref), ref);
703 }
704 panic("os_pcpu_ref: destroying non-zero refcount %p at %p",
705 os_pcpu_get(*ref), ref);
706 }
707
708 __abortlike
709 static void
__os_pcpu_ref_overflow_panic(__os_pcpu_ref_t rc)710 __os_pcpu_ref_overflow_panic(__os_pcpu_ref_t rc)
711 {
712 panic("os_pcpu_ref: overflow (rc=%p)", rc);
713 }
714
715 __abortlike
716 static void
__os_pcpu_ref_retain_panic(__os_pcpu_ref_t rc,uint64_t v)717 __os_pcpu_ref_retain_panic(__os_pcpu_ref_t rc, uint64_t v)
718 {
719 if (v == 0) {
720 panic("os_pcpu_ref: attempted resurrection (rc=%p)", rc);
721 } else {
722 __os_pcpu_ref_overflow_panic(rc);
723 }
724 }
725
726 __abortlike
727 static void
__os_pcpu_ref_release_live_panic(__os_pcpu_ref_t rc)728 __os_pcpu_ref_release_live_panic(__os_pcpu_ref_t rc)
729 {
730 panic("os_pcpu_ref: unexpected release of final reference (rc=%p)", rc);
731 }
732
733 __abortlike
734 static void
__os_pcpu_ref_release_panic(__os_pcpu_ref_t rc)735 __os_pcpu_ref_release_panic(__os_pcpu_ref_t rc)
736 {
737 panic("os_pcpu_ref: over-release (rc=%p)", rc);
738 }
739
740 __abortlike
741 static void
__os_pcpu_ref_kill_panic(__os_pcpu_ref_t rc)742 __os_pcpu_ref_kill_panic(__os_pcpu_ref_t rc)
743 {
744 panic("os_pcpu_ref: double-kill (rc=%p)", rc);
745 }
746
747 __abortlike
748 static void
__os_pcpu_ref_invalid_wait_panic(__os_pcpu_ref_t rc,uint64_t ov)749 __os_pcpu_ref_invalid_wait_panic(__os_pcpu_ref_t rc, uint64_t ov)
750 {
751 if (ov & OS_PCPU_REF_WAITER) {
752 panic("os_pcpu_ref: double-wait (rc=%p)", rc);
753 } else {
754 panic("os_pcpu_ref: wait while still live (rc=%p)", rc);
755 }
756 }
757
758 void
759 (os_pcpu_ref_init)(os_pcpu_ref_t * ref, struct os_refgrp *__debug_only grp)
760 {
761 __os_pcpu_ref_t rc;
762
763 rc = zalloc_percpu(percpu_u64_zone, Z_WAITOK | Z_NOFAIL);
zpercpu_foreach_cpu(cpu)764 zpercpu_foreach_cpu(cpu) {
765 os_atomic_init(zpercpu_get_cpu(rc, cpu),
766 OS_PCPU_REF_LIVE + (cpu ? 0 : OS_PCPU_REF_INC));
767 }
768
769 *ref = (os_pcpu_ref_t)rc;
770 #if OS_REFCNT_DEBUG
771 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
772 ref_retain_debug(rc, grp);
773 }
774 #endif
775 }
776
777 void
778 (os_pcpu_ref_destroy)(os_pcpu_ref_t * ref, struct os_refgrp *__debug_only grp)
779 {
780 __os_pcpu_ref_t rc = os_pcpu_get(*ref);
781 uint64_t n = 0;
782
783 n = os_atomic_load_wide(zpercpu_get_cpu(rc, 0), relaxed);
784 if (n & OS_PCPU_REF_LIVE) {
785 n = os_pcpu_ref_kill(*ref, grp);
786 } else {
787 for (int cpu = zpercpu_count(); cpu-- > 1;) {
788 n |= os_atomic_load_wide(zpercpu_get_cpu(rc, cpu), relaxed);
789 }
790 }
791 if (n) {
792 __os_pcpu_ref_destroy_panic(ref, n);
793 }
794
795 *ref = 0;
796 zfree_percpu(percpu_u64_zone, rc);
797 }
798
799 os_ref_count_t
os_pcpu_ref_count(os_pcpu_ref_t ref)800 os_pcpu_ref_count(os_pcpu_ref_t ref)
801 {
802 uint64_t v;
803
804 v = os_atomic_load_wide(zpercpu_get_cpu(os_pcpu_get(ref), 0), relaxed);
805 if (v & OS_PCPU_REF_LIVE) {
806 return OS_REFCNT_MAX_COUNT;
807 }
808 return os_pcpu_value_to_count(v);
809 }
810
811 static inline uint64_t
__os_pcpu_ref_delta(__os_pcpu_ref_t rc,int delta,int * cpup)812 __os_pcpu_ref_delta(__os_pcpu_ref_t rc, int delta, int *cpup)
813 {
814 _Atomic uint64_t *rcp;
815 uint64_t v;
816 int cpu;
817
818 cpu = cpu_number();
819 rcp = zpercpu_get_cpu(rc, cpu);
820 v = os_atomic_load_wide(rcp, relaxed);
821 if (__improbable((v & OS_PCPU_REF_LIVE) == 0)) {
822 *cpup = -1;
823 return v;
824 }
825
826 *cpup = cpu;
827 if (delta > 0) {
828 return os_atomic_add_orig(rcp, OS_PCPU_REF_INC, relaxed);
829 } else {
830 return os_atomic_sub_orig(rcp, OS_PCPU_REF_INC, release);
831 }
832 }
833
834 __attribute__((noinline))
835 static void
__os_pcpu_ref_retain_slow(__os_pcpu_ref_t rc,int cpu,uint64_t v)836 __os_pcpu_ref_retain_slow(__os_pcpu_ref_t rc, int cpu, uint64_t v)
837 {
838 if (cpu > 0) {
839 os_atomic_sub(zpercpu_get_cpu(rc, cpu),
840 OS_PCPU_REF_INC, relaxed);
841 }
842
843 if (cpu != 0) {
844 v = os_atomic_add_orig(zpercpu_get_cpu(rc, 0),
845 OS_PCPU_REF_INC, relaxed);
846 if (v & OS_PCPU_REF_LIVE) {
847 /* we're doing this concurrently to an os_pcpu_ref_kill */
848 return;
849 }
850 }
851
852 if (v == 0 || v >= os_pcpu_count_to_value(OS_REFCNT_MAX_COUNT)) {
853 __os_pcpu_ref_retain_panic(rc, v);
854 }
855 }
856
857 void
858 (os_pcpu_ref_retain)(os_pcpu_ref_t ref, struct os_refgrp * __debug_only grp)
859 {
860 __os_pcpu_ref_t rc = os_pcpu_get(ref);
861 uint64_t v;
862 int cpu;
863
864 v = __os_pcpu_ref_delta(rc, +1, &cpu);
865 if (__improbable((v & OS_PCPU_REF_LIVE) == 0)) {
866 __os_pcpu_ref_retain_slow(rc, cpu, v);
867 }
868
869 #if OS_REFCNT_DEBUG
870 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
871 ref_retain_debug(rc, grp);
872 }
873 #endif
874 }
875
876 bool
877 (os_pcpu_ref_retain_try)(os_pcpu_ref_t ref, struct os_refgrp *__debug_only grp)
878 {
879 __os_pcpu_ref_t rc = os_pcpu_get(ref);
880 _Atomic uint64_t *rcp = zpercpu_get(rc);
881 uint64_t ov, nv;
882
883 os_atomic_rmw_loop(rcp, ov, nv, relaxed, {
884 if ((ov & OS_PCPU_REF_LIVE) == 0) {
885 os_atomic_rmw_loop_give_up(return false);
886 }
887 nv = ov + OS_PCPU_REF_INC;
888 });
889
890 #if OS_REFCNT_DEBUG
891 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
892 ref_retain_debug(rc, grp);
893 }
894 #endif
895 return true;
896 }
897
898 __attribute__((noinline))
899 static void
__os_pcpu_ref_release_live_slow(__os_pcpu_ref_t rc,int cpu,uint64_t v)900 __os_pcpu_ref_release_live_slow(__os_pcpu_ref_t rc, int cpu, uint64_t v)
901 {
902 if (cpu > 0) {
903 os_atomic_add(zpercpu_get_cpu(rc, cpu),
904 OS_PCPU_REF_INC, relaxed);
905 }
906 if (cpu != 0) {
907 v = os_atomic_sub_orig(zpercpu_get_cpu(rc, 0),
908 OS_PCPU_REF_INC, release);
909 if (v & OS_PCPU_REF_LIVE) {
910 /* we're doing this concurrently to an os_pcpu_ref_kill */
911 return;
912 }
913 }
914
915 if (v < os_pcpu_count_to_value(2)) {
916 __os_pcpu_ref_release_live_panic(rc);
917 }
918 }
919
920 void
921 (os_pcpu_ref_release_live)(os_pcpu_ref_t ref, struct os_refgrp *__debug_only grp)
922 {
923 __os_pcpu_ref_t rc = os_pcpu_get(ref);
924 uint64_t v;
925 int cpu;
926
927 #if OS_REFCNT_DEBUG
928 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
929 /*
930 * Care not to use 'rc' after the decrement because it might be deallocated
931 * under us.
932 */
933 ref_release_debug(rc, grp);
934 }
935 #endif
936
937 v = __os_pcpu_ref_delta(rc, -1, &cpu);
938
939 if (__improbable((v & OS_PCPU_REF_LIVE) == 0)) {
940 __os_pcpu_ref_release_live_slow(rc, cpu, v);
941 }
942 }
943
944 __attribute__((noinline))
945 static os_ref_count_t
__os_pcpu_ref_release_slow(__os_pcpu_ref_t rc,int cpu,uint64_t v __os_refgrp_arg (struct os_refgrp * grp))946 __os_pcpu_ref_release_slow(
947 __os_pcpu_ref_t rc,
948 int cpu,
949 uint64_t v
950 __os_refgrp_arg(struct os_refgrp *grp))
951 {
952 uint64_t _Atomic *rc0 = zpercpu_get_cpu(rc, 0);
953
954 if (cpu > 0) {
955 os_atomic_add(zpercpu_get_cpu(rc, cpu),
956 OS_PCPU_REF_INC, relaxed);
957 }
958 if (cpu != 0) {
959 v = os_atomic_sub_orig(rc0, OS_PCPU_REF_INC, release);
960 if (v & OS_PCPU_REF_LIVE) {
961 /* we're doing this concurrently to an os_pcpu_ref_kill */
962 return OS_REFCNT_MAX_COUNT;
963 }
964 }
965
966 if (os_sub_overflow(v, OS_PCPU_REF_INC, &v)) {
967 __os_pcpu_ref_release_panic(rc);
968 }
969
970 os_atomic_thread_fence(acquire);
971 if (v == OS_PCPU_REF_WAITER) {
972 os_atomic_andnot(rc0, OS_PCPU_REF_WAITER, release);
973 thread_wakeup(rc);
974 v = 0;
975 }
976 #if OS_REFCNT_DEBUG
977 if (v == 0 && REFLOG_GRP_DEBUG_ENABLED(grp)) {
978 return ref_drop_debug(rc, grp);
979 }
980 #endif
981 return os_pcpu_value_to_count(v);
982 }
983
os_ref_count_t(os_pcpu_ref_release)984 os_ref_count_t
985 (os_pcpu_ref_release)(os_pcpu_ref_t ref, struct os_refgrp *__debug_only grp)
986 {
987 __os_pcpu_ref_t rc = os_pcpu_get(ref);
988 uint64_t v;
989 int cpu;
990
991 #if OS_REFCNT_DEBUG
992 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
993 ref_release_debug(rc, grp);
994 }
995 #endif
996
997 v = __os_pcpu_ref_delta(rc, -1, &cpu);
998 if (__improbable((v & OS_PCPU_REF_LIVE) == 0)) {
999 return __os_pcpu_ref_release_slow(rc, cpu, v __os_refgrp_arg(grp));
1000 }
1001
1002 return OS_REFCNT_MAX_COUNT;
1003 }
1004
os_ref_count_t(os_pcpu_ref_kill)1005 os_ref_count_t
1006 (os_pcpu_ref_kill)(os_pcpu_ref_t ref, struct os_refgrp *__debug_only grp)
1007 {
1008 __os_pcpu_ref_t rc = os_pcpu_get(ref);
1009 uint64_t v = 0, t = 0;
1010
1011 #if OS_REFCNT_DEBUG
1012 if (REFLOG_GRP_DEBUG_ENABLED(grp)) {
1013 ref_release_debug(rc, grp);
1014 }
1015 #endif
1016
1017 for (int cpu = zpercpu_count(); cpu-- > 1;) {
1018 v = os_atomic_xchg(zpercpu_get_cpu(rc, cpu), 0, relaxed);
1019 if ((v & OS_PCPU_REF_LIVE) == 0) {
1020 __os_pcpu_ref_kill_panic(rc);
1021 }
1022 t += v - OS_PCPU_REF_LIVE;
1023 }
1024 t -= OS_PCPU_REF_LIVE + OS_PCPU_REF_INC;
1025
1026 v = os_atomic_add(zpercpu_get_cpu(rc, 0), t, acq_rel);
1027 if (v & OS_PCPU_REF_LIVE) {
1028 __os_pcpu_ref_kill_panic(rc);
1029 }
1030
1031 if (v >= os_pcpu_count_to_value(OS_REFCNT_MAX_COUNT)) {
1032 __os_pcpu_ref_overflow_panic(rc);
1033 }
1034
1035 #if OS_REFCNT_DEBUG
1036 if (v == 0 && REFLOG_GRP_DEBUG_ENABLED(grp)) {
1037 return ref_drop_debug(rc, grp);
1038 }
1039 #endif
1040 return os_pcpu_value_to_count(v);
1041 }
1042
1043 #if KERNEL
1044
1045 void
os_pcpu_ref_wait_for_death(os_pcpu_ref_t ref)1046 os_pcpu_ref_wait_for_death(os_pcpu_ref_t ref)
1047 {
1048 __os_pcpu_ref_t rc = os_pcpu_get(ref);
1049 uint64_t _Atomic *rc0 = zpercpu_get_cpu(rc, 0);
1050 uint64_t ov, nv;
1051
1052 ov = os_atomic_load(rc0, relaxed);
1053 if (ov == 0) {
1054 os_atomic_thread_fence(acquire);
1055 return;
1056 }
1057
1058 assert_wait(rc, THREAD_UNINT);
1059
1060 os_atomic_rmw_loop(rc0, ov, nv, relaxed, {
1061 if (ov & (OS_PCPU_REF_WAITER | OS_PCPU_REF_LIVE)) {
1062 __os_pcpu_ref_invalid_wait_panic(rc, ov);
1063 }
1064 if (ov == 0) {
1065 os_atomic_rmw_loop_give_up(break);
1066 }
1067 nv = ov | OS_PCPU_REF_WAITER;
1068 });
1069
1070 if (ov == 0) {
1071 os_atomic_thread_fence(acquire);
1072 clear_wait(current_thread(), THREAD_AWAKENED);
1073 } else {
1074 thread_block(THREAD_CONTINUE_NULL);
1075 }
1076 }
1077
1078 #endif
1079