xref: /xnu-11215.41.3/bsd/skywalk/mem/skmem_cache_var.h (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #ifndef _SKYWALK_MEM_SKMEMCACHEVAR_H
30 #define _SKYWALK_MEM_SKMEMCACHEVAR_H
31 
32 #ifdef BSD_KERNEL_PRIVATE
33 #include <skywalk/core/skywalk_var.h>
34 #include <skywalk/os_channel_private.h>
35 #include <kern/cpu_number.h>
36 #include <machine/machine_routines.h>
37 
38 /*
39  * Buffer control.
40  */
41 struct skmem_bufctl {
42 	SLIST_ENTRY(skmem_bufctl) bc_link;      /* bufctl linkage */
43 	void                    *__sized_by(bc_lim) bc_addr;       /* buffer obj address */
44 	void                    *bc_addrm;      /* mirrored buffer obj addr */
45 	struct skmem_slab       *bc_slab;       /* controlling slab */
46 	uint32_t                bc_lim;         /* buffer obj limit */
47 	uint32_t                bc_flags;       /* SKMEM_BUFCTL_* flags */
48 	uint32_t                bc_idx;         /* buffer index within slab */
49 	volatile uint32_t       bc_usecnt;      /* outstanding use */
50 };
51 
52 #define SKMEM_BUFCTL_SHAREOK    0x1             /* supports sharing */
53 
54 #define SKMEM_STACK_DEPTH       16              /* maximum audit stack depth */
55 
56 #define SKMEM_CACHE_ALIGN       8               /* min guaranteed alignment */
57 
58 /*
59  * Alternative buffer control if SKM_MODE_AUDIT is set.
60  */
61 struct skmem_bufctl_audit {
62 	SLIST_ENTRY(skmem_bufctl) bc_link;      /* bufctl linkage */
63 	void                    *__sized_by(bc_lim) bc_addr;       /* buffer address */
64 	void                    *bc_addrm;      /* mirrored buffer address */
65 	struct skmem_slab       *bc_slab;       /* controlling slab */
66 	uint32_t                bc_lim;         /* buffer obj limit */
67 	uint32_t                bc_flags;       /* SKMEM_BUFCTL_* flags */
68 	uint32_t                bc_idx;         /* buffer index within slab */
69 	volatile uint32_t       bc_usecnt;      /* outstanding use */
70 	struct thread           *bc_thread;     /* thread doing transaction */
71 	uint32_t                bc_timestamp;   /* transaction time */
72 	uint32_t                bc_depth;       /* stack depth */
73 	void                    *bc_stack[SKMEM_STACK_DEPTH]; /* stack */
74 };
75 
76 /*
77  * Buffer control hash bucket.
78  */
79 struct skmem_bufctl_bkt {
80 	SLIST_HEAD(, skmem_bufctl) bcb_head;    /* bufctl allocated list */
81 };
82 
83 /*
84  * Slab.
85  */
86 struct skmem_slab {
87 	TAILQ_ENTRY(skmem_slab) sl_link;        /* slab freelist linkage */
88 	struct skmem_cache      *sl_cache;      /* controlling cache */
89 	void                    *sl_base;       /* base of allocated memory */
90 	void                    *sl_basem;      /* base of mirrored memory */
91 	struct sksegment        *sl_seg;        /* backing segment */
92 	struct sksegment        *sl_segm;       /* backing mirrored segment */
93 	SLIST_HEAD(, skmem_bufctl) sl_head;     /* bufctl free list */
94 	uint32_t                sl_refcnt;      /* outstanding allocations */
95 	uint32_t                sl_chunks;      /* # of buffers in slab */
96 };
97 
98 #define SKMEM_SLAB_IS_PARTIAL(sl)       \
99 	((sl)->sl_refcnt > 0 && (sl)->sl_refcnt < (sl)->sl_chunks)
100 
101 #define SKMEM_SLAB_MEMBER(sl, buf)      \
102 	(((size_t)(buf) - (size_t)(sl)->sl_base) < (sl)->sl_cache->skm_slabsize)
103 
104 /*
105  * Magazine type.
106  */
107 struct skmem_magtype {
108 	int                     mt_magsize;     /* magazine size (# of objs) */
109 	int                     mt_align;       /* magazine alignment */
110 	size_t                  mt_minbuf;      /* all smaller bufs qualify */
111 	size_t                  mt_maxbuf;      /* no larger bufs qualify */
112 	struct skmem_cache      *mt_cache;      /* magazine cache */
113 	char                    mt_cname[64];   /* magazine cache name */
114 };
115 
116 /*
117  * Magazine.
118  */
119 struct skmem_mag {
120 	SLIST_ENTRY(skmem_mag)  mg_link;        /* magazine linkage */
121 	struct skmem_magtype    *mg_magtype;    /* magazine type */
122 	size_t                  mg_count;       /* # of mg_round array elements */
123 	void                    *mg_round[__counted_by(mg_count)];   /* one or more objs */
124 };
125 
126 #define SKMEM_MAG_SIZE(n)       \
127 	offsetof(struct skmem_mag, mg_round[n])
128 
129 /*
130  * Magazine depot.
131  */
132 struct skmem_maglist {
133 	SLIST_HEAD(, skmem_mag) ml_list;        /* magazine list */
134 	uint32_t                ml_total;       /* number of magazines */
135 	uint32_t                ml_min;         /* min since last update */
136 	uint32_t                ml_reaplimit;   /* max reapable magazines */
137 	uint64_t                ml_alloc;       /* allocations from this list */
138 };
139 
140 /*
141  * Per-CPU cache structure.
142  */
143 struct skmem_cpu_cache {
144 	decl_lck_mtx_data(, cp_lock);
145 	struct skmem_mag        *cp_loaded;     /* currently filled magazine */
146 	struct skmem_mag        *cp_ploaded;    /* previously filled magazine */
147 	uint64_t                cp_alloc;       /* allocations from this cpu */
148 	uint64_t                cp_free;        /* frees to this cpu */
149 	int                     cp_rounds;      /* # of objs in filled mag */
150 	int                     cp_prounds;     /* # of objs in previous mag */
151 	int                     cp_magsize;     /* # of objs in a full mag */
152 } __attribute__((aligned(CHANNEL_CACHE_ALIGN_MAX)));
153 
154 /*
155  * Object's region information.
156  *
157  * This info is provided to skmem_ctor_fn_t() to assist master and
158  * slave objects construction.  It is also provided separately via
159  * skmem_cache_get_obj_info() when called on an object that's been
160  * allocated from skmem_cache.  Information about slave object is
161  * available only at constructor time.
162  */
163 struct skmem_obj_info {
164 	void                    *__sized_by(oi_size) oi_addr;       /* object address */
165 	struct skmem_bufctl     *oi_bc;         /* buffer control (master) */
166 	uint32_t                oi_size;        /* actual object size */
167 	obj_idx_t               oi_idx_reg;     /* object idx within region */
168 	obj_idx_t               oi_idx_seg;     /* object idx within segment */
169 } __attribute__((__packed__));
170 
171 /*
172  * Generic one-way linked list element structure.  This is used to
173  * handle skmem_cache_batch_alloc() requests in order to chain the
174  * allocated objects together before returning them to the caller.
175  * It is also used when freeing a batch of packets by the caller of
176  * skmem_cache_batch_free().  Note that this requires the region's
177  * object to be at least the size of struct skmem_obj, as we store
178  * this information at the beginning of each object in the chain.
179  */
180 struct skmem_obj {
181 	/*
182 	 * Given that we overlay this structure on top of whatever
183 	 * structure that the object represents, the constructor must
184 	 * ensure that it reserves at least the size of a pointer
185 	 * at the top for the linkage.
186 	 */
187 	struct skmem_obj        *mo_next;       /* next object in the list */
188 	/*
189 	 * The following are used only for raw (unconstructed) objects
190 	 * coming out of the slab layer during allocations.  They are
191 	 * not touched otherwise by skmem_cache when the object resides
192 	 * in the magazine.  By utilizing this space, we avoid having
193 	 * to allocate temporary storage elsewhere.
194 	 */
195 	struct skmem_obj_info   mo_info;        /* object's info */
196 	struct skmem_obj_info   mo_minfo;       /* mirrored object's info */
197 };
198 
199 #define SKMEM_OBJ_ADDR(_oi)     (_oi)->oi_addr
200 #define SKMEM_OBJ_BUFCTL(_oi)   (_oi)->oi_bc
201 #define SKMEM_OBJ_SIZE(_oi)     (_oi)->oi_size
202 #define SKMEM_OBJ_IDX_REG(_oi)  (_oi)->oi_idx_reg
203 #define SKMEM_OBJ_IDX_SEG(_oi)  (_oi)->oi_idx_seg
204 /* segment the object belongs to (only for master) */
205 #define SKMEM_OBJ_SEG(_oi)      (_oi)->oi_bc->bc_slab->sl_seg
206 /* offset of object relative to the object's own region */
207 #define SKMEM_OBJ_ROFF(_oi)     \
208 	((mach_vm_offset_t)(SKMEM_OBJ_SIZE(_oi) * SKMEM_OBJ_IDX_REG(_oi)))
209 
210 typedef int (*skmem_ctor_fn_t)(struct skmem_obj_info *,
211     struct skmem_obj_info *, void *, uint32_t);
212 typedef void (*skmem_dtor_fn_t)(void *, void *);
213 typedef void (*skmem_reclaim_fn_t)(void *);
214 typedef int (*skmem_slab_alloc_fn_t)(struct skmem_cache *,
215     struct skmem_obj_info *, struct skmem_obj_info *, uint32_t);
216 typedef void (*skmem_slab_free_fn_t)(struct skmem_cache *, void *);
217 
218 /*
219  * Cache.
220  */
221 struct skmem_cache {
222 #if KASAN
223 	void            *skm_start;
224 	uint32_t        skm_align[0];
225 #endif
226 	/*
227 	 * Commonly-accessed elements during alloc and free.
228 	 */
229 	uint32_t        skm_mode;               /* cache mode flags */
230 	skmem_ctor_fn_t skm_ctor;               /* object constructor */
231 	skmem_dtor_fn_t skm_dtor;               /* object destructor */
232 	skmem_reclaim_fn_t skm_reclaim;         /* cache reclaim */
233 	void            *skm_private;           /* opaque arg to callbacks */
234 
235 	/*
236 	 * Depot.
237 	 */
238 	decl_lck_mtx_data(, skm_dp_lock);       /* protects depot layer */
239 	struct skmem_magtype *skm_magtype;      /* magazine type */
240 	struct skmem_maglist skm_full;          /* full magazines */
241 	struct skmem_maglist skm_empty;         /* empty magazines */
242 
243 	/*
244 	 * Slab.
245 	 */
246 	decl_lck_mtx_data(, skm_sl_lock);       /* protects slab layer */
247 	skmem_slab_alloc_fn_t skm_slab_alloc;   /* slab allocate */
248 	skmem_slab_free_fn_t skm_slab_free;     /* slab free */
249 	size_t          skm_chunksize;          /* bufsize + alignment */
250 	size_t          skm_objsize;            /* actual obj size in slab */
251 	size_t          skm_slabsize;           /* size of a slab */
252 	size_t          skm_hash_initial;       /* initial hash table size */
253 	size_t          skm_hash_limit;         /* hash table size limit */
254 	size_t          skm_hash_shift;         /* get to interesting bits */
255 	size_t          skm_hash_mask;          /* hash table mask */
256 	size_t          skm_hash_size;
257 	struct skmem_bufctl_bkt *__counted_by(skm_hash_size) skm_hash_table; /* alloc'd buffer htable */
258 	TAILQ_HEAD(, skmem_slab) skm_sl_partial_list; /* partially-allocated */
259 	TAILQ_HEAD(, skmem_slab) skm_sl_empty_list;   /* fully-allocated */
260 	struct skmem_region *skm_region;        /* region source for slabs */
261 
262 	/*
263 	 * Statistics.
264 	 */
265 	uint32_t        skm_cpu_mag_size;       /* current magazine size */
266 	uint32_t        skm_cpu_mag_resize;     /* # of magazine resizes */
267 	uint32_t        skm_cpu_mag_purge;      /* # of magazine purges */
268 	uint32_t        skm_cpu_mag_reap;       /* # of magazine reaps */
269 	uint64_t        skm_depot_contention;   /* mutex contention count */
270 	uint64_t        skm_depot_contention_prev; /* previous snapshot */
271 	uint32_t        skm_depot_full;         /* # of full magazines */
272 	uint32_t        skm_depot_empty;        /* # of empty magazines */
273 	uint32_t        skm_depot_ws_zero;      /* # of working set flushes */
274 	uint32_t        skm_sl_rescale;         /* # of hash table rescales */
275 	uint32_t        skm_sl_create;          /* slab creates */
276 	uint32_t        skm_sl_destroy;         /* slab destroys */
277 	uint32_t        skm_sl_alloc;           /* slab layer allocations */
278 	uint32_t        skm_sl_free;            /* slab layer frees */
279 	uint32_t        skm_sl_partial;         /* # of partial slabs */
280 	uint32_t        skm_sl_empty;           /* # of empty slabs */
281 	uint64_t        skm_sl_alloc_fail;      /* total failed allocations */
282 	uint64_t        skm_sl_bufinuse;        /* total unfreed buffers */
283 	uint64_t        skm_sl_bufmax;          /* max buffers ever */
284 
285 	/*
286 	 * Cache properties.
287 	 */
288 	TAILQ_ENTRY(skmem_cache) skm_link;      /* cache linkage */
289 	char            skm_name[64];           /* cache name */
290 	uuid_t          skm_uuid;               /* cache uuid */
291 	size_t          skm_bufsize;            /* buffer size */
292 	size_t          skm_bufalign;           /* buffer alignment */
293 	size_t          skm_objalign;           /* object alignment */
294 
295 	/*
296 	 * CPU layer, aligned at (maximum) cache line boundary.
297 	 */
298 	decl_lck_mtx_data(, skm_rs_lock);       /* protects resizing */
299 	struct thread    *skm_rs_owner;         /* resize owner */
300 	uint32_t        skm_rs_busy;            /* prevent resizing */
301 	uint32_t        skm_rs_want;            /* # of threads blocked */
302 	size_t          skm_cpu_cache_count;
303 	struct skmem_cpu_cache  skm_cpu_cache[__counted_by(skm_cpu_cache_count)]
304 	__attribute__((aligned(CHANNEL_CACHE_ALIGN_MAX)));
305 };
306 
307 #define SKMEM_CACHE_SIZE(n)     \
308 	offsetof(struct skmem_cache, skm_cpu_cache[n])
309 
310 #define SKMEM_CPU_CACHE(c)                                      \
311 	((struct skmem_cpu_cache *)((void *)((char *)(c) +      \
312 	SKMEM_CACHE_SIZE(cpu_number()))))
313 
314 /* valid values for skm_mode, set only by skmem_cache_create() */
315 #define SKM_MODE_NOMAGAZINES    0x00000001      /* disable magazines layer */
316 #define SKM_MODE_AUDIT          0x00000002      /* audit transactions */
317 #define SKM_MODE_NOREDIRECT     0x00000004      /* unaffected by defunct */
318 #define SKM_MODE_BATCH          0x00000008      /* supports batch alloc/free */
319 #define SKM_MODE_DYNAMIC        0x00000010      /* enable magazine resizing */
320 #define SKM_MODE_CLEARONFREE    0x00000020      /* zero-out upon slab free */
321 #define SKM_MODE_PSEUDO         0x00000040      /* external backing store */
322 #define SKM_MODE_RECLAIM        0x00000080      /* aggressive memory reclaim */
323 
324 #define SKM_MODE_BITS \
325 	"\020\01NOMAGAZINES\02AUDIT\03NOREDIRECT\04BATCH\05DYNAMIC"     \
326 	"\06CLEARONFREE\07PSEUDO\10RECLAIM"
327 
328 /*
329  * Valid flags for sk{mem,region}_alloc().  SKMEM_FAILOK is valid only if
330  * SKMEM_SLEEP is set, i.e. SKMEM_{NOSLEEP,FAILOK} are mutually exclusive.
331  * If set, SKMEM_FAILOK indicates that the segment allocation may fail,
332  * and that the cache layer would handle the retries rather than blocking
333  * inside the region allocator.
334  */
335 #define SKMEM_SLEEP             0x0     /* can block for memory; won't fail */
336 #define SKMEM_NOSLEEP           0x1     /* cannot block for memory; may fail */
337 #define SKMEM_PANIC             0x2     /* panic upon allocation failure */
338 #define SKMEM_FAILOK            0x4     /* can fail for blocking alloc */
339 
340 /* valid flag values for skmem_cache_create() */
341 #define SKMEM_CR_NOMAGAZINES    0x1     /* disable magazines layer */
342 #define SKMEM_CR_BATCH          0x2     /* support batch alloc/free */
343 #define SKMEM_CR_DYNAMIC        0x4     /* enable magazine resizing */
344 #define SKMEM_CR_CLEARONFREE    0x8     /* zero-out upon slab free */
345 #define SKMEM_CR_RECLAIM        0x10    /* aggressive memory reclaim */
346 
347 __BEGIN_DECLS
348 /*
349  * Given a buffer control, add a use count to it.
350  */
351 __attribute__((always_inline))
352 static inline void
skmem_bufctl_use(struct skmem_bufctl * bc)353 skmem_bufctl_use(struct skmem_bufctl *bc)
354 {
355 	uint32_t old, new;
356 
357 	os_atomic_rmw_loop(&bc->bc_usecnt, old, new, relaxed, {
358 		new = old + 1;
359 		VERIFY(new != 0);
360 		ASSERT(new == 1 || (bc->bc_flags & SKMEM_BUFCTL_SHAREOK));
361 	});
362 }
363 
364 /*
365  * Given a buffer control, remove a use count from it (returns new value).
366  */
367 __attribute__((always_inline))
368 static inline uint32_t
skmem_bufctl_unuse(struct skmem_bufctl * bc)369 skmem_bufctl_unuse(struct skmem_bufctl *bc)
370 {
371 	uint32_t old, new;
372 
373 	os_atomic_rmw_loop(&bc->bc_usecnt, old, new, relaxed, {
374 		new = old - 1;
375 		VERIFY(old != 0);
376 		ASSERT(old == 1 || (bc->bc_flags & SKMEM_BUFCTL_SHAREOK));
377 	});
378 
379 	return new;
380 }
381 
382 extern struct skmem_cache *skmem_slab_cache;    /* cache for skmem_slab */
383 extern struct skmem_cache *skmem_bufctl_cache;  /* cache for skmem_bufctl */
384 extern unsigned int bc_size;                    /* size of bufctl */
385 extern int skmem_slab_alloc_locked(struct skmem_cache *,
386     struct skmem_obj_info *, struct skmem_obj_info *, uint32_t);
387 extern void skmem_slab_free_locked(struct skmem_cache *, void *);
388 extern int skmem_slab_alloc_pseudo_locked(struct skmem_cache *,
389     struct skmem_obj_info *, struct skmem_obj_info *, uint32_t);
390 extern void skmem_slab_free_pseudo_locked(struct skmem_cache *, void *);
391 extern void skmem_slab_free(struct skmem_cache *, void *);
392 extern void skmem_slab_batch_free(struct skmem_cache *, struct skmem_obj *);
393 extern uint32_t skmem_slab_batch_alloc(struct skmem_cache *, struct skmem_obj **,
394     uint32_t, uint32_t);
395 extern int skmem_slab_alloc(struct skmem_cache *, struct skmem_obj_info *,
396     struct skmem_obj_info *, uint32_t);
397 extern void skmem_audit_bufctl(struct skmem_bufctl *);
398 #define SKM_SLAB_LOCK(_skm)                     \
399 	lck_mtx_lock(&(_skm)->skm_sl_lock)
400 #define SKM_SLAB_LOCK_ASSERT_HELD(_skm)         \
401 	LCK_MTX_ASSERT(&(_skm)->skm_sl_lock, LCK_MTX_ASSERT_OWNED)
402 #define SKM_SLAB_LOCK_ASSERT_NOTHELD(_skm)      \
403 	LCK_MTX_ASSERT(&(_skm)->skm_sl_lock, LCK_MTX_ASSERT_NOTOWNED)
404 #define SKM_SLAB_UNLOCK(_skm)                   \
405 	lck_mtx_unlock(&(_skm)->skm_sl_lock)
406 #define SKMEM_CACHE_HASH_INDEX(_a, _s, _m)      (((_a) >> (_s)) & (_m))
407 #define SKMEM_CACHE_HASH(_skm, _buf)                                     \
408 	(&(_skm)->skm_hash_table[SKMEM_CACHE_HASH_INDEX((uintptr_t)_buf, \
409 	(_skm)->skm_hash_shift, (_skm)->skm_hash_mask)])
410 
411 extern void skmem_cache_pre_init(void);
412 extern void skmem_cache_init(void);
413 extern void skmem_cache_fini(void);
414 extern struct skmem_cache *skmem_cache_create(const char *, size_t, size_t,
415     skmem_ctor_fn_t, skmem_dtor_fn_t, skmem_reclaim_fn_t, void *,
416     struct skmem_region *, uint32_t);
417 extern void skmem_cache_destroy(struct skmem_cache *);
418 
419 extern uint32_t skmem_cache_batch_alloc(struct skmem_cache *,
420     struct skmem_obj **list, size_t objsize, uint32_t, uint32_t);
421 
422 /*
423  * XXX -fbounds-safety: Sometimes we use skmem_cache_alloc to allocate a struct
424  * with a flexible array (e.g. struct skmem_mag). For those, we can't have the
425  * alloc function return void *__single, because we lose bounds information.
426  */
427 static inline void *__header_indexable
skmem_cache_alloc(struct skmem_cache * skm,uint32_t skmflag)428 skmem_cache_alloc(struct skmem_cache *skm, uint32_t skmflag)
429 {
430 	struct skmem_obj *__single buf;
431 
432 	(void) skmem_cache_batch_alloc(skm, &buf, skm->skm_objsize, 1, skmflag);
433 
434 	/* This is one of the few places where using __unsafe_forge is okay */
435 	return __unsafe_forge_bidi_indexable(void *, buf, buf ? skm->skm_objsize : 0);
436 }
437 
438 extern void skmem_cache_free(struct skmem_cache *, void *);
439 extern void skmem_cache_free_nocache(struct skmem_cache *, void *);
440 extern void skmem_cache_batch_free(struct skmem_cache *, struct skmem_obj *);
441 extern void skmem_cache_batch_free_nocache(struct skmem_cache *, struct skmem_obj *);
442 extern void skmem_cache_reap_now(struct skmem_cache *, boolean_t);
443 extern void skmem_cache_reap(void);
444 extern void skmem_reap_caches(boolean_t);
445 extern void skmem_cache_get_obj_info(struct skmem_cache *, void *,
446     struct skmem_obj_info *, struct skmem_obj_info *);
447 extern uint32_t skmem_cache_magazine_max(uint32_t);
448 extern boolean_t skmem_allow_magazines(void);
449 #if (DEVELOPMENT || DEBUG)
450 extern void skmem_cache_test_start(uint32_t);
451 extern void skmem_cache_test_stop(void);
452 #endif /* (DEVELOPMENT || DEBUG) */
453 __END_DECLS
454 #endif /* BSD_KERNEL_PRIVATE */
455 #endif /* _SKYWALK_MEM_SKMEMCACHEVAR_H */
456