xref: /xnu-10063.121.3/bsd/skywalk/mem/skmem_cache_var.h (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #ifndef _SKYWALK_MEM_SKMEMCACHEVAR_H
30 #define _SKYWALK_MEM_SKMEMCACHEVAR_H
31 
32 #ifdef BSD_KERNEL_PRIVATE
33 #include <skywalk/core/skywalk_var.h>
34 #include <skywalk/os_channel_private.h>
35 #include <kern/cpu_number.h>
36 
37 /*
38  * Buffer control.
39  */
40 struct skmem_bufctl {
41 	SLIST_ENTRY(skmem_bufctl) bc_link;      /* bufctl linkage */
42 	void                    *bc_addr;       /* buffer obj address */
43 	void                    *bc_addrm;      /* mirrored buffer obj addr */
44 	struct skmem_slab       *bc_slab;       /* controlling slab */
45 	uint32_t                bc_lim;         /* buffer obj limit */
46 	uint32_t                bc_flags;       /* SKMEM_BUFCTL_* flags */
47 	uint32_t                bc_idx;         /* buffer index within slab */
48 	volatile uint32_t       bc_usecnt;      /* outstanding use */
49 };
50 
51 #define SKMEM_BUFCTL_SHAREOK    0x1             /* supports sharing */
52 
53 #define SKMEM_STACK_DEPTH       16              /* maximum audit stack depth */
54 
55 #define SKMEM_CACHE_ALIGN       8               /* min guaranteed alignment */
56 
57 /*
58  * Alternative buffer control if SKM_MODE_AUDIT is set.
59  */
60 struct skmem_bufctl_audit {
61 	SLIST_ENTRY(skmem_bufctl) bc_link;      /* bufctl linkage */
62 	void                    *bc_addr;       /* buffer address */
63 	void                    *bc_addrm;      /* mirrored buffer address */
64 	struct skmem_slab       *bc_slab;       /* controlling slab */
65 	uint32_t                bc_flags;       /* SKMEM_BUFCTL_* flags */
66 	uint32_t                bc_idx;         /* buffer index within slab */
67 	volatile uint32_t       bc_usecnt;      /* outstanding use */
68 	struct thread           *bc_thread;     /* thread doing transaction */
69 	uint32_t                bc_timestamp;   /* transaction time */
70 	uint32_t                bc_depth;       /* stack depth */
71 	void                    *bc_stack[SKMEM_STACK_DEPTH]; /* stack */
72 };
73 
74 /*
75  * Buffer control hash bucket.
76  */
77 struct skmem_bufctl_bkt {
78 	SLIST_HEAD(, skmem_bufctl) bcb_head;    /* bufctl allocated list */
79 };
80 
81 /*
82  * Slab.
83  */
84 struct skmem_slab {
85 	TAILQ_ENTRY(skmem_slab) sl_link;        /* slab freelist linkage */
86 	struct skmem_cache      *sl_cache;      /* controlling cache */
87 	void                    *sl_base;       /* base of allocated memory */
88 	void                    *sl_basem;      /* base of mirrored memory */
89 	struct sksegment        *sl_seg;        /* backing segment */
90 	struct sksegment        *sl_segm;       /* backing mirrored segment */
91 	SLIST_HEAD(, skmem_bufctl) sl_head;     /* bufctl free list */
92 	uint32_t                sl_refcnt;      /* outstanding allocations */
93 	uint32_t                sl_chunks;      /* # of buffers in slab */
94 };
95 
96 #define SKMEM_SLAB_IS_PARTIAL(sl)       \
97 	((sl)->sl_refcnt > 0 && (sl)->sl_refcnt < (sl)->sl_chunks)
98 
99 #define SKMEM_SLAB_MEMBER(sl, buf)      \
100 	(((size_t)(buf) - (size_t)(sl)->sl_base) < (sl)->sl_cache->skm_slabsize)
101 
102 /*
103  * Magazine type.
104  */
105 struct skmem_magtype {
106 	int                     mt_magsize;     /* magazine size (# of objs) */
107 	int                     mt_align;       /* magazine alignment */
108 	size_t                  mt_minbuf;      /* all smaller bufs qualify */
109 	size_t                  mt_maxbuf;      /* no larger bufs qualify */
110 	struct skmem_cache      *mt_cache;      /* magazine cache */
111 	char                    mt_cname[64];   /* magazine cache name */
112 };
113 
114 /*
115  * Magazine.
116  */
117 struct skmem_mag {
118 	SLIST_ENTRY(skmem_mag)  mg_link;        /* magazine linkage */
119 	struct skmem_magtype    *mg_magtype;    /* magazine type */
120 	void                    *mg_round[1];   /* one or more objs */
121 };
122 
123 #define SKMEM_MAG_SIZE(n)       \
124 	offsetof(struct skmem_mag, mg_round[n])
125 
126 /*
127  * Magazine depot.
128  */
129 struct skmem_maglist {
130 	SLIST_HEAD(, skmem_mag) ml_list;        /* magazine list */
131 	uint32_t                ml_total;       /* number of magazines */
132 	uint32_t                ml_min;         /* min since last update */
133 	uint32_t                ml_reaplimit;   /* max reapable magazines */
134 	uint64_t                ml_alloc;       /* allocations from this list */
135 };
136 
137 /*
138  * Per-CPU cache structure.
139  */
140 struct skmem_cpu_cache {
141 	decl_lck_mtx_data(, cp_lock);
142 	struct skmem_mag        *cp_loaded;     /* currently filled magazine */
143 	struct skmem_mag        *cp_ploaded;    /* previously filled magazine */
144 	uint64_t                cp_alloc;       /* allocations from this cpu */
145 	uint64_t                cp_free;        /* frees to this cpu */
146 	int                     cp_rounds;      /* # of objs in filled mag */
147 	int                     cp_prounds;     /* # of objs in previous mag */
148 	int                     cp_magsize;     /* # of objs in a full mag */
149 } __attribute__((aligned(CHANNEL_CACHE_ALIGN_MAX)));
150 
151 /*
152  * Object's region information.
153  *
154  * This info is provided to skmem_ctor_fn_t() to assist master and
155  * slave objects construction.  It is also provided separately via
156  * skmem_cache_get_obj_info() when called on an object that's been
157  * allocated from skmem_cache.  Information about slave object is
158  * available only at constructor time.
159  */
160 struct skmem_obj_info {
161 	void                    *oi_addr;       /* object address */
162 	struct skmem_bufctl     *oi_bc;         /* buffer control (master) */
163 	uint32_t                oi_size;        /* actual object size */
164 	obj_idx_t               oi_idx_reg;     /* object idx within region */
165 	obj_idx_t               oi_idx_seg;     /* object idx within segment */
166 } __attribute__((__packed__));
167 
168 /*
169  * Generic one-way linked list element structure.  This is used to
170  * handle skmem_cache_batch_alloc() requests in order to chain the
171  * allocated objects together before returning them to the caller.
172  * It is also used when freeing a batch of packets by the caller of
173  * skmem_cache_batch_free().  Note that this requires the region's
174  * object to be at least the size of struct skmem_obj, as we store
175  * this information at the beginning of each object in the chain.
176  */
177 struct skmem_obj {
178 	/*
179 	 * Given that we overlay this structure on top of whatever
180 	 * structure that the object represents, the constructor must
181 	 * ensure that it reserves at least the size of a pointer
182 	 * at the top for the linkage.
183 	 */
184 	struct skmem_obj        *mo_next;       /* next object in the list */
185 	/*
186 	 * The following are used only for raw (unconstructed) objects
187 	 * coming out of the slab layer during allocations.  They are
188 	 * not touched otherwise by skmem_cache when the object resides
189 	 * in the magazine.  By utilizing this space, we avoid having
190 	 * to allocate temporary storage elsewhere.
191 	 */
192 	struct skmem_obj_info   mo_info;        /* object's info */
193 	struct skmem_obj_info   mo_minfo;       /* mirrored object's info */
194 };
195 
196 #define SKMEM_OBJ_ADDR(_oi)     (_oi)->oi_addr
197 #define SKMEM_OBJ_BUFCTL(_oi)   (_oi)->oi_bc
198 #define SKMEM_OBJ_SIZE(_oi)     (_oi)->oi_size
199 #define SKMEM_OBJ_IDX_REG(_oi)  (_oi)->oi_idx_reg
200 #define SKMEM_OBJ_IDX_SEG(_oi)  (_oi)->oi_idx_seg
201 /* segment the object belongs to (only for master) */
202 #define SKMEM_OBJ_SEG(_oi)      (_oi)->oi_bc->bc_slab->sl_seg
203 /* offset of object relative to the object's own region */
204 #define SKMEM_OBJ_ROFF(_oi)     \
205 	((mach_vm_offset_t)(SKMEM_OBJ_SIZE(_oi) * SKMEM_OBJ_IDX_REG(_oi)))
206 
207 typedef int (*skmem_ctor_fn_t)(struct skmem_obj_info *,
208     struct skmem_obj_info *, void *, uint32_t);
209 typedef void (*skmem_dtor_fn_t)(void *, void *);
210 typedef void (*skmem_reclaim_fn_t)(void *);
211 typedef int (*skmem_slab_alloc_fn_t)(struct skmem_cache *,
212     struct skmem_obj_info *, struct skmem_obj_info *, uint32_t);
213 typedef void (*skmem_slab_free_fn_t)(struct skmem_cache *, void *);
214 
215 /*
216  * Cache.
217  */
218 struct skmem_cache {
219 	/*
220 	 * Commonly-accessed elements during alloc and free.
221 	 */
222 	uint32_t        skm_mode;               /* cache mode flags */
223 	skmem_ctor_fn_t skm_ctor;               /* object constructor */
224 	skmem_dtor_fn_t skm_dtor;               /* object destructor */
225 	skmem_reclaim_fn_t skm_reclaim;         /* cache reclaim */
226 	void            *skm_private;           /* opaque arg to callbacks */
227 
228 	/*
229 	 * Depot.
230 	 */
231 	decl_lck_mtx_data(, skm_dp_lock);       /* protects depot layer */
232 	struct skmem_magtype *skm_magtype;      /* magazine type */
233 	struct skmem_maglist skm_full;          /* full magazines */
234 	struct skmem_maglist skm_empty;         /* empty magazines */
235 
236 	/*
237 	 * Slab.
238 	 */
239 	decl_lck_mtx_data(, skm_sl_lock);       /* protects slab layer */
240 	skmem_slab_alloc_fn_t skm_slab_alloc;   /* slab allocate */
241 	skmem_slab_free_fn_t skm_slab_free;     /* slab free */
242 	size_t          skm_chunksize;          /* bufsize + alignment */
243 	size_t          skm_objsize;            /* actual obj size in slab */
244 	size_t          skm_slabsize;           /* size of a slab */
245 	size_t          skm_hash_initial;       /* initial hash table size */
246 	size_t          skm_hash_limit;         /* hash table size limit */
247 	size_t          skm_hash_shift;         /* get to interesting bits */
248 	size_t          skm_hash_mask;          /* hash table mask */
249 	struct skmem_bufctl_bkt *skm_hash_table; /* alloc'd buffer htable */
250 	TAILQ_HEAD(, skmem_slab) skm_sl_partial_list; /* partially-allocated */
251 	TAILQ_HEAD(, skmem_slab) skm_sl_empty_list;   /* fully-allocated */
252 	struct skmem_region *skm_region;        /* region source for slabs */
253 
254 	/*
255 	 * Statistics.
256 	 */
257 	uint32_t        skm_cpu_mag_size;       /* current magazine size */
258 	uint32_t        skm_cpu_mag_resize;     /* # of magazine resizes */
259 	uint32_t        skm_cpu_mag_purge;      /* # of magazine purges */
260 	uint32_t        skm_cpu_mag_reap;       /* # of magazine reaps */
261 	uint64_t        skm_depot_contention;   /* mutex contention count */
262 	uint64_t        skm_depot_contention_prev; /* previous snapshot */
263 	uint32_t        skm_depot_full;         /* # of full magazines */
264 	uint32_t        skm_depot_empty;        /* # of empty magazines */
265 	uint32_t        skm_depot_ws_zero;      /* # of working set flushes */
266 	uint32_t        skm_sl_rescale;         /* # of hash table rescales */
267 	uint32_t        skm_sl_create;          /* slab creates */
268 	uint32_t        skm_sl_destroy;         /* slab destroys */
269 	uint32_t        skm_sl_alloc;           /* slab layer allocations */
270 	uint32_t        skm_sl_free;            /* slab layer frees */
271 	uint32_t        skm_sl_partial;         /* # of partial slabs */
272 	uint32_t        skm_sl_empty;           /* # of empty slabs */
273 	uint64_t        skm_sl_alloc_fail;      /* total failed allocations */
274 	uint64_t        skm_sl_bufinuse;        /* total unfreed buffers */
275 	uint64_t        skm_sl_bufmax;          /* max buffers ever */
276 
277 	/*
278 	 * Cache properties.
279 	 */
280 	TAILQ_ENTRY(skmem_cache) skm_link;      /* cache linkage */
281 	char            skm_name[64];           /* cache name */
282 	uuid_t          skm_uuid;               /* cache uuid */
283 	size_t          skm_bufsize;            /* buffer size */
284 	size_t          skm_bufalign;           /* buffer alignment */
285 	size_t          skm_objalign;           /* object alignment */
286 
287 	/*
288 	 * CPU layer, aligned at (maximum) cache line boundary.
289 	 */
290 	decl_lck_mtx_data(, skm_rs_lock);       /* protects resizing */
291 	struct thread    *skm_rs_owner;         /* resize owner */
292 	uint32_t        skm_rs_busy;            /* prevent resizing */
293 	uint32_t        skm_rs_want;            /* # of threads blocked */
294 	struct skmem_cpu_cache  skm_cpu_cache[1]
295 	__attribute__((aligned(CHANNEL_CACHE_ALIGN_MAX)));
296 };
297 
298 #define SKMEM_CACHE_SIZE(n)     \
299 	offsetof(struct skmem_cache, skm_cpu_cache[n])
300 
301 #define SKMEM_CPU_CACHE(c)                                      \
302 	((struct skmem_cpu_cache *)((void *)((char *)(c) +      \
303 	SKMEM_CACHE_SIZE(cpu_number()))))
304 
305 /* valid values for skm_mode, set only by skmem_cache_create() */
306 #define SKM_MODE_NOMAGAZINES    0x00000001      /* disable magazines layer */
307 #define SKM_MODE_AUDIT          0x00000002      /* audit transactions */
308 #define SKM_MODE_NOREDIRECT     0x00000004      /* unaffected by defunct */
309 #define SKM_MODE_BATCH          0x00000008      /* supports batch alloc/free */
310 #define SKM_MODE_DYNAMIC        0x00000010      /* enable magazine resizing */
311 #define SKM_MODE_CLEARONFREE    0x00000020      /* zero-out upon slab free */
312 #define SKM_MODE_PSEUDO         0x00000040      /* external backing store */
313 #define SKM_MODE_RECLAIM        0x00000080      /* aggressive memory reclaim */
314 
315 #define SKM_MODE_BITS \
316 	"\020\01NOMAGAZINES\02AUDIT\03NOREDIRECT\04BATCH\05DYNAMIC"     \
317 	"\06CLEARONFREE\07PSEUDO\10RECLAIM"
318 
319 /*
320  * Valid flags for sk{mem,region}_alloc().  SKMEM_FAILOK is valid only if
321  * SKMEM_SLEEP is set, i.e. SKMEM_{NOSLEEP,FAILOK} are mutually exclusive.
322  * If set, SKMEM_FAILOK indicates that the segment allocation may fail,
323  * and that the cache layer would handle the retries rather than blocking
324  * inside the region allocator.
325  */
326 #define SKMEM_SLEEP             0x0     /* can block for memory; won't fail */
327 #define SKMEM_NOSLEEP           0x1     /* cannot block for memory; may fail */
328 #define SKMEM_PANIC             0x2     /* panic upon allocation failure */
329 #define SKMEM_FAILOK            0x4     /* can fail for blocking alloc */
330 
331 /* valid flag values for skmem_cache_create() */
332 #define SKMEM_CR_NOMAGAZINES    0x1     /* disable magazines layer */
333 #define SKMEM_CR_BATCH          0x2     /* support batch alloc/free */
334 #define SKMEM_CR_DYNAMIC        0x4     /* enable magazine resizing */
335 #define SKMEM_CR_CLEARONFREE    0x8     /* zero-out upon slab free */
336 #define SKMEM_CR_RECLAIM        0x10    /* aggressive memory reclaim */
337 
338 __BEGIN_DECLS
339 /*
340  * Given a buffer control, add a use count to it.
341  */
342 __attribute__((always_inline))
343 static inline void
skmem_bufctl_use(struct skmem_bufctl * bc)344 skmem_bufctl_use(struct skmem_bufctl *bc)
345 {
346 	uint32_t old, new;
347 
348 	os_atomic_rmw_loop(&bc->bc_usecnt, old, new, relaxed, {
349 		new = old + 1;
350 		VERIFY(new != 0);
351 		ASSERT(new == 1 || (bc->bc_flags & SKMEM_BUFCTL_SHAREOK));
352 	});
353 }
354 
355 /*
356  * Given a buffer control, remove a use count from it (returns new value).
357  */
358 __attribute__((always_inline))
359 static inline uint32_t
skmem_bufctl_unuse(struct skmem_bufctl * bc)360 skmem_bufctl_unuse(struct skmem_bufctl *bc)
361 {
362 	uint32_t old, new;
363 
364 	os_atomic_rmw_loop(&bc->bc_usecnt, old, new, relaxed, {
365 		new = old - 1;
366 		VERIFY(old != 0);
367 		ASSERT(old == 1 || (bc->bc_flags & SKMEM_BUFCTL_SHAREOK));
368 	});
369 
370 	return new;
371 }
372 
373 extern void skmem_cache_pre_init(void);
374 extern void skmem_cache_init(void);
375 extern void skmem_cache_fini(void);
376 extern struct skmem_cache *skmem_cache_create(const char *, size_t, size_t,
377     skmem_ctor_fn_t, skmem_dtor_fn_t, skmem_reclaim_fn_t, void *,
378     struct skmem_region *, uint32_t);
379 extern void skmem_cache_destroy(struct skmem_cache *);
380 extern void *skmem_cache_alloc(struct skmem_cache *, uint32_t);
381 extern uint32_t skmem_cache_batch_alloc(struct skmem_cache *,
382     struct skmem_obj **list, uint32_t, uint32_t);
383 extern void skmem_cache_free(struct skmem_cache *, void *);
384 extern void skmem_cache_batch_free(struct skmem_cache *, struct skmem_obj *);
385 extern void skmem_cache_reap_now(struct skmem_cache *, boolean_t);
386 extern void skmem_cache_reap(void);
387 extern void skmem_reap_caches(boolean_t);
388 extern void skmem_cache_get_obj_info(struct skmem_cache *, void *,
389     struct skmem_obj_info *, struct skmem_obj_info *);
390 extern uint32_t skmem_cache_magazine_max(uint32_t);
391 extern boolean_t skmem_allow_magazines(void);
392 #if (DEVELOPMENT || DEBUG)
393 extern void skmem_cache_test_start(uint32_t);
394 extern void skmem_cache_test_stop(void);
395 #endif /* (DEVELOPMENT || DEBUG) */
396 __END_DECLS
397 #endif /* BSD_KERNEL_PRIVATE */
398 #endif /* _SKYWALK_MEM_SKMEMCACHEVAR_H */
399