1 /*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #ifndef _SKYWALK_MEM_SKMEMCACHEVAR_H
30 #define _SKYWALK_MEM_SKMEMCACHEVAR_H
31
32 #ifdef BSD_KERNEL_PRIVATE
33 #include <skywalk/core/skywalk_var.h>
34 #include <skywalk/os_channel_private.h>
35 #include <kern/cpu_number.h>
36 #include <machine/machine_routines.h>
37
38 /*
39 * Buffer control.
40 */
41 struct skmem_bufctl {
42 SLIST_ENTRY(skmem_bufctl) bc_link; /* bufctl linkage */
43 void *__sized_by(bc_lim) bc_addr; /* buffer obj address */
44 void *bc_addrm; /* mirrored buffer obj addr */
45 struct skmem_slab *bc_slab; /* controlling slab */
46 uint32_t bc_lim; /* buffer obj limit */
47 uint32_t bc_flags; /* SKMEM_BUFCTL_* flags */
48 uint32_t bc_idx; /* buffer index within slab */
49 volatile uint32_t bc_usecnt; /* outstanding use */
50 };
51
52 #define SKMEM_BUFCTL_SHAREOK 0x1 /* supports sharing */
53
54 #define SKMEM_STACK_DEPTH 16 /* maximum audit stack depth */
55
56 #define SKMEM_CACHE_ALIGN 8 /* min guaranteed alignment */
57
58 /*
59 * Alternative buffer control if SKM_MODE_AUDIT is set.
60 */
61 struct skmem_bufctl_audit {
62 SLIST_ENTRY(skmem_bufctl) bc_link; /* bufctl linkage */
63 void *__sized_by(bc_lim) bc_addr; /* buffer address */
64 void *bc_addrm; /* mirrored buffer address */
65 struct skmem_slab *bc_slab; /* controlling slab */
66 uint32_t bc_lim; /* buffer obj limit */
67 uint32_t bc_flags; /* SKMEM_BUFCTL_* flags */
68 uint32_t bc_idx; /* buffer index within slab */
69 volatile uint32_t bc_usecnt; /* outstanding use */
70 struct thread *bc_thread; /* thread doing transaction */
71 uint32_t bc_timestamp; /* transaction time */
72 uint32_t bc_depth; /* stack depth */
73 void *bc_stack[SKMEM_STACK_DEPTH]; /* stack */
74 };
75
76 /*
77 * Buffer control hash bucket.
78 */
79 struct skmem_bufctl_bkt {
80 SLIST_HEAD(, skmem_bufctl) bcb_head; /* bufctl allocated list */
81 };
82
83 /*
84 * Slab.
85 */
86 struct skmem_slab {
87 TAILQ_ENTRY(skmem_slab) sl_link; /* slab freelist linkage */
88 struct skmem_cache *sl_cache; /* controlling cache */
89 void *sl_base; /* base of allocated memory */
90 void *sl_basem; /* base of mirrored memory */
91 struct sksegment *sl_seg; /* backing segment */
92 struct sksegment *sl_segm; /* backing mirrored segment */
93 SLIST_HEAD(, skmem_bufctl) sl_head; /* bufctl free list */
94 uint32_t sl_refcnt; /* outstanding allocations */
95 uint32_t sl_chunks; /* # of buffers in slab */
96 };
97
98 #define SKMEM_SLAB_IS_PARTIAL(sl) \
99 ((sl)->sl_refcnt > 0 && (sl)->sl_refcnt < (sl)->sl_chunks)
100
101 #define SKMEM_SLAB_MEMBER(sl, buf) \
102 (((size_t)(buf) - (size_t)(sl)->sl_base) < (sl)->sl_cache->skm_slabsize)
103
104 /*
105 * Magazine type.
106 */
107 struct skmem_magtype {
108 int mt_magsize; /* magazine size (# of objs) */
109 int mt_align; /* magazine alignment */
110 size_t mt_minbuf; /* all smaller bufs qualify */
111 size_t mt_maxbuf; /* no larger bufs qualify */
112 struct skmem_cache *mt_cache; /* magazine cache */
113 char mt_cname[64]; /* magazine cache name */
114 };
115
116 /*
117 * Magazine.
118 */
119 struct skmem_mag {
120 SLIST_ENTRY(skmem_mag) mg_link; /* magazine linkage */
121 struct skmem_magtype *mg_magtype; /* magazine type */
122 size_t mg_count; /* # of mg_round array elements */
123 void *mg_round[__counted_by(mg_count)]; /* one or more objs */
124 };
125
126 #define SKMEM_MAG_SIZE(n) \
127 offsetof(struct skmem_mag, mg_round[n])
128
129 /*
130 * Magazine depot.
131 */
132 struct skmem_maglist {
133 SLIST_HEAD(, skmem_mag) ml_list; /* magazine list */
134 uint32_t ml_total; /* number of magazines */
135 uint32_t ml_min; /* min since last update */
136 uint32_t ml_reaplimit; /* max reapable magazines */
137 uint64_t ml_alloc; /* allocations from this list */
138 };
139
140 /*
141 * Per-CPU cache structure.
142 */
143 struct skmem_cpu_cache {
144 decl_lck_mtx_data(, cp_lock);
145 struct skmem_mag *cp_loaded; /* currently filled magazine */
146 struct skmem_mag *cp_ploaded; /* previously filled magazine */
147 uint64_t cp_alloc; /* allocations from this cpu */
148 uint64_t cp_free; /* frees to this cpu */
149 int cp_rounds; /* # of objs in filled mag */
150 int cp_prounds; /* # of objs in previous mag */
151 int cp_magsize; /* # of objs in a full mag */
152 } __attribute__((aligned(CHANNEL_CACHE_ALIGN_MAX)));
153
154 /*
155 * Object's region information.
156 *
157 * This info is provided to skmem_ctor_fn_t() to assist master and
158 * slave objects construction. It is also provided separately via
159 * skmem_cache_get_obj_info() when called on an object that's been
160 * allocated from skmem_cache. Information about slave object is
161 * available only at constructor time.
162 */
163 struct skmem_obj_info {
164 void *__sized_by(oi_size) oi_addr; /* object address */
165 struct skmem_bufctl *oi_bc; /* buffer control (master) */
166 uint32_t oi_size; /* actual object size */
167 obj_idx_t oi_idx_reg; /* object idx within region */
168 obj_idx_t oi_idx_seg; /* object idx within segment */
169 } __attribute__((__packed__));
170
171 /*
172 * Generic one-way linked list element structure. This is used to
173 * handle skmem_cache_batch_alloc() requests in order to chain the
174 * allocated objects together before returning them to the caller.
175 * It is also used when freeing a batch of packets by the caller of
176 * skmem_cache_batch_free(). Note that this requires the region's
177 * object to be at least the size of struct skmem_obj, as we store
178 * this information at the beginning of each object in the chain.
179 */
180 struct skmem_obj {
181 /*
182 * Given that we overlay this structure on top of whatever
183 * structure that the object represents, the constructor must
184 * ensure that it reserves at least the size of a pointer
185 * at the top for the linkage.
186 */
187 struct skmem_obj *mo_next; /* next object in the list */
188 /*
189 * The following are used only for raw (unconstructed) objects
190 * coming out of the slab layer during allocations. They are
191 * not touched otherwise by skmem_cache when the object resides
192 * in the magazine. By utilizing this space, we avoid having
193 * to allocate temporary storage elsewhere.
194 */
195 struct skmem_obj_info mo_info; /* object's info */
196 struct skmem_obj_info mo_minfo; /* mirrored object's info */
197 };
198
199 #define SKMEM_OBJ_ADDR(_oi) (_oi)->oi_addr
200 #define SKMEM_OBJ_BUFCTL(_oi) (_oi)->oi_bc
201 #define SKMEM_OBJ_SIZE(_oi) (_oi)->oi_size
202 #define SKMEM_OBJ_IDX_REG(_oi) (_oi)->oi_idx_reg
203 #define SKMEM_OBJ_IDX_SEG(_oi) (_oi)->oi_idx_seg
204 /* segment the object belongs to (only for master) */
205 #define SKMEM_OBJ_SEG(_oi) (_oi)->oi_bc->bc_slab->sl_seg
206 /* offset of object relative to the object's own region */
207 #define SKMEM_OBJ_ROFF(_oi) \
208 ((mach_vm_offset_t)(SKMEM_OBJ_SIZE(_oi) * SKMEM_OBJ_IDX_REG(_oi)))
209
210 typedef int (*skmem_ctor_fn_t)(struct skmem_obj_info *,
211 struct skmem_obj_info *, void *, uint32_t);
212 typedef void (*skmem_dtor_fn_t)(void *, void *);
213 typedef void (*skmem_reclaim_fn_t)(void *);
214 typedef int (*skmem_slab_alloc_fn_t)(struct skmem_cache *,
215 struct skmem_obj_info *, struct skmem_obj_info *, uint32_t);
216 typedef void (*skmem_slab_free_fn_t)(struct skmem_cache *, void *);
217
218 /*
219 * Cache.
220 */
221 struct skmem_cache {
222 #if KASAN
223 void *skm_start;
224 uint32_t skm_align[0];
225 #endif
226 /*
227 * Commonly-accessed elements during alloc and free.
228 */
229 uint32_t skm_mode; /* cache mode flags */
230 skmem_ctor_fn_t skm_ctor; /* object constructor */
231 skmem_dtor_fn_t skm_dtor; /* object destructor */
232 skmem_reclaim_fn_t skm_reclaim; /* cache reclaim */
233 void *skm_private; /* opaque arg to callbacks */
234
235 /*
236 * Depot.
237 */
238 decl_lck_mtx_data(, skm_dp_lock); /* protects depot layer */
239 struct skmem_magtype *skm_magtype; /* magazine type */
240 struct skmem_maglist skm_full; /* full magazines */
241 struct skmem_maglist skm_empty; /* empty magazines */
242
243 /*
244 * Slab.
245 */
246 decl_lck_mtx_data(, skm_sl_lock); /* protects slab layer */
247 skmem_slab_alloc_fn_t skm_slab_alloc; /* slab allocate */
248 skmem_slab_free_fn_t skm_slab_free; /* slab free */
249 size_t skm_chunksize; /* bufsize + alignment */
250 size_t skm_objsize; /* actual obj size in slab */
251 size_t skm_slabsize; /* size of a slab */
252 size_t skm_hash_initial; /* initial hash table size */
253 size_t skm_hash_limit; /* hash table size limit */
254 size_t skm_hash_shift; /* get to interesting bits */
255 size_t skm_hash_mask; /* hash table mask */
256 size_t skm_hash_size;
257 struct skmem_bufctl_bkt *__counted_by(skm_hash_size) skm_hash_table; /* alloc'd buffer htable */
258 TAILQ_HEAD(, skmem_slab) skm_sl_partial_list; /* partially-allocated */
259 TAILQ_HEAD(, skmem_slab) skm_sl_empty_list; /* fully-allocated */
260 struct skmem_region *skm_region; /* region source for slabs */
261
262 /*
263 * Statistics.
264 */
265 uint32_t skm_cpu_mag_size; /* current magazine size */
266 uint32_t skm_cpu_mag_resize; /* # of magazine resizes */
267 uint32_t skm_cpu_mag_purge; /* # of magazine purges */
268 uint32_t skm_cpu_mag_reap; /* # of magazine reaps */
269 uint64_t skm_depot_contention; /* mutex contention count */
270 uint64_t skm_depot_contention_prev; /* previous snapshot */
271 uint32_t skm_depot_full; /* # of full magazines */
272 uint32_t skm_depot_empty; /* # of empty magazines */
273 uint32_t skm_depot_ws_zero; /* # of working set flushes */
274 uint32_t skm_sl_rescale; /* # of hash table rescales */
275 uint32_t skm_sl_create; /* slab creates */
276 uint32_t skm_sl_destroy; /* slab destroys */
277 uint32_t skm_sl_alloc; /* slab layer allocations */
278 uint32_t skm_sl_free; /* slab layer frees */
279 uint32_t skm_sl_partial; /* # of partial slabs */
280 uint32_t skm_sl_empty; /* # of empty slabs */
281 uint64_t skm_sl_alloc_fail; /* total failed allocations */
282 uint64_t skm_sl_bufinuse; /* total unfreed buffers */
283 uint64_t skm_sl_bufmax; /* max buffers ever */
284
285 /*
286 * Cache properties.
287 */
288 TAILQ_ENTRY(skmem_cache) skm_link; /* cache linkage */
289 char skm_name[64]; /* cache name */
290 uuid_t skm_uuid; /* cache uuid */
291 size_t skm_bufsize; /* buffer size */
292 size_t skm_bufalign; /* buffer alignment */
293 size_t skm_objalign; /* object alignment */
294
295 /*
296 * CPU layer, aligned at (maximum) cache line boundary.
297 */
298 decl_lck_mtx_data(, skm_rs_lock); /* protects resizing */
299 struct thread *skm_rs_owner; /* resize owner */
300 uint32_t skm_rs_busy; /* prevent resizing */
301 uint32_t skm_rs_want; /* # of threads blocked */
302 size_t skm_cpu_cache_count;
303 struct skmem_cpu_cache skm_cpu_cache[__counted_by(skm_cpu_cache_count)]
304 __attribute__((aligned(CHANNEL_CACHE_ALIGN_MAX)));
305 };
306
307 #define SKMEM_CACHE_SIZE(n) \
308 offsetof(struct skmem_cache, skm_cpu_cache[n])
309
310 #define SKMEM_CPU_CACHE(c) \
311 ((struct skmem_cpu_cache *)((void *)((char *)(c) + \
312 SKMEM_CACHE_SIZE(cpu_number()))))
313
314 /* valid values for skm_mode, set only by skmem_cache_create() */
315 #define SKM_MODE_NOMAGAZINES 0x00000001 /* disable magazines layer */
316 #define SKM_MODE_AUDIT 0x00000002 /* audit transactions */
317 #define SKM_MODE_NOREDIRECT 0x00000004 /* unaffected by defunct */
318 #define SKM_MODE_BATCH 0x00000008 /* supports batch alloc/free */
319 #define SKM_MODE_DYNAMIC 0x00000010 /* enable magazine resizing */
320 #define SKM_MODE_CLEARONFREE 0x00000020 /* zero-out upon slab free */
321 #define SKM_MODE_PSEUDO 0x00000040 /* external backing store */
322 #define SKM_MODE_RECLAIM 0x00000080 /* aggressive memory reclaim */
323
324 #define SKM_MODE_BITS \
325 "\020\01NOMAGAZINES\02AUDIT\03NOREDIRECT\04BATCH\05DYNAMIC" \
326 "\06CLEARONFREE\07PSEUDO\10RECLAIM"
327
328 /*
329 * Valid flags for sk{mem,region}_alloc(). SKMEM_FAILOK is valid only if
330 * SKMEM_SLEEP is set, i.e. SKMEM_{NOSLEEP,FAILOK} are mutually exclusive.
331 * If set, SKMEM_FAILOK indicates that the segment allocation may fail,
332 * and that the cache layer would handle the retries rather than blocking
333 * inside the region allocator.
334 */
335 #define SKMEM_SLEEP 0x0 /* can block for memory; won't fail */
336 #define SKMEM_NOSLEEP 0x1 /* cannot block for memory; may fail */
337 #define SKMEM_PANIC 0x2 /* panic upon allocation failure */
338 #define SKMEM_FAILOK 0x4 /* can fail for blocking alloc */
339
340 /* valid flag values for skmem_cache_create() */
341 #define SKMEM_CR_NOMAGAZINES 0x1 /* disable magazines layer */
342 #define SKMEM_CR_BATCH 0x2 /* support batch alloc/free */
343 #define SKMEM_CR_DYNAMIC 0x4 /* enable magazine resizing */
344 #define SKMEM_CR_CLEARONFREE 0x8 /* zero-out upon slab free */
345 #define SKMEM_CR_RECLAIM 0x10 /* aggressive memory reclaim */
346
347 __BEGIN_DECLS
348 /*
349 * Given a buffer control, add a use count to it.
350 */
351 __attribute__((always_inline))
352 static inline void
skmem_bufctl_use(struct skmem_bufctl * bc)353 skmem_bufctl_use(struct skmem_bufctl *bc)
354 {
355 uint32_t old, new;
356
357 os_atomic_rmw_loop(&bc->bc_usecnt, old, new, relaxed, {
358 new = old + 1;
359 VERIFY(new != 0);
360 ASSERT(new == 1 || (bc->bc_flags & SKMEM_BUFCTL_SHAREOK));
361 });
362 }
363
364 /*
365 * Given a buffer control, remove a use count from it (returns new value).
366 */
367 __attribute__((always_inline))
368 static inline uint32_t
skmem_bufctl_unuse(struct skmem_bufctl * bc)369 skmem_bufctl_unuse(struct skmem_bufctl *bc)
370 {
371 uint32_t old, new;
372
373 os_atomic_rmw_loop(&bc->bc_usecnt, old, new, relaxed, {
374 new = old - 1;
375 VERIFY(old != 0);
376 ASSERT(old == 1 || (bc->bc_flags & SKMEM_BUFCTL_SHAREOK));
377 });
378
379 return new;
380 }
381
382 extern struct skmem_cache *skmem_slab_cache; /* cache for skmem_slab */
383 extern struct skmem_cache *skmem_bufctl_cache; /* cache for skmem_bufctl */
384 extern unsigned int bc_size; /* size of bufctl */
385 extern int skmem_slab_alloc_locked(struct skmem_cache *,
386 struct skmem_obj_info *, struct skmem_obj_info *, uint32_t);
387 extern void skmem_slab_free_locked(struct skmem_cache *, void *);
388 extern int skmem_slab_alloc_pseudo_locked(struct skmem_cache *,
389 struct skmem_obj_info *, struct skmem_obj_info *, uint32_t);
390 extern void skmem_slab_free_pseudo_locked(struct skmem_cache *, void *);
391 extern void skmem_slab_free(struct skmem_cache *, void *);
392 extern void skmem_slab_batch_free(struct skmem_cache *, struct skmem_obj *);
393 extern uint32_t skmem_slab_batch_alloc(struct skmem_cache *, struct skmem_obj **,
394 uint32_t, uint32_t);
395 extern int skmem_slab_alloc(struct skmem_cache *, struct skmem_obj_info *,
396 struct skmem_obj_info *, uint32_t);
397 extern void skmem_audit_bufctl(struct skmem_bufctl *);
398 #define SKM_SLAB_LOCK(_skm) \
399 lck_mtx_lock(&(_skm)->skm_sl_lock)
400 #define SKM_SLAB_LOCK_ASSERT_HELD(_skm) \
401 LCK_MTX_ASSERT(&(_skm)->skm_sl_lock, LCK_MTX_ASSERT_OWNED)
402 #define SKM_SLAB_LOCK_ASSERT_NOTHELD(_skm) \
403 LCK_MTX_ASSERT(&(_skm)->skm_sl_lock, LCK_MTX_ASSERT_NOTOWNED)
404 #define SKM_SLAB_UNLOCK(_skm) \
405 lck_mtx_unlock(&(_skm)->skm_sl_lock)
406 #define SKMEM_CACHE_HASH_INDEX(_a, _s, _m) (((_a) >> (_s)) & (_m))
407 #define SKMEM_CACHE_HASH(_skm, _buf) \
408 (&(_skm)->skm_hash_table[SKMEM_CACHE_HASH_INDEX((uintptr_t)_buf, \
409 (_skm)->skm_hash_shift, (_skm)->skm_hash_mask)])
410
411 extern void skmem_cache_pre_init(void);
412 extern void skmem_cache_init(void);
413 extern void skmem_cache_fini(void);
414 extern struct skmem_cache *skmem_cache_create(const char *, size_t, size_t,
415 skmem_ctor_fn_t, skmem_dtor_fn_t, skmem_reclaim_fn_t, void *,
416 struct skmem_region *, uint32_t);
417 extern void skmem_cache_destroy(struct skmem_cache *);
418
419 extern uint32_t skmem_cache_batch_alloc(struct skmem_cache *,
420 struct skmem_obj **list, size_t objsize, uint32_t, uint32_t);
421
422 /*
423 * XXX -fbounds-safety: Sometimes we use skmem_cache_alloc to allocate a struct
424 * with a flexible array (e.g. struct skmem_mag). For those, we can't have the
425 * alloc function return void *__single, because we lose bounds information.
426 */
427 static inline void *__header_indexable
skmem_cache_alloc(struct skmem_cache * skm,uint32_t skmflag)428 skmem_cache_alloc(struct skmem_cache *skm, uint32_t skmflag)
429 {
430 struct skmem_obj *__single buf;
431
432 (void) skmem_cache_batch_alloc(skm, &buf, skm->skm_objsize, 1, skmflag);
433
434 /* This is one of the few places where using __unsafe_forge is okay */
435 return __unsafe_forge_bidi_indexable(void *, buf, buf ? skm->skm_objsize : 0);
436 }
437
438 extern void skmem_cache_free(struct skmem_cache *, void *);
439 extern void skmem_cache_free_nocache(struct skmem_cache *, void *);
440 extern void skmem_cache_batch_free(struct skmem_cache *, struct skmem_obj *);
441 extern void skmem_cache_batch_free_nocache(struct skmem_cache *, struct skmem_obj *);
442 extern void skmem_cache_reap_now(struct skmem_cache *, boolean_t);
443 extern void skmem_cache_reap(void);
444 extern void skmem_reap_caches(boolean_t);
445 extern void skmem_cache_get_obj_info(struct skmem_cache *, void *,
446 struct skmem_obj_info *, struct skmem_obj_info *);
447 extern uint32_t skmem_cache_magazine_max(uint32_t);
448 extern boolean_t skmem_allow_magazines(void);
449 #if (DEVELOPMENT || DEBUG)
450 extern void skmem_cache_test_start(uint32_t);
451 extern void skmem_cache_test_stop(void);
452 #endif /* (DEVELOPMENT || DEBUG) */
453 __END_DECLS
454 #endif /* BSD_KERNEL_PRIVATE */
455 #endif /* _SKYWALK_MEM_SKMEMCACHEVAR_H */
456