1 /*
2 * Copyright (c) 2016-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/packet/pbufpool_var.h>
31 #include <sys/sdt.h>
32
33 static struct kern_pbufpool *pp_alloc(zalloc_flags_t);
34 static void pp_free(struct kern_pbufpool *);
35 static uint32_t pp_alloc_packet_common(struct kern_pbufpool *, uint16_t,
36 uint64_t *, uint32_t, boolean_t, alloc_cb_func_t, const void *, uint32_t);
37 static void pp_free_packet_array(struct kern_pbufpool *, uint64_t *, uint32_t);
38 static int pp_metadata_ctor_no_buflet(struct skmem_obj_info *,
39 struct skmem_obj_info *, void *, uint32_t);
40 static int pp_metadata_ctor_max_buflet(struct skmem_obj_info *,
41 struct skmem_obj_info *, void *, uint32_t);
42 static void pp_metadata_dtor(void *, void *);
43 static int pp_metadata_construct(struct __kern_quantum *,
44 struct __user_quantum *, obj_idx_t, struct kern_pbufpool *, uint32_t,
45 uint16_t, bool, struct skmem_obj **);
46 static void pp_metadata_destruct(struct __kern_quantum *,
47 struct kern_pbufpool *, bool);
48 static struct __kern_quantum *pp_metadata_init(struct __metadata_preamble *,
49 struct kern_pbufpool *, uint16_t, uint32_t, struct skmem_obj **);
50 static struct __metadata_preamble *pp_metadata_fini(struct __kern_quantum *,
51 struct kern_pbufpool *, struct mbuf **, struct __kern_packet **,
52 struct skmem_obj **, struct skmem_obj **, struct skmem_obj **);
53 static void pp_purge_upp_locked(struct kern_pbufpool *pp, pid_t pid);
54 static void pp_buf_seg_ctor(struct sksegment *, IOSKMemoryBufferRef, void *);
55 static void pp_buf_seg_dtor(struct sksegment *, IOSKMemoryBufferRef, void *);
56 static void pp_destroy_upp_locked(struct kern_pbufpool *);
57 static void pp_destroy_upp_bft_locked(struct kern_pbufpool *);
58 static int pp_init_upp_bft_locked(struct kern_pbufpool *, boolean_t);
59 static void pp_free_buflet_common(const kern_pbufpool_t, kern_buflet_t);
60 static mach_vm_address_t pp_alloc_buffer_common(const kern_pbufpool_t pp,
61 struct skmem_obj_info *oi, uint32_t skmflag, bool large);
62 static inline uint32_t
63 pp_alloc_buflet_common(struct kern_pbufpool *pp, uint64_t *array,
64 uint32_t num, uint32_t skmflag, uint32_t flags);
65
66 #define KERN_PBUFPOOL_U_HASH_SIZE 64 /* hash table size */
67 #define KERN_BUF_CNT_MULTIPLIER 2
68
69 /*
70 * Since the inputs are small (indices to the metadata region), we can use
71 * Knuth's multiplicative hash method which is fast and good enough. Here
72 * we multiply the input by the golden ratio of 2^32. See "The Art of
73 * Computer Programming", section 6.4.
74 */
75 #define KERN_PBUFPOOL_U_HASH_INDEX(_i, _m) \
76 (((_i) * 2654435761U) & (_m))
77 #define KERN_PBUFPOOL_U_HASH(_pp, _i) \
78 (&(_pp)->pp_u_hash_table[KERN_PBUFPOOL_U_HASH_INDEX(_i, \
79 KERN_PBUFPOOL_U_HASH_SIZE - 1)])
80 #define KERN_PBUFPOOL_U_BFT_HASH(_pp, _i) \
81 (&(_pp)->pp_u_bft_hash_table[KERN_PBUFPOOL_U_HASH_INDEX(_i, \
82 KERN_PBUFPOOL_U_HASH_SIZE - 1)])
83
84 static ZONE_DEFINE(pp_zone, SKMEM_ZONE_PREFIX ".mem.pp",
85 sizeof(struct kern_pbufpool), ZC_ZFREE_CLEARMEM);
86
87 #define PP_U_HTBL_SIZE \
88 (sizeof(struct kern_pbufpool_u_bkt) * KERN_PBUFPOOL_U_HASH_SIZE)
89 static ZONE_DEFINE(pp_u_htbl_zone, SKMEM_ZONE_PREFIX ".mem.pp.htbl",
90 PP_U_HTBL_SIZE, ZC_ZFREE_CLEARMEM);
91
92 static struct skmem_cache *pp_opt_cache; /* cache for __packet_opt */
93 static struct skmem_cache *pp_flow_cache; /* cache for __flow */
94 static struct skmem_cache *pp_compl_cache; /* cache for __packet_compl */
95
96 static int __pp_inited = 0;
97
98 int
pp_init(void)99 pp_init(void)
100 {
101 _CASSERT(KPKT_SC_UNSPEC == MBUF_SC_UNSPEC);
102 _CASSERT(KPKT_SC_BK_SYS == MBUF_SC_BK_SYS);
103 _CASSERT(KPKT_SC_BK == MBUF_SC_BK);
104 _CASSERT(KPKT_SC_BE == MBUF_SC_BE);
105 _CASSERT(KPKT_SC_RD == MBUF_SC_RD);
106 _CASSERT(KPKT_SC_OAM == MBUF_SC_OAM);
107 _CASSERT(KPKT_SC_AV == MBUF_SC_AV);
108 _CASSERT(KPKT_SC_RV == MBUF_SC_RV);
109 _CASSERT(KPKT_SC_VI == MBUF_SC_VI);
110 _CASSERT(KPKT_SC_SIG == MBUF_SC_SIG);
111 _CASSERT(KPKT_SC_VO == MBUF_SC_VO);
112 _CASSERT(KPKT_SC_CTL == MBUF_SC_CTL);
113
114 _CASSERT(KPKT_SC_BK_SYS == PKT_SC_BK_SYS);
115 _CASSERT(KPKT_SC_BK == PKT_SC_BK);
116 _CASSERT(KPKT_SC_BE == PKT_SC_BE);
117 _CASSERT(KPKT_SC_RD == PKT_SC_RD);
118 _CASSERT(KPKT_SC_OAM == PKT_SC_OAM);
119 _CASSERT(KPKT_SC_AV == PKT_SC_AV);
120 _CASSERT(KPKT_SC_RV == PKT_SC_RV);
121 _CASSERT(KPKT_SC_VI == PKT_SC_VI);
122 _CASSERT(KPKT_SC_SIG == PKT_SC_SIG);
123 _CASSERT(KPKT_SC_VO == PKT_SC_VO);
124 _CASSERT(KPKT_SC_CTL == PKT_SC_CTL);
125 _CASSERT(KPKT_SC_MAX_CLASSES == MBUF_SC_MAX_CLASSES);
126
127 _CASSERT(KPKT_TC_UNSPEC == MBUF_TC_UNSPEC);
128 _CASSERT(KPKT_TC_BE == MBUF_TC_BE);
129 _CASSERT(KPKT_TC_BK == MBUF_TC_BK);
130 _CASSERT(KPKT_TC_VI == MBUF_TC_VI);
131 _CASSERT(KPKT_TC_VO == MBUF_TC_VO);
132 _CASSERT(KPKT_TC_MAX == MBUF_TC_MAX);
133
134 _CASSERT(KPKT_TC_BE == PKT_TC_BE);
135 _CASSERT(KPKT_TC_BK == PKT_TC_BK);
136 _CASSERT(KPKT_TC_VI == PKT_TC_VI);
137 _CASSERT(KPKT_TC_VO == PKT_TC_VO);
138
139 _CASSERT(PKT_SCVAL_BK_SYS == SCVAL_BK_SYS);
140 _CASSERT(PKT_SCVAL_BK == SCVAL_BK);
141 _CASSERT(PKT_SCVAL_BE == SCVAL_BE);
142 _CASSERT(PKT_SCVAL_RD == SCVAL_RD);
143 _CASSERT(PKT_SCVAL_OAM == SCVAL_OAM);
144 _CASSERT(PKT_SCVAL_AV == SCVAL_AV);
145 _CASSERT(PKT_SCVAL_RV == SCVAL_RV);
146 _CASSERT(PKT_SCVAL_VI == SCVAL_VI);
147 _CASSERT(PKT_SCVAL_VO == SCVAL_VO);
148 _CASSERT(PKT_SCVAL_CTL == SCVAL_CTL);
149
150 /*
151 * Assert that the value of common packet flags between mbuf and
152 * skywalk packets match, and that they are in PKT_F_COMMON_MASK.
153 */
154 _CASSERT(PKT_F_BACKGROUND == PKTF_SO_BACKGROUND);
155 _CASSERT(PKT_F_REALTIME == PKTF_SO_REALTIME);
156 _CASSERT(PKT_F_REXMT == PKTF_TCP_REXMT);
157 _CASSERT(PKT_F_LAST_PKT == PKTF_LAST_PKT);
158 _CASSERT(PKT_F_FLOW_ID == PKTF_FLOW_ID);
159 _CASSERT(PKT_F_FLOW_ADV == PKTF_FLOW_ADV);
160 _CASSERT(PKT_F_TX_COMPL_TS_REQ == PKTF_TX_COMPL_TS_REQ);
161 _CASSERT(PKT_F_TS_VALID == PKTF_TS_VALID);
162 _CASSERT(PKT_F_NEW_FLOW == PKTF_NEW_FLOW);
163 _CASSERT(PKT_F_START_SEQ == PKTF_START_SEQ);
164 _CASSERT(PKT_F_KEEPALIVE == PKTF_KEEPALIVE);
165 _CASSERT(PKT_F_WAKE_PKT == PKTF_WAKE_PKT);
166 _CASSERT(PKT_F_COMMON_MASK == (PKT_F_BACKGROUND | PKT_F_REALTIME |
167 PKT_F_REXMT | PKT_F_LAST_PKT | PKT_F_FLOW_ID | PKT_F_FLOW_ADV |
168 PKT_F_TX_COMPL_TS_REQ | PKT_F_TS_VALID | PKT_F_NEW_FLOW |
169 PKT_F_START_SEQ | PKT_F_KEEPALIVE | PKT_F_WAKE_PKT));
170 /*
171 * Assert packet flags shared with userland.
172 */
173 _CASSERT(PKT_F_USER_MASK == (PKT_F_BACKGROUND | PKT_F_REALTIME |
174 PKT_F_REXMT | PKT_F_LAST_PKT | PKT_F_OPT_DATA | PKT_F_PROMISC |
175 PKT_F_TRUNCATED | PKT_F_WAKE_PKT | PKT_F_L4S));
176
177 _CASSERT(offsetof(struct __kern_quantum, qum_len) ==
178 offsetof(struct __kern_packet, pkt_length));
179
180 /*
181 * Due to the use of tagged pointer, we need the size of
182 * the metadata preamble structure to be multiples of 16.
183 * See SK_PTR_TAG() definition for details.
184 */
185 _CASSERT(sizeof(struct __metadata_preamble) != 0 &&
186 (sizeof(struct __metadata_preamble) % 16) == 0);
187
188 _CASSERT(NX_PBUF_FRAGS_MIN == 1 &&
189 NX_PBUF_FRAGS_MIN == NX_PBUF_FRAGS_DEFAULT);
190
191 /*
192 * Batch alloc/free requires linking the objects together;
193 * make sure that the fields are at the same offset since
194 * we cast the object to struct skmem_obj.
195 */
196 _CASSERT(offsetof(struct __metadata_preamble, _mdp_next) ==
197 offsetof(struct skmem_obj, mo_next));
198 _CASSERT(offsetof(struct __buflet, __buflet_next) ==
199 offsetof(struct skmem_obj, mo_next));
200
201 SK_LOCK_ASSERT_HELD();
202 ASSERT(!__pp_inited);
203
204 pp_opt_cache = skmem_cache_create("pkt.opt",
205 sizeof(struct __packet_opt), sizeof(uint64_t),
206 NULL, NULL, NULL, NULL, NULL, 0);
207 pp_flow_cache = skmem_cache_create("pkt.flow",
208 sizeof(struct __flow), 16, /* 16-bytes aligned */
209 NULL, NULL, NULL, NULL, NULL, 0);
210 pp_compl_cache = skmem_cache_create("pkt.compl",
211 sizeof(struct __packet_compl), sizeof(uint64_t),
212 NULL, NULL, NULL, NULL, NULL, 0);
213
214 return 0;
215 }
216
217 void
pp_fini(void)218 pp_fini(void)
219 {
220 SK_LOCK_ASSERT_HELD();
221
222 if (__pp_inited) {
223 if (pp_compl_cache != NULL) {
224 skmem_cache_destroy(pp_compl_cache);
225 pp_compl_cache = NULL;
226 }
227 if (pp_flow_cache != NULL) {
228 skmem_cache_destroy(pp_flow_cache);
229 pp_flow_cache = NULL;
230 }
231 if (pp_opt_cache != NULL) {
232 skmem_cache_destroy(pp_opt_cache);
233 pp_opt_cache = NULL;
234 }
235
236 __pp_inited = 0;
237 }
238 }
239
240 static struct kern_pbufpool *
pp_alloc(zalloc_flags_t how)241 pp_alloc(zalloc_flags_t how)
242 {
243 struct kern_pbufpool *pp = zalloc_flags(pp_zone, how | Z_ZERO);
244
245 if (pp) {
246 lck_mtx_init(&pp->pp_lock, &skmem_lock_grp, &skmem_lock_attr);
247 }
248 return pp;
249 }
250
251 static void
pp_free(struct kern_pbufpool * pp)252 pp_free(struct kern_pbufpool *pp)
253 {
254 PP_LOCK_ASSERT_HELD(pp);
255
256 pp_destroy(pp);
257 PP_UNLOCK(pp);
258
259 SK_DF(SK_VERB_MEM, "pp 0x%llx FREE", SK_KVA(pp));
260 lck_mtx_destroy(&pp->pp_lock, &skmem_lock_grp);
261 zfree(pp_zone, pp);
262 }
263
264 void
pp_retain_locked(struct kern_pbufpool * pp)265 pp_retain_locked(struct kern_pbufpool *pp)
266 {
267 PP_LOCK_ASSERT_HELD(pp);
268
269 pp->pp_refcnt++;
270 ASSERT(pp->pp_refcnt != 0);
271 }
272
273 void
pp_retain(struct kern_pbufpool * pp)274 pp_retain(struct kern_pbufpool *pp)
275 {
276 PP_LOCK(pp);
277 pp_retain_locked(pp);
278 PP_UNLOCK(pp);
279 }
280
281 boolean_t
pp_release_locked(struct kern_pbufpool * pp)282 pp_release_locked(struct kern_pbufpool *pp)
283 {
284 uint32_t oldref = pp->pp_refcnt;
285
286 PP_LOCK_ASSERT_HELD(pp);
287
288 ASSERT(pp->pp_refcnt != 0);
289 if (--pp->pp_refcnt == 0) {
290 pp_free(pp);
291 }
292
293 return oldref == 1;
294 }
295
296 boolean_t
pp_release(struct kern_pbufpool * pp)297 pp_release(struct kern_pbufpool *pp)
298 {
299 boolean_t lastref;
300
301 PP_LOCK(pp);
302 if (!(lastref = pp_release_locked(pp))) {
303 PP_UNLOCK(pp);
304 }
305
306 return lastref;
307 }
308
309 void
pp_close(struct kern_pbufpool * pp)310 pp_close(struct kern_pbufpool *pp)
311 {
312 PP_LOCK(pp);
313 ASSERT(pp->pp_refcnt > 0);
314 ASSERT(!(pp->pp_flags & PPF_CLOSED));
315 pp->pp_flags |= PPF_CLOSED;
316 if (!pp_release_locked(pp)) {
317 PP_UNLOCK(pp);
318 }
319 }
320
321 void
pp_regions_params_adjust(struct skmem_region_params * srp_array,nexus_meta_type_t md_type,nexus_meta_subtype_t md_subtype,uint32_t md_cnt,uint16_t max_frags,uint32_t buf_size,uint32_t large_buf_size,uint32_t buf_cnt,uint32_t buf_seg_size,uint32_t flags)322 pp_regions_params_adjust(struct skmem_region_params *srp_array,
323 nexus_meta_type_t md_type, nexus_meta_subtype_t md_subtype, uint32_t md_cnt,
324 uint16_t max_frags, uint32_t buf_size, uint32_t large_buf_size,
325 uint32_t buf_cnt, uint32_t buf_seg_size, uint32_t flags)
326 {
327 struct skmem_region_params *srp, *kmd_srp, *buf_srp, *kbft_srp,
328 *lbuf_srp;
329 uint32_t md_size = 0;
330 bool kernel_only = ((flags & PP_REGION_CONFIG_KERNEL_ONLY) != 0);
331 bool md_persistent = ((flags & PP_REGION_CONFIG_MD_PERSISTENT) != 0);
332 bool buf_persistent = ((flags & PP_REGION_CONFIG_BUF_PERSISTENT) != 0);
333 bool config_buflet = ((flags & PP_REGION_CONFIG_BUFLET) != 0);
334 bool md_magazine_enable = ((flags &
335 PP_REGION_CONFIG_MD_MAGAZINE_ENABLE) != 0);
336 bool config_raw_buflet = (flags & PP_REGION_CONFIG_RAW_BUFLET) != 0;
337
338 ASSERT(max_frags != 0);
339
340 switch (md_type) {
341 case NEXUS_META_TYPE_QUANTUM:
342 md_size = NX_METADATA_QUANTUM_SZ;
343 break;
344 case NEXUS_META_TYPE_PACKET:
345 md_size = NX_METADATA_PACKET_SZ(max_frags);
346 break;
347 default:
348 VERIFY(0);
349 /* NOTREACHED */
350 __builtin_unreachable();
351 }
352
353 switch (flags & PP_REGION_CONFIG_BUF_IODIR_BIDIR) {
354 case PP_REGION_CONFIG_BUF_IODIR_IN:
355 kmd_srp = &srp_array[SKMEM_REGION_RXKMD];
356 buf_srp = &srp_array[SKMEM_REGION_RXBUF_DEF];
357 lbuf_srp = &srp_array[SKMEM_REGION_RXBUF_LARGE];
358 kbft_srp = &srp_array[SKMEM_REGION_RXKBFT];
359 break;
360 case PP_REGION_CONFIG_BUF_IODIR_OUT:
361 kmd_srp = &srp_array[SKMEM_REGION_TXKMD];
362 buf_srp = &srp_array[SKMEM_REGION_TXBUF_DEF];
363 lbuf_srp = &srp_array[SKMEM_REGION_TXBUF_LARGE];
364 kbft_srp = &srp_array[SKMEM_REGION_TXKBFT];
365 break;
366 case PP_REGION_CONFIG_BUF_IODIR_BIDIR:
367 default:
368 kmd_srp = &srp_array[SKMEM_REGION_KMD];
369 buf_srp = &srp_array[SKMEM_REGION_BUF_DEF];
370 lbuf_srp = &srp_array[SKMEM_REGION_BUF_LARGE];
371 kbft_srp = &srp_array[SKMEM_REGION_KBFT];
372 break;
373 }
374
375 /* add preamble size to metadata obj size */
376 md_size += METADATA_PREAMBLE_SZ;
377 ASSERT(md_size >= NX_METADATA_OBJ_MIN_SZ);
378
379 /* configure kernel metadata region */
380 kmd_srp->srp_md_type = md_type;
381 kmd_srp->srp_md_subtype = md_subtype;
382 kmd_srp->srp_r_obj_cnt = md_cnt;
383 kmd_srp->srp_r_obj_size = md_size;
384 kmd_srp->srp_max_frags = max_frags;
385 ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) == 0);
386 if (md_persistent) {
387 kmd_srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
388 }
389 ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) != 0);
390 if (md_magazine_enable) {
391 kmd_srp->srp_cflags &= ~SKMEM_REGION_CR_NOMAGAZINES;
392 }
393 skmem_region_params_config(kmd_srp);
394
395 /* configure user metadata region */
396 srp = &srp_array[SKMEM_REGION_UMD];
397 if (!kernel_only) {
398 srp->srp_md_type = kmd_srp->srp_md_type;
399 srp->srp_md_subtype = kmd_srp->srp_md_subtype;
400 srp->srp_r_obj_cnt = kmd_srp->srp_c_obj_cnt;
401 srp->srp_r_obj_size = kmd_srp->srp_c_obj_size;
402 srp->srp_max_frags = kmd_srp->srp_max_frags;
403 ASSERT((srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) == 0);
404 if (md_persistent) {
405 srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
406 }
407 /*
408 * UMD is a mirrored region and object allocation operations
409 * are performed on the KMD objects.
410 */
411 ASSERT((srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) != 0);
412 skmem_region_params_config(srp);
413 ASSERT(srp->srp_c_obj_cnt == kmd_srp->srp_c_obj_cnt);
414 } else {
415 ASSERT(srp->srp_r_obj_cnt == 0);
416 ASSERT(srp->srp_r_obj_size == 0);
417 }
418
419 /* configure buffer region */
420 buf_srp->srp_r_obj_cnt = MAX(buf_cnt, kmd_srp->srp_c_obj_cnt);
421 buf_srp->srp_r_obj_size = buf_size;
422 buf_srp->srp_cflags &= ~SKMEM_REGION_CR_MONOLITHIC;
423 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) == 0);
424 if (buf_persistent) {
425 buf_srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
426 }
427 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) != 0);
428 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_UREADONLY) == 0);
429 if ((flags & PP_REGION_CONFIG_BUF_UREADONLY) != 0) {
430 buf_srp->srp_cflags |= SKMEM_REGION_CR_UREADONLY;
431 }
432 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_KREADONLY) == 0);
433 if ((flags & PP_REGION_CONFIG_BUF_KREADONLY) != 0) {
434 buf_srp->srp_cflags |= SKMEM_REGION_CR_KREADONLY;
435 }
436 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_MONOLITHIC) == 0);
437 if ((flags & PP_REGION_CONFIG_BUF_MONOLITHIC) != 0) {
438 buf_srp->srp_cflags |= SKMEM_REGION_CR_MONOLITHIC;
439 }
440 ASSERT((srp->srp_cflags & SKMEM_REGION_CR_SEGPHYSCONTIG) == 0);
441 if ((flags & PP_REGION_CONFIG_BUF_SEGPHYSCONTIG) != 0) {
442 buf_srp->srp_cflags |= SKMEM_REGION_CR_SEGPHYSCONTIG;
443 }
444 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_NOCACHE) == 0);
445 if ((flags & PP_REGION_CONFIG_BUF_NOCACHE) != 0) {
446 buf_srp->srp_cflags |= SKMEM_REGION_CR_NOCACHE;
447 }
448 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_THREADSAFE) == 0);
449 if ((flags & PP_REGION_CONFIG_BUF_THREADSAFE) != 0) {
450 buf_srp->srp_cflags |= SKMEM_REGION_CR_THREADSAFE;
451 }
452 if (buf_seg_size != 0) {
453 buf_srp->srp_r_seg_size = buf_seg_size;
454 }
455 skmem_region_params_config(buf_srp);
456
457 /* configure large buffer region */
458 if (large_buf_size != 0) {
459 lbuf_srp->srp_r_obj_cnt = buf_srp->srp_r_obj_cnt;
460 lbuf_srp->srp_r_obj_size = large_buf_size;
461 lbuf_srp->srp_r_seg_size = buf_srp->srp_r_seg_size;
462 lbuf_srp->srp_cflags = buf_srp->srp_cflags;
463 skmem_region_params_config(lbuf_srp);
464 }
465
466 /* configure kernel buflet region */
467 if (config_buflet) {
468 ASSERT(md_type == NEXUS_META_TYPE_PACKET);
469 /*
470 * We want to have enough buflets when multi-buflet and
471 * shared buffer object is used.
472 */
473 uint32_t r_obj_cnt_multiplier = config_raw_buflet ?
474 KERN_BUF_CNT_MULTIPLIER : 1;
475 kbft_srp->srp_r_obj_cnt =
476 (buf_srp->srp_c_obj_cnt + lbuf_srp->srp_c_obj_cnt) *
477 r_obj_cnt_multiplier;
478 kbft_srp->srp_r_obj_size = MAX(sizeof(struct __kern_buflet_ext),
479 sizeof(struct __user_buflet));
480 kbft_srp->srp_cflags = kmd_srp->srp_cflags;
481 skmem_region_params_config(kbft_srp);
482 ASSERT(kbft_srp->srp_c_obj_cnt >= buf_srp->srp_c_obj_cnt +
483 lbuf_srp->srp_c_obj_cnt);
484 } else {
485 ASSERT(kbft_srp->srp_r_obj_cnt == 0);
486 ASSERT(kbft_srp->srp_r_obj_size == 0);
487 }
488
489 /* configure user buflet region */
490 srp = &srp_array[SKMEM_REGION_UBFT];
491 if (config_buflet && !kernel_only) {
492 srp->srp_r_obj_cnt = kbft_srp->srp_c_obj_cnt;
493 srp->srp_r_obj_size = kbft_srp->srp_c_obj_size;
494 srp->srp_cflags = srp_array[SKMEM_REGION_UMD].srp_cflags;
495 skmem_region_params_config(srp);
496 ASSERT(srp->srp_c_obj_cnt == kbft_srp->srp_c_obj_cnt);
497 } else {
498 ASSERT(srp->srp_r_obj_cnt == 0);
499 ASSERT(srp->srp_r_obj_size == 0);
500 }
501
502 /* make sure each metadata can be paired with a buffer */
503 ASSERT(kmd_srp->srp_c_obj_cnt <= buf_srp->srp_c_obj_cnt);
504 }
505
506 SK_NO_INLINE_ATTRIBUTE
507 static int
pp_metadata_construct(struct __kern_quantum * kqum,struct __user_quantum * uqum,obj_idx_t midx,struct kern_pbufpool * pp,uint32_t skmflag,uint16_t bufcnt,bool raw,struct skmem_obj ** blist)508 pp_metadata_construct(struct __kern_quantum *kqum, struct __user_quantum *uqum,
509 obj_idx_t midx, struct kern_pbufpool *pp, uint32_t skmflag, uint16_t bufcnt,
510 bool raw, struct skmem_obj **blist)
511 {
512 struct __kern_buflet *kbuf;
513 mach_vm_address_t baddr = 0;
514 uint16_t *pbufs_cnt, *pbufs_max;
515 uint16_t i;
516
517 ASSERT(bufcnt == 1 || PP_HAS_BUFFER_ON_DEMAND(pp));
518
519 /* construct {user,kernel} metadata */
520 switch (pp->pp_md_type) {
521 case NEXUS_META_TYPE_PACKET: {
522 struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
523 struct __user_packet *upkt = SK_PTR_ADDR_UPKT(uqum);
524 struct __packet_opt *opt;
525 struct __flow *flow;
526 struct __packet_compl *compl;
527 uint64_t pflags;
528
529 if (raw) {
530 opt = skmem_cache_alloc(pp_opt_cache, SKMEM_SLEEP);
531 flow = skmem_cache_alloc(pp_flow_cache, SKMEM_SLEEP);
532 compl = skmem_cache_alloc(pp_compl_cache, SKMEM_SLEEP);
533 pflags = (PKT_F_OPT_ALLOC | PKT_F_FLOW_ALLOC |
534 PKT_F_TX_COMPL_ALLOC);
535 } else {
536 ASSERT((kpkt->pkt_pflags & PKT_F_OPT_ALLOC) &&
537 kpkt->pkt_com_opt != NULL);
538 opt = kpkt->pkt_com_opt;
539 ASSERT((kpkt->pkt_pflags & PKT_F_FLOW_ALLOC) &&
540 kpkt->pkt_flow != NULL);
541 flow = kpkt->pkt_flow;
542 ASSERT((kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC) &&
543 kpkt->pkt_tx_compl != NULL);
544 compl = kpkt->pkt_tx_compl;
545 pflags = kpkt->pkt_pflags;
546 }
547 /* will be adjusted below as part of allocating buffer(s) */
548 _CASSERT(sizeof(kpkt->pkt_bufs_cnt) == sizeof(uint16_t));
549 _CASSERT(sizeof(kpkt->pkt_bufs_max) == sizeof(uint16_t));
550 pbufs_cnt = __DECONST(uint16_t *, &kpkt->pkt_bufs_cnt);
551 pbufs_max = __DECONST(uint16_t *, &kpkt->pkt_bufs_max);
552
553 /* kernel (and user) packet */
554 KPKT_CTOR(kpkt, pflags, opt, flow, compl, midx,
555 upkt, pp, 0, pp->pp_max_frags, 0);
556 break;
557 }
558 default:
559 ASSERT(pp->pp_md_type == NEXUS_META_TYPE_QUANTUM);
560 VERIFY(bufcnt == 1);
561 /* TODO: point these to quantum's once they're defined */
562 pbufs_cnt = pbufs_max = NULL;
563 /* kernel quantum */
564 KQUM_CTOR(kqum, midx, uqum, pp, 0);
565 break;
566 }
567
568 kbuf = kqum->qum_buf;
569 for (i = 0; i < bufcnt; i++) {
570 struct skmem_obj_info oib;
571
572 if (!PP_HAS_BUFFER_ON_DEMAND(pp)) {
573 ASSERT(i == 0);
574 ASSERT(*blist == NULL);
575 /*
576 * quantum has a native buflet, so we only need a
577 * buffer to be allocated and attached to the buflet.
578 */
579 baddr = pp_alloc_buffer_common(pp, &oib, skmflag,
580 false);
581 if (__improbable(baddr == 0)) {
582 goto fail;
583 }
584 KBUF_CTOR(kbuf, baddr, SKMEM_OBJ_IDX_REG(&oib),
585 SKMEM_OBJ_BUFCTL(&oib), pp, false);
586 baddr = 0;
587 } else {
588 /*
589 * we use pre-constructed buflets with attached buffers.
590 */
591 struct __kern_buflet *pkbuf = kbuf;
592 struct skmem_obj *blistn;
593
594 ASSERT(pkbuf != NULL);
595 kbuf = (kern_buflet_t)*blist;
596 if (__improbable(kbuf == NULL)) {
597 SK_DF(SK_VERB_MEM, "failed to get buflet,"
598 " pp 0x%llx", SK_KVA(pp));
599 goto fail;
600 }
601 blistn = (*blist)->mo_next;
602 (*blist)->mo_next = NULL;
603
604 KBUF_EXT_INIT(kbuf, pp);
605 KBUF_LINK(pkbuf, kbuf);
606 *blist = blistn;
607 }
608
609 /* adjust buffer count accordingly */
610 if (__probable(pbufs_cnt != NULL)) {
611 *pbufs_cnt += 1;
612 ASSERT(*pbufs_cnt <= *pbufs_max);
613 }
614 }
615
616 ASSERT(!PP_KERNEL_ONLY(pp) || (kqum->qum_qflags & QUM_F_KERNEL_ONLY));
617 ASSERT(METADATA_IDX(kqum) != OBJ_IDX_NONE);
618 SK_DF(SK_VERB_MEM, "pp 0x%llx pkt 0x%llx bufcnt %d buf 0x%llx",
619 SK_KVA(pp), SK_KVA(kqum), bufcnt, SK_KVA(baddr));
620 return 0;
621
622 fail:
623 ASSERT(bufcnt != 0 && baddr == 0);
624 pp_metadata_destruct(kqum, pp, raw);
625 return ENOMEM;
626 }
627
628 static int
pp_metadata_ctor_common(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,struct kern_pbufpool * pp,uint32_t skmflag,bool no_buflet)629 pp_metadata_ctor_common(struct skmem_obj_info *oi0,
630 struct skmem_obj_info *oim0, struct kern_pbufpool *pp, uint32_t skmflag,
631 bool no_buflet)
632 {
633 struct skmem_obj_info _oi, _oim;
634 struct skmem_obj_info *oi, *oim;
635 struct __kern_quantum *kqum;
636 struct __user_quantum *uqum;
637 uint16_t bufcnt = (no_buflet ? 0 : pp->pp_max_frags);
638 struct skmem_obj *blist = NULL;
639 int error;
640
641 #if (DEVELOPMENT || DEBUG)
642 uint64_t mtbf = skmem_region_get_mtbf();
643 /*
644 * MTBF is applicable only for non-blocking allocations here.
645 */
646 if (__improbable(mtbf != 0 && (net_uptime_ms() % mtbf) == 0 &&
647 (skmflag & SKMEM_NOSLEEP))) {
648 SK_ERR("pp \"%s\" MTBF failure", pp->pp_name);
649 net_update_uptime();
650 return ENOMEM;
651 }
652 #endif /* (DEVELOPMENT || DEBUG) */
653
654 /*
655 * Note that oi0 and oim0 may be stored inside the object itself;
656 * if so, copy them to local variables before constructing. We
657 * don't use PPF_BATCH to test as the allocator may be allocating
658 * storage space differently depending on the number of objects.
659 */
660 if (__probable((uintptr_t)oi0 >= (uintptr_t)SKMEM_OBJ_ADDR(oi0) &&
661 ((uintptr_t)oi0 + sizeof(*oi0)) <=
662 ((uintptr_t)SKMEM_OBJ_ADDR(oi0) + SKMEM_OBJ_SIZE(oi0)))) {
663 oi = &_oi;
664 *oi = *oi0;
665 if (__probable(oim0 != NULL)) {
666 oim = &_oim;
667 *oim = *oim0;
668 } else {
669 oim = NULL;
670 }
671 } else {
672 oi = oi0;
673 oim = oim0;
674 }
675
676 kqum = SK_PTR_ADDR_KQUM((uintptr_t)SKMEM_OBJ_ADDR(oi) +
677 METADATA_PREAMBLE_SZ);
678
679 if (__probable(!PP_KERNEL_ONLY(pp))) {
680 ASSERT(oim != NULL && SKMEM_OBJ_ADDR(oim) != NULL);
681 ASSERT(SKMEM_OBJ_SIZE(oi) == SKMEM_OBJ_SIZE(oim));
682 uqum = SK_PTR_ADDR_UQUM((uintptr_t)SKMEM_OBJ_ADDR(oim) +
683 METADATA_PREAMBLE_SZ);
684 } else {
685 ASSERT(oim == NULL);
686 uqum = NULL;
687 }
688
689 if (oim != NULL) {
690 /* initialize user metadata redzone */
691 struct __metadata_preamble *mdp = SKMEM_OBJ_ADDR(oim);
692 mdp->mdp_redzone =
693 (SKMEM_OBJ_ROFF(oim) + METADATA_PREAMBLE_SZ) ^
694 __ch_umd_redzone_cookie;
695 }
696
697 /* allocate (constructed) buflet(s) with buffer(s) attached */
698 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0) {
699 (void) skmem_cache_batch_alloc(PP_KBFT_CACHE_DEF(pp), &blist,
700 bufcnt, skmflag);
701 }
702
703 error = pp_metadata_construct(kqum, uqum, SKMEM_OBJ_IDX_REG(oi), pp,
704 skmflag, bufcnt, TRUE, &blist);
705 if (__improbable(blist != NULL)) {
706 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp), blist);
707 blist = NULL;
708 }
709 return error;
710 }
711
712 static int
pp_metadata_ctor_no_buflet(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)713 pp_metadata_ctor_no_buflet(struct skmem_obj_info *oi0,
714 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
715 {
716 return pp_metadata_ctor_common(oi0, oim0, arg, skmflag, true);
717 }
718
719 static int
pp_metadata_ctor_max_buflet(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)720 pp_metadata_ctor_max_buflet(struct skmem_obj_info *oi0,
721 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
722 {
723 return pp_metadata_ctor_common(oi0, oim0, arg, skmflag, false);
724 }
725
726 __attribute__((always_inline))
727 static void
pp_metadata_destruct_common(struct __kern_quantum * kqum,struct kern_pbufpool * pp,bool raw,struct skmem_obj ** blist_def,struct skmem_obj ** blist_large,struct skmem_obj ** blist_raw)728 pp_metadata_destruct_common(struct __kern_quantum *kqum,
729 struct kern_pbufpool *pp, bool raw, struct skmem_obj **blist_def,
730 struct skmem_obj **blist_large, struct skmem_obj **blist_raw)
731 {
732 struct __kern_buflet *kbuf, *nbuf;
733 struct skmem_obj *p_blist_def = NULL, *p_blist_large = NULL, *p_blist_raw = NULL;
734 struct skmem_obj **pp_blist_def = &p_blist_def;
735 struct skmem_obj **pp_blist_large = &p_blist_large;
736 struct skmem_obj **pp_blist_raw = &p_blist_raw;
737
738 uint16_t bufcnt, i = 0;
739 bool first_buflet_empty;
740
741 ASSERT(blist_def != NULL);
742 ASSERT(blist_large != NULL);
743
744 switch (pp->pp_md_type) {
745 case NEXUS_META_TYPE_PACKET: {
746 struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
747
748 ASSERT(kpkt->pkt_user != NULL || PP_KERNEL_ONLY(pp));
749 ASSERT(kpkt->pkt_qum.qum_pp == pp);
750 ASSERT(METADATA_TYPE(kpkt) == pp->pp_md_type);
751 ASSERT(METADATA_SUBTYPE(kpkt) == pp->pp_md_subtype);
752 ASSERT(METADATA_IDX(kpkt) != OBJ_IDX_NONE);
753 ASSERT(kpkt->pkt_qum.qum_ksd == NULL);
754 ASSERT(kpkt->pkt_bufs_cnt <= kpkt->pkt_bufs_max);
755 ASSERT(kpkt->pkt_bufs_max == pp->pp_max_frags);
756 _CASSERT(sizeof(kpkt->pkt_bufs_cnt) == sizeof(uint16_t));
757 bufcnt = kpkt->pkt_bufs_cnt;
758 kbuf = &kqum->qum_buf[0];
759 /*
760 * special handling for empty first buflet.
761 */
762 first_buflet_empty = (kbuf->buf_addr == 0);
763 *__DECONST(uint16_t *, &kpkt->pkt_bufs_cnt) = 0;
764 break;
765 }
766 default:
767 ASSERT(pp->pp_md_type == NEXUS_META_TYPE_QUANTUM);
768 ASSERT(kqum->qum_user != NULL || PP_KERNEL_ONLY(pp));
769 ASSERT(kqum->qum_pp == pp);
770 ASSERT(METADATA_TYPE(kqum) == pp->pp_md_type);
771 ASSERT(METADATA_SUBTYPE(kqum) == pp->pp_md_subtype);
772 ASSERT(METADATA_IDX(kqum) != OBJ_IDX_NONE);
773 ASSERT(kqum->qum_ksd == NULL);
774 kbuf = &kqum->qum_buf[0];
775 /*
776 * XXX: Special handling for quantum as we don't currently
777 * define bufs_{cnt,max} there. Given that we support at
778 * most only 1 buflet for now, check if buf_addr is non-NULL.
779 * See related code in pp_metadata_construct().
780 */
781 first_buflet_empty = (kbuf->buf_addr == 0);
782 bufcnt = first_buflet_empty ? 0 : 1;
783 break;
784 }
785
786 nbuf = __DECONST(struct __kern_buflet *, kbuf->buf_nbft_addr);
787 BUF_NBFT_ADDR(kbuf, 0);
788 BUF_NBFT_IDX(kbuf, OBJ_IDX_NONE);
789 if (!first_buflet_empty) {
790 pp_free_buflet_common(pp, kbuf);
791 ++i;
792 }
793
794 while (nbuf != NULL) {
795 if (BUFLET_FROM_RAW_BFLT_CACHE(nbuf)) {
796 /*
797 * Separate the raw buflet and its attached buffer to
798 * reduce usecnt.
799 */
800 uint32_t usecnt = 0;
801 void *objaddr = nbuf->buf_objaddr;
802 KBUF_DTOR(nbuf, usecnt);
803 SK_DF(SK_VERB_MEM, "pp 0x%llx buf 0x%llx usecnt %u",
804 SK_KVA(pp), SK_KVA(objaddr), usecnt);
805 if (__improbable(usecnt == 0)) {
806 skmem_cache_free(BUFLET_HAS_LARGE_BUF(nbuf) ?
807 PP_BUF_CACHE_LARGE(pp) : PP_BUF_CACHE_DEF(pp),
808 objaddr);
809 }
810
811 *pp_blist_raw = (struct skmem_obj *)(void *)nbuf;
812 pp_blist_raw = &((struct skmem_obj *)(void *)nbuf)->mo_next;
813 } else {
814 if (BUFLET_HAS_LARGE_BUF(nbuf)) {
815 *pp_blist_large = (struct skmem_obj *)(void *)nbuf;
816 pp_blist_large =
817 &((struct skmem_obj *)(void *)nbuf)->mo_next;
818 } else {
819 *pp_blist_def = (struct skmem_obj *)(void *)nbuf;
820 pp_blist_def =
821 &((struct skmem_obj *)(void *)nbuf)->mo_next;
822 }
823 }
824 BUF_NBFT_IDX(nbuf, OBJ_IDX_NONE);
825 nbuf = __DECONST(struct __kern_buflet *, nbuf->buf_nbft_addr);
826 ++i;
827 }
828
829 ASSERT(i == bufcnt);
830
831 if (p_blist_def != NULL) {
832 *pp_blist_def = *blist_def;
833 *blist_def = p_blist_def;
834 }
835 if (p_blist_large != NULL) {
836 *pp_blist_large = *blist_large;
837 *blist_large = p_blist_large;
838 }
839 if (p_blist_raw != NULL) {
840 *pp_blist_raw = *blist_raw;
841 *blist_raw = p_blist_raw;
842 }
843
844 /* if we're about to return this object to the slab, clean it up */
845 if (raw) {
846 switch (pp->pp_md_type) {
847 case NEXUS_META_TYPE_PACKET: {
848 struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
849
850 ASSERT(kpkt->pkt_com_opt != NULL ||
851 !(kpkt->pkt_pflags & PKT_F_OPT_ALLOC));
852 if (kpkt->pkt_com_opt != NULL) {
853 ASSERT(kpkt->pkt_pflags & PKT_F_OPT_ALLOC);
854 skmem_cache_free(pp_opt_cache,
855 kpkt->pkt_com_opt);
856 kpkt->pkt_com_opt = NULL;
857 }
858 ASSERT(kpkt->pkt_flow != NULL ||
859 !(kpkt->pkt_pflags & PKT_F_FLOW_ALLOC));
860 if (kpkt->pkt_flow != NULL) {
861 ASSERT(kpkt->pkt_pflags & PKT_F_FLOW_ALLOC);
862 skmem_cache_free(pp_flow_cache, kpkt->pkt_flow);
863 kpkt->pkt_flow = NULL;
864 }
865 ASSERT(kpkt->pkt_tx_compl != NULL ||
866 !(kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC));
867 if (kpkt->pkt_tx_compl != NULL) {
868 ASSERT(kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC);
869 skmem_cache_free(pp_compl_cache,
870 kpkt->pkt_tx_compl);
871 kpkt->pkt_tx_compl = NULL;
872 }
873 kpkt->pkt_pflags = 0;
874 break;
875 }
876 default:
877 ASSERT(METADATA_TYPE(kqum) == NEXUS_META_TYPE_QUANTUM);
878 /* nothing to do for quantum (yet) */
879 break;
880 }
881 }
882 }
883
884 __attribute__((always_inline))
885 static void
pp_metadata_destruct(struct __kern_quantum * kqum,struct kern_pbufpool * pp,bool raw)886 pp_metadata_destruct(struct __kern_quantum *kqum, struct kern_pbufpool *pp,
887 bool raw)
888 {
889 struct skmem_obj *blist_def = NULL, *blist_large = NULL, *blist_raw = NULL;
890
891 pp_metadata_destruct_common(kqum, pp, raw, &blist_def, &blist_large,
892 &blist_raw);
893 if (blist_def != NULL) {
894 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp), blist_def);
895 }
896 if (blist_large != NULL) {
897 skmem_cache_batch_free(PP_KBFT_CACHE_LARGE(pp), blist_large);
898 }
899 if (blist_raw != NULL) {
900 skmem_cache_batch_free(pp->pp_raw_kbft_cache, blist_raw);
901 }
902 }
903
904 static void
pp_metadata_dtor(void * addr,void * arg)905 pp_metadata_dtor(void *addr, void *arg)
906 {
907 pp_metadata_destruct(SK_PTR_ADDR_KQUM((uintptr_t)addr +
908 METADATA_PREAMBLE_SZ), arg, TRUE);
909 }
910
911 static void
pp_buf_seg_ctor(struct sksegment * sg,IOSKMemoryBufferRef md,void * arg)912 pp_buf_seg_ctor(struct sksegment *sg, IOSKMemoryBufferRef md, void *arg)
913 {
914 struct kern_pbufpool *pp = arg;
915
916 if (pp->pp_pbuf_seg_ctor != NULL) {
917 pp->pp_pbuf_seg_ctor(pp, sg, md);
918 }
919 }
920
921 static void
pp_buf_seg_dtor(struct sksegment * sg,IOSKMemoryBufferRef md,void * arg)922 pp_buf_seg_dtor(struct sksegment *sg, IOSKMemoryBufferRef md, void *arg)
923 {
924 struct kern_pbufpool *pp = arg;
925
926 if (pp->pp_pbuf_seg_dtor != NULL) {
927 pp->pp_pbuf_seg_dtor(pp, sg, md);
928 }
929 }
930
931 static int
pp_buflet_metadata_ctor_common(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag,bool large,bool attach_buf)932 pp_buflet_metadata_ctor_common(struct skmem_obj_info *oi0,
933 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag, bool large,
934 bool attach_buf)
935 {
936 #pragma unused (skmflag)
937 struct kern_pbufpool *pp = (struct kern_pbufpool *)arg;
938 struct __kern_buflet *kbft;
939 struct __user_buflet *ubft;
940 struct skmem_obj_info oib;
941 mach_vm_address_t baddr = 0;
942 obj_idx_t oi_idx_reg, oib_idx_reg = OBJ_IDX_NONE;
943 struct skmem_bufctl* oib_bc = NULL;
944
945 if (attach_buf) {
946 baddr = pp_alloc_buffer_common(pp, &oib, skmflag, large);
947 if (__improbable(baddr == 0)) {
948 return ENOMEM;
949 }
950 oib_idx_reg = SKMEM_OBJ_IDX_REG(&oib);
951 oib_bc = SKMEM_OBJ_BUFCTL(&oib);
952 }
953 /*
954 * Note that oi0 and oim0 may be stored inside the object itself;
955 * so copy what is required to local variables before constructing.
956 */
957 oi_idx_reg = SKMEM_OBJ_IDX_REG(oi0);
958 kbft = SKMEM_OBJ_ADDR(oi0);
959
960 if (__probable(!PP_KERNEL_ONLY(pp))) {
961 ASSERT(oim0 != NULL && SKMEM_OBJ_ADDR(oim0) != NULL);
962 ASSERT(SKMEM_OBJ_SIZE(oi0) == SKMEM_OBJ_SIZE(oim0));
963 ASSERT(oi_idx_reg == SKMEM_OBJ_IDX_REG(oim0));
964 ASSERT(SKMEM_OBJ_IDX_SEG(oi0) == SKMEM_OBJ_IDX_SEG(oim0));
965 ubft = SKMEM_OBJ_ADDR(oim0);
966 } else {
967 ASSERT(oim0 == NULL);
968 ubft = NULL;
969 }
970 KBUF_EXT_CTOR(kbft, ubft, baddr, oib_idx_reg, oib_bc,
971 oi_idx_reg, pp, large, attach_buf);
972 return 0;
973 }
974
975 static int
pp_buflet_default_buffer_metadata_ctor(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)976 pp_buflet_default_buffer_metadata_ctor(struct skmem_obj_info *oi0,
977 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
978 {
979 return pp_buflet_metadata_ctor_common(oi0, oim0, arg, skmflag, false, true);
980 }
981
982 static int
pp_buflet_large_buffer_metadata_ctor(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)983 pp_buflet_large_buffer_metadata_ctor(struct skmem_obj_info *oi0,
984 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
985 {
986 return pp_buflet_metadata_ctor_common(oi0, oim0, arg, skmflag, true, true);
987 }
988
989 static int
pp_buflet_no_buffer_metadata_ctor(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)990 pp_buflet_no_buffer_metadata_ctor(struct skmem_obj_info *oi0,
991 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
992 {
993 return pp_buflet_metadata_ctor_common(oi0, oim0, arg, skmflag, false, false);
994 }
995
996 static void
pp_buflet_metadata_dtor(void * addr,void * arg)997 pp_buflet_metadata_dtor(void *addr, void *arg)
998 {
999 struct __kern_buflet *kbft = addr;
1000 void *objaddr;
1001 struct kern_pbufpool *pp = arg;
1002 uint32_t usecnt = 0;
1003 bool large = BUFLET_HAS_LARGE_BUF(kbft);
1004
1005 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
1006 /*
1007 * don't assert for (buf_nbft_addr == 0) here as constructed
1008 * buflet may have this field as non-zero. This is because
1009 * buf_nbft_addr (__buflet_next) is used by skmem batch alloc
1010 * for chaining the buflets.
1011 * To ensure that the frred buflet was not part of a chain we
1012 * assert for (buf_nbft_idx == OBJ_IDX_NONE).
1013 */
1014 ASSERT(kbft->buf_nbft_idx == OBJ_IDX_NONE);
1015 ASSERT(((struct __kern_buflet_ext *)kbft)->kbe_buf_upp_link.sle_next ==
1016 NULL);
1017
1018 /*
1019 * The raw buflet has never been attached with a buffer or already
1020 * cleaned up.
1021 */
1022 if ((kbft->buf_flag & BUFLET_FLAG_RAW) != 0 && kbft->buf_ctl == NULL) {
1023 return;
1024 }
1025
1026 ASSERT(kbft->buf_addr != 0);
1027 ASSERT(kbft->buf_idx != OBJ_IDX_NONE);
1028 ASSERT(kbft->buf_ctl != NULL);
1029
1030 objaddr = kbft->buf_objaddr;
1031 KBUF_DTOR(kbft, usecnt);
1032 SK_DF(SK_VERB_MEM, "pp 0x%llx buf 0x%llx usecnt %u", SK_KVA(pp),
1033 SK_KVA(objaddr), usecnt);
1034 if (__probable(usecnt == 0)) {
1035 skmem_cache_free(large ? PP_BUF_CACHE_LARGE(pp) :
1036 PP_BUF_CACHE_DEF(pp), objaddr);
1037 }
1038 }
1039
1040 struct kern_pbufpool *
pp_create(const char * name,struct skmem_region_params * srp_array,pbuf_seg_ctor_fn_t buf_seg_ctor,pbuf_seg_dtor_fn_t buf_seg_dtor,const void * ctx,pbuf_ctx_retain_fn_t ctx_retain,pbuf_ctx_release_fn_t ctx_release,uint32_t ppcreatef)1041 pp_create(const char *name, struct skmem_region_params *srp_array,
1042 pbuf_seg_ctor_fn_t buf_seg_ctor, pbuf_seg_dtor_fn_t buf_seg_dtor,
1043 const void *ctx, pbuf_ctx_retain_fn_t ctx_retain,
1044 pbuf_ctx_release_fn_t ctx_release, uint32_t ppcreatef)
1045 {
1046 struct kern_pbufpool *pp = NULL;
1047 uint32_t md_size, def_buf_obj_size;
1048 uint16_t def_buf_size, large_buf_size;
1049 nexus_meta_type_t md_type;
1050 nexus_meta_subtype_t md_subtype;
1051 uint32_t md_cflags;
1052 uint16_t max_frags;
1053 char cname[64];
1054 struct skmem_region_params *kmd_srp;
1055 struct skmem_region_params *buf_srp;
1056 struct skmem_region_params *kbft_srp;
1057 struct skmem_region_params *umd_srp = NULL;
1058 struct skmem_region_params *ubft_srp = NULL;
1059 struct skmem_region_params *lbuf_srp = NULL;
1060
1061 /* buf_seg_{ctor,dtor} pair must be either NULL or non-NULL */
1062 ASSERT(!(!(buf_seg_ctor == NULL && buf_seg_dtor == NULL) &&
1063 ((buf_seg_ctor == NULL) ^ (buf_seg_dtor == NULL))));
1064
1065 /* ctx{,_retain,_release} must be either ALL NULL or ALL non-NULL */
1066 ASSERT((ctx == NULL && ctx_retain == NULL && ctx_release == NULL) ||
1067 (ctx != NULL && ctx_retain != NULL && ctx_release != NULL));
1068
1069 if (srp_array[SKMEM_REGION_KMD].srp_c_obj_cnt != 0) {
1070 kmd_srp = &srp_array[SKMEM_REGION_KMD];
1071 buf_srp = &srp_array[SKMEM_REGION_BUF_DEF];
1072 lbuf_srp = &srp_array[SKMEM_REGION_BUF_LARGE];
1073 kbft_srp = &srp_array[SKMEM_REGION_KBFT];
1074 } else if (srp_array[SKMEM_REGION_RXKMD].srp_c_obj_cnt != 0) {
1075 kmd_srp = &srp_array[SKMEM_REGION_RXKMD];
1076 buf_srp = &srp_array[SKMEM_REGION_RXBUF_DEF];
1077 lbuf_srp = &srp_array[SKMEM_REGION_RXBUF_LARGE];
1078 kbft_srp = &srp_array[SKMEM_REGION_RXKBFT];
1079 } else {
1080 VERIFY(srp_array[SKMEM_REGION_TXKMD].srp_c_obj_cnt != 0);
1081 kmd_srp = &srp_array[SKMEM_REGION_TXKMD];
1082 buf_srp = &srp_array[SKMEM_REGION_TXBUF_DEF];
1083 lbuf_srp = &srp_array[SKMEM_REGION_TXBUF_LARGE];
1084 kbft_srp = &srp_array[SKMEM_REGION_TXKBFT];
1085 }
1086
1087 VERIFY(kmd_srp->srp_c_obj_size != 0);
1088 VERIFY(buf_srp->srp_c_obj_cnt != 0);
1089 VERIFY(buf_srp->srp_c_obj_size != 0);
1090
1091 if (ppcreatef & PPCREATEF_ONDEMAND_BUF) {
1092 VERIFY(kbft_srp->srp_c_obj_cnt != 0);
1093 VERIFY(kbft_srp->srp_c_obj_size != 0);
1094 } else {
1095 kbft_srp = NULL;
1096 }
1097
1098 if ((ppcreatef & PPCREATEF_KERNEL_ONLY) == 0) {
1099 umd_srp = &srp_array[SKMEM_REGION_UMD];
1100 ASSERT(umd_srp->srp_c_obj_size == kmd_srp->srp_c_obj_size);
1101 ASSERT(umd_srp->srp_c_obj_cnt == kmd_srp->srp_c_obj_cnt);
1102 ASSERT(umd_srp->srp_c_seg_size == kmd_srp->srp_c_seg_size);
1103 ASSERT(umd_srp->srp_seg_cnt == kmd_srp->srp_seg_cnt);
1104 ASSERT(umd_srp->srp_md_type == kmd_srp->srp_md_type);
1105 ASSERT(umd_srp->srp_md_subtype == kmd_srp->srp_md_subtype);
1106 ASSERT(umd_srp->srp_max_frags == kmd_srp->srp_max_frags);
1107 ASSERT((umd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) ==
1108 (kmd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT));
1109 if (kbft_srp != NULL) {
1110 ubft_srp = &srp_array[SKMEM_REGION_UBFT];
1111 ASSERT(ubft_srp->srp_c_obj_size ==
1112 kbft_srp->srp_c_obj_size);
1113 ASSERT(ubft_srp->srp_c_obj_cnt ==
1114 kbft_srp->srp_c_obj_cnt);
1115 ASSERT(ubft_srp->srp_c_seg_size ==
1116 kbft_srp->srp_c_seg_size);
1117 ASSERT(ubft_srp->srp_seg_cnt == kbft_srp->srp_seg_cnt);
1118 }
1119 }
1120
1121 md_size = kmd_srp->srp_r_obj_size;
1122 md_type = kmd_srp->srp_md_type;
1123 md_subtype = kmd_srp->srp_md_subtype;
1124 max_frags = kmd_srp->srp_max_frags;
1125 def_buf_obj_size = buf_srp->srp_c_obj_size;
1126
1127 if (def_buf_obj_size > UINT16_MAX) {
1128 def_buf_size = UINT16_MAX;
1129 } else {
1130 def_buf_size = (uint16_t)def_buf_obj_size;
1131 }
1132
1133 if (lbuf_srp->srp_c_obj_size > UINT16_MAX) {
1134 large_buf_size = UINT16_MAX;
1135 } else {
1136 large_buf_size = (uint16_t)lbuf_srp->srp_c_obj_size;
1137 }
1138
1139 #if (DEBUG || DEVELOPMENT)
1140 ASSERT(def_buf_obj_size != 0);
1141 ASSERT(md_type > NEXUS_META_TYPE_INVALID &&
1142 md_type <= NEXUS_META_TYPE_MAX);
1143 if (md_type == NEXUS_META_TYPE_QUANTUM) {
1144 ASSERT(max_frags == 1);
1145 ASSERT(md_size >=
1146 (METADATA_PREAMBLE_SZ + NX_METADATA_QUANTUM_SZ));
1147 } else {
1148 ASSERT(max_frags >= 1);
1149 ASSERT(md_type == NEXUS_META_TYPE_PACKET);
1150 ASSERT(md_size >= (METADATA_PREAMBLE_SZ +
1151 NX_METADATA_PACKET_SZ(max_frags)));
1152 }
1153 ASSERT(md_subtype > NEXUS_META_SUBTYPE_INVALID &&
1154 md_subtype <= NEXUS_META_SUBTYPE_MAX);
1155 #endif /* DEBUG || DEVELOPMENT */
1156
1157 pp = pp_alloc(Z_WAITOK);
1158
1159 (void) snprintf((char *)pp->pp_name, sizeof(pp->pp_name),
1160 "skywalk.pp.%s", name);
1161
1162 pp->pp_ctx = __DECONST(void *, ctx);
1163 pp->pp_ctx_retain = ctx_retain;
1164 pp->pp_ctx_release = ctx_release;
1165 if (pp->pp_ctx != NULL) {
1166 pp->pp_ctx_retain(pp->pp_ctx);
1167 }
1168
1169 pp->pp_pbuf_seg_ctor = buf_seg_ctor;
1170 pp->pp_pbuf_seg_dtor = buf_seg_dtor;
1171 PP_BUF_SIZE_DEF(pp) = def_buf_size;
1172 PP_BUF_OBJ_SIZE_DEF(pp) = def_buf_obj_size;
1173 PP_BUF_SIZE_LARGE(pp) = large_buf_size;
1174 PP_BUF_OBJ_SIZE_LARGE(pp) = lbuf_srp->srp_c_obj_size;
1175 pp->pp_md_type = md_type;
1176 pp->pp_md_subtype = md_subtype;
1177 pp->pp_max_frags = max_frags;
1178 if (ppcreatef & PPCREATEF_EXTERNAL) {
1179 pp->pp_flags |= PPF_EXTERNAL;
1180 }
1181 if (ppcreatef & PPCREATEF_TRUNCATED_BUF) {
1182 pp->pp_flags |= PPF_TRUNCATED_BUF;
1183 }
1184 if (ppcreatef & PPCREATEF_KERNEL_ONLY) {
1185 pp->pp_flags |= PPF_KERNEL;
1186 }
1187 if (ppcreatef & PPCREATEF_ONDEMAND_BUF) {
1188 pp->pp_flags |= PPF_BUFFER_ON_DEMAND;
1189 }
1190 if (ppcreatef & PPCREATEF_DYNAMIC) {
1191 pp->pp_flags |= PPF_DYNAMIC;
1192 }
1193 if (lbuf_srp->srp_c_obj_cnt > 0) {
1194 ASSERT(lbuf_srp->srp_c_obj_size != 0);
1195 pp->pp_flags |= PPF_LARGE_BUF;
1196 }
1197 if (ppcreatef & PPCREATEF_RAW_BFLT) {
1198 ASSERT((ppcreatef & PPCREATEF_ONDEMAND_BUF) != 0);
1199 pp->pp_flags |= PPF_RAW_BUFLT;
1200 }
1201
1202 pp_retain(pp);
1203
1204 md_cflags = ((kmd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) ?
1205 SKMEM_CR_NOMAGAZINES : 0);
1206 md_cflags |= SKMEM_CR_BATCH;
1207 pp->pp_flags |= PPF_BATCH;
1208
1209 if (pp->pp_flags & PPF_DYNAMIC) {
1210 md_cflags |= SKMEM_CR_DYNAMIC;
1211 }
1212
1213 if (umd_srp != NULL && (pp->pp_umd_region =
1214 skmem_region_create(name, umd_srp, NULL, NULL, NULL)) == NULL) {
1215 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1216 pp->pp_name, SK_KVA(pp), umd_srp->srp_name);
1217 goto failed;
1218 }
1219
1220 if ((pp->pp_kmd_region = skmem_region_create(name, kmd_srp, NULL, NULL,
1221 NULL)) == NULL) {
1222 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1223 pp->pp_name, SK_KVA(pp), kmd_srp->srp_name);
1224 goto failed;
1225 }
1226
1227 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1228 VERIFY((kbft_srp != NULL) && (kbft_srp->srp_c_obj_cnt > 0));
1229 if (!PP_KERNEL_ONLY(pp)) {
1230 VERIFY((ubft_srp != NULL) &&
1231 (ubft_srp->srp_c_obj_cnt > 0));
1232 }
1233 }
1234 /*
1235 * Metadata regions {KMD,KBFT,UBFT} magazines layer and persistency
1236 * attribute must match.
1237 */
1238 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1239 ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) ==
1240 (kbft_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES));
1241 ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) ==
1242 (kbft_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT));
1243 }
1244
1245 if (PP_HAS_BUFFER_ON_DEMAND(pp) && !PP_KERNEL_ONLY(pp)) {
1246 if ((pp->pp_ubft_region = skmem_region_create(name, ubft_srp,
1247 NULL, NULL, NULL)) == NULL) {
1248 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1249 pp->pp_name, SK_KVA(pp), ubft_srp->srp_name);
1250 goto failed;
1251 }
1252 }
1253
1254 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1255 if ((pp->pp_kbft_region = skmem_region_create(name,
1256 kbft_srp, NULL, NULL, NULL)) == NULL) {
1257 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1258 pp->pp_name, SK_KVA(pp), kbft_srp->srp_name);
1259 goto failed;
1260 }
1261 }
1262
1263 if (!PP_KERNEL_ONLY(pp)) {
1264 skmem_region_mirror(pp->pp_kmd_region, pp->pp_umd_region);
1265 }
1266 if (!PP_KERNEL_ONLY(pp) && pp->pp_ubft_region != NULL) {
1267 ASSERT(pp->pp_kbft_region != NULL);
1268 skmem_region_mirror(pp->pp_kbft_region, pp->pp_ubft_region);
1269 }
1270
1271 /*
1272 * Create the metadata cache; magazines layer is determined by caller.
1273 */
1274 (void) snprintf(cname, sizeof(cname), "kmd.%s", name);
1275 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1276 pp->pp_kmd_cache = skmem_cache_create(cname, md_size, 0,
1277 pp_metadata_ctor_no_buflet, pp_metadata_dtor, NULL, pp,
1278 pp->pp_kmd_region, md_cflags);
1279 } else {
1280 pp->pp_kmd_cache = skmem_cache_create(cname, md_size, 0,
1281 pp_metadata_ctor_max_buflet, pp_metadata_dtor, NULL, pp,
1282 pp->pp_kmd_region, md_cflags);
1283 }
1284
1285 if (pp->pp_kmd_cache == NULL) {
1286 SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1287 pp->pp_name, SK_KVA(pp), cname);
1288 goto failed;
1289 }
1290
1291 /*
1292 * Create the buflet metadata cache
1293 */
1294 if (pp->pp_kbft_region != NULL) {
1295 (void) snprintf(cname, sizeof(cname), "kbft_def.%s", name);
1296 PP_KBFT_CACHE_DEF(pp) = skmem_cache_create(cname,
1297 kbft_srp->srp_c_obj_size, 0,
1298 pp_buflet_default_buffer_metadata_ctor,
1299 pp_buflet_metadata_dtor, NULL, pp, pp->pp_kbft_region,
1300 md_cflags);
1301
1302 if (PP_KBFT_CACHE_DEF(pp) == NULL) {
1303 SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1304 pp->pp_name, SK_KVA(pp), cname);
1305 goto failed;
1306 }
1307
1308 if (PP_HAS_LARGE_BUF(pp)) {
1309 (void) snprintf(cname, sizeof(cname), "kbft_large.%s",
1310 name);
1311 PP_KBFT_CACHE_LARGE(pp) = skmem_cache_create(cname,
1312 kbft_srp->srp_c_obj_size, 0,
1313 pp_buflet_large_buffer_metadata_ctor,
1314 pp_buflet_metadata_dtor,
1315 NULL, pp, pp->pp_kbft_region, md_cflags);
1316
1317 if (PP_KBFT_CACHE_LARGE(pp) == NULL) {
1318 SK_ERR("\"%s\" (0x%llx) failed to "
1319 "create \"%s\" cache", pp->pp_name,
1320 SK_KVA(pp), cname);
1321 goto failed;
1322 }
1323 }
1324
1325 if (PP_HAS_RAW_BFLT(pp)) {
1326 (void) snprintf(cname, sizeof(cname), "kbft_raw.%s", name);
1327 pp->pp_raw_kbft_cache = skmem_cache_create(cname,
1328 kbft_srp->srp_c_obj_size, 0,
1329 pp_buflet_no_buffer_metadata_ctor,
1330 pp_buflet_metadata_dtor, NULL, pp, pp->pp_kbft_region,
1331 md_cflags);
1332
1333 if (pp->pp_raw_kbft_cache == NULL) {
1334 SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1335 pp->pp_name, SK_KVA(pp), cname);
1336 goto failed;
1337 }
1338 }
1339 }
1340
1341 if ((PP_BUF_REGION_DEF(pp) = skmem_region_create(name,
1342 buf_srp, pp_buf_seg_ctor, pp_buf_seg_dtor, pp)) == NULL) {
1343 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1344 pp->pp_name, SK_KVA(pp), buf_srp->srp_name);
1345 goto failed;
1346 }
1347
1348 if (PP_HAS_LARGE_BUF(pp)) {
1349 PP_BUF_REGION_LARGE(pp) = skmem_region_create(name, lbuf_srp,
1350 pp_buf_seg_ctor, pp_buf_seg_dtor, pp);
1351 if (PP_BUF_REGION_LARGE(pp) == NULL) {
1352 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1353 pp->pp_name, SK_KVA(pp), lbuf_srp->srp_name);
1354 goto failed;
1355 }
1356 }
1357
1358 /*
1359 * Create the buffer object cache without the magazines layer.
1360 * We rely on caching the constructed metadata object instead.
1361 */
1362 (void) snprintf(cname, sizeof(cname), "buf_def.%s", name);
1363 if ((PP_BUF_CACHE_DEF(pp) = skmem_cache_create(cname, def_buf_obj_size,
1364 0, NULL, NULL, NULL, pp, PP_BUF_REGION_DEF(pp),
1365 SKMEM_CR_NOMAGAZINES)) == NULL) {
1366 SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1367 pp->pp_name, SK_KVA(pp), cname);
1368 goto failed;
1369 }
1370
1371 if (PP_BUF_REGION_LARGE(pp) != NULL) {
1372 (void) snprintf(cname, sizeof(cname), "buf_large.%s", name);
1373 if ((PP_BUF_CACHE_LARGE(pp) = skmem_cache_create(cname,
1374 lbuf_srp->srp_c_obj_size, 0, NULL, NULL, NULL, pp,
1375 PP_BUF_REGION_LARGE(pp), SKMEM_CR_NOMAGAZINES)) == NULL) {
1376 SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1377 pp->pp_name, SK_KVA(pp), cname);
1378 goto failed;
1379 }
1380 }
1381
1382 return pp;
1383
1384 failed:
1385 if (pp != NULL) {
1386 if (pp->pp_ctx != NULL) {
1387 pp->pp_ctx_release(pp->pp_ctx);
1388 pp->pp_ctx = NULL;
1389 }
1390 pp_close(pp);
1391 }
1392
1393 return NULL;
1394 }
1395
1396 void
pp_destroy(struct kern_pbufpool * pp)1397 pp_destroy(struct kern_pbufpool *pp)
1398 {
1399 PP_LOCK_ASSERT_HELD(pp);
1400
1401 /* may be called for built-in pp with outstanding reference */
1402 ASSERT(!(pp->pp_flags & PPF_EXTERNAL) || pp->pp_refcnt == 0);
1403
1404 pp_destroy_upp_locked(pp);
1405
1406 pp_destroy_upp_bft_locked(pp);
1407
1408 if (pp->pp_kmd_cache != NULL) {
1409 skmem_cache_destroy(pp->pp_kmd_cache);
1410 pp->pp_kmd_cache = NULL;
1411 }
1412
1413 if (pp->pp_umd_region != NULL) {
1414 skmem_region_release(pp->pp_umd_region);
1415 pp->pp_umd_region = NULL;
1416 }
1417
1418 if (pp->pp_kmd_region != NULL) {
1419 skmem_region_release(pp->pp_kmd_region);
1420 pp->pp_kmd_region = NULL;
1421 }
1422
1423 if (PP_KBFT_CACHE_DEF(pp) != NULL) {
1424 skmem_cache_destroy(PP_KBFT_CACHE_DEF(pp));
1425 PP_KBFT_CACHE_DEF(pp) = NULL;
1426 }
1427
1428 if (PP_KBFT_CACHE_LARGE(pp) != NULL) {
1429 skmem_cache_destroy(PP_KBFT_CACHE_LARGE(pp));
1430 PP_KBFT_CACHE_LARGE(pp) = NULL;
1431 }
1432
1433 if (pp->pp_raw_kbft_cache != NULL) {
1434 skmem_cache_destroy(pp->pp_raw_kbft_cache);
1435 pp->pp_raw_kbft_cache = NULL;
1436 }
1437
1438 if (pp->pp_ubft_region != NULL) {
1439 skmem_region_release(pp->pp_ubft_region);
1440 pp->pp_ubft_region = NULL;
1441 }
1442
1443 if (pp->pp_kbft_region != NULL) {
1444 skmem_region_release(pp->pp_kbft_region);
1445 pp->pp_kbft_region = NULL;
1446 }
1447
1448 /*
1449 * The order is important here, since pp_metadata_dtor()
1450 * called by freeing on the pp_kmd_cache will in turn
1451 * free the attached buffer. Therefore destroy the
1452 * buffer cache last.
1453 */
1454 if (PP_BUF_CACHE_DEF(pp) != NULL) {
1455 skmem_cache_destroy(PP_BUF_CACHE_DEF(pp));
1456 PP_BUF_CACHE_DEF(pp) = NULL;
1457 }
1458 if (PP_BUF_REGION_DEF(pp) != NULL) {
1459 skmem_region_release(PP_BUF_REGION_DEF(pp));
1460 PP_BUF_REGION_DEF(pp) = NULL;
1461 }
1462 if (PP_BUF_CACHE_LARGE(pp) != NULL) {
1463 skmem_cache_destroy(PP_BUF_CACHE_LARGE(pp));
1464 PP_BUF_CACHE_LARGE(pp) = NULL;
1465 }
1466 if (PP_BUF_REGION_LARGE(pp) != NULL) {
1467 skmem_region_release(PP_BUF_REGION_LARGE(pp));
1468 PP_BUF_REGION_LARGE(pp) = NULL;
1469 }
1470
1471 if (pp->pp_ctx != NULL) {
1472 pp->pp_ctx_release(pp->pp_ctx);
1473 pp->pp_ctx = NULL;
1474 }
1475 }
1476
1477 static int
pp_init_upp_locked(struct kern_pbufpool * pp,boolean_t can_block)1478 pp_init_upp_locked(struct kern_pbufpool *pp, boolean_t can_block)
1479 {
1480 int i, err = 0;
1481
1482 if (pp->pp_u_hash_table != NULL) {
1483 goto done;
1484 }
1485
1486 /* allocated-address hash table */
1487 pp->pp_u_hash_table = can_block ? zalloc(pp_u_htbl_zone) :
1488 zalloc_noblock(pp_u_htbl_zone);
1489 if (pp->pp_u_hash_table == NULL) {
1490 SK_ERR("failed to zalloc packet buffer pool upp hash table");
1491 err = ENOMEM;
1492 goto done;
1493 }
1494
1495 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1496 SLIST_INIT(&pp->pp_u_hash_table[i].upp_head);
1497 }
1498 done:
1499 return err;
1500 }
1501
1502 static void
pp_destroy_upp_locked(struct kern_pbufpool * pp)1503 pp_destroy_upp_locked(struct kern_pbufpool *pp)
1504 {
1505 PP_LOCK_ASSERT_HELD(pp);
1506 if (pp->pp_u_hash_table != NULL) {
1507 /* purge anything that's left */
1508 pp_purge_upp_locked(pp, -1);
1509
1510 #if (DEBUG || DEVELOPMENT)
1511 for (int i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1512 ASSERT(SLIST_EMPTY(&pp->pp_u_hash_table[i].upp_head));
1513 }
1514 #endif /* DEBUG || DEVELOPMENT */
1515
1516 zfree(pp_u_htbl_zone, pp->pp_u_hash_table);
1517 pp->pp_u_hash_table = NULL;
1518 }
1519 ASSERT(pp->pp_u_bufinuse == 0);
1520 }
1521
1522 int
pp_init_upp(struct kern_pbufpool * pp,boolean_t can_block)1523 pp_init_upp(struct kern_pbufpool *pp, boolean_t can_block)
1524 {
1525 int err = 0;
1526
1527 PP_LOCK(pp);
1528 err = pp_init_upp_locked(pp, can_block);
1529 if (err) {
1530 SK_ERR("packet UPP init failed (%d)", err);
1531 goto done;
1532 }
1533 err = pp_init_upp_bft_locked(pp, can_block);
1534 if (err) {
1535 SK_ERR("buflet UPP init failed (%d)", err);
1536 pp_destroy_upp_locked(pp);
1537 goto done;
1538 }
1539 pp_retain_locked(pp);
1540 done:
1541 PP_UNLOCK(pp);
1542 return err;
1543 }
1544
1545 __attribute__((always_inline))
1546 static void
pp_insert_upp_bft_locked(struct kern_pbufpool * pp,struct __kern_buflet * kbft,pid_t pid)1547 pp_insert_upp_bft_locked(struct kern_pbufpool *pp,
1548 struct __kern_buflet *kbft, pid_t pid)
1549 {
1550 struct kern_pbufpool_u_bft_bkt *bkt;
1551 struct __kern_buflet_ext *kbe = (struct __kern_buflet_ext *)kbft;
1552
1553 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
1554 ASSERT(kbe->kbe_buf_pid == (pid_t)-1);
1555 kbe->kbe_buf_pid = pid;
1556 bkt = KERN_PBUFPOOL_U_BFT_HASH(pp, kbft->buf_bft_idx_reg);
1557 SLIST_INSERT_HEAD(&bkt->upp_head, kbe, kbe_buf_upp_link);
1558 pp->pp_u_bftinuse++;
1559 }
1560
1561 __attribute__((always_inline))
1562 static void
pp_insert_upp_bft_chain_locked(struct kern_pbufpool * pp,struct __kern_buflet * kbft,pid_t pid)1563 pp_insert_upp_bft_chain_locked(struct kern_pbufpool *pp,
1564 struct __kern_buflet *kbft, pid_t pid)
1565 {
1566 while (kbft != NULL) {
1567 pp_insert_upp_bft_locked(pp, kbft, pid);
1568 kbft = __DECONST(kern_buflet_t, kbft->buf_nbft_addr);
1569 }
1570 }
1571
1572 /* Also inserts the attached chain of buflets */
1573 void static inline
pp_insert_upp_common(struct kern_pbufpool * pp,struct __kern_quantum * kqum,pid_t pid)1574 pp_insert_upp_common(struct kern_pbufpool *pp, struct __kern_quantum *kqum,
1575 pid_t pid)
1576 {
1577 struct kern_pbufpool_u_bkt *bkt;
1578 struct __kern_buflet *kbft;
1579
1580 ASSERT(kqum->qum_pid == (pid_t)-1);
1581 kqum->qum_pid = pid;
1582
1583 bkt = KERN_PBUFPOOL_U_HASH(pp, METADATA_IDX(kqum));
1584 SLIST_INSERT_HEAD(&bkt->upp_head, kqum, qum_upp_link);
1585 pp->pp_u_bufinuse++;
1586
1587 kbft = (kern_buflet_t)kqum->qum_buf[0].buf_nbft_addr;
1588 if (kbft != NULL) {
1589 ASSERT(((kern_buflet_t)kbft)->buf_flag & BUFLET_FLAG_EXTERNAL);
1590 ASSERT(kqum->qum_qflags & QUM_F_INTERNALIZED);
1591 pp_insert_upp_bft_chain_locked(pp, kbft, pid);
1592 }
1593 }
1594
1595 void
pp_insert_upp_locked(struct kern_pbufpool * pp,struct __kern_quantum * kqum,pid_t pid)1596 pp_insert_upp_locked(struct kern_pbufpool *pp, struct __kern_quantum *kqum,
1597 pid_t pid)
1598 {
1599 pp_insert_upp_common(pp, kqum, pid);
1600 }
1601
1602 void
pp_insert_upp(struct kern_pbufpool * pp,struct __kern_quantum * kqum,pid_t pid)1603 pp_insert_upp(struct kern_pbufpool *pp, struct __kern_quantum *kqum, pid_t pid)
1604 {
1605 PP_LOCK(pp);
1606 pp_insert_upp_common(pp, kqum, pid);
1607 PP_UNLOCK(pp);
1608 }
1609
1610 void
pp_insert_upp_batch(struct kern_pbufpool * pp,pid_t pid,uint64_t * array,uint32_t num)1611 pp_insert_upp_batch(struct kern_pbufpool *pp, pid_t pid, uint64_t *array,
1612 uint32_t num)
1613 {
1614 uint32_t i = 0;
1615
1616 ASSERT(array != NULL && num > 0);
1617 PP_LOCK(pp);
1618 while (num != 0) {
1619 struct __kern_quantum *kqum = SK_PTR_ADDR_KQUM(array[i]);
1620
1621 ASSERT(kqum != NULL);
1622 pp_insert_upp_common(pp, kqum, pid);
1623 --num;
1624 ++i;
1625 }
1626 PP_UNLOCK(pp);
1627 }
1628
1629 __attribute__((always_inline))
1630 static struct __kern_buflet *
pp_remove_upp_bft_locked(struct kern_pbufpool * pp,obj_idx_t bft_idx)1631 pp_remove_upp_bft_locked(struct kern_pbufpool *pp, obj_idx_t bft_idx)
1632 {
1633 struct __kern_buflet_ext *kbft, *tbft;
1634 struct kern_pbufpool_u_bft_bkt *bkt;
1635
1636 bkt = KERN_PBUFPOOL_U_BFT_HASH(pp, bft_idx);
1637 SLIST_FOREACH_SAFE(kbft, &bkt->upp_head, kbe_buf_upp_link, tbft) {
1638 if (((kern_buflet_t)kbft)->buf_bft_idx_reg == bft_idx) {
1639 SLIST_REMOVE(&bkt->upp_head, kbft, __kern_buflet_ext,
1640 kbe_buf_upp_link);
1641 kbft->kbe_buf_pid = (pid_t)-1;
1642 kbft->kbe_buf_upp_link.sle_next = NULL;
1643 ASSERT(pp->pp_u_bftinuse != 0);
1644 pp->pp_u_bftinuse--;
1645 break;
1646 }
1647 }
1648 return (kern_buflet_t)kbft;
1649 }
1650
1651 struct __kern_buflet *
pp_remove_upp_bft(struct kern_pbufpool * pp,obj_idx_t md_idx,int * err)1652 pp_remove_upp_bft(struct kern_pbufpool *pp, obj_idx_t md_idx, int *err)
1653 {
1654 struct __kern_buflet *kbft = pp_remove_upp_bft_locked(pp, md_idx);
1655
1656 *err = __improbable(kbft != NULL) ? 0 : EINVAL;
1657 return kbft;
1658 }
1659
1660 __attribute__((always_inline))
1661 static int
pp_remove_upp_bft_chain_locked(struct kern_pbufpool * pp,struct __kern_quantum * kqum)1662 pp_remove_upp_bft_chain_locked(struct kern_pbufpool *pp,
1663 struct __kern_quantum *kqum)
1664 {
1665 uint32_t max_frags = pp->pp_max_frags;
1666 struct __kern_buflet *kbft;
1667 uint16_t nbfts, upkt_nbfts;
1668 obj_idx_t bft_idx;
1669
1670 ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
1671 bft_idx = kqum->qum_user->qum_buf[0].buf_nbft_idx;
1672 kbft = &kqum->qum_buf[0];
1673 if (bft_idx == OBJ_IDX_NONE) {
1674 return 0;
1675 }
1676
1677 ASSERT(METADATA_TYPE(kqum) == NEXUS_META_TYPE_PACKET);
1678 struct __kern_packet *kpkt = __DECONST(struct __kern_packet *, kqum);
1679 struct __user_packet *upkt = __DECONST(struct __user_packet *,
1680 kpkt->pkt_qum.qum_user);
1681
1682 upkt_nbfts = upkt->pkt_bufs_cnt;
1683 if (__improbable(upkt_nbfts > max_frags)) {
1684 SK_ERR("bad bcnt in upkt (%d > %d)", upkt_nbfts, max_frags);
1685 BUF_NBFT_IDX(kbft, OBJ_IDX_NONE);
1686 BUF_NBFT_ADDR(kbft, 0);
1687 return ERANGE;
1688 }
1689
1690 nbfts = (kbft->buf_addr != 0) ? 1 : 0;
1691
1692 do {
1693 struct __kern_buflet *pbft = kbft;
1694 struct __kern_buflet_ext *kbe;
1695
1696 kbft = pp_remove_upp_bft_locked(pp, bft_idx);
1697 if (__improbable(kbft == NULL)) {
1698 BUF_NBFT_IDX(pbft, OBJ_IDX_NONE);
1699 BUF_NBFT_ADDR(pbft, 0);
1700 SK_ERR("unallocated next buflet (%d), %p", bft_idx,
1701 SK_KVA(pbft));
1702 return ERANGE;
1703 }
1704 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
1705 BUF_NBFT_IDX(pbft, bft_idx);
1706 BUF_NBFT_ADDR(pbft, kbft);
1707 kbe = (struct __kern_buflet_ext *)kbft;
1708 bft_idx = kbe->kbe_buf_user->buf_nbft_idx;
1709 ++nbfts;
1710 } while ((bft_idx != OBJ_IDX_NONE) && (nbfts < upkt_nbfts));
1711
1712 ASSERT(kbft != NULL);
1713 BUF_NBFT_IDX(kbft, OBJ_IDX_NONE);
1714 BUF_NBFT_ADDR(kbft, 0);
1715 *__DECONST(uint16_t *, &kpkt->pkt_bufs_cnt) = nbfts;
1716
1717 if (__improbable((bft_idx != OBJ_IDX_NONE) || (nbfts != upkt_nbfts))) {
1718 SK_ERR("bad buflet in upkt (%d, %d)", nbfts, upkt_nbfts);
1719 return ERANGE;
1720 }
1721 return 0;
1722 }
1723
1724 struct __kern_quantum *
pp_remove_upp_locked(struct kern_pbufpool * pp,obj_idx_t md_idx,int * err)1725 pp_remove_upp_locked(struct kern_pbufpool *pp, obj_idx_t md_idx, int *err)
1726 {
1727 struct __kern_quantum *kqum, *tqum;
1728 struct kern_pbufpool_u_bkt *bkt;
1729
1730 bkt = KERN_PBUFPOOL_U_HASH(pp, md_idx);
1731 SLIST_FOREACH_SAFE(kqum, &bkt->upp_head, qum_upp_link, tqum) {
1732 if (METADATA_IDX(kqum) == md_idx) {
1733 SLIST_REMOVE(&bkt->upp_head, kqum, __kern_quantum,
1734 qum_upp_link);
1735 kqum->qum_pid = (pid_t)-1;
1736 ASSERT(pp->pp_u_bufinuse != 0);
1737 pp->pp_u_bufinuse--;
1738 break;
1739 }
1740 }
1741 if (__probable(kqum != NULL)) {
1742 *err = pp_remove_upp_bft_chain_locked(pp, kqum);
1743 } else {
1744 *err = ERANGE;
1745 }
1746 return kqum;
1747 }
1748
1749 struct __kern_quantum *
pp_remove_upp(struct kern_pbufpool * pp,obj_idx_t md_idx,int * err)1750 pp_remove_upp(struct kern_pbufpool *pp, obj_idx_t md_idx, int *err)
1751 {
1752 struct __kern_quantum *kqum;
1753
1754 PP_LOCK(pp);
1755 kqum = pp_remove_upp_locked(pp, md_idx, err);
1756 PP_UNLOCK(pp);
1757 return kqum;
1758 }
1759
1760 struct __kern_quantum *
pp_find_upp(struct kern_pbufpool * pp,obj_idx_t md_idx)1761 pp_find_upp(struct kern_pbufpool *pp, obj_idx_t md_idx)
1762 {
1763 struct __kern_quantum *kqum, *tqum;
1764 struct kern_pbufpool_u_bkt *bkt;
1765
1766 PP_LOCK(pp);
1767 bkt = KERN_PBUFPOOL_U_HASH(pp, md_idx);
1768 SLIST_FOREACH_SAFE(kqum, &bkt->upp_head, qum_upp_link, tqum) {
1769 if (METADATA_IDX(kqum) == md_idx) {
1770 break;
1771 }
1772 }
1773 PP_UNLOCK(pp);
1774
1775 return kqum;
1776 }
1777
1778 __attribute__((always_inline))
1779 static void
pp_purge_upp_locked(struct kern_pbufpool * pp,pid_t pid)1780 pp_purge_upp_locked(struct kern_pbufpool *pp, pid_t pid)
1781 {
1782 struct __kern_quantum *kqum, *tqum;
1783 struct kern_pbufpool_u_bkt *bkt;
1784 int i;
1785
1786 PP_LOCK_ASSERT_HELD(pp);
1787
1788 /*
1789 * TODO: Build a list of packets and batch-free them.
1790 */
1791 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1792 bkt = &pp->pp_u_hash_table[i];
1793 SLIST_FOREACH_SAFE(kqum, &bkt->upp_head, qum_upp_link, tqum) {
1794 ASSERT(kqum->qum_pid != (pid_t)-1);
1795 if (pid != (pid_t)-1 && kqum->qum_pid != pid) {
1796 continue;
1797 }
1798 SLIST_REMOVE(&bkt->upp_head, kqum, __kern_quantum,
1799 qum_upp_link);
1800 pp_remove_upp_bft_chain_locked(pp, kqum);
1801 kqum->qum_pid = (pid_t)-1;
1802 kqum->qum_qflags &= ~QUM_F_FINALIZED;
1803 kqum->qum_ksd = NULL;
1804 pp_free_packet(__DECONST(struct kern_pbufpool *,
1805 kqum->qum_pp), (uint64_t)kqum);
1806 ASSERT(pp->pp_u_bufinuse != 0);
1807 pp->pp_u_bufinuse--;
1808 }
1809 }
1810 }
1811
1812 __attribute__((always_inline))
1813 static void
pp_purge_upp_bft_locked(struct kern_pbufpool * pp,pid_t pid)1814 pp_purge_upp_bft_locked(struct kern_pbufpool *pp, pid_t pid)
1815 {
1816 struct __kern_buflet_ext *kbft, *tbft;
1817 struct kern_pbufpool_u_bft_bkt *bkt;
1818 int i;
1819
1820 PP_LOCK_ASSERT_HELD(pp);
1821
1822 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1823 bkt = &pp->pp_u_bft_hash_table[i];
1824 SLIST_FOREACH_SAFE(kbft, &bkt->upp_head, kbe_buf_upp_link,
1825 tbft) {
1826 ASSERT(kbft->kbe_buf_pid != (pid_t)-1);
1827 if (pid != (pid_t)-1 && kbft->kbe_buf_pid != pid) {
1828 continue;
1829 }
1830 SLIST_REMOVE(&bkt->upp_head, kbft, __kern_buflet_ext,
1831 kbe_buf_upp_link);
1832 kbft->kbe_buf_pid = (pid_t)-1;
1833 kbft->kbe_buf_upp_link.sle_next = NULL;
1834 pp_free_buflet(pp, (kern_buflet_t)kbft);
1835 ASSERT(pp->pp_u_bftinuse != 0);
1836 pp->pp_u_bftinuse--;
1837 }
1838 }
1839 }
1840
1841 void
pp_purge_upp(struct kern_pbufpool * pp,pid_t pid)1842 pp_purge_upp(struct kern_pbufpool *pp, pid_t pid)
1843 {
1844 PP_LOCK(pp);
1845 pp_purge_upp_locked(pp, pid);
1846 pp_purge_upp_bft_locked(pp, pid);
1847 PP_UNLOCK(pp);
1848 }
1849
1850 static int
pp_init_upp_bft_locked(struct kern_pbufpool * pp,boolean_t can_block)1851 pp_init_upp_bft_locked(struct kern_pbufpool *pp, boolean_t can_block)
1852 {
1853 int i, err = 0;
1854
1855 PP_LOCK_ASSERT_HELD(pp);
1856 if (pp->pp_u_bft_hash_table != NULL) {
1857 return 0;
1858 }
1859
1860 /* allocated-address hash table */
1861 pp->pp_u_bft_hash_table = can_block ? zalloc(pp_u_htbl_zone) :
1862 zalloc_noblock(pp_u_htbl_zone);
1863 if (pp->pp_u_bft_hash_table == NULL) {
1864 SK_ERR("failed to zalloc packet buffer pool upp buflet hash table");
1865 err = ENOMEM;
1866 goto fail;
1867 }
1868
1869 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1870 SLIST_INIT(&pp->pp_u_bft_hash_table[i].upp_head);
1871 }
1872
1873 fail:
1874 return err;
1875 }
1876
1877 static void
pp_destroy_upp_bft_locked(struct kern_pbufpool * pp)1878 pp_destroy_upp_bft_locked(struct kern_pbufpool *pp)
1879 {
1880 PP_LOCK_ASSERT_HELD(pp);
1881 if (pp->pp_u_bft_hash_table != NULL) {
1882 /* purge anything that's left */
1883 pp_purge_upp_bft_locked(pp, -1);
1884
1885 #if (DEBUG || DEVELOPMENT)
1886 for (int i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1887 ASSERT(SLIST_EMPTY(&pp->pp_u_bft_hash_table[i].upp_head));
1888 }
1889 #endif /* DEBUG || DEVELOPMENT */
1890
1891 zfree(pp_u_htbl_zone, pp->pp_u_bft_hash_table);
1892 pp->pp_u_bft_hash_table = NULL;
1893 }
1894 ASSERT(pp->pp_u_bftinuse == 0);
1895 }
1896
1897 void
pp_insert_upp_bft(struct kern_pbufpool * pp,struct __kern_buflet * kbft,pid_t pid)1898 pp_insert_upp_bft(struct kern_pbufpool *pp,
1899 struct __kern_buflet *kbft, pid_t pid)
1900 {
1901 PP_LOCK(pp);
1902 pp_insert_upp_bft_locked(pp, kbft, pid);
1903 PP_UNLOCK(pp);
1904 }
1905
1906 boolean_t
pp_isempty_upp(struct kern_pbufpool * pp)1907 pp_isempty_upp(struct kern_pbufpool *pp)
1908 {
1909 boolean_t isempty;
1910
1911 PP_LOCK(pp);
1912 isempty = (pp->pp_u_bufinuse == 0);
1913 PP_UNLOCK(pp);
1914
1915 return isempty;
1916 }
1917
1918 __attribute__((always_inline))
1919 static inline struct __kern_quantum *
pp_metadata_init(struct __metadata_preamble * mdp,struct kern_pbufpool * pp,uint16_t bufcnt,uint32_t skmflag,struct skmem_obj ** blist)1920 pp_metadata_init(struct __metadata_preamble *mdp, struct kern_pbufpool *pp,
1921 uint16_t bufcnt, uint32_t skmflag, struct skmem_obj **blist)
1922 {
1923 struct __kern_quantum *kqum;
1924 struct __user_quantum *uqum;
1925
1926 kqum = SK_PTR_ADDR_KQUM((uintptr_t)mdp + METADATA_PREAMBLE_SZ);
1927 ASSERT(kqum->qum_pp == pp);
1928 if (__probable(!PP_KERNEL_ONLY(pp))) {
1929 ASSERT(!(kqum->qum_qflags & QUM_F_KERNEL_ONLY));
1930 uqum = __DECONST(struct __user_quantum *, kqum->qum_user);
1931 ASSERT(uqum != NULL);
1932 } else {
1933 ASSERT(kqum->qum_qflags & QUM_F_KERNEL_ONLY);
1934 ASSERT(kqum->qum_user == NULL);
1935 uqum = NULL;
1936 }
1937
1938 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0 &&
1939 pp_metadata_construct(kqum, uqum, METADATA_IDX(kqum), pp,
1940 skmflag, bufcnt, FALSE, blist) != 0) {
1941 return NULL;
1942 }
1943
1944 /* (re)construct {user,kernel} metadata */
1945 switch (pp->pp_md_type) {
1946 case NEXUS_META_TYPE_PACKET: {
1947 struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
1948 struct __kern_buflet *kbuf = &kpkt->pkt_qum_buf;
1949 uint16_t i;
1950
1951 /* sanitize flags */
1952 kpkt->pkt_pflags &= PKT_F_INIT_MASK;
1953
1954 ASSERT((kpkt->pkt_pflags & PKT_F_OPT_ALLOC) &&
1955 kpkt->pkt_com_opt != NULL);
1956 ASSERT((kpkt->pkt_pflags & PKT_F_FLOW_ALLOC) &&
1957 kpkt->pkt_flow != NULL);
1958 ASSERT((kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC) &&
1959 kpkt->pkt_tx_compl != NULL);
1960
1961 /*
1962 * XXX: For now we always set PKT_F_FLOW_DATA;
1963 * this is a no-op but done for consistency
1964 * with the other PKT_F_*_DATA flags.
1965 */
1966 kpkt->pkt_pflags |= PKT_F_FLOW_DATA;
1967
1968 /* initialize kernel packet */
1969 KPKT_INIT(kpkt, QUM_F_INTERNALIZED);
1970
1971 ASSERT(bufcnt || PP_HAS_BUFFER_ON_DEMAND(pp));
1972 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1973 ASSERT(kbuf->buf_ctl == NULL);
1974 ASSERT(kbuf->buf_addr == 0);
1975 kbuf = __DECONST(struct __kern_buflet *,
1976 kbuf->buf_nbft_addr);
1977 }
1978 /* initialize kernel buflet */
1979 for (i = 0; i < bufcnt; i++) {
1980 ASSERT(kbuf != NULL);
1981 KBUF_INIT(kbuf);
1982 kbuf = __DECONST(struct __kern_buflet *,
1983 kbuf->buf_nbft_addr);
1984 }
1985 ASSERT((kbuf == NULL) || (bufcnt == 0));
1986 break;
1987 }
1988 default:
1989 ASSERT(pp->pp_md_type == NEXUS_META_TYPE_QUANTUM);
1990 /* kernel quantum */
1991 KQUM_INIT(kqum, QUM_F_INTERNALIZED);
1992 KBUF_INIT(&kqum->qum_buf[0]);
1993 break;
1994 }
1995
1996 return kqum;
1997 }
1998
1999 /*
2000 * When PPF_BUFFER_ON_DEMAND flag is set on packet pool creation, we create
2001 * packet descriptor cache with no buffer attached and a buflet cache with
2002 * cpu layer caching enabled. While operating in this mode, we can call
2003 * pp_alloc_packet_common() either with `bufcnt = 0` or `bufcnt = n`,
2004 * where n <= pp->pp_max_frags. If `bufcnt == 0` then we allocate packet
2005 * descriptor with no attached buffer from the metadata cache.
2006 * If `bufcnt != 0`, then this routine allocates packet descriptor and buflets
2007 * from their respective caches and constructs the packet on behalf of the
2008 * caller.
2009 */
2010 __attribute__((always_inline))
2011 static inline uint32_t
pp_alloc_packet_common(struct kern_pbufpool * pp,uint16_t bufcnt,uint64_t * array,uint32_t num,boolean_t tagged,alloc_cb_func_t cb,const void * ctx,uint32_t skmflag)2012 pp_alloc_packet_common(struct kern_pbufpool *pp, uint16_t bufcnt,
2013 uint64_t *array, uint32_t num, boolean_t tagged, alloc_cb_func_t cb,
2014 const void *ctx, uint32_t skmflag)
2015 {
2016 struct __metadata_preamble *mdp;
2017 struct __kern_quantum *kqum = NULL;
2018 uint32_t allocp, need = num;
2019 struct skmem_obj *plist, *blist = NULL;
2020
2021 ASSERT(bufcnt <= pp->pp_max_frags);
2022 ASSERT(array != NULL && num > 0);
2023 ASSERT(PP_BATCH_CAPABLE(pp));
2024
2025 /* allocate (constructed) packet(s) with buffer(s) attached */
2026 allocp = skmem_cache_batch_alloc(pp->pp_kmd_cache, &plist, num,
2027 skmflag);
2028
2029 /* allocate (constructed) buflet(s) with buffer(s) attached */
2030 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0 && allocp != 0) {
2031 (void) skmem_cache_batch_alloc(PP_KBFT_CACHE_DEF(pp), &blist,
2032 (allocp * bufcnt), skmflag);
2033 }
2034
2035 while (plist != NULL) {
2036 struct skmem_obj *plistn;
2037
2038 plistn = plist->mo_next;
2039 plist->mo_next = NULL;
2040
2041 mdp = (struct __metadata_preamble *)(void *)plist;
2042 kqum = pp_metadata_init(mdp, pp, bufcnt, skmflag, &blist);
2043 if (kqum == NULL) {
2044 if (blist != NULL) {
2045 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp),
2046 blist);
2047 blist = NULL;
2048 }
2049 plist->mo_next = plistn;
2050 skmem_cache_batch_free(pp->pp_kmd_cache, plist);
2051 plist = NULL;
2052 break;
2053 }
2054
2055 if (tagged) {
2056 *array = SK_PTR_ENCODE(kqum, METADATA_TYPE(kqum),
2057 METADATA_SUBTYPE(kqum));
2058 } else {
2059 *array = (uint64_t)kqum;
2060 }
2061
2062 if (cb != NULL) {
2063 (cb)(*array, (num - need), ctx);
2064 }
2065
2066 ++array;
2067 plist = plistn;
2068
2069 ASSERT(need > 0);
2070 --need;
2071 }
2072 ASSERT(blist == NULL);
2073 ASSERT((num - need) == allocp || kqum == NULL);
2074
2075 return num - need;
2076 }
2077
2078 uint64_t
pp_alloc_packet(struct kern_pbufpool * pp,uint16_t bufcnt,uint32_t skmflag)2079 pp_alloc_packet(struct kern_pbufpool *pp, uint16_t bufcnt, uint32_t skmflag)
2080 {
2081 uint64_t kpkt = 0;
2082
2083 (void) pp_alloc_packet_common(pp, bufcnt, &kpkt, 1, FALSE,
2084 NULL, NULL, skmflag);
2085
2086 return kpkt;
2087 }
2088
2089 int
pp_alloc_packet_batch(struct kern_pbufpool * pp,uint16_t bufcnt,uint64_t * array,uint32_t * size,boolean_t tagged,alloc_cb_func_t cb,const void * ctx,uint32_t skmflag)2090 pp_alloc_packet_batch(struct kern_pbufpool *pp, uint16_t bufcnt,
2091 uint64_t *array, uint32_t *size, boolean_t tagged, alloc_cb_func_t cb,
2092 const void *ctx, uint32_t skmflag)
2093 {
2094 uint32_t i, n;
2095 int err;
2096
2097 ASSERT(array != NULL && size > 0);
2098
2099 n = *size;
2100 *size = 0;
2101
2102 i = pp_alloc_packet_common(pp, bufcnt, array, n, tagged,
2103 cb, ctx, skmflag);
2104 *size = i;
2105
2106 if (__probable(i == n)) {
2107 err = 0;
2108 } else if (i != 0) {
2109 err = EAGAIN;
2110 } else {
2111 err = ENOMEM;
2112 }
2113
2114 return err;
2115 }
2116
2117 int
pp_alloc_pktq(struct kern_pbufpool * pp,uint16_t bufcnt,struct pktq * pktq,uint32_t num,alloc_cb_func_t cb,const void * ctx,uint32_t skmflag)2118 pp_alloc_pktq(struct kern_pbufpool *pp, uint16_t bufcnt,
2119 struct pktq *pktq, uint32_t num, alloc_cb_func_t cb, const void *ctx,
2120 uint32_t skmflag)
2121 {
2122 struct __metadata_preamble *mdp;
2123 struct __kern_packet *kpkt = NULL;
2124 uint32_t allocp, need = num;
2125 struct skmem_obj *plist, *blist = NULL;
2126 int err;
2127
2128 ASSERT(pktq != NULL && num > 0);
2129 ASSERT(pp->pp_md_type == NEXUS_META_TYPE_PACKET);
2130 ASSERT(bufcnt <= pp->pp_max_frags);
2131 ASSERT(PP_BATCH_CAPABLE(pp));
2132
2133 /* allocate (constructed) packet(s) with buffer(s) attached */
2134 allocp = skmem_cache_batch_alloc(pp->pp_kmd_cache, &plist, num,
2135 skmflag);
2136
2137 /* allocate (constructed) buflet(s) with buffer(s) attached */
2138 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0 && allocp != 0) {
2139 (void) skmem_cache_batch_alloc(PP_KBFT_CACHE_DEF(pp), &blist,
2140 (allocp * bufcnt), skmflag);
2141 }
2142
2143 while (plist != NULL) {
2144 struct skmem_obj *plistn;
2145
2146 plistn = plist->mo_next;
2147 plist->mo_next = NULL;
2148
2149 mdp = (struct __metadata_preamble *)(void *)plist;
2150 kpkt = (struct __kern_packet *)pp_metadata_init(mdp, pp,
2151 bufcnt, skmflag, &blist);
2152 if (kpkt == NULL) {
2153 if (blist != NULL) {
2154 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp),
2155 blist);
2156 blist = NULL;
2157 }
2158 plist->mo_next = plistn;
2159 skmem_cache_batch_free(pp->pp_kmd_cache, plist);
2160 plist = NULL;
2161 break;
2162 }
2163
2164 KPKTQ_ENQUEUE(pktq, kpkt);
2165
2166 if (cb != NULL) {
2167 (cb)((uint64_t)kpkt, (num - need), ctx);
2168 }
2169
2170 plist = plistn;
2171
2172 ASSERT(need > 0);
2173 --need;
2174 }
2175 ASSERT(blist == NULL);
2176 ASSERT((num - need) == allocp || kpkt == NULL);
2177
2178 if (__probable(need == 0)) {
2179 err = 0;
2180 } else if (need == num) {
2181 err = ENOMEM;
2182 } else {
2183 err = EAGAIN;
2184 }
2185
2186 return err;
2187 }
2188
2189 uint64_t
pp_alloc_packet_by_size(struct kern_pbufpool * pp,uint32_t size,uint32_t skmflag)2190 pp_alloc_packet_by_size(struct kern_pbufpool *pp, uint32_t size,
2191 uint32_t skmflag)
2192 {
2193 uint32_t bufcnt = pp->pp_max_frags;
2194 uint64_t kpkt = 0;
2195
2196 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
2197 bufcnt =
2198 SK_ROUNDUP(size, PP_BUF_SIZE_DEF(pp)) / PP_BUF_SIZE_DEF(pp);
2199 ASSERT(bufcnt <= UINT16_MAX);
2200 }
2201
2202 (void) pp_alloc_packet_common(pp, (uint16_t)bufcnt, &kpkt, 1, TRUE,
2203 NULL, NULL, skmflag);
2204
2205 return kpkt;
2206 }
2207
2208 __attribute__((always_inline))
2209 static inline struct __metadata_preamble *
pp_metadata_fini(struct __kern_quantum * kqum,struct kern_pbufpool * pp,struct mbuf ** mp,struct __kern_packet ** kpp,struct skmem_obj ** blist_def,struct skmem_obj ** blist_large,struct skmem_obj ** blist_raw)2210 pp_metadata_fini(struct __kern_quantum *kqum, struct kern_pbufpool *pp,
2211 struct mbuf **mp, struct __kern_packet **kpp, struct skmem_obj **blist_def,
2212 struct skmem_obj **blist_large, struct skmem_obj **blist_raw)
2213 {
2214 struct __metadata_preamble *mdp = METADATA_PREAMBLE(kqum);
2215
2216 ASSERT(SK_PTR_TAG(kqum) == 0);
2217
2218 switch (pp->pp_md_type) {
2219 case NEXUS_META_TYPE_PACKET: {
2220 struct __kern_packet *kpkt = SK_PTR_KPKT(kqum);
2221
2222 if ((kpkt->pkt_pflags & PKT_F_TX_COMPL_TS_REQ) != 0) {
2223 __packet_perform_tx_completion_callbacks(
2224 SK_PKT2PH(kpkt), NULL);
2225 }
2226 if ((kpkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) {
2227 ASSERT((kpkt->pkt_pflags & PKT_F_PKT_DATA) == 0);
2228 ASSERT(kpkt->pkt_mbuf != NULL);
2229 ASSERT(kpkt->pkt_mbuf->m_nextpkt == NULL);
2230 if (mp != NULL) {
2231 ASSERT(*mp == NULL);
2232 *mp = kpkt->pkt_mbuf;
2233 } else {
2234 m_freem(kpkt->pkt_mbuf);
2235 }
2236 KPKT_CLEAR_MBUF_DATA(kpkt);
2237 } else if ((kpkt->pkt_pflags & PKT_F_PKT_DATA) != 0) {
2238 ASSERT(kpkt->pkt_pkt != NULL);
2239 ASSERT(kpkt->pkt_pkt->pkt_nextpkt == NULL);
2240 if (kpp != NULL) {
2241 ASSERT(*kpp == NULL);
2242 *kpp = kpkt->pkt_pkt;
2243 } else {
2244 /* can only recurse once */
2245 ASSERT((kpkt->pkt_pkt->pkt_pflags &
2246 PKT_F_PKT_DATA) == 0);
2247 pp_free_packet_single(kpkt->pkt_pkt);
2248 }
2249 KPKT_CLEAR_PKT_DATA(kpkt);
2250 }
2251 ASSERT(kpkt->pkt_nextpkt == NULL);
2252 ASSERT(kpkt->pkt_qum.qum_ksd == NULL);
2253 ASSERT((kpkt->pkt_pflags & PKT_F_MBUF_MASK) == 0);
2254 ASSERT((kpkt->pkt_pflags & PKT_F_PKT_MASK) == 0);
2255 break;
2256 }
2257 default:
2258 break;
2259 }
2260
2261 if (__improbable(PP_HAS_BUFFER_ON_DEMAND(pp))) {
2262 pp_metadata_destruct_common(kqum, pp, FALSE, blist_def,
2263 blist_large, blist_raw);
2264 }
2265 return mdp;
2266 }
2267
2268 void
pp_free_packet_chain(struct __kern_packet * pkt_chain,int * npkt)2269 pp_free_packet_chain(struct __kern_packet *pkt_chain, int *npkt)
2270 {
2271 struct __metadata_preamble *mdp;
2272 struct skmem_obj *top = NULL;
2273 struct skmem_obj *blist_def = NULL;
2274 struct skmem_obj *blist_large = NULL;
2275 struct skmem_obj *blist_raw = NULL;
2276 struct skmem_obj **list = ⊤
2277 struct mbuf *mtop = NULL;
2278 struct mbuf **mp = &mtop;
2279 struct __kern_packet *kptop = NULL;
2280 struct __kern_packet **kpp = &kptop, *pkt, *next;
2281 struct kern_pbufpool *pp;
2282 int c = 0;
2283
2284 pp = __DECONST(struct kern_pbufpool *, pkt_chain->pkt_qum.qum_pp);
2285 ASSERT(pp != NULL);
2286 ASSERT(PP_BATCH_CAPABLE(pp));
2287
2288 for (pkt = pkt_chain; pkt != NULL; pkt = next) {
2289 next = pkt->pkt_nextpkt;
2290 pkt->pkt_nextpkt = NULL;
2291
2292 ASSERT(SK_PTR_ADDR_KQUM(pkt)->qum_pp == pp);
2293 mdp = pp_metadata_fini(SK_PTR_ADDR_KQUM(pkt), pp,
2294 mp, kpp, &blist_def, &blist_large, &blist_raw);
2295
2296 *list = (struct skmem_obj *)mdp;
2297 list = &(*list)->mo_next;
2298 c++;
2299
2300 if (*mp != NULL) {
2301 mp = &(*mp)->m_nextpkt;
2302 ASSERT(*mp == NULL);
2303 }
2304 if (*kpp != NULL) {
2305 kpp = &(*kpp)->pkt_nextpkt;
2306 ASSERT(*kpp == NULL);
2307 }
2308 }
2309
2310 ASSERT(top != NULL);
2311 skmem_cache_batch_free(pp->pp_kmd_cache, top);
2312 if (blist_def != NULL) {
2313 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp), blist_def);
2314 blist_def = NULL;
2315 }
2316 if (blist_large != NULL) {
2317 skmem_cache_batch_free(PP_KBFT_CACHE_LARGE(pp), blist_large);
2318 blist_large = NULL;
2319 }
2320 if (blist_raw != NULL) {
2321 skmem_cache_batch_free(pp->pp_raw_kbft_cache, blist_raw);
2322 blist_raw = NULL;
2323 }
2324 if (mtop != NULL) {
2325 DTRACE_SKYWALK(free__attached__mbuf);
2326 if (__probable(mtop->m_nextpkt != NULL)) {
2327 m_freem_list(mtop);
2328 } else {
2329 m_freem(mtop);
2330 }
2331 }
2332 if (kptop != NULL) {
2333 int cnt = 0;
2334 pp_free_packet_chain(kptop, &cnt);
2335 DTRACE_SKYWALK1(free__attached__pkt, int, cnt);
2336 }
2337 if (npkt != NULL) {
2338 *npkt = c;
2339 }
2340 }
2341
2342 void
pp_free_pktq(struct pktq * pktq)2343 pp_free_pktq(struct pktq *pktq)
2344 {
2345 if (__improbable(KPKTQ_EMPTY(pktq))) {
2346 return;
2347 }
2348 struct __kern_packet *pkt = KPKTQ_FIRST(pktq);
2349 pp_free_packet_chain(pkt, NULL);
2350 KPKTQ_DISPOSE(pktq);
2351 }
2352
2353 __attribute__((always_inline))
2354 static inline void
pp_free_packet_array(struct kern_pbufpool * pp,uint64_t * array,uint32_t num)2355 pp_free_packet_array(struct kern_pbufpool *pp, uint64_t *array, uint32_t num)
2356 {
2357 struct __metadata_preamble *mdp;
2358 struct skmem_obj *top = NULL;
2359 struct skmem_obj *blist_def = NULL;
2360 struct skmem_obj *blist_large = NULL;
2361 struct skmem_obj *blist_raw = NULL;
2362 struct skmem_obj **list = ⊤
2363 struct mbuf *mtop = NULL;
2364 struct mbuf **mp = &mtop;
2365 struct __kern_packet *kptop = NULL;
2366 struct __kern_packet **kpp = &kptop;
2367 uint32_t i;
2368
2369 ASSERT(pp != NULL);
2370 ASSERT(array != NULL && num > 0);
2371 ASSERT(PP_BATCH_CAPABLE(pp));
2372
2373 for (i = 0; i < num; i++) {
2374 ASSERT(SK_PTR_ADDR_KQUM(array[i])->qum_pp == pp);
2375 mdp = pp_metadata_fini(SK_PTR_ADDR_KQUM(array[i]), pp,
2376 mp, kpp, &blist_def, &blist_large, &blist_raw);
2377
2378 *list = (struct skmem_obj *)mdp;
2379 list = &(*list)->mo_next;
2380 array[i] = 0;
2381
2382 if (*mp != NULL) {
2383 mp = &(*mp)->m_nextpkt;
2384 ASSERT(*mp == NULL);
2385 }
2386 if (*kpp != NULL) {
2387 kpp = &(*kpp)->pkt_nextpkt;
2388 ASSERT(*kpp == NULL);
2389 }
2390 }
2391
2392 ASSERT(top != NULL);
2393 skmem_cache_batch_free(pp->pp_kmd_cache, top);
2394 if (blist_def != NULL) {
2395 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp), blist_def);
2396 blist_def = NULL;
2397 }
2398 if (blist_large != NULL) {
2399 skmem_cache_batch_free(PP_KBFT_CACHE_LARGE(pp), blist_large);
2400 blist_large = NULL;
2401 }
2402 if (blist_raw != NULL) {
2403 skmem_cache_batch_free(pp->pp_raw_kbft_cache, blist_raw);
2404 blist_raw = NULL;
2405 }
2406 if (mtop != NULL) {
2407 DTRACE_SKYWALK(free__attached__mbuf);
2408 if (__probable(mtop->m_nextpkt != NULL)) {
2409 m_freem_list(mtop);
2410 } else {
2411 m_freem(mtop);
2412 }
2413 }
2414 if (kptop != NULL) {
2415 int cnt = 0;
2416 pp_free_packet_chain(kptop, &cnt);
2417 DTRACE_SKYWALK1(free__attached__pkt, int, cnt);
2418 }
2419 }
2420
2421 void
pp_free_packet(struct kern_pbufpool * pp,uint64_t kqum)2422 pp_free_packet(struct kern_pbufpool *pp, uint64_t kqum)
2423 {
2424 pp_free_packet_array(pp, &kqum, 1);
2425 }
2426
2427 void
pp_free_packet_batch(const kern_pbufpool_t pp,uint64_t * array,uint32_t size)2428 pp_free_packet_batch(const kern_pbufpool_t pp, uint64_t *array, uint32_t size)
2429 {
2430 pp_free_packet_array(pp, array, size);
2431 }
2432
2433 void
pp_free_packet_single(struct __kern_packet * pkt)2434 pp_free_packet_single(struct __kern_packet *pkt)
2435 {
2436 ASSERT(pkt->pkt_nextpkt == NULL);
2437 pp_free_packet(__DECONST(struct kern_pbufpool *,
2438 pkt->pkt_qum.qum_pp), SK_PTR_ADDR(pkt));
2439 }
2440
2441 static mach_vm_address_t
pp_alloc_buffer_common(const kern_pbufpool_t pp,struct skmem_obj_info * oi,uint32_t skmflag,bool large)2442 pp_alloc_buffer_common(const kern_pbufpool_t pp, struct skmem_obj_info *oi,
2443 uint32_t skmflag, bool large)
2444 {
2445 mach_vm_address_t baddr;
2446 struct skmem_cache *skm = large ? PP_BUF_CACHE_LARGE(pp):
2447 PP_BUF_CACHE_DEF(pp);
2448
2449 ASSERT(skm != NULL);
2450 /* allocate a cached buffer */
2451 baddr = (mach_vm_address_t)skmem_cache_alloc(skm, skmflag);
2452
2453 #if (DEVELOPMENT || DEBUG)
2454 uint64_t mtbf = skmem_region_get_mtbf();
2455 /*
2456 * MTBF is applicable only for non-blocking allocations here.
2457 */
2458 if (__improbable(mtbf != 0 && (net_uptime_ms() % mtbf) == 0 &&
2459 (skmflag & SKMEM_NOSLEEP))) {
2460 SK_ERR("pp \"%s\" MTBF failure", pp->pp_name);
2461 net_update_uptime();
2462 if (baddr != 0) {
2463 skmem_cache_free(skm, (void *)baddr);
2464 baddr = 0;
2465 }
2466 }
2467 #endif /* (DEVELOPMENT || DEBUG) */
2468
2469 if (__improbable(baddr == 0)) {
2470 SK_DF(SK_VERB_MEM, "failed to alloc buffer, pp 0x%llx",
2471 SK_KVA(pp));
2472 return 0;
2473 }
2474 skmem_cache_get_obj_info(skm, (void *)baddr, oi, NULL);
2475 ASSERT(SKMEM_OBJ_BUFCTL(oi) != NULL);
2476 ASSERT((mach_vm_address_t)SKMEM_OBJ_ADDR(oi) == baddr);
2477 return baddr;
2478 }
2479
2480 errno_t
pp_alloc_buffer(const kern_pbufpool_t pp,mach_vm_address_t * baddr,kern_segment_t * seg,kern_obj_idx_seg_t * idx,uint32_t skmflag)2481 pp_alloc_buffer(const kern_pbufpool_t pp, mach_vm_address_t *baddr,
2482 kern_segment_t *seg, kern_obj_idx_seg_t *idx, uint32_t skmflag)
2483 {
2484 struct skmem_obj_info oib;
2485
2486 VERIFY(pp != NULL && baddr != NULL);
2487 VERIFY((seg != NULL) == (idx != NULL));
2488
2489 if (__improbable(!PP_HAS_BUFFER_ON_DEMAND(pp))) {
2490 return ENOTSUP;
2491 }
2492
2493 *baddr = pp_alloc_buffer_common(pp, &oib, skmflag, false);
2494 if (__improbable(*baddr == 0)) {
2495 return ENOMEM;
2496 }
2497
2498 if (seg != NULL) {
2499 ASSERT(SKMEM_OBJ_SEG(&oib) != NULL);
2500 *seg = SKMEM_OBJ_SEG(&oib);
2501 *idx = SKMEM_OBJ_IDX_SEG(&oib);
2502 }
2503 return 0;
2504 }
2505
2506 void
pp_free_buffer(const kern_pbufpool_t pp,mach_vm_address_t addr)2507 pp_free_buffer(const kern_pbufpool_t pp, mach_vm_address_t addr)
2508 {
2509 ASSERT(pp != NULL && addr != 0);
2510 skmem_cache_free(PP_BUF_CACHE_DEF(pp), (void *)addr);
2511 }
2512
2513 __attribute__((always_inline))
2514 static inline uint32_t
pp_alloc_buflet_common(struct kern_pbufpool * pp,uint64_t * array,uint32_t num,uint32_t skmflag,uint32_t flags)2515 pp_alloc_buflet_common(struct kern_pbufpool *pp, uint64_t *array,
2516 uint32_t num, uint32_t skmflag, uint32_t flags)
2517 {
2518 struct __kern_buflet *kbft = NULL;
2519 uint32_t allocd, need = num;
2520 struct skmem_obj *list;
2521 struct skmem_cache *skm = NULL;
2522 boolean_t attach_buffer = (flags & PP_ALLOC_BFT_ATTACH_BUFFER) != 0;
2523 boolean_t large = (flags & PP_ALLOC_BFT_LARGE) != 0;
2524
2525 ASSERT(array != NULL && num > 0);
2526 ASSERT(PP_BATCH_CAPABLE(pp));
2527 ASSERT(PP_KBFT_CACHE_DEF(pp) != NULL);
2528 ASSERT(PP_BUF_SIZE_LARGE(pp) != 0 || !large);
2529 ASSERT(pp->pp_raw_kbft_cache != NULL || attach_buffer);
2530
2531 if (!attach_buffer) {
2532 skm = pp->pp_raw_kbft_cache;
2533 } else {
2534 skm = large ? PP_KBFT_CACHE_LARGE(pp) :
2535 PP_KBFT_CACHE_DEF(pp);
2536 }
2537 allocd = skmem_cache_batch_alloc(skm, &list, num, skmflag);
2538
2539 while (list != NULL) {
2540 struct skmem_obj *listn;
2541
2542 listn = list->mo_next;
2543 list->mo_next = NULL;
2544 kbft = (kern_buflet_t)(void *)list;
2545 if (attach_buffer) {
2546 KBUF_EXT_INIT(kbft, pp);
2547 } else {
2548 RAW_KBUF_EXT_INIT(kbft);
2549 }
2550 *array = (uint64_t)kbft;
2551 ++array;
2552 list = listn;
2553 ASSERT(need > 0);
2554 --need;
2555 }
2556 ASSERT((num - need) == allocd || kbft == NULL);
2557 return num - need;
2558 }
2559
2560 errno_t
pp_alloc_buflet(struct kern_pbufpool * pp,kern_buflet_t * kbft,uint32_t skmflag,uint32_t flags)2561 pp_alloc_buflet(struct kern_pbufpool *pp, kern_buflet_t *kbft, uint32_t skmflag,
2562 uint32_t flags)
2563 {
2564 uint64_t bft;
2565
2566 if (__improbable(!pp_alloc_buflet_common(pp, &bft, 1, skmflag, flags))) {
2567 return ENOMEM;
2568 }
2569 *kbft = (kern_buflet_t)bft;
2570 return 0;
2571 }
2572
2573 errno_t
pp_alloc_buflet_batch(struct kern_pbufpool * pp,uint64_t * array,uint32_t * size,uint32_t skmflag,uint32_t flags)2574 pp_alloc_buflet_batch(struct kern_pbufpool *pp, uint64_t *array,
2575 uint32_t *size, uint32_t skmflag, uint32_t flags)
2576 {
2577 uint32_t i, n;
2578 int err;
2579
2580 ASSERT(array != NULL && size > 0);
2581
2582 n = *size;
2583 *size = 0;
2584
2585 i = pp_alloc_buflet_common(pp, array, n, skmflag, flags);
2586 *size = i;
2587
2588 if (__probable(i == n)) {
2589 err = 0;
2590 } else if (i != 0) {
2591 err = EAGAIN;
2592 } else {
2593 err = ENOMEM;
2594 }
2595
2596 return err;
2597 }
2598
2599 __attribute__((always_inline))
2600 static void
pp_free_buflet_common(const kern_pbufpool_t pp,kern_buflet_t kbft)2601 pp_free_buflet_common(const kern_pbufpool_t pp, kern_buflet_t kbft)
2602 {
2603 ASSERT(kbft->buf_nbft_idx == OBJ_IDX_NONE);
2604 ASSERT(kbft->buf_nbft_addr == 0);
2605
2606 if (kbft->buf_flag & BUFLET_FLAG_EXTERNAL) {
2607 ASSERT(kbft->buf_addr != 0);
2608 ASSERT(kbft->buf_idx != OBJ_IDX_NONE);
2609 ASSERT(kbft->buf_bft_idx_reg != OBJ_IDX_NONE);
2610 ASSERT(kbft->buf_ctl != NULL);
2611 ASSERT(((struct __kern_buflet_ext *)kbft)->
2612 kbe_buf_upp_link.sle_next == NULL);
2613
2614 /* raw buflet has a buffer attached after construction */
2615 if (BUFLET_FROM_RAW_BFLT_CACHE(kbft)) {
2616 uint32_t usecnt = 0;
2617 void *objaddr = kbft->buf_objaddr;
2618 KBUF_DTOR(kbft, usecnt);
2619 SK_DF(SK_VERB_MEM, "pp 0x%llx buf 0x%llx usecnt %u",
2620 SK_KVA(pp), SK_KVA(objaddr), usecnt);
2621 if (__improbable(usecnt == 0)) {
2622 skmem_cache_free(BUFLET_HAS_LARGE_BUF(kbft) ?
2623 PP_BUF_CACHE_LARGE(pp) : PP_BUF_CACHE_DEF(pp),
2624 objaddr);
2625 }
2626 }
2627
2628 /*
2629 * non-raw external buflet has buffer attached at construction,
2630 * so we don't free the buffer here.
2631 */
2632 skmem_cache_free(BUFLET_HAS_LARGE_BUF(kbft) ?
2633 PP_KBFT_CACHE_LARGE(pp) : PP_KBFT_CACHE_DEF(pp),
2634 (void *)kbft);
2635 } else if (__probable(kbft->buf_addr != 0)) {
2636 void *objaddr = kbft->buf_objaddr;
2637 uint32_t usecnt = 0;
2638
2639 ASSERT(kbft->buf_idx != OBJ_IDX_NONE);
2640 ASSERT(kbft->buf_ctl != NULL);
2641 KBUF_DTOR(kbft, usecnt);
2642 SK_DF(SK_VERB_MEM, "pp 0x%llx buf 0x%llx usecnt %u",
2643 SK_KVA(pp), SK_KVA(objaddr), usecnt);
2644 if (__probable(usecnt == 0)) {
2645 skmem_cache_free(BUFLET_HAS_LARGE_BUF(kbft) ?
2646 PP_BUF_CACHE_LARGE(pp) : PP_BUF_CACHE_DEF(pp),
2647 objaddr);
2648 }
2649 }
2650 }
2651
2652 void
pp_free_buflet(const kern_pbufpool_t pp,kern_buflet_t kbft)2653 pp_free_buflet(const kern_pbufpool_t pp, kern_buflet_t kbft)
2654 {
2655 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
2656 ASSERT(pp != NULL && kbft != NULL);
2657 pp_free_buflet_common(pp, kbft);
2658 }
2659
2660 void
pp_reap_caches(boolean_t purge)2661 pp_reap_caches(boolean_t purge)
2662 {
2663 skmem_cache_reap_now(pp_opt_cache, purge);
2664 skmem_cache_reap_now(pp_flow_cache, purge);
2665 skmem_cache_reap_now(pp_compl_cache, purge);
2666 }
2667