1 /*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/packet/pbufpool_var.h>
31 #include <sys/sdt.h>
32
33 static struct kern_pbufpool *pp_alloc(zalloc_flags_t);
34 static void pp_free(struct kern_pbufpool *);
35 static uint32_t pp_alloc_packet_common(struct kern_pbufpool *, uint16_t,
36 uint64_t *, uint32_t, boolean_t, alloc_cb_func_t, const void *, uint32_t);
37 static void pp_free_packet_array(struct kern_pbufpool *, uint64_t *, uint32_t);
38 static int pp_metadata_ctor_no_buflet(struct skmem_obj_info *,
39 struct skmem_obj_info *, void *, uint32_t);
40 static int pp_metadata_ctor_max_buflet(struct skmem_obj_info *,
41 struct skmem_obj_info *, void *, uint32_t);
42 static void pp_metadata_dtor(void *, void *);
43 static int pp_metadata_construct(struct __kern_quantum *,
44 struct __user_quantum *, obj_idx_t, struct kern_pbufpool *, uint32_t,
45 uint16_t, boolean_t, struct skmem_obj **);
46 static void pp_metadata_destruct(struct __kern_quantum *,
47 struct kern_pbufpool *, boolean_t);
48 static struct __kern_quantum *pp_metadata_init(struct __metadata_preamble *,
49 struct kern_pbufpool *, uint16_t, uint32_t, struct skmem_obj **);
50 static struct __metadata_preamble *pp_metadata_fini(struct __kern_quantum *,
51 struct kern_pbufpool *, struct mbuf **, struct __kern_packet **,
52 struct skmem_obj **);
53 static void pp_purge_upp_locked(struct kern_pbufpool *pp, pid_t pid);
54 static void pp_buf_seg_ctor(struct sksegment *, IOSKMemoryBufferRef, void *);
55 static void pp_buf_seg_dtor(struct sksegment *, IOSKMemoryBufferRef, void *);
56 static void pp_destroy_upp_locked(struct kern_pbufpool *);
57 static void pp_destroy_upp_bft_locked(struct kern_pbufpool *);
58 static int pp_init_upp_bft_locked(struct kern_pbufpool *, boolean_t);
59 static void pp_free_buflet_common(const kern_pbufpool_t, kern_buflet_t);
60 static mach_vm_address_t pp_alloc_buffer_common(const kern_pbufpool_t pp,
61 struct skmem_obj_info *oi, uint32_t skmflag);
62 static inline uint32_t
63 pp_alloc_buflet_common(struct kern_pbufpool *pp, uint64_t *array,
64 uint32_t num, uint32_t skmflag);
65
66 #define KERN_PBUFPOOL_U_HASH_SIZE 64 /* hash table size */
67
68 /*
69 * Since the inputs are small (indices to the metadata region), we can use
70 * Knuth's multiplicative hash method which is fast and good enough. Here
71 * we multiply the input by the golden ratio of 2^32. See "The Art of
72 * Computer Programming", section 6.4.
73 */
74 #define KERN_PBUFPOOL_U_HASH_INDEX(_i, _m) \
75 (((_i) * 2654435761U) & (_m))
76 #define KERN_PBUFPOOL_U_HASH(_pp, _i) \
77 (&(_pp)->pp_u_hash_table[KERN_PBUFPOOL_U_HASH_INDEX(_i, \
78 KERN_PBUFPOOL_U_HASH_SIZE - 1)])
79 #define KERN_PBUFPOOL_U_BFT_HASH(_pp, _i) \
80 (&(_pp)->pp_u_bft_hash_table[KERN_PBUFPOOL_U_HASH_INDEX(_i, \
81 KERN_PBUFPOOL_U_HASH_SIZE - 1)])
82
83 static ZONE_DEFINE(pp_zone, SKMEM_ZONE_PREFIX ".mem.pp",
84 sizeof(struct kern_pbufpool), ZC_ZFREE_CLEARMEM);
85
86 #define PP_U_HTBL_SIZE \
87 (sizeof(struct kern_pbufpool_u_bkt) * KERN_PBUFPOOL_U_HASH_SIZE)
88 static ZONE_DEFINE(pp_u_htbl_zone, SKMEM_ZONE_PREFIX ".mem.pp.htbl",
89 PP_U_HTBL_SIZE, ZC_ZFREE_CLEARMEM);
90
91 static struct skmem_cache *pp_opt_cache; /* cache for __packet_opt */
92 static struct skmem_cache *pp_flow_cache; /* cache for __flow */
93 static struct skmem_cache *pp_compl_cache; /* cache for __packet_compl */
94
95 static int __pp_inited = 0;
96
97 int
pp_init(void)98 pp_init(void)
99 {
100 _CASSERT(KPKT_SC_UNSPEC == MBUF_SC_UNSPEC);
101 _CASSERT(KPKT_SC_BK_SYS == MBUF_SC_BK_SYS);
102 _CASSERT(KPKT_SC_BK == MBUF_SC_BK);
103 _CASSERT(KPKT_SC_BE == MBUF_SC_BE);
104 _CASSERT(KPKT_SC_RD == MBUF_SC_RD);
105 _CASSERT(KPKT_SC_OAM == MBUF_SC_OAM);
106 _CASSERT(KPKT_SC_AV == MBUF_SC_AV);
107 _CASSERT(KPKT_SC_RV == MBUF_SC_RV);
108 _CASSERT(KPKT_SC_VI == MBUF_SC_VI);
109 _CASSERT(KPKT_SC_SIG == MBUF_SC_SIG);
110 _CASSERT(KPKT_SC_VO == MBUF_SC_VO);
111 _CASSERT(KPKT_SC_CTL == MBUF_SC_CTL);
112
113 _CASSERT(KPKT_SC_BK_SYS == PKT_SC_BK_SYS);
114 _CASSERT(KPKT_SC_BK == PKT_SC_BK);
115 _CASSERT(KPKT_SC_BE == PKT_SC_BE);
116 _CASSERT(KPKT_SC_RD == PKT_SC_RD);
117 _CASSERT(KPKT_SC_OAM == PKT_SC_OAM);
118 _CASSERT(KPKT_SC_AV == PKT_SC_AV);
119 _CASSERT(KPKT_SC_RV == PKT_SC_RV);
120 _CASSERT(KPKT_SC_VI == PKT_SC_VI);
121 _CASSERT(KPKT_SC_SIG == PKT_SC_SIG);
122 _CASSERT(KPKT_SC_VO == PKT_SC_VO);
123 _CASSERT(KPKT_SC_CTL == PKT_SC_CTL);
124 _CASSERT(KPKT_SC_MAX_CLASSES == MBUF_SC_MAX_CLASSES);
125
126 _CASSERT(KPKT_TC_UNSPEC == MBUF_TC_UNSPEC);
127 _CASSERT(KPKT_TC_BE == MBUF_TC_BE);
128 _CASSERT(KPKT_TC_BK == MBUF_TC_BK);
129 _CASSERT(KPKT_TC_VI == MBUF_TC_VI);
130 _CASSERT(KPKT_TC_VO == MBUF_TC_VO);
131 _CASSERT(KPKT_TC_MAX == MBUF_TC_MAX);
132
133 _CASSERT(KPKT_TC_BE == PKT_TC_BE);
134 _CASSERT(KPKT_TC_BK == PKT_TC_BK);
135 _CASSERT(KPKT_TC_VI == PKT_TC_VI);
136 _CASSERT(KPKT_TC_VO == PKT_TC_VO);
137
138 _CASSERT(PKT_SCVAL_BK_SYS == SCVAL_BK_SYS);
139 _CASSERT(PKT_SCVAL_BK == SCVAL_BK);
140 _CASSERT(PKT_SCVAL_BE == SCVAL_BE);
141 _CASSERT(PKT_SCVAL_RD == SCVAL_RD);
142 _CASSERT(PKT_SCVAL_OAM == SCVAL_OAM);
143 _CASSERT(PKT_SCVAL_AV == SCVAL_AV);
144 _CASSERT(PKT_SCVAL_RV == SCVAL_RV);
145 _CASSERT(PKT_SCVAL_VI == SCVAL_VI);
146 _CASSERT(PKT_SCVAL_VO == SCVAL_VO);
147 _CASSERT(PKT_SCVAL_CTL == SCVAL_CTL);
148
149 /*
150 * Assert that the value of common packet flags between mbuf and
151 * skywalk packets match, and that they are in PKT_F_COMMON_MASK.
152 */
153 _CASSERT(PKT_F_BACKGROUND == PKTF_SO_BACKGROUND);
154 _CASSERT(PKT_F_REALTIME == PKTF_SO_REALTIME);
155 _CASSERT(PKT_F_REXMT == PKTF_TCP_REXMT);
156 _CASSERT(PKT_F_LAST_PKT == PKTF_LAST_PKT);
157 _CASSERT(PKT_F_FLOW_ID == PKTF_FLOW_ID);
158 _CASSERT(PKT_F_FLOW_ADV == PKTF_FLOW_ADV);
159 _CASSERT(PKT_F_TX_COMPL_TS_REQ == PKTF_TX_COMPL_TS_REQ);
160 _CASSERT(PKT_F_TS_VALID == PKTF_TS_VALID);
161 _CASSERT(PKT_F_NEW_FLOW == PKTF_NEW_FLOW);
162 _CASSERT(PKT_F_START_SEQ == PKTF_START_SEQ);
163 _CASSERT(PKT_F_KEEPALIVE == PKTF_KEEPALIVE);
164 _CASSERT(PKT_F_WAKE_PKT == PKTF_WAKE_PKT);
165 _CASSERT(PKT_F_COMMON_MASK == (PKT_F_BACKGROUND | PKT_F_REALTIME |
166 PKT_F_REXMT | PKT_F_LAST_PKT | PKT_F_FLOW_ID | PKT_F_FLOW_ADV |
167 PKT_F_TX_COMPL_TS_REQ | PKT_F_TS_VALID | PKT_F_NEW_FLOW |
168 PKT_F_START_SEQ | PKT_F_KEEPALIVE | PKT_F_WAKE_PKT));
169 /*
170 * Assert packet flags shared with userland.
171 */
172 _CASSERT(PKT_F_USER_MASK == (PKT_F_BACKGROUND | PKT_F_REALTIME |
173 PKT_F_REXMT | PKT_F_LAST_PKT | PKT_F_OPT_DATA | PKT_F_PROMISC |
174 PKT_F_TRUNCATED | PKT_F_WAKE_PKT));
175
176 _CASSERT(offsetof(struct __kern_quantum, qum_len) ==
177 offsetof(struct __kern_packet, pkt_length));
178
179 /*
180 * Due to the use of tagged pointer, we need the size of
181 * the metadata preamble structure to be multiples of 16.
182 * See SK_PTR_TAG() definition for details.
183 */
184 _CASSERT(sizeof(struct __metadata_preamble) != 0 &&
185 (sizeof(struct __metadata_preamble) % 16) == 0);
186
187 _CASSERT(NX_PBUF_FRAGS_MIN == 1 &&
188 NX_PBUF_FRAGS_MIN == NX_PBUF_FRAGS_DEFAULT);
189
190 /*
191 * Batch alloc/free requires linking the objects together;
192 * make sure that the fields are at the same offset since
193 * we cast the object to struct skmem_obj.
194 */
195 _CASSERT(offsetof(struct __metadata_preamble, _mdp_next) ==
196 offsetof(struct skmem_obj, mo_next));
197 _CASSERT(offsetof(struct __buflet, __buflet_next) ==
198 offsetof(struct skmem_obj, mo_next));
199
200 SK_LOCK_ASSERT_HELD();
201 ASSERT(!__pp_inited);
202
203 pp_opt_cache = skmem_cache_create("pkt.opt",
204 sizeof(struct __packet_opt), sizeof(uint64_t),
205 NULL, NULL, NULL, NULL, NULL, 0);
206 pp_flow_cache = skmem_cache_create("pkt.flow",
207 sizeof(struct __flow), 16, /* 16-bytes aligned */
208 NULL, NULL, NULL, NULL, NULL, 0);
209 pp_compl_cache = skmem_cache_create("pkt.compl",
210 sizeof(struct __packet_compl), sizeof(uint64_t),
211 NULL, NULL, NULL, NULL, NULL, 0);
212
213 return 0;
214 }
215
216 void
pp_fini(void)217 pp_fini(void)
218 {
219 SK_LOCK_ASSERT_HELD();
220
221 if (__pp_inited) {
222 if (pp_compl_cache != NULL) {
223 skmem_cache_destroy(pp_compl_cache);
224 pp_compl_cache = NULL;
225 }
226 if (pp_flow_cache != NULL) {
227 skmem_cache_destroy(pp_flow_cache);
228 pp_flow_cache = NULL;
229 }
230 if (pp_opt_cache != NULL) {
231 skmem_cache_destroy(pp_opt_cache);
232 pp_opt_cache = NULL;
233 }
234
235 __pp_inited = 0;
236 }
237 }
238
239 static struct kern_pbufpool *
pp_alloc(zalloc_flags_t how)240 pp_alloc(zalloc_flags_t how)
241 {
242 struct kern_pbufpool *pp = zalloc_flags(pp_zone, how | Z_ZERO);
243
244 if (pp) {
245 lck_mtx_init(&pp->pp_lock, &skmem_lock_grp, &skmem_lock_attr);
246 }
247 return pp;
248 }
249
250 static void
pp_free(struct kern_pbufpool * pp)251 pp_free(struct kern_pbufpool *pp)
252 {
253 PP_LOCK_ASSERT_HELD(pp);
254
255 pp_destroy(pp);
256 PP_UNLOCK(pp);
257
258 SK_DF(SK_VERB_MEM, "pp 0x%llx FREE", SK_KVA(pp));
259 lck_mtx_destroy(&pp->pp_lock, &skmem_lock_grp);
260 zfree(pp_zone, pp);
261 }
262
263 void
pp_retain_locked(struct kern_pbufpool * pp)264 pp_retain_locked(struct kern_pbufpool *pp)
265 {
266 PP_LOCK_ASSERT_HELD(pp);
267
268 pp->pp_refcnt++;
269 ASSERT(pp->pp_refcnt != 0);
270 }
271
272 void
pp_retain(struct kern_pbufpool * pp)273 pp_retain(struct kern_pbufpool *pp)
274 {
275 PP_LOCK(pp);
276 pp_retain_locked(pp);
277 PP_UNLOCK(pp);
278 }
279
280 boolean_t
pp_release_locked(struct kern_pbufpool * pp)281 pp_release_locked(struct kern_pbufpool *pp)
282 {
283 uint32_t oldref = pp->pp_refcnt;
284
285 PP_LOCK_ASSERT_HELD(pp);
286
287 ASSERT(pp->pp_refcnt != 0);
288 if (--pp->pp_refcnt == 0) {
289 pp_free(pp);
290 }
291
292 return oldref == 1;
293 }
294
295 boolean_t
pp_release(struct kern_pbufpool * pp)296 pp_release(struct kern_pbufpool *pp)
297 {
298 boolean_t lastref;
299
300 PP_LOCK(pp);
301 if (!(lastref = pp_release_locked(pp))) {
302 PP_UNLOCK(pp);
303 }
304
305 return lastref;
306 }
307
308 void
pp_close(struct kern_pbufpool * pp)309 pp_close(struct kern_pbufpool *pp)
310 {
311 PP_LOCK(pp);
312 ASSERT(pp->pp_refcnt > 0);
313 ASSERT(!(pp->pp_flags & PPF_CLOSED));
314 pp->pp_flags |= PPF_CLOSED;
315 if (!pp_release_locked(pp)) {
316 PP_UNLOCK(pp);
317 }
318 }
319
320 void
pp_regions_params_adjust(struct skmem_region_params * buf_srp,struct skmem_region_params * kmd_srp,struct skmem_region_params * umd_srp,struct skmem_region_params * kbft_srp,struct skmem_region_params * ubft_srp,nexus_meta_type_t md_type,nexus_meta_subtype_t md_subtype,uint32_t md_cnt,uint16_t max_frags,uint32_t buf_size,uint32_t buf_cnt)321 pp_regions_params_adjust(struct skmem_region_params *buf_srp,
322 struct skmem_region_params *kmd_srp, struct skmem_region_params *umd_srp,
323 struct skmem_region_params *kbft_srp, struct skmem_region_params *ubft_srp,
324 nexus_meta_type_t md_type, nexus_meta_subtype_t md_subtype, uint32_t md_cnt,
325 uint16_t max_frags, uint32_t buf_size, uint32_t buf_cnt)
326 {
327 uint32_t md_size = 0;
328
329 ASSERT(max_frags != 0);
330
331 switch (md_type) {
332 case NEXUS_META_TYPE_QUANTUM:
333 md_size = NX_METADATA_QUANTUM_SZ;
334 break;
335 case NEXUS_META_TYPE_PACKET:
336 md_size = NX_METADATA_PACKET_SZ(max_frags);
337 break;
338 default:
339 VERIFY(0);
340 /* NOTREACHED */
341 __builtin_unreachable();
342 }
343 /* add preamble size to metadata obj size */
344 md_size += METADATA_PREAMBLE_SZ;
345 ASSERT(md_size >= NX_METADATA_OBJ_MIN_SZ);
346
347 umd_srp->srp_md_type = md_type;
348 umd_srp->srp_md_subtype = md_subtype;
349 umd_srp->srp_r_obj_cnt = md_cnt;
350 umd_srp->srp_r_obj_size = md_size;
351 umd_srp->srp_max_frags = max_frags;
352 skmem_region_params_config(umd_srp);
353
354 kmd_srp->srp_md_type = md_type;
355 kmd_srp->srp_md_subtype = md_subtype;
356 kmd_srp->srp_r_obj_cnt = md_cnt;
357 kmd_srp->srp_r_obj_size = md_size;
358 kmd_srp->srp_max_frags = max_frags;
359 skmem_region_params_config(kmd_srp);
360
361 buf_srp->srp_r_obj_cnt = MAX(buf_cnt, kmd_srp->srp_c_obj_cnt);
362 buf_srp->srp_r_obj_size = buf_size;
363 buf_srp->srp_cflags &=
364 ~(SKMEM_REGION_CR_MONOLITHIC | SKMEM_REGION_CR_PERSISTENT);
365 skmem_region_params_config(buf_srp);
366
367 if (kbft_srp != NULL) {
368 ASSERT(md_type == NEXUS_META_TYPE_PACKET);
369
370 /*
371 * Ideally we want the number of buflets to be
372 * "kmd_srp->srp_c_obj_cnt * (kmd_srp->srp_max_frags - 1)",
373 * so that we have enough buflets when multi-buflet and
374 * shared buffer object is used.
375 * Currently multi-buflet is being used only by user pool
376 * which doesn't support shared buffer object, hence to reduce
377 * the number of objects we are restricting the number of
378 * buflets to the number of buffers.
379 */
380 kbft_srp->srp_r_obj_cnt = buf_srp->srp_c_obj_cnt;
381 kbft_srp->srp_r_obj_size = MAX(sizeof(struct __kern_buflet_ext),
382 sizeof(struct __user_buflet));
383 kbft_srp->srp_cflags = kmd_srp->srp_cflags;
384 skmem_region_params_config(kbft_srp);
385 ASSERT(kbft_srp->srp_c_obj_cnt >= buf_srp->srp_c_obj_cnt);
386 }
387
388 if (ubft_srp != NULL) {
389 ASSERT(kbft_srp != NULL);
390 ubft_srp->srp_r_obj_cnt = kbft_srp->srp_r_obj_cnt;
391 ubft_srp->srp_r_obj_size = kbft_srp->srp_r_obj_size;
392 ubft_srp->srp_cflags = umd_srp->srp_cflags;
393 skmem_region_params_config(ubft_srp);
394 ASSERT(kbft_srp->srp_c_obj_cnt == ubft_srp->srp_c_obj_cnt);
395 }
396
397 /* make sure each metadata can be paired with a buffer */
398 ASSERT(kmd_srp->srp_c_obj_cnt == umd_srp->srp_c_obj_cnt);
399 ASSERT(kmd_srp->srp_c_obj_cnt <= buf_srp->srp_c_obj_cnt);
400 }
401
402 SK_NO_INLINE_ATTRIBUTE
403 static int
pp_metadata_construct(struct __kern_quantum * kqum,struct __user_quantum * uqum,obj_idx_t midx,struct kern_pbufpool * pp,uint32_t skmflag,uint16_t bufcnt,boolean_t raw,struct skmem_obj ** blist)404 pp_metadata_construct(struct __kern_quantum *kqum, struct __user_quantum *uqum,
405 obj_idx_t midx, struct kern_pbufpool *pp, uint32_t skmflag, uint16_t bufcnt,
406 boolean_t raw, struct skmem_obj **blist)
407 {
408 struct __kern_buflet *kbuf;
409 mach_vm_address_t baddr = 0;
410 uint16_t *pbufs_cnt, *pbufs_max;
411 uint16_t i;
412
413 ASSERT(bufcnt == 1 || PP_HAS_BUFFER_ON_DEMAND(pp));
414
415 /* construct {user,kernel} metadata */
416 switch (pp->pp_md_type) {
417 case NEXUS_META_TYPE_PACKET: {
418 struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
419 struct __user_packet *upkt = SK_PTR_ADDR_UPKT(uqum);
420 struct __packet_opt *opt;
421 struct __flow *flow;
422 struct __packet_compl *compl;
423 uint64_t pflags;
424
425 if (raw) {
426 opt = skmem_cache_alloc(pp_opt_cache, SKMEM_SLEEP);
427 flow = skmem_cache_alloc(pp_flow_cache, SKMEM_SLEEP);
428 compl = skmem_cache_alloc(pp_compl_cache, SKMEM_SLEEP);
429 pflags = (PKT_F_OPT_ALLOC | PKT_F_FLOW_ALLOC |
430 PKT_F_TX_COMPL_ALLOC);
431 } else {
432 ASSERT((kpkt->pkt_pflags & PKT_F_OPT_ALLOC) &&
433 kpkt->pkt_com_opt != NULL);
434 opt = kpkt->pkt_com_opt;
435 ASSERT((kpkt->pkt_pflags & PKT_F_FLOW_ALLOC) &&
436 kpkt->pkt_flow != NULL);
437 flow = kpkt->pkt_flow;
438 ASSERT((kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC) &&
439 kpkt->pkt_tx_compl != NULL);
440 compl = kpkt->pkt_tx_compl;
441 pflags = kpkt->pkt_pflags;
442 }
443 /* will be adjusted below as part of allocating buffer(s) */
444 _CASSERT(sizeof(kpkt->pkt_bufs_cnt) == sizeof(uint16_t));
445 _CASSERT(sizeof(kpkt->pkt_bufs_max) == sizeof(uint16_t));
446 pbufs_cnt = __DECONST(uint16_t *, &kpkt->pkt_bufs_cnt);
447 pbufs_max = __DECONST(uint16_t *, &kpkt->pkt_bufs_max);
448
449 /* kernel (and user) packet */
450 KPKT_CTOR(kpkt, pflags, opt, flow, compl, midx,
451 upkt, pp, 0, pp->pp_max_frags, 0);
452 break;
453 }
454 default:
455 ASSERT(pp->pp_md_type == NEXUS_META_TYPE_QUANTUM);
456 VERIFY(bufcnt == 1);
457 /* TODO: point these to quantum's once they're defined */
458 pbufs_cnt = pbufs_max = NULL;
459 /* kernel quantum */
460 KQUM_CTOR(kqum, midx, uqum, pp, 0);
461 break;
462 }
463
464 kbuf = kqum->qum_buf;
465 for (i = 0; i < bufcnt; i++) {
466 struct skmem_obj_info oib;
467
468 if (!PP_HAS_BUFFER_ON_DEMAND(pp)) {
469 ASSERT(i == 0);
470 ASSERT(*blist == NULL);
471 /*
472 * quantum has a native buflet, so we only need a
473 * buffer to be allocated and attached to the buflet.
474 */
475 baddr = pp_alloc_buffer_common(pp, &oib, skmflag);
476 if (__improbable(baddr == 0)) {
477 goto fail;
478 }
479 KBUF_CTOR(kbuf, baddr, SKMEM_OBJ_IDX_REG(&oib),
480 SKMEM_OBJ_BUFCTL(&oib), pp);
481 baddr = 0;
482 } else {
483 /*
484 * we use pre-constructed buflets with attached buffers.
485 */
486 struct __kern_buflet *pkbuf = kbuf;
487 struct skmem_obj *blistn;
488
489 ASSERT(pkbuf != NULL);
490 kbuf = (kern_buflet_t)*blist;
491 if (__improbable(kbuf == NULL)) {
492 SK_DF(SK_VERB_MEM, "failed to get buflet,"
493 " pp 0x%llx", SK_KVA(pp));
494 goto fail;
495 }
496 blistn = (*blist)->mo_next;
497 (*blist)->mo_next = NULL;
498
499 KBUF_EXT_INIT(kbuf, pp);
500 KBUF_LINK(pkbuf, kbuf);
501 *blist = blistn;
502 }
503
504 /* adjust buffer count accordingly */
505 if (__probable(pbufs_cnt != NULL)) {
506 *pbufs_cnt += 1;
507 ASSERT(*pbufs_cnt <= *pbufs_max);
508 }
509 }
510
511 ASSERT(!PP_KERNEL_ONLY(pp) || (kqum->qum_qflags & QUM_F_KERNEL_ONLY));
512 ASSERT(METADATA_IDX(kqum) != OBJ_IDX_NONE);
513 SK_DF(SK_VERB_MEM, "pp 0x%llx pkt 0x%llx bufcnt %d buf 0x%llx",
514 SK_KVA(pp), SK_KVA(kqum), bufcnt, SK_KVA(baddr));
515 return 0;
516
517 fail:
518 ASSERT(bufcnt != 0 && baddr == 0);
519 pp_metadata_destruct(kqum, pp, raw);
520 return ENOMEM;
521 }
522
523 static int
pp_metadata_ctor_common(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,struct kern_pbufpool * pp,uint32_t skmflag,boolean_t no_buflet)524 pp_metadata_ctor_common(struct skmem_obj_info *oi0,
525 struct skmem_obj_info *oim0, struct kern_pbufpool *pp, uint32_t skmflag,
526 boolean_t no_buflet)
527 {
528 struct skmem_obj_info _oi, _oim;
529 struct skmem_obj_info *oi, *oim;
530 struct __kern_quantum *kqum;
531 struct __user_quantum *uqum;
532 uint16_t bufcnt = (no_buflet ? 0 : pp->pp_max_frags);
533 struct skmem_obj *blist = NULL;
534 int error;
535
536 #if (DEVELOPMENT || DEBUG)
537 uint64_t mtbf = skmem_region_get_mtbf();
538 /*
539 * MTBF is applicable only for non-blocking allocations here.
540 */
541 if (__improbable(mtbf != 0 && (net_uptime_ms() % mtbf) == 0 &&
542 (skmflag & SKMEM_NOSLEEP))) {
543 SK_ERR("pp \"%s\" MTBF failure", pp->pp_name);
544 net_update_uptime();
545 return ENOMEM;
546 }
547 #endif /* (DEVELOPMENT || DEBUG) */
548
549 /*
550 * Note that oi0 and oim0 may be stored inside the object itself;
551 * if so, copy them to local variables before constructing. We
552 * don't use PPF_BATCH to test as the allocator may be allocating
553 * storage space differently depending on the number of objects.
554 */
555 if (__probable((uintptr_t)oi0 >= (uintptr_t)SKMEM_OBJ_ADDR(oi0) &&
556 ((uintptr_t)oi0 + sizeof(*oi0)) <=
557 ((uintptr_t)SKMEM_OBJ_ADDR(oi0) + SKMEM_OBJ_SIZE(oi0)))) {
558 oi = &_oi;
559 *oi = *oi0;
560 if (__probable(oim0 != NULL)) {
561 oim = &_oim;
562 *oim = *oim0;
563 } else {
564 oim = NULL;
565 }
566 } else {
567 oi = oi0;
568 oim = oim0;
569 }
570
571 kqum = SK_PTR_ADDR_KQUM((uintptr_t)SKMEM_OBJ_ADDR(oi) +
572 METADATA_PREAMBLE_SZ);
573
574 if (__probable(!PP_KERNEL_ONLY(pp))) {
575 ASSERT(oim != NULL && SKMEM_OBJ_ADDR(oim) != NULL);
576 ASSERT(SKMEM_OBJ_SIZE(oi) == SKMEM_OBJ_SIZE(oim));
577 uqum = SK_PTR_ADDR_UQUM((uintptr_t)SKMEM_OBJ_ADDR(oim) +
578 METADATA_PREAMBLE_SZ);
579 } else {
580 ASSERT(oim == NULL);
581 uqum = NULL;
582 }
583
584 if (oim != NULL) {
585 /* initialize user metadata redzone */
586 struct __metadata_preamble *mdp = SKMEM_OBJ_ADDR(oim);
587 mdp->mdp_redzone =
588 (SKMEM_OBJ_ROFF(oim) + METADATA_PREAMBLE_SZ) ^
589 __ch_umd_redzone_cookie;
590 }
591
592 /* allocate (constructed) buflet(s) with buffer(s) attached */
593 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0) {
594 (void) skmem_cache_batch_alloc(pp->pp_kbft_cache, &blist,
595 bufcnt, skmflag);
596 }
597
598 error = pp_metadata_construct(kqum, uqum, SKMEM_OBJ_IDX_REG(oi), pp,
599 skmflag, bufcnt, TRUE, &blist);
600 if (__improbable(blist != NULL)) {
601 skmem_cache_batch_free(pp->pp_kbft_cache, blist);
602 blist = NULL;
603 }
604 return error;
605 }
606
607 static int
pp_metadata_ctor_no_buflet(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)608 pp_metadata_ctor_no_buflet(struct skmem_obj_info *oi0,
609 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
610 {
611 return pp_metadata_ctor_common(oi0, oim0, arg, skmflag, TRUE);
612 }
613
614 static int
pp_metadata_ctor_max_buflet(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)615 pp_metadata_ctor_max_buflet(struct skmem_obj_info *oi0,
616 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
617 {
618 return pp_metadata_ctor_common(oi0, oim0, arg, skmflag, FALSE);
619 }
620
621 __attribute__((always_inline))
622 static void
pp_metadata_destruct_common(struct __kern_quantum * kqum,struct kern_pbufpool * pp,boolean_t raw,struct skmem_obj ** blist)623 pp_metadata_destruct_common(struct __kern_quantum *kqum,
624 struct kern_pbufpool *pp, boolean_t raw, struct skmem_obj **blist)
625 {
626 struct __kern_buflet *kbuf, *nbuf, *lbuf = NULL;
627 boolean_t first_buflet_empty;
628 struct skmem_obj *_blist;
629 uint16_t bufcnt, i = 0;
630
631 ASSERT(blist != NULL);
632 _blist = *blist;
633
634 switch (pp->pp_md_type) {
635 case NEXUS_META_TYPE_PACKET: {
636 struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
637
638 ASSERT(kpkt->pkt_user != NULL || PP_KERNEL_ONLY(pp));
639 ASSERT(kpkt->pkt_qum.qum_pp == pp);
640 ASSERT(METADATA_TYPE(kpkt) == pp->pp_md_type);
641 ASSERT(METADATA_SUBTYPE(kpkt) == pp->pp_md_subtype);
642 ASSERT(METADATA_IDX(kpkt) != OBJ_IDX_NONE);
643 ASSERT(kpkt->pkt_qum.qum_ksd == NULL);
644 ASSERT(kpkt->pkt_bufs_cnt <= kpkt->pkt_bufs_max);
645 ASSERT(kpkt->pkt_bufs_max == pp->pp_max_frags);
646 _CASSERT(sizeof(kpkt->pkt_bufs_cnt) == sizeof(uint16_t));
647 bufcnt = kpkt->pkt_bufs_cnt;
648 kbuf = &kqum->qum_buf[0];
649 /*
650 * special handling for empty first buflet.
651 */
652 first_buflet_empty = (kbuf->buf_addr == 0);
653 *__DECONST(uint16_t *, &kpkt->pkt_bufs_cnt) = 0;
654 break;
655 }
656 default:
657 ASSERT(pp->pp_md_type == NEXUS_META_TYPE_QUANTUM);
658 ASSERT(kqum->qum_user != NULL || PP_KERNEL_ONLY(pp));
659 ASSERT(kqum->qum_pp == pp);
660 ASSERT(METADATA_TYPE(kqum) == pp->pp_md_type);
661 ASSERT(METADATA_SUBTYPE(kqum) == pp->pp_md_subtype);
662 ASSERT(METADATA_IDX(kqum) != OBJ_IDX_NONE);
663 ASSERT(kqum->qum_ksd == NULL);
664 kbuf = &kqum->qum_buf[0];
665 /*
666 * XXX: Special handling for quantum as we don't currently
667 * define bufs_{cnt,max} there. Given that we support at
668 * most only 1 buflet for now, check if buf_addr is non-NULL.
669 * See related code in pp_metadata_construct().
670 */
671 first_buflet_empty = (kbuf->buf_addr == 0);
672 bufcnt = first_buflet_empty ? 0 : 1;
673 break;
674 }
675
676 nbuf = __DECONST(struct __kern_buflet *, kbuf->buf_nbft_addr);
677 if (nbuf != NULL) {
678 *blist = (struct skmem_obj *)(void *)nbuf;
679 }
680 BUF_NBFT_ADDR(kbuf, 0);
681 BUF_NBFT_IDX(kbuf, OBJ_IDX_NONE);
682 if (!first_buflet_empty) {
683 pp_free_buflet_common(pp, kbuf);
684 ++i;
685 }
686 while (nbuf != NULL) {
687 lbuf = nbuf;
688 BUF_NBFT_IDX(nbuf, OBJ_IDX_NONE);
689 nbuf = __DECONST(struct __kern_buflet *, nbuf->buf_nbft_addr);
690 ++i;
691 }
692 ASSERT(i == bufcnt);
693 if (lbuf != NULL) {
694 ((struct skmem_obj *)(void *)lbuf)->mo_next = _blist;
695 }
696
697 /* if we're about to return this object to the slab, clean it up */
698 if (raw) {
699 switch (pp->pp_md_type) {
700 case NEXUS_META_TYPE_PACKET: {
701 struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
702
703 ASSERT(kpkt->pkt_com_opt != NULL ||
704 !(kpkt->pkt_pflags & PKT_F_OPT_ALLOC));
705 if (kpkt->pkt_com_opt != NULL) {
706 ASSERT(kpkt->pkt_pflags & PKT_F_OPT_ALLOC);
707 skmem_cache_free(pp_opt_cache,
708 kpkt->pkt_com_opt);
709 kpkt->pkt_com_opt = NULL;
710 }
711 ASSERT(kpkt->pkt_flow != NULL ||
712 !(kpkt->pkt_pflags & PKT_F_FLOW_ALLOC));
713 if (kpkt->pkt_flow != NULL) {
714 ASSERT(kpkt->pkt_pflags & PKT_F_FLOW_ALLOC);
715 skmem_cache_free(pp_flow_cache, kpkt->pkt_flow);
716 kpkt->pkt_flow = NULL;
717 }
718 ASSERT(kpkt->pkt_tx_compl != NULL ||
719 !(kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC));
720 if (kpkt->pkt_tx_compl != NULL) {
721 ASSERT(kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC);
722 skmem_cache_free(pp_compl_cache,
723 kpkt->pkt_tx_compl);
724 kpkt->pkt_tx_compl = NULL;
725 }
726 kpkt->pkt_pflags = 0;
727 break;
728 }
729 default:
730 ASSERT(METADATA_TYPE(kqum) == NEXUS_META_TYPE_QUANTUM);
731 /* nothing to do for quantum (yet) */
732 break;
733 }
734 }
735 }
736
737 __attribute__((always_inline))
738 static void
pp_metadata_destruct(struct __kern_quantum * kqum,struct kern_pbufpool * pp,boolean_t raw)739 pp_metadata_destruct(struct __kern_quantum *kqum, struct kern_pbufpool *pp,
740 boolean_t raw)
741 {
742 struct skmem_obj *blist = NULL;
743
744 pp_metadata_destruct_common(kqum, pp, raw, &blist);
745 if (blist != NULL) {
746 skmem_cache_batch_free(pp->pp_kbft_cache, blist);
747 }
748 }
749
750 static void
pp_metadata_dtor(void * addr,void * arg)751 pp_metadata_dtor(void *addr, void *arg)
752 {
753 pp_metadata_destruct(SK_PTR_ADDR_KQUM((uintptr_t)addr +
754 METADATA_PREAMBLE_SZ), arg, TRUE);
755 }
756
757 static void
pp_buf_seg_ctor(struct sksegment * sg,IOSKMemoryBufferRef md,void * arg)758 pp_buf_seg_ctor(struct sksegment *sg, IOSKMemoryBufferRef md, void *arg)
759 {
760 struct kern_pbufpool *pp = arg;
761
762 if (pp->pp_pbuf_seg_ctor != NULL) {
763 pp->pp_pbuf_seg_ctor(pp, sg, md);
764 }
765 }
766
767 static void
pp_buf_seg_dtor(struct sksegment * sg,IOSKMemoryBufferRef md,void * arg)768 pp_buf_seg_dtor(struct sksegment *sg, IOSKMemoryBufferRef md, void *arg)
769 {
770 struct kern_pbufpool *pp = arg;
771
772 if (pp->pp_pbuf_seg_dtor != NULL) {
773 pp->pp_pbuf_seg_dtor(pp, sg, md);
774 }
775 }
776
777 static int
pp_buflet_metadata_ctor(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)778 pp_buflet_metadata_ctor(struct skmem_obj_info *oi0,
779 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
780 {
781 #pragma unused (skmflag)
782 struct kern_pbufpool *pp = (struct kern_pbufpool *)arg;
783 struct __kern_buflet *kbft;
784 struct __user_buflet *ubft;
785 struct skmem_obj_info oib;
786 mach_vm_address_t baddr;
787 obj_idx_t oi_idx_reg;
788
789 baddr = pp_alloc_buffer_common(pp, &oib, skmflag);
790 if (__improbable(baddr == 0)) {
791 return ENOMEM;
792 }
793 /*
794 * Note that oi0 and oim0 may be stored inside the object itself;
795 * so copy what is required to local variables before constructing.
796 */
797 oi_idx_reg = SKMEM_OBJ_IDX_REG(oi0);
798 kbft = SKMEM_OBJ_ADDR(oi0);
799
800 if (__probable(!PP_KERNEL_ONLY(pp))) {
801 ASSERT(oim0 != NULL && SKMEM_OBJ_ADDR(oim0) != NULL);
802 ASSERT(SKMEM_OBJ_SIZE(oi0) == SKMEM_OBJ_SIZE(oim0));
803 ASSERT(oi_idx_reg == SKMEM_OBJ_IDX_REG(oim0));
804 ASSERT(SKMEM_OBJ_IDX_SEG(oi0) == SKMEM_OBJ_IDX_SEG(oim0));
805 ubft = SKMEM_OBJ_ADDR(oim0);
806 } else {
807 ASSERT(oim0 == NULL);
808 ubft = NULL;
809 }
810 KBUF_EXT_CTOR(kbft, ubft, baddr, SKMEM_OBJ_IDX_REG(&oib),
811 SKMEM_OBJ_BUFCTL(&oib), oi_idx_reg, pp);
812 return 0;
813 }
814
815 static void
pp_buflet_metadata_dtor(void * addr,void * arg)816 pp_buflet_metadata_dtor(void *addr, void *arg)
817 {
818 struct __kern_buflet *kbft = addr;
819 void *objaddr = kbft->buf_objaddr;
820 struct kern_pbufpool *pp = arg;
821 uint32_t usecnt = 0;
822
823 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
824 /*
825 * don't assert for (buf_nbft_addr == 0) here as constructed
826 * buflet may have this field as non-zero. This is because
827 * buf_nbft_addr (__buflet_next) is used by skmem batch alloc
828 * for chaining the buflets.
829 * To ensure that the frred buflet was not part of a chain we
830 * assert for (buf_nbft_idx == OBJ_IDX_NONE).
831 */
832 ASSERT(kbft->buf_nbft_idx == OBJ_IDX_NONE);
833 ASSERT(((struct __kern_buflet_ext *)kbft)->kbe_buf_upp_link.sle_next ==
834 NULL);
835 ASSERT(kbft->buf_addr != 0);
836 ASSERT(kbft->buf_idx != OBJ_IDX_NONE);
837 ASSERT(kbft->buf_ctl != NULL);
838
839 KBUF_DTOR(kbft, usecnt);
840 SK_DF(SK_VERB_MEM, "pp 0x%llx buf 0x%llx usecnt %u", SK_KVA(pp),
841 SK_KVA(objaddr), usecnt);
842 if (__probable(usecnt == 0)) {
843 skmem_cache_free(pp->pp_buf_cache, objaddr);
844 }
845 }
846
847 struct kern_pbufpool *
pp_create(const char * name,struct skmem_region_params * buf_srp,struct skmem_region_params * kmd_srp,struct skmem_region_params * umd_srp,struct skmem_region_params * kbft_srp,struct skmem_region_params * ubft_srp,pbuf_seg_ctor_fn_t buf_seg_ctor,pbuf_seg_dtor_fn_t buf_seg_dtor,const void * ctx,pbuf_ctx_retain_fn_t ctx_retain,pbuf_ctx_release_fn_t ctx_release,uint32_t ppcreatef)848 pp_create(const char *name, struct skmem_region_params *buf_srp,
849 struct skmem_region_params *kmd_srp, struct skmem_region_params *umd_srp,
850 struct skmem_region_params *kbft_srp, struct skmem_region_params *ubft_srp,
851 pbuf_seg_ctor_fn_t buf_seg_ctor, pbuf_seg_dtor_fn_t buf_seg_dtor,
852 const void *ctx, pbuf_ctx_retain_fn_t ctx_retain,
853 pbuf_ctx_release_fn_t ctx_release, uint32_t ppcreatef)
854 {
855 struct kern_pbufpool *pp = NULL;
856 uint32_t md_size, buf_size;
857 nexus_meta_type_t md_type;
858 nexus_meta_subtype_t md_subtype;
859 uint32_t md_cflags;
860 uint16_t max_frags;
861 char cname[64];
862
863 /* buf_seg_{ctor,dtor} pair must be either NULL or non-NULL */
864 ASSERT(!(!(buf_seg_ctor == NULL && buf_seg_dtor == NULL) &&
865 ((buf_seg_ctor == NULL) ^ (buf_seg_dtor == NULL))));
866
867 /* ctx{,_retain,_release} must be either ALL NULL or ALL non-NULL */
868 ASSERT((ctx == NULL && ctx_retain == NULL && ctx_release == NULL) ||
869 (ctx != NULL && ctx_retain != NULL && ctx_release != NULL));
870
871 ASSERT(umd_srp->srp_c_obj_size == kmd_srp->srp_c_obj_size);
872 ASSERT(umd_srp->srp_c_obj_cnt == kmd_srp->srp_c_obj_cnt);
873 ASSERT(umd_srp->srp_c_seg_size == kmd_srp->srp_c_seg_size);
874 ASSERT(umd_srp->srp_seg_cnt == kmd_srp->srp_seg_cnt);
875 ASSERT(umd_srp->srp_md_type == kmd_srp->srp_md_type);
876 ASSERT(umd_srp->srp_md_subtype == kmd_srp->srp_md_subtype);
877 ASSERT(umd_srp->srp_max_frags == kmd_srp->srp_max_frags);
878
879 md_size = kmd_srp->srp_r_obj_size;
880 md_type = kmd_srp->srp_md_type;
881 md_subtype = kmd_srp->srp_md_subtype;
882 max_frags = kmd_srp->srp_max_frags;
883 buf_size = buf_srp->srp_r_obj_size;
884
885 if (__improbable((buf_size > UINT16_MAX) ||
886 (buf_srp->srp_c_obj_size > UINT16_MAX))) {
887 SK_ERR("\"%s\" requested/configured "
888 "(%d/%d) buffer size is too large", name, buf_size,
889 buf_srp->srp_c_obj_size);
890 goto failed;
891 }
892
893 #if (DEBUG || DEVELOPMENT)
894 ASSERT(buf_size != 0);
895 ASSERT(md_type > NEXUS_META_TYPE_INVALID &&
896 md_type <= NEXUS_META_TYPE_MAX);
897 if (md_type == NEXUS_META_TYPE_QUANTUM) {
898 ASSERT(max_frags == 1);
899 ASSERT(md_size >=
900 (METADATA_PREAMBLE_SZ + NX_METADATA_QUANTUM_SZ));
901 } else {
902 ASSERT(max_frags >= 1);
903 ASSERT(md_type == NEXUS_META_TYPE_PACKET);
904 ASSERT(md_size >= (METADATA_PREAMBLE_SZ +
905 NX_METADATA_PACKET_SZ(max_frags)));
906 }
907 ASSERT(md_subtype > NEXUS_META_SUBTYPE_INVALID &&
908 md_subtype <= NEXUS_META_SUBTYPE_MAX);
909 #endif /* DEBUG || DEVELOPMENT */
910
911 pp = pp_alloc(Z_WAITOK);
912
913 (void) snprintf((char *)pp->pp_name, sizeof(pp->pp_name),
914 "skywalk.pp.%s", name);
915
916 pp->pp_ctx = __DECONST(void *, ctx);
917 pp->pp_ctx_retain = ctx_retain;
918 pp->pp_ctx_release = ctx_release;
919 if (pp->pp_ctx != NULL) {
920 pp->pp_ctx_retain(pp->pp_ctx);
921 }
922
923 pp->pp_pbuf_seg_ctor = buf_seg_ctor;
924 pp->pp_pbuf_seg_dtor = buf_seg_dtor;
925 pp->pp_buflet_size = (uint16_t)buf_size;
926 pp->pp_md_type = md_type;
927 pp->pp_md_subtype = md_subtype;
928 pp->pp_max_frags = max_frags;
929 if (ppcreatef & PPCREATEF_EXTERNAL) {
930 pp->pp_flags |= PPF_EXTERNAL;
931 }
932 if (ppcreatef & PPCREATEF_TRUNCATED_BUF) {
933 pp->pp_flags |= PPF_TRUNCATED_BUF;
934 }
935 if (ppcreatef & PPCREATEF_KERNEL_ONLY) {
936 pp->pp_flags |= PPF_KERNEL;
937 }
938 if (ppcreatef & PPCREATEF_ONDEMAND_BUF) {
939 pp->pp_flags |= PPF_BUFFER_ON_DEMAND;
940 }
941 if (ppcreatef & PPCREATEF_DYNAMIC) {
942 pp->pp_flags |= PPF_DYNAMIC;
943 }
944
945 pp_retain(pp);
946
947 /*
948 * Metadata regions {UMD,KMD} magazines layer attribute must match.
949 */
950 ASSERT((umd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) ==
951 (kmd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES));
952 /*
953 * Metadata regions {UMD,KMD} persistency attribute must match.
954 */
955 ASSERT((umd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) ==
956 (kmd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT));
957
958 md_cflags = ((umd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) ?
959 SKMEM_CR_NOMAGAZINES : 0);
960 md_cflags |= SKMEM_CR_BATCH;
961 pp->pp_flags |= PPF_BATCH;
962
963 if (pp->pp_flags & PPF_DYNAMIC) {
964 md_cflags |= SKMEM_CR_DYNAMIC;
965 }
966
967 if (!PP_KERNEL_ONLY(pp) && (pp->pp_umd_region =
968 skmem_region_create(name, umd_srp, NULL, NULL,
969 NULL)) == NULL) {
970 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
971 pp->pp_name, SK_KVA(pp), umd_srp->srp_name);
972 goto failed;
973 }
974
975 if ((pp->pp_kmd_region = skmem_region_create(name, kmd_srp, NULL, NULL,
976 NULL)) == NULL) {
977 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
978 pp->pp_name, SK_KVA(pp), kmd_srp->srp_name);
979 goto failed;
980 }
981
982 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
983 VERIFY((kbft_srp != NULL) && (kbft_srp->srp_r_obj_cnt > 0));
984 if (!PP_KERNEL_ONLY(pp)) {
985 VERIFY((ubft_srp != NULL) &&
986 (ubft_srp->srp_r_obj_cnt > 0));
987 }
988 }
989 /*
990 * Metadata regions {KMD,KBFT,UBFT} magazines layer and persistency
991 * attribute must match.
992 */
993 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
994 ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) ==
995 (kbft_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES));
996 ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) ==
997 (kbft_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT));
998 }
999
1000 if (PP_HAS_BUFFER_ON_DEMAND(pp) && !PP_KERNEL_ONLY(pp)) {
1001 ASSERT((ubft_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) ==
1002 (kbft_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES));
1003 ASSERT((ubft_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) ==
1004 (kbft_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT));
1005
1006 if ((pp->pp_ubft_region = skmem_region_create(name, ubft_srp,
1007 NULL, NULL, NULL)) == NULL) {
1008 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1009 pp->pp_name, SK_KVA(pp), ubft_srp->srp_name);
1010 goto failed;
1011 }
1012 }
1013
1014 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1015 if ((pp->pp_kbft_region = skmem_region_create(name,
1016 kbft_srp, NULL, NULL, NULL)) == NULL) {
1017 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1018 pp->pp_name, SK_KVA(pp), kbft_srp->srp_name);
1019 goto failed;
1020 }
1021 }
1022
1023 if (!PP_KERNEL_ONLY(pp)) {
1024 skmem_region_mirror(pp->pp_kmd_region, pp->pp_umd_region);
1025 }
1026 if (!PP_KERNEL_ONLY(pp) && pp->pp_ubft_region != NULL) {
1027 ASSERT(pp->pp_kbft_region != NULL);
1028 skmem_region_mirror(pp->pp_kbft_region, pp->pp_ubft_region);
1029 }
1030
1031 /*
1032 * Create the metadata cache; magazines layer is determined by caller.
1033 */
1034 (void) snprintf(cname, sizeof(cname), "kmd.%s", name);
1035 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1036 pp->pp_kmd_cache = skmem_cache_create(cname, md_size, 0,
1037 pp_metadata_ctor_no_buflet, pp_metadata_dtor, NULL, pp,
1038 pp->pp_kmd_region, md_cflags);
1039 } else {
1040 pp->pp_kmd_cache = skmem_cache_create(cname, md_size, 0,
1041 pp_metadata_ctor_max_buflet, pp_metadata_dtor, NULL, pp,
1042 pp->pp_kmd_region, md_cflags);
1043 }
1044
1045 if (pp->pp_kmd_cache == NULL) {
1046 SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1047 pp->pp_name, SK_KVA(pp), cname);
1048 goto failed;
1049 }
1050
1051 /*
1052 * Create the buflet metadata cache
1053 */
1054 if (pp->pp_kbft_region != NULL) {
1055 (void) snprintf(cname, sizeof(cname), "kbft.%s", name);
1056 pp->pp_kbft_cache = skmem_cache_create(cname,
1057 kbft_srp->srp_c_obj_size, 0, pp_buflet_metadata_ctor,
1058 pp_buflet_metadata_dtor, NULL, pp, pp->pp_kbft_region,
1059 md_cflags);
1060
1061 if (pp->pp_kbft_cache == NULL) {
1062 SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1063 pp->pp_name, SK_KVA(pp), cname);
1064 goto failed;
1065 }
1066 }
1067
1068 if ((pp->pp_buf_region = skmem_region_create(name,
1069 buf_srp, pp_buf_seg_ctor, pp_buf_seg_dtor, pp)) == NULL) {
1070 SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1071 pp->pp_name, SK_KVA(pp), buf_srp->srp_name);
1072 goto failed;
1073 }
1074
1075 /*
1076 * Create the buffer object cache without the magazines layer.
1077 * We rely on caching the constructed metadata object instead.
1078 */
1079 (void) snprintf(cname, sizeof(cname), "buf.%s", name);
1080 if ((pp->pp_buf_cache = skmem_cache_create(cname, buf_size, 0,
1081 NULL, NULL, NULL, pp, pp->pp_buf_region, SKMEM_CR_NOMAGAZINES)) ==
1082 NULL) {
1083 SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1084 pp->pp_name, SK_KVA(pp), cname);
1085 goto failed;
1086 }
1087
1088 return pp;
1089
1090 failed:
1091 if (pp != NULL) {
1092 if (pp->pp_ctx != NULL) {
1093 pp->pp_ctx_release(pp->pp_ctx);
1094 pp->pp_ctx = NULL;
1095 }
1096 pp_close(pp);
1097 }
1098
1099 return NULL;
1100 }
1101
1102 void
pp_destroy(struct kern_pbufpool * pp)1103 pp_destroy(struct kern_pbufpool *pp)
1104 {
1105 PP_LOCK_ASSERT_HELD(pp);
1106
1107 /* may be called for built-in pp with outstanding reference */
1108 ASSERT(!(pp->pp_flags & PPF_EXTERNAL) || pp->pp_refcnt == 0);
1109
1110 pp_destroy_upp_locked(pp);
1111
1112 pp_destroy_upp_bft_locked(pp);
1113
1114 if (pp->pp_kmd_cache != NULL) {
1115 skmem_cache_destroy(pp->pp_kmd_cache);
1116 pp->pp_kmd_cache = NULL;
1117 }
1118
1119 if (pp->pp_umd_region != NULL) {
1120 skmem_region_release(pp->pp_umd_region);
1121 pp->pp_umd_region = NULL;
1122 }
1123
1124 if (pp->pp_kmd_region != NULL) {
1125 skmem_region_release(pp->pp_kmd_region);
1126 pp->pp_kmd_region = NULL;
1127 }
1128
1129 if (pp->pp_kbft_cache != NULL) {
1130 skmem_cache_destroy(pp->pp_kbft_cache);
1131 pp->pp_kbft_cache = NULL;
1132 }
1133
1134 if (pp->pp_ubft_region != NULL) {
1135 skmem_region_release(pp->pp_ubft_region);
1136 pp->pp_ubft_region = NULL;
1137 }
1138
1139 if (pp->pp_kbft_region != NULL) {
1140 skmem_region_release(pp->pp_kbft_region);
1141 pp->pp_kbft_region = NULL;
1142 }
1143
1144 /*
1145 * The order is important here, since pp_metadata_dtor()
1146 * called by freeing on the pp_kmd_cache will in turn
1147 * free the attached buffer. Therefore destroy the
1148 * buffer cache last.
1149 */
1150 if (pp->pp_buf_cache != NULL) {
1151 skmem_cache_destroy(pp->pp_buf_cache);
1152 pp->pp_buf_cache = NULL;
1153 }
1154 if (pp->pp_buf_region != NULL) {
1155 skmem_region_release(pp->pp_buf_region);
1156 pp->pp_buf_region = NULL;
1157 }
1158
1159 if (pp->pp_ctx != NULL) {
1160 pp->pp_ctx_release(pp->pp_ctx);
1161 pp->pp_ctx = NULL;
1162 }
1163 }
1164
1165 static int
pp_init_upp_locked(struct kern_pbufpool * pp,boolean_t can_block)1166 pp_init_upp_locked(struct kern_pbufpool *pp, boolean_t can_block)
1167 {
1168 int i, err = 0;
1169
1170 if (pp->pp_u_hash_table != NULL) {
1171 goto done;
1172 }
1173
1174 /* allocated-address hash table */
1175 pp->pp_u_hash_table = can_block ? zalloc(pp_u_htbl_zone) :
1176 zalloc_noblock(pp_u_htbl_zone);
1177 if (pp->pp_u_hash_table == NULL) {
1178 SK_ERR("failed to zalloc packet buffer pool upp hash table");
1179 err = ENOMEM;
1180 goto done;
1181 }
1182
1183 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1184 SLIST_INIT(&pp->pp_u_hash_table[i].upp_head);
1185 }
1186 done:
1187 return err;
1188 }
1189
1190 static void
pp_destroy_upp_locked(struct kern_pbufpool * pp)1191 pp_destroy_upp_locked(struct kern_pbufpool *pp)
1192 {
1193 PP_LOCK_ASSERT_HELD(pp);
1194 if (pp->pp_u_hash_table != NULL) {
1195 /* purge anything that's left */
1196 pp_purge_upp_locked(pp, -1);
1197
1198 #if (DEBUG || DEVELOPMENT)
1199 for (int i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1200 ASSERT(SLIST_EMPTY(&pp->pp_u_hash_table[i].upp_head));
1201 }
1202 #endif /* DEBUG || DEVELOPMENT */
1203
1204 zfree(pp_u_htbl_zone, pp->pp_u_hash_table);
1205 pp->pp_u_hash_table = NULL;
1206 }
1207 ASSERT(pp->pp_u_bufinuse == 0);
1208 }
1209
1210 int
pp_init_upp(struct kern_pbufpool * pp,boolean_t can_block)1211 pp_init_upp(struct kern_pbufpool *pp, boolean_t can_block)
1212 {
1213 int err = 0;
1214
1215 PP_LOCK(pp);
1216 err = pp_init_upp_locked(pp, can_block);
1217 if (err) {
1218 SK_ERR("packet UPP init failed (%d)", err);
1219 goto done;
1220 }
1221 err = pp_init_upp_bft_locked(pp, can_block);
1222 if (err) {
1223 SK_ERR("buflet UPP init failed (%d)", err);
1224 pp_destroy_upp_locked(pp);
1225 goto done;
1226 }
1227 pp_retain_locked(pp);
1228 done:
1229 PP_UNLOCK(pp);
1230 return err;
1231 }
1232
1233 __attribute__((always_inline))
1234 static void
pp_insert_upp_bft_locked(struct kern_pbufpool * pp,struct __kern_buflet * kbft,pid_t pid)1235 pp_insert_upp_bft_locked(struct kern_pbufpool *pp,
1236 struct __kern_buflet *kbft, pid_t pid)
1237 {
1238 struct kern_pbufpool_u_bft_bkt *bkt;
1239 struct __kern_buflet_ext *kbe = (struct __kern_buflet_ext *)kbft;
1240
1241 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
1242 ASSERT(kbe->kbe_buf_pid == (pid_t)-1);
1243 kbe->kbe_buf_pid = pid;
1244 bkt = KERN_PBUFPOOL_U_BFT_HASH(pp, kbft->buf_bft_idx_reg);
1245 SLIST_INSERT_HEAD(&bkt->upp_head, kbe, kbe_buf_upp_link);
1246 pp->pp_u_bftinuse++;
1247 }
1248
1249 __attribute__((always_inline))
1250 static void
pp_insert_upp_bft_chain_locked(struct kern_pbufpool * pp,struct __kern_buflet * kbft,pid_t pid)1251 pp_insert_upp_bft_chain_locked(struct kern_pbufpool *pp,
1252 struct __kern_buflet *kbft, pid_t pid)
1253 {
1254 while (kbft != NULL) {
1255 pp_insert_upp_bft_locked(pp, kbft, pid);
1256 kbft = __DECONST(kern_buflet_t, kbft->buf_nbft_addr);
1257 }
1258 }
1259
1260 /* Also inserts the attached chain of buflets */
1261 void static inline
pp_insert_upp_common(struct kern_pbufpool * pp,struct __kern_quantum * kqum,pid_t pid)1262 pp_insert_upp_common(struct kern_pbufpool *pp, struct __kern_quantum *kqum,
1263 pid_t pid)
1264 {
1265 struct kern_pbufpool_u_bkt *bkt;
1266 struct __kern_buflet *kbft;
1267
1268 ASSERT(kqum->qum_pid == (pid_t)-1);
1269 kqum->qum_pid = pid;
1270
1271 bkt = KERN_PBUFPOOL_U_HASH(pp, METADATA_IDX(kqum));
1272 SLIST_INSERT_HEAD(&bkt->upp_head, kqum, qum_upp_link);
1273 pp->pp_u_bufinuse++;
1274
1275 kbft = (kern_buflet_t)kqum->qum_buf[0].buf_nbft_addr;
1276 if (kbft != NULL) {
1277 ASSERT(((kern_buflet_t)kbft)->buf_flag & BUFLET_FLAG_EXTERNAL);
1278 ASSERT(kqum->qum_qflags & QUM_F_INTERNALIZED);
1279 pp_insert_upp_bft_chain_locked(pp, kbft, pid);
1280 }
1281 }
1282
1283 void
pp_insert_upp_locked(struct kern_pbufpool * pp,struct __kern_quantum * kqum,pid_t pid)1284 pp_insert_upp_locked(struct kern_pbufpool *pp, struct __kern_quantum *kqum,
1285 pid_t pid)
1286 {
1287 pp_insert_upp_common(pp, kqum, pid);
1288 }
1289
1290 void
pp_insert_upp(struct kern_pbufpool * pp,struct __kern_quantum * kqum,pid_t pid)1291 pp_insert_upp(struct kern_pbufpool *pp, struct __kern_quantum *kqum, pid_t pid)
1292 {
1293 PP_LOCK(pp);
1294 pp_insert_upp_common(pp, kqum, pid);
1295 PP_UNLOCK(pp);
1296 }
1297
1298 void
pp_insert_upp_batch(struct kern_pbufpool * pp,pid_t pid,uint64_t * array,uint32_t num)1299 pp_insert_upp_batch(struct kern_pbufpool *pp, pid_t pid, uint64_t *array,
1300 uint32_t num)
1301 {
1302 uint32_t i = 0;
1303
1304 ASSERT(array != NULL && num > 0);
1305 PP_LOCK(pp);
1306 while (num != 0) {
1307 struct __kern_quantum *kqum = SK_PTR_ADDR_KQUM(array[i]);
1308
1309 ASSERT(kqum != NULL);
1310 pp_insert_upp_common(pp, kqum, pid);
1311 --num;
1312 ++i;
1313 }
1314 PP_UNLOCK(pp);
1315 }
1316
1317 __attribute__((always_inline))
1318 static struct __kern_buflet *
pp_remove_upp_bft_locked(struct kern_pbufpool * pp,obj_idx_t bft_idx)1319 pp_remove_upp_bft_locked(struct kern_pbufpool *pp, obj_idx_t bft_idx)
1320 {
1321 struct __kern_buflet_ext *kbft, *tbft;
1322 struct kern_pbufpool_u_bft_bkt *bkt;
1323
1324 bkt = KERN_PBUFPOOL_U_BFT_HASH(pp, bft_idx);
1325 SLIST_FOREACH_SAFE(kbft, &bkt->upp_head, kbe_buf_upp_link, tbft) {
1326 if (((kern_buflet_t)kbft)->buf_bft_idx_reg == bft_idx) {
1327 SLIST_REMOVE(&bkt->upp_head, kbft, __kern_buflet_ext,
1328 kbe_buf_upp_link);
1329 kbft->kbe_buf_pid = (pid_t)-1;
1330 kbft->kbe_buf_upp_link.sle_next = NULL;
1331 ASSERT(pp->pp_u_bftinuse != 0);
1332 pp->pp_u_bftinuse--;
1333 break;
1334 }
1335 }
1336 return (kern_buflet_t)kbft;
1337 }
1338
1339 struct __kern_buflet *
pp_remove_upp_bft(struct kern_pbufpool * pp,obj_idx_t md_idx,int * err)1340 pp_remove_upp_bft(struct kern_pbufpool *pp, obj_idx_t md_idx, int *err)
1341 {
1342 struct __kern_buflet *kbft = pp_remove_upp_bft_locked(pp, md_idx);
1343
1344 *err = __improbable(kbft != NULL) ? 0 : EINVAL;
1345 return kbft;
1346 }
1347
1348 __attribute__((always_inline))
1349 static int
pp_remove_upp_bft_chain_locked(struct kern_pbufpool * pp,struct __kern_quantum * kqum)1350 pp_remove_upp_bft_chain_locked(struct kern_pbufpool *pp,
1351 struct __kern_quantum *kqum)
1352 {
1353 uint32_t max_frags = pp->pp_max_frags;
1354 struct __kern_buflet *kbft;
1355 uint16_t nbfts, upkt_nbfts;
1356 obj_idx_t bft_idx;
1357
1358 ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
1359 bft_idx = kqum->qum_user->qum_buf[0].buf_nbft_idx;
1360 kbft = &kqum->qum_buf[0];
1361 if (bft_idx == OBJ_IDX_NONE) {
1362 return 0;
1363 }
1364
1365 ASSERT(METADATA_TYPE(kqum) == NEXUS_META_TYPE_PACKET);
1366 struct __kern_packet *kpkt = __DECONST(struct __kern_packet *, kqum);
1367 struct __user_packet *upkt = __DECONST(struct __user_packet *,
1368 kpkt->pkt_qum.qum_user);
1369
1370 upkt_nbfts = upkt->pkt_bufs_cnt;
1371 if (__improbable(upkt_nbfts > max_frags)) {
1372 SK_ERR("bad bcnt in upkt (%d > %d)", upkt_nbfts, max_frags);
1373 BUF_NBFT_IDX(kbft, OBJ_IDX_NONE);
1374 BUF_NBFT_ADDR(kbft, 0);
1375 return ERANGE;
1376 }
1377
1378 nbfts = (kbft->buf_addr != 0) ? 1 : 0;
1379
1380 do {
1381 struct __kern_buflet *pbft = kbft;
1382 struct __kern_buflet_ext *kbe;
1383
1384 kbft = pp_remove_upp_bft_locked(pp, bft_idx);
1385 if (__improbable(kbft == NULL)) {
1386 BUF_NBFT_IDX(pbft, OBJ_IDX_NONE);
1387 BUF_NBFT_ADDR(pbft, 0);
1388 SK_ERR("unallocated next buflet (%d), %p", bft_idx,
1389 SK_KVA(pbft));
1390 return ERANGE;
1391 }
1392 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
1393 BUF_NBFT_IDX(pbft, bft_idx);
1394 BUF_NBFT_ADDR(pbft, kbft);
1395 kbe = (struct __kern_buflet_ext *)kbft;
1396 bft_idx = kbe->kbe_buf_user->buf_nbft_idx;
1397 ++nbfts;
1398 } while ((bft_idx != OBJ_IDX_NONE) && (nbfts < upkt_nbfts));
1399
1400 ASSERT(kbft != NULL);
1401 BUF_NBFT_IDX(kbft, OBJ_IDX_NONE);
1402 BUF_NBFT_ADDR(kbft, 0);
1403 *__DECONST(uint16_t *, &kpkt->pkt_bufs_cnt) = nbfts;
1404
1405 if (__improbable((bft_idx != OBJ_IDX_NONE) || (nbfts != upkt_nbfts))) {
1406 SK_ERR("bad buflet in upkt (%d, %d)", nbfts, upkt_nbfts);
1407 return ERANGE;
1408 }
1409 return 0;
1410 }
1411
1412 struct __kern_quantum *
pp_remove_upp_locked(struct kern_pbufpool * pp,obj_idx_t md_idx,int * err)1413 pp_remove_upp_locked(struct kern_pbufpool *pp, obj_idx_t md_idx, int *err)
1414 {
1415 struct __kern_quantum *kqum, *tqum;
1416 struct kern_pbufpool_u_bkt *bkt;
1417
1418 bkt = KERN_PBUFPOOL_U_HASH(pp, md_idx);
1419 SLIST_FOREACH_SAFE(kqum, &bkt->upp_head, qum_upp_link, tqum) {
1420 if (METADATA_IDX(kqum) == md_idx) {
1421 SLIST_REMOVE(&bkt->upp_head, kqum, __kern_quantum,
1422 qum_upp_link);
1423 kqum->qum_pid = (pid_t)-1;
1424 ASSERT(pp->pp_u_bufinuse != 0);
1425 pp->pp_u_bufinuse--;
1426 break;
1427 }
1428 }
1429 if (__probable(kqum != NULL)) {
1430 *err = pp_remove_upp_bft_chain_locked(pp, kqum);
1431 } else {
1432 *err = ERANGE;
1433 }
1434 return kqum;
1435 }
1436
1437 struct __kern_quantum *
pp_remove_upp(struct kern_pbufpool * pp,obj_idx_t md_idx,int * err)1438 pp_remove_upp(struct kern_pbufpool *pp, obj_idx_t md_idx, int *err)
1439 {
1440 struct __kern_quantum *kqum;
1441
1442 PP_LOCK(pp);
1443 kqum = pp_remove_upp_locked(pp, md_idx, err);
1444 PP_UNLOCK(pp);
1445 return kqum;
1446 }
1447
1448 struct __kern_quantum *
pp_find_upp(struct kern_pbufpool * pp,obj_idx_t md_idx)1449 pp_find_upp(struct kern_pbufpool *pp, obj_idx_t md_idx)
1450 {
1451 struct __kern_quantum *kqum, *tqum;
1452 struct kern_pbufpool_u_bkt *bkt;
1453
1454 PP_LOCK(pp);
1455 bkt = KERN_PBUFPOOL_U_HASH(pp, md_idx);
1456 SLIST_FOREACH_SAFE(kqum, &bkt->upp_head, qum_upp_link, tqum) {
1457 if (METADATA_IDX(kqum) == md_idx) {
1458 break;
1459 }
1460 }
1461 PP_UNLOCK(pp);
1462
1463 return kqum;
1464 }
1465
1466 __attribute__((always_inline))
1467 static void
pp_purge_upp_locked(struct kern_pbufpool * pp,pid_t pid)1468 pp_purge_upp_locked(struct kern_pbufpool *pp, pid_t pid)
1469 {
1470 struct __kern_quantum *kqum, *tqum;
1471 struct kern_pbufpool_u_bkt *bkt;
1472 int i;
1473
1474 PP_LOCK_ASSERT_HELD(pp);
1475
1476 /*
1477 * TODO: Build a list of packets and batch-free them.
1478 */
1479 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1480 bkt = &pp->pp_u_hash_table[i];
1481 SLIST_FOREACH_SAFE(kqum, &bkt->upp_head, qum_upp_link, tqum) {
1482 ASSERT(kqum->qum_pid != (pid_t)-1);
1483 if (pid != (pid_t)-1 && kqum->qum_pid != pid) {
1484 continue;
1485 }
1486 SLIST_REMOVE(&bkt->upp_head, kqum, __kern_quantum,
1487 qum_upp_link);
1488 pp_remove_upp_bft_chain_locked(pp, kqum);
1489 kqum->qum_pid = (pid_t)-1;
1490 kqum->qum_qflags &= ~QUM_F_FINALIZED;
1491 kqum->qum_ksd = NULL;
1492 pp_free_packet(__DECONST(struct kern_pbufpool *,
1493 kqum->qum_pp), (uint64_t)kqum);
1494 ASSERT(pp->pp_u_bufinuse != 0);
1495 pp->pp_u_bufinuse--;
1496 }
1497 }
1498 }
1499
1500 __attribute__((always_inline))
1501 static void
pp_purge_upp_bft_locked(struct kern_pbufpool * pp,pid_t pid)1502 pp_purge_upp_bft_locked(struct kern_pbufpool *pp, pid_t pid)
1503 {
1504 struct __kern_buflet_ext *kbft, *tbft;
1505 struct kern_pbufpool_u_bft_bkt *bkt;
1506 int i;
1507
1508 PP_LOCK_ASSERT_HELD(pp);
1509
1510 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1511 bkt = &pp->pp_u_bft_hash_table[i];
1512 SLIST_FOREACH_SAFE(kbft, &bkt->upp_head, kbe_buf_upp_link,
1513 tbft) {
1514 ASSERT(kbft->kbe_buf_pid != (pid_t)-1);
1515 if (pid != (pid_t)-1 && kbft->kbe_buf_pid != pid) {
1516 continue;
1517 }
1518 SLIST_REMOVE(&bkt->upp_head, kbft, __kern_buflet_ext,
1519 kbe_buf_upp_link);
1520 kbft->kbe_buf_pid = (pid_t)-1;
1521 kbft->kbe_buf_upp_link.sle_next = NULL;
1522 pp_free_buflet(pp, (kern_buflet_t)kbft);
1523 ASSERT(pp->pp_u_bftinuse != 0);
1524 pp->pp_u_bftinuse--;
1525 }
1526 }
1527 }
1528
1529 void
pp_purge_upp(struct kern_pbufpool * pp,pid_t pid)1530 pp_purge_upp(struct kern_pbufpool *pp, pid_t pid)
1531 {
1532 PP_LOCK(pp);
1533 pp_purge_upp_locked(pp, pid);
1534 pp_purge_upp_bft_locked(pp, pid);
1535 PP_UNLOCK(pp);
1536 }
1537
1538 static int
pp_init_upp_bft_locked(struct kern_pbufpool * pp,boolean_t can_block)1539 pp_init_upp_bft_locked(struct kern_pbufpool *pp, boolean_t can_block)
1540 {
1541 int i, err = 0;
1542
1543 PP_LOCK_ASSERT_HELD(pp);
1544 if (pp->pp_u_bft_hash_table != NULL) {
1545 return 0;
1546 }
1547
1548 /* allocated-address hash table */
1549 pp->pp_u_bft_hash_table = can_block ? zalloc(pp_u_htbl_zone) :
1550 zalloc_noblock(pp_u_htbl_zone);
1551 if (pp->pp_u_bft_hash_table == NULL) {
1552 SK_ERR("failed to zalloc packet buffer pool upp buflet hash table");
1553 err = ENOMEM;
1554 goto fail;
1555 }
1556
1557 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1558 SLIST_INIT(&pp->pp_u_bft_hash_table[i].upp_head);
1559 }
1560
1561 fail:
1562 return err;
1563 }
1564
1565 static void
pp_destroy_upp_bft_locked(struct kern_pbufpool * pp)1566 pp_destroy_upp_bft_locked(struct kern_pbufpool *pp)
1567 {
1568 PP_LOCK_ASSERT_HELD(pp);
1569 if (pp->pp_u_bft_hash_table != NULL) {
1570 /* purge anything that's left */
1571 pp_purge_upp_bft_locked(pp, -1);
1572
1573 #if (DEBUG || DEVELOPMENT)
1574 for (int i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1575 ASSERT(SLIST_EMPTY(&pp->pp_u_bft_hash_table[i].upp_head));
1576 }
1577 #endif /* DEBUG || DEVELOPMENT */
1578
1579 zfree(pp_u_htbl_zone, pp->pp_u_bft_hash_table);
1580 pp->pp_u_bft_hash_table = NULL;
1581 }
1582 ASSERT(pp->pp_u_bftinuse == 0);
1583 }
1584
1585 void
pp_insert_upp_bft(struct kern_pbufpool * pp,struct __kern_buflet * kbft,pid_t pid)1586 pp_insert_upp_bft(struct kern_pbufpool *pp,
1587 struct __kern_buflet *kbft, pid_t pid)
1588 {
1589 PP_LOCK(pp);
1590 pp_insert_upp_bft_locked(pp, kbft, pid);
1591 PP_UNLOCK(pp);
1592 }
1593
1594 boolean_t
pp_isempty_upp(struct kern_pbufpool * pp)1595 pp_isempty_upp(struct kern_pbufpool *pp)
1596 {
1597 boolean_t isempty;
1598
1599 PP_LOCK(pp);
1600 isempty = (pp->pp_u_bufinuse == 0);
1601 PP_UNLOCK(pp);
1602
1603 return isempty;
1604 }
1605
1606 __attribute__((always_inline))
1607 static inline struct __kern_quantum *
pp_metadata_init(struct __metadata_preamble * mdp,struct kern_pbufpool * pp,uint16_t bufcnt,uint32_t skmflag,struct skmem_obj ** blist)1608 pp_metadata_init(struct __metadata_preamble *mdp, struct kern_pbufpool *pp,
1609 uint16_t bufcnt, uint32_t skmflag, struct skmem_obj **blist)
1610 {
1611 struct __kern_quantum *kqum;
1612 struct __user_quantum *uqum;
1613
1614 kqum = SK_PTR_ADDR_KQUM((uintptr_t)mdp + METADATA_PREAMBLE_SZ);
1615 ASSERT(kqum->qum_pp == pp);
1616 if (__probable(!PP_KERNEL_ONLY(pp))) {
1617 ASSERT(!(kqum->qum_qflags & QUM_F_KERNEL_ONLY));
1618 uqum = __DECONST(struct __user_quantum *, kqum->qum_user);
1619 ASSERT(uqum != NULL);
1620 } else {
1621 ASSERT(kqum->qum_qflags & QUM_F_KERNEL_ONLY);
1622 ASSERT(kqum->qum_user == NULL);
1623 uqum = NULL;
1624 }
1625
1626 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0 &&
1627 pp_metadata_construct(kqum, uqum, METADATA_IDX(kqum), pp,
1628 skmflag, bufcnt, FALSE, blist) != 0) {
1629 return NULL;
1630 }
1631
1632 /* (re)construct {user,kernel} metadata */
1633 switch (pp->pp_md_type) {
1634 case NEXUS_META_TYPE_PACKET: {
1635 struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
1636 struct __kern_buflet *kbuf = &kpkt->pkt_qum_buf;
1637 uint16_t i;
1638
1639 /* sanitize flags */
1640 kpkt->pkt_pflags &= PKT_F_INIT_MASK;
1641
1642 ASSERT((kpkt->pkt_pflags & PKT_F_OPT_ALLOC) &&
1643 kpkt->pkt_com_opt != NULL);
1644 ASSERT((kpkt->pkt_pflags & PKT_F_FLOW_ALLOC) &&
1645 kpkt->pkt_flow != NULL);
1646 ASSERT((kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC) &&
1647 kpkt->pkt_tx_compl != NULL);
1648
1649 /*
1650 * XXX: For now we always set PKT_F_FLOW_DATA;
1651 * this is a no-op but done for consistency
1652 * with the other PKT_F_*_DATA flags.
1653 */
1654 kpkt->pkt_pflags |= PKT_F_FLOW_DATA;
1655
1656 /* initialize kernel packet */
1657 KPKT_INIT(kpkt, QUM_F_INTERNALIZED);
1658
1659 ASSERT(bufcnt || PP_HAS_BUFFER_ON_DEMAND(pp));
1660 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1661 ASSERT(kbuf->buf_ctl == NULL);
1662 ASSERT(kbuf->buf_addr == 0);
1663 kbuf = __DECONST(struct __kern_buflet *,
1664 kbuf->buf_nbft_addr);
1665 }
1666 /* initialize kernel buflet */
1667 for (i = 0; i < bufcnt; i++) {
1668 ASSERT(kbuf != NULL);
1669 KBUF_INIT(kbuf);
1670 kbuf = __DECONST(struct __kern_buflet *,
1671 kbuf->buf_nbft_addr);
1672 }
1673 ASSERT((kbuf == NULL) || (bufcnt == 0));
1674 break;
1675 }
1676 default:
1677 ASSERT(pp->pp_md_type == NEXUS_META_TYPE_QUANTUM);
1678 /* kernel quantum */
1679 KQUM_INIT(kqum, QUM_F_INTERNALIZED);
1680 KBUF_INIT(&kqum->qum_buf[0]);
1681 break;
1682 }
1683
1684 return kqum;
1685 }
1686
1687 /*
1688 * When PPF_BUFFER_ON_DEMAND flag is set on packet pool creation, we create
1689 * packet descriptor cache with no buffer attached and a buflet cache with
1690 * cpu layer caching enabled. While operating in this mode, we can call
1691 * pp_alloc_packet_common() either with `bufcnt = 0` or `bufcnt = n`,
1692 * where n <= pp->pp_max_frags. If `bufcnt == 0` then we allocate packet
1693 * descriptor with no attached buffer from the metadata cache.
1694 * If `bufcnt != 0`, then this routine allocates packet descriptor and buflets
1695 * from their respective caches and constructs the packet on behalf of the
1696 * caller.
1697 */
1698 __attribute__((always_inline))
1699 static inline uint32_t
pp_alloc_packet_common(struct kern_pbufpool * pp,uint16_t bufcnt,uint64_t * array,uint32_t num,boolean_t tagged,alloc_cb_func_t cb,const void * ctx,uint32_t skmflag)1700 pp_alloc_packet_common(struct kern_pbufpool *pp, uint16_t bufcnt,
1701 uint64_t *array, uint32_t num, boolean_t tagged, alloc_cb_func_t cb,
1702 const void *ctx, uint32_t skmflag)
1703 {
1704 struct __metadata_preamble *mdp;
1705 struct __kern_quantum *kqum = NULL;
1706 uint32_t allocp, need = num;
1707 struct skmem_obj *plist, *blist = NULL;
1708
1709 ASSERT(bufcnt <= pp->pp_max_frags);
1710 ASSERT(array != NULL && num > 0);
1711 ASSERT(PP_BATCH_CAPABLE(pp));
1712
1713 /* allocate (constructed) packet(s) with buffer(s) attached */
1714 allocp = skmem_cache_batch_alloc(pp->pp_kmd_cache, &plist, num,
1715 skmflag);
1716
1717 /* allocate (constructed) buflet(s) with buffer(s) attached */
1718 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0 && allocp != 0) {
1719 (void) skmem_cache_batch_alloc(pp->pp_kbft_cache, &blist,
1720 (allocp * bufcnt), skmflag);
1721 }
1722
1723 while (plist != NULL) {
1724 struct skmem_obj *plistn;
1725
1726 plistn = plist->mo_next;
1727 plist->mo_next = NULL;
1728
1729 mdp = (struct __metadata_preamble *)(void *)plist;
1730 kqum = pp_metadata_init(mdp, pp, bufcnt, skmflag, &blist);
1731 if (kqum == NULL) {
1732 if (blist != NULL) {
1733 skmem_cache_batch_free(pp->pp_kbft_cache,
1734 blist);
1735 blist = NULL;
1736 }
1737 plist->mo_next = plistn;
1738 skmem_cache_batch_free(pp->pp_kmd_cache, plist);
1739 plist = NULL;
1740 break;
1741 }
1742
1743 if (tagged) {
1744 *array = SK_PTR_ENCODE(kqum, METADATA_TYPE(kqum),
1745 METADATA_SUBTYPE(kqum));
1746 } else {
1747 *array = (uint64_t)kqum;
1748 }
1749
1750 if (cb != NULL) {
1751 (cb)(*array, (num - need), ctx);
1752 }
1753
1754 ++array;
1755 plist = plistn;
1756
1757 ASSERT(need > 0);
1758 --need;
1759 }
1760 ASSERT(blist == NULL);
1761 ASSERT((num - need) == allocp || kqum == NULL);
1762
1763 return num - need;
1764 }
1765
1766 uint64_t
pp_alloc_packet(struct kern_pbufpool * pp,uint16_t bufcnt,uint32_t skmflag)1767 pp_alloc_packet(struct kern_pbufpool *pp, uint16_t bufcnt, uint32_t skmflag)
1768 {
1769 uint64_t kpkt = 0;
1770
1771 (void) pp_alloc_packet_common(pp, bufcnt, &kpkt, 1, FALSE,
1772 NULL, NULL, skmflag);
1773
1774 return kpkt;
1775 }
1776
1777 int
pp_alloc_packet_batch(struct kern_pbufpool * pp,uint16_t bufcnt,uint64_t * array,uint32_t * size,boolean_t tagged,alloc_cb_func_t cb,const void * ctx,uint32_t skmflag)1778 pp_alloc_packet_batch(struct kern_pbufpool *pp, uint16_t bufcnt,
1779 uint64_t *array, uint32_t *size, boolean_t tagged, alloc_cb_func_t cb,
1780 const void *ctx, uint32_t skmflag)
1781 {
1782 uint32_t i, n;
1783 int err;
1784
1785 ASSERT(array != NULL && size > 0);
1786
1787 n = *size;
1788 *size = 0;
1789
1790 i = pp_alloc_packet_common(pp, bufcnt, array, n, tagged,
1791 cb, ctx, skmflag);
1792 *size = i;
1793
1794 if (__probable(i == n)) {
1795 err = 0;
1796 } else if (i != 0) {
1797 err = EAGAIN;
1798 } else {
1799 err = ENOMEM;
1800 }
1801
1802 return err;
1803 }
1804
1805 int
pp_alloc_pktq(struct kern_pbufpool * pp,uint16_t bufcnt,struct pktq * pktq,uint32_t num,alloc_cb_func_t cb,const void * ctx,uint32_t skmflag)1806 pp_alloc_pktq(struct kern_pbufpool *pp, uint16_t bufcnt,
1807 struct pktq *pktq, uint32_t num, alloc_cb_func_t cb, const void *ctx,
1808 uint32_t skmflag)
1809 {
1810 struct __metadata_preamble *mdp;
1811 struct __kern_packet *kpkt = NULL;
1812 uint32_t allocp, need = num;
1813 struct skmem_obj *plist, *blist = NULL;
1814 int err;
1815
1816 ASSERT(pktq != NULL && num > 0);
1817 ASSERT(pp->pp_md_type == NEXUS_META_TYPE_PACKET);
1818 ASSERT(bufcnt <= pp->pp_max_frags);
1819 ASSERT(PP_BATCH_CAPABLE(pp));
1820
1821 /* allocate (constructed) packet(s) with buffer(s) attached */
1822 allocp = skmem_cache_batch_alloc(pp->pp_kmd_cache, &plist, num,
1823 skmflag);
1824
1825 /* allocate (constructed) buflet(s) with buffer(s) attached */
1826 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0 && allocp != 0) {
1827 (void) skmem_cache_batch_alloc(pp->pp_kbft_cache, &blist,
1828 (allocp * bufcnt), skmflag);
1829 }
1830
1831 while (plist != NULL) {
1832 struct skmem_obj *plistn;
1833
1834 plistn = plist->mo_next;
1835 plist->mo_next = NULL;
1836
1837 mdp = (struct __metadata_preamble *)(void *)plist;
1838 kpkt = (struct __kern_packet *)pp_metadata_init(mdp, pp,
1839 bufcnt, skmflag, &blist);
1840 if (kpkt == NULL) {
1841 if (blist != NULL) {
1842 skmem_cache_batch_free(pp->pp_kbft_cache,
1843 blist);
1844 blist = NULL;
1845 }
1846 plist->mo_next = plistn;
1847 skmem_cache_batch_free(pp->pp_kmd_cache, plist);
1848 plist = NULL;
1849 break;
1850 }
1851
1852 KPKTQ_ENQUEUE(pktq, kpkt);
1853
1854 if (cb != NULL) {
1855 (cb)((uint64_t)kpkt, (num - need), ctx);
1856 }
1857
1858 plist = plistn;
1859
1860 ASSERT(need > 0);
1861 --need;
1862 }
1863 ASSERT(blist == NULL);
1864 ASSERT((num - need) == allocp || kpkt == NULL);
1865
1866 if (__probable(need == 0)) {
1867 err = 0;
1868 } else if (need == num) {
1869 err = ENOMEM;
1870 } else {
1871 err = EAGAIN;
1872 }
1873
1874 return err;
1875 }
1876
1877 uint64_t
pp_alloc_packet_by_size(struct kern_pbufpool * pp,uint32_t size,uint32_t skmflag)1878 pp_alloc_packet_by_size(struct kern_pbufpool *pp, uint32_t size,
1879 uint32_t skmflag)
1880 {
1881 uint32_t bufcnt = pp->pp_max_frags;
1882 uint64_t kpkt = 0;
1883
1884 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1885 bufcnt =
1886 SK_ROUNDUP(size, pp->pp_buflet_size) / pp->pp_buflet_size;
1887 ASSERT(bufcnt <= UINT16_MAX);
1888 }
1889
1890 (void) pp_alloc_packet_common(pp, (uint16_t)bufcnt, &kpkt, 1, TRUE,
1891 NULL, NULL, skmflag);
1892
1893 return kpkt;
1894 }
1895
1896 __attribute__((always_inline))
1897 static inline struct __metadata_preamble *
pp_metadata_fini(struct __kern_quantum * kqum,struct kern_pbufpool * pp,struct mbuf ** mp,struct __kern_packet ** kpp,struct skmem_obj ** blist)1898 pp_metadata_fini(struct __kern_quantum *kqum, struct kern_pbufpool *pp,
1899 struct mbuf **mp, struct __kern_packet **kpp, struct skmem_obj **blist)
1900 {
1901 struct __metadata_preamble *mdp = METADATA_PREAMBLE(kqum);
1902
1903 ASSERT(SK_PTR_TAG(kqum) == 0);
1904
1905 switch (pp->pp_md_type) {
1906 case NEXUS_META_TYPE_PACKET: {
1907 struct __kern_packet *kpkt = SK_PTR_KPKT(kqum);
1908
1909 if ((kpkt->pkt_pflags & PKT_F_TX_COMPL_TS_REQ) != 0) {
1910 __packet_perform_tx_completion_callbacks(
1911 SK_PKT2PH(kpkt), NULL);
1912 }
1913 if ((kpkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) {
1914 ASSERT((kpkt->pkt_pflags & PKT_F_PKT_DATA) == 0);
1915 ASSERT(kpkt->pkt_mbuf != NULL);
1916 ASSERT(kpkt->pkt_mbuf->m_nextpkt == NULL);
1917 if (mp != NULL) {
1918 ASSERT(*mp == NULL);
1919 *mp = kpkt->pkt_mbuf;
1920 } else {
1921 m_freem(kpkt->pkt_mbuf);
1922 }
1923 KPKT_CLEAR_MBUF_DATA(kpkt);
1924 } else if ((kpkt->pkt_pflags & PKT_F_PKT_DATA) != 0) {
1925 ASSERT(kpkt->pkt_pkt != NULL);
1926 ASSERT(kpkt->pkt_pkt->pkt_nextpkt == NULL);
1927 if (kpp != NULL) {
1928 ASSERT(*kpp == NULL);
1929 *kpp = kpkt->pkt_pkt;
1930 } else {
1931 /* can only recurse once */
1932 ASSERT((kpkt->pkt_pkt->pkt_pflags &
1933 PKT_F_PKT_DATA) == 0);
1934 pp_free_packet_single(kpkt->pkt_pkt);
1935 }
1936 KPKT_CLEAR_PKT_DATA(kpkt);
1937 }
1938 ASSERT(kpkt->pkt_nextpkt == NULL);
1939 ASSERT(kpkt->pkt_qum.qum_ksd == NULL);
1940 ASSERT((kpkt->pkt_pflags & PKT_F_MBUF_MASK) == 0);
1941 ASSERT((kpkt->pkt_pflags & PKT_F_PKT_MASK) == 0);
1942 break;
1943 }
1944 default:
1945 break;
1946 }
1947
1948 if (__improbable(PP_HAS_BUFFER_ON_DEMAND(pp))) {
1949 pp_metadata_destruct_common(kqum, pp, FALSE, blist);
1950 }
1951 return mdp;
1952 }
1953
1954 void
pp_free_packet_chain(struct __kern_packet * pkt_chain,int * npkt)1955 pp_free_packet_chain(struct __kern_packet *pkt_chain, int *npkt)
1956 {
1957 struct __metadata_preamble *mdp;
1958 struct skmem_obj *top = NULL;
1959 struct skmem_obj *blist = NULL;
1960 struct skmem_obj **list = ⊤
1961 struct mbuf *mtop = NULL;
1962 struct mbuf **mp = &mtop;
1963 struct __kern_packet *kptop = NULL;
1964 struct __kern_packet **kpp = &kptop, *pkt, *next;
1965 struct kern_pbufpool *pp;
1966 int c = 0;
1967
1968 pp = __DECONST(struct kern_pbufpool *, pkt_chain->pkt_qum.qum_pp);
1969 ASSERT(pp != NULL);
1970 ASSERT(PP_BATCH_CAPABLE(pp));
1971
1972 for (pkt = pkt_chain; pkt != NULL; pkt = next) {
1973 next = pkt->pkt_nextpkt;
1974 pkt->pkt_nextpkt = NULL;
1975
1976 ASSERT(SK_PTR_ADDR_KQUM(pkt)->qum_pp == pp);
1977 mdp = pp_metadata_fini(SK_PTR_ADDR_KQUM(pkt), pp,
1978 mp, kpp, &blist);
1979
1980 *list = (struct skmem_obj *)mdp;
1981 list = &(*list)->mo_next;
1982 c++;
1983
1984 if (*mp != NULL) {
1985 mp = &(*mp)->m_nextpkt;
1986 ASSERT(*mp == NULL);
1987 }
1988 if (*kpp != NULL) {
1989 kpp = &(*kpp)->pkt_nextpkt;
1990 ASSERT(*kpp == NULL);
1991 }
1992 }
1993
1994 ASSERT(top != NULL);
1995 skmem_cache_batch_free(pp->pp_kmd_cache, top);
1996 if (blist != NULL) {
1997 skmem_cache_batch_free(pp->pp_kbft_cache, blist);
1998 blist = NULL;
1999 }
2000 if (mtop != NULL) {
2001 DTRACE_SKYWALK(free__attached__mbuf);
2002 if (__probable(mtop->m_nextpkt != NULL)) {
2003 m_freem_list(mtop);
2004 } else {
2005 m_freem(mtop);
2006 }
2007 }
2008 if (kptop != NULL) {
2009 int cnt = 0;
2010 pp_free_packet_chain(kptop, &cnt);
2011 DTRACE_SKYWALK1(free__attached__pkt, int, cnt);
2012 }
2013 if (npkt != NULL) {
2014 *npkt = c;
2015 }
2016 }
2017
2018 void
pp_free_pktq(struct pktq * pktq)2019 pp_free_pktq(struct pktq *pktq)
2020 {
2021 if (__improbable(KPKTQ_EMPTY(pktq))) {
2022 return;
2023 }
2024 struct __kern_packet *pkt = KPKTQ_FIRST(pktq);
2025 pp_free_packet_chain(pkt, NULL);
2026 KPKTQ_DISPOSE(pktq);
2027 }
2028
2029 __attribute__((always_inline))
2030 static inline void
pp_free_packet_array(struct kern_pbufpool * pp,uint64_t * array,uint32_t num)2031 pp_free_packet_array(struct kern_pbufpool *pp, uint64_t *array, uint32_t num)
2032 {
2033 struct __metadata_preamble *mdp;
2034 struct skmem_obj *top = NULL;
2035 struct skmem_obj *blist = NULL;
2036 struct skmem_obj **list = ⊤
2037 struct mbuf *mtop = NULL;
2038 struct mbuf **mp = &mtop;
2039 struct __kern_packet *kptop = NULL;
2040 struct __kern_packet **kpp = &kptop;
2041 uint32_t i;
2042
2043 ASSERT(pp != NULL);
2044 ASSERT(array != NULL && num > 0);
2045 ASSERT(PP_BATCH_CAPABLE(pp));
2046
2047 for (i = 0; i < num; i++) {
2048 ASSERT(SK_PTR_ADDR_KQUM(array[i])->qum_pp == pp);
2049 mdp = pp_metadata_fini(SK_PTR_ADDR_KQUM(array[i]), pp,
2050 mp, kpp, &blist);
2051
2052 *list = (struct skmem_obj *)mdp;
2053 list = &(*list)->mo_next;
2054 array[i] = 0;
2055
2056 if (*mp != NULL) {
2057 mp = &(*mp)->m_nextpkt;
2058 ASSERT(*mp == NULL);
2059 }
2060 if (*kpp != NULL) {
2061 kpp = &(*kpp)->pkt_nextpkt;
2062 ASSERT(*kpp == NULL);
2063 }
2064 }
2065
2066 ASSERT(top != NULL);
2067 skmem_cache_batch_free(pp->pp_kmd_cache, top);
2068 if (blist != NULL) {
2069 skmem_cache_batch_free(pp->pp_kbft_cache, blist);
2070 blist = NULL;
2071 }
2072
2073 if (mtop != NULL) {
2074 DTRACE_SKYWALK(free__attached__mbuf);
2075 if (__probable(mtop->m_nextpkt != NULL)) {
2076 m_freem_list(mtop);
2077 } else {
2078 m_freem(mtop);
2079 }
2080 }
2081 if (kptop != NULL) {
2082 int cnt = 0;
2083 pp_free_packet_chain(kptop, &cnt);
2084 DTRACE_SKYWALK1(free__attached__pkt, int, cnt);
2085 }
2086 }
2087
2088 void
pp_free_packet(struct kern_pbufpool * pp,uint64_t kqum)2089 pp_free_packet(struct kern_pbufpool *pp, uint64_t kqum)
2090 {
2091 pp_free_packet_array(pp, &kqum, 1);
2092 }
2093
2094 void
pp_free_packet_batch(const kern_pbufpool_t pp,uint64_t * array,uint32_t size)2095 pp_free_packet_batch(const kern_pbufpool_t pp, uint64_t *array, uint32_t size)
2096 {
2097 pp_free_packet_array(pp, array, size);
2098 }
2099
2100 void
pp_free_packet_single(struct __kern_packet * pkt)2101 pp_free_packet_single(struct __kern_packet *pkt)
2102 {
2103 ASSERT(pkt->pkt_nextpkt == NULL);
2104 pp_free_packet(__DECONST(struct kern_pbufpool *,
2105 pkt->pkt_qum.qum_pp), SK_PTR_ADDR(pkt));
2106 }
2107
2108 static mach_vm_address_t
pp_alloc_buffer_common(const kern_pbufpool_t pp,struct skmem_obj_info * oi,uint32_t skmflag)2109 pp_alloc_buffer_common(const kern_pbufpool_t pp, struct skmem_obj_info *oi,
2110 uint32_t skmflag)
2111 {
2112 mach_vm_address_t baddr;
2113
2114 /* allocate a cached buffer */
2115 baddr = (mach_vm_address_t)skmem_cache_alloc(pp->pp_buf_cache,
2116 skmflag);
2117
2118 #if (DEVELOPMENT || DEBUG)
2119 uint64_t mtbf = skmem_region_get_mtbf();
2120 /*
2121 * MTBF is applicable only for non-blocking allocations here.
2122 */
2123 if (__improbable(mtbf != 0 && (net_uptime_ms() % mtbf) == 0 &&
2124 (skmflag & SKMEM_NOSLEEP))) {
2125 SK_ERR("pp \"%s\" MTBF failure", pp->pp_name);
2126 net_update_uptime();
2127 if (baddr != 0) {
2128 skmem_cache_free(pp->pp_buf_cache, (void *)baddr);
2129 baddr = 0;
2130 }
2131 }
2132 #endif /* (DEVELOPMENT || DEBUG) */
2133
2134 if (__improbable(baddr == 0)) {
2135 SK_DF(SK_VERB_MEM, "failed to alloc buffer, pp 0x%llx",
2136 SK_KVA(pp));
2137 return 0;
2138 }
2139 skmem_cache_get_obj_info(pp->pp_buf_cache, (void *)baddr, oi, NULL);
2140 ASSERT(SKMEM_OBJ_BUFCTL(oi) != NULL);
2141 ASSERT((mach_vm_address_t)SKMEM_OBJ_ADDR(oi) == baddr);
2142 return baddr;
2143 }
2144
2145 errno_t
pp_alloc_buffer(const kern_pbufpool_t pp,mach_vm_address_t * baddr,kern_segment_t * seg,kern_obj_idx_seg_t * idx,uint32_t skmflag)2146 pp_alloc_buffer(const kern_pbufpool_t pp, mach_vm_address_t *baddr,
2147 kern_segment_t *seg, kern_obj_idx_seg_t *idx, uint32_t skmflag)
2148 {
2149 struct skmem_obj_info oib;
2150
2151 VERIFY(pp != NULL && baddr != NULL);
2152 VERIFY((seg != NULL) == (idx != NULL));
2153
2154 if (__improbable(!PP_HAS_BUFFER_ON_DEMAND(pp))) {
2155 return ENOTSUP;
2156 }
2157
2158 *baddr = pp_alloc_buffer_common(pp, &oib, skmflag);
2159 if (__improbable(*baddr == 0)) {
2160 return ENOMEM;
2161 }
2162
2163 if (seg != NULL) {
2164 ASSERT(SKMEM_OBJ_SEG(&oib) != NULL);
2165 *seg = SKMEM_OBJ_SEG(&oib);
2166 *idx = SKMEM_OBJ_IDX_SEG(&oib);
2167 }
2168 return 0;
2169 }
2170
2171 void
pp_free_buffer(const kern_pbufpool_t pp,mach_vm_address_t addr)2172 pp_free_buffer(const kern_pbufpool_t pp, mach_vm_address_t addr)
2173 {
2174 ASSERT(pp != NULL && addr != 0);
2175 skmem_cache_free(pp->pp_buf_cache, (void *)addr);
2176 }
2177
2178 __attribute__((always_inline))
2179 static inline uint32_t
pp_alloc_buflet_common(struct kern_pbufpool * pp,uint64_t * array,uint32_t num,uint32_t skmflag)2180 pp_alloc_buflet_common(struct kern_pbufpool *pp, uint64_t *array,
2181 uint32_t num, uint32_t skmflag)
2182 {
2183 struct __kern_buflet *kbft = NULL;
2184 uint32_t allocd, need = num;
2185 struct skmem_obj *list;
2186
2187 ASSERT(array != NULL && num > 0);
2188 ASSERT(PP_BATCH_CAPABLE(pp));
2189 ASSERT(pp->pp_kbft_cache != NULL);
2190
2191 allocd = skmem_cache_batch_alloc(pp->pp_kbft_cache, &list, num,
2192 skmflag);
2193
2194 while (list != NULL) {
2195 struct skmem_obj *listn;
2196
2197 listn = list->mo_next;
2198 list->mo_next = NULL;
2199 kbft = (kern_buflet_t)(void *)list;
2200 KBUF_EXT_INIT(kbft, pp);
2201 *array = (uint64_t)kbft;
2202 ++array;
2203 list = listn;
2204 ASSERT(need > 0);
2205 --need;
2206 }
2207 ASSERT((num - need) == allocd || kbft == NULL);
2208 return num - need;
2209 }
2210
2211 errno_t
pp_alloc_buflet(struct kern_pbufpool * pp,kern_buflet_t * kbft,uint32_t skmflag)2212 pp_alloc_buflet(struct kern_pbufpool *pp, kern_buflet_t *kbft, uint32_t skmflag)
2213 {
2214 uint64_t bft;
2215
2216 if (__improbable(!pp_alloc_buflet_common(pp, &bft, 1, skmflag))) {
2217 return ENOMEM;
2218 }
2219 *kbft = (kern_buflet_t)bft;
2220 return 0;
2221 }
2222
2223 errno_t
pp_alloc_buflet_batch(struct kern_pbufpool * pp,uint64_t * array,uint32_t * size,uint32_t skmflag)2224 pp_alloc_buflet_batch(struct kern_pbufpool *pp, uint64_t *array,
2225 uint32_t *size, uint32_t skmflag)
2226 {
2227 uint32_t i, n;
2228 int err;
2229
2230 ASSERT(array != NULL && size > 0);
2231
2232 n = *size;
2233 *size = 0;
2234
2235 i = pp_alloc_buflet_common(pp, array, n, skmflag);
2236 *size = i;
2237
2238 if (__probable(i == n)) {
2239 err = 0;
2240 } else if (i != 0) {
2241 err = EAGAIN;
2242 } else {
2243 err = ENOMEM;
2244 }
2245
2246 return err;
2247 }
2248
2249 __attribute__((always_inline))
2250 static void
pp_free_buflet_common(const kern_pbufpool_t pp,kern_buflet_t kbft)2251 pp_free_buflet_common(const kern_pbufpool_t pp, kern_buflet_t kbft)
2252 {
2253 ASSERT(kbft->buf_nbft_idx == OBJ_IDX_NONE);
2254 ASSERT(kbft->buf_nbft_addr == 0);
2255
2256 if (kbft->buf_flag & BUFLET_FLAG_EXTERNAL) {
2257 ASSERT(kbft->buf_addr != 0);
2258 ASSERT(kbft->buf_idx != OBJ_IDX_NONE);
2259 ASSERT(kbft->buf_bft_idx_reg != OBJ_IDX_NONE);
2260 ASSERT(kbft->buf_ctl != NULL);
2261 ASSERT(((struct __kern_buflet_ext *)kbft)->
2262 kbe_buf_upp_link.sle_next == NULL);
2263 /*
2264 * external buflet has buffer attached at construction,
2265 * so we don't free the buffer here.
2266 */
2267 skmem_cache_free(pp->pp_kbft_cache, (void *)kbft);
2268 } else if (__probable(kbft->buf_addr != 0)) {
2269 void *objaddr = kbft->buf_objaddr;
2270 uint32_t usecnt = 0;
2271
2272 ASSERT(kbft->buf_idx != OBJ_IDX_NONE);
2273 ASSERT(kbft->buf_ctl != NULL);
2274 KBUF_DTOR(kbft, usecnt);
2275 SK_DF(SK_VERB_MEM, "pp 0x%llx buf 0x%llx usecnt %u",
2276 SK_KVA(pp), SK_KVA(objaddr), usecnt);
2277 if (__probable(usecnt == 0)) {
2278 skmem_cache_free(pp->pp_buf_cache, objaddr);
2279 }
2280 }
2281 }
2282
2283 void
pp_free_buflet(const kern_pbufpool_t pp,kern_buflet_t kbft)2284 pp_free_buflet(const kern_pbufpool_t pp, kern_buflet_t kbft)
2285 {
2286 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
2287 ASSERT(pp != NULL && kbft != NULL);
2288 pp_free_buflet_common(pp, kbft);
2289 }
2290
2291 void
pp_reap_caches(boolean_t purge)2292 pp_reap_caches(boolean_t purge)
2293 {
2294 skmem_cache_reap_now(pp_opt_cache, purge);
2295 skmem_cache_reap_now(pp_flow_cache, purge);
2296 skmem_cache_reap_now(pp_compl_cache, purge);
2297 }
2298