xref: /xnu-10063.121.3/bsd/skywalk/packet/pbufpool.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2016-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/packet/pbufpool_var.h>
31 #include <sys/sdt.h>
32 
33 static struct kern_pbufpool *pp_alloc(zalloc_flags_t);
34 static void pp_free(struct kern_pbufpool *);
35 static uint32_t pp_alloc_packet_common(struct kern_pbufpool *, uint16_t,
36     uint64_t *, uint32_t, boolean_t, alloc_cb_func_t, const void *, uint32_t);
37 static void pp_free_packet_array(struct kern_pbufpool *, uint64_t *, uint32_t);
38 static int pp_metadata_ctor_no_buflet(struct skmem_obj_info *,
39     struct skmem_obj_info *, void *, uint32_t);
40 static int pp_metadata_ctor_max_buflet(struct skmem_obj_info *,
41     struct skmem_obj_info *, void *, uint32_t);
42 static void pp_metadata_dtor(void *, void *);
43 static int pp_metadata_construct(struct __kern_quantum *,
44     struct __user_quantum *, obj_idx_t, struct kern_pbufpool *, uint32_t,
45     uint16_t, bool, struct skmem_obj **);
46 static void pp_metadata_destruct(struct __kern_quantum *,
47     struct kern_pbufpool *, bool);
48 static struct __kern_quantum *pp_metadata_init(struct __metadata_preamble *,
49     struct kern_pbufpool *, uint16_t, uint32_t, struct skmem_obj **);
50 static struct __metadata_preamble *pp_metadata_fini(struct __kern_quantum *,
51     struct kern_pbufpool *, struct mbuf **, struct __kern_packet **,
52     struct skmem_obj **, struct skmem_obj **);
53 static void pp_purge_upp_locked(struct kern_pbufpool *pp, pid_t pid);
54 static void pp_buf_seg_ctor(struct sksegment *, IOSKMemoryBufferRef, void *);
55 static void pp_buf_seg_dtor(struct sksegment *, IOSKMemoryBufferRef, void *);
56 static void pp_destroy_upp_locked(struct kern_pbufpool *);
57 static void pp_destroy_upp_bft_locked(struct kern_pbufpool *);
58 static int pp_init_upp_bft_locked(struct kern_pbufpool *, boolean_t);
59 static void pp_free_buflet_common(const kern_pbufpool_t, kern_buflet_t);
60 static mach_vm_address_t pp_alloc_buffer_common(const kern_pbufpool_t pp,
61     struct skmem_obj_info *oi, uint32_t skmflag, bool large);
62 static inline uint32_t
63 pp_alloc_buflet_common(struct kern_pbufpool *pp, uint64_t *array,
64     uint32_t num, uint32_t skmflag, bool large);
65 
66 #define KERN_PBUFPOOL_U_HASH_SIZE       64      /* hash table size */
67 
68 /*
69  * Since the inputs are small (indices to the metadata region), we can use
70  * Knuth's multiplicative hash method which is fast and good enough.  Here
71  * we multiply the input by the golden ratio of 2^32.  See "The Art of
72  * Computer Programming", section 6.4.
73  */
74 #define KERN_PBUFPOOL_U_HASH_INDEX(_i, _m)                      \
75 	(((_i) * 2654435761U) & (_m))
76 #define KERN_PBUFPOOL_U_HASH(_pp, _i)                           \
77 	(&(_pp)->pp_u_hash_table[KERN_PBUFPOOL_U_HASH_INDEX(_i, \
78 	KERN_PBUFPOOL_U_HASH_SIZE - 1)])
79 #define KERN_PBUFPOOL_U_BFT_HASH(_pp, _i)                           \
80 	(&(_pp)->pp_u_bft_hash_table[KERN_PBUFPOOL_U_HASH_INDEX(_i, \
81 	KERN_PBUFPOOL_U_HASH_SIZE - 1)])
82 
83 static SKMEM_TYPE_DEFINE(pp_zone, struct kern_pbufpool);
84 
85 struct kern_pbufpool_u_htbl {
86 	struct kern_pbufpool_u_bkt upp_hash[KERN_PBUFPOOL_U_HASH_SIZE];
87 };
88 
89 #define PP_U_HTBL_SIZE  sizeof(struct kern_pbufpool_u_htbl)
90 static SKMEM_TYPE_DEFINE(pp_u_htbl_zone, struct kern_pbufpool_u_htbl);
91 
92 static struct skmem_cache *pp_opt_cache;        /* cache for __packet_opt */
93 static struct skmem_cache *pp_flow_cache;       /* cache for __flow */
94 static struct skmem_cache *pp_compl_cache;      /* cache for __packet_compl */
95 
96 static int __pp_inited = 0;
97 
98 int
pp_init(void)99 pp_init(void)
100 {
101 	_CASSERT(KPKT_SC_UNSPEC == MBUF_SC_UNSPEC);
102 	_CASSERT(KPKT_SC_BK_SYS == MBUF_SC_BK_SYS);
103 	_CASSERT(KPKT_SC_BK == MBUF_SC_BK);
104 	_CASSERT(KPKT_SC_BE == MBUF_SC_BE);
105 	_CASSERT(KPKT_SC_RD == MBUF_SC_RD);
106 	_CASSERT(KPKT_SC_OAM == MBUF_SC_OAM);
107 	_CASSERT(KPKT_SC_AV == MBUF_SC_AV);
108 	_CASSERT(KPKT_SC_RV == MBUF_SC_RV);
109 	_CASSERT(KPKT_SC_VI == MBUF_SC_VI);
110 	_CASSERT(KPKT_SC_SIG == MBUF_SC_SIG);
111 	_CASSERT(KPKT_SC_VO == MBUF_SC_VO);
112 	_CASSERT(KPKT_SC_CTL == MBUF_SC_CTL);
113 
114 	_CASSERT(KPKT_SC_BK_SYS == PKT_SC_BK_SYS);
115 	_CASSERT(KPKT_SC_BK == PKT_SC_BK);
116 	_CASSERT(KPKT_SC_BE == PKT_SC_BE);
117 	_CASSERT(KPKT_SC_RD == PKT_SC_RD);
118 	_CASSERT(KPKT_SC_OAM == PKT_SC_OAM);
119 	_CASSERT(KPKT_SC_AV == PKT_SC_AV);
120 	_CASSERT(KPKT_SC_RV == PKT_SC_RV);
121 	_CASSERT(KPKT_SC_VI == PKT_SC_VI);
122 	_CASSERT(KPKT_SC_SIG == PKT_SC_SIG);
123 	_CASSERT(KPKT_SC_VO == PKT_SC_VO);
124 	_CASSERT(KPKT_SC_CTL == PKT_SC_CTL);
125 	_CASSERT(KPKT_SC_MAX_CLASSES == MBUF_SC_MAX_CLASSES);
126 
127 	_CASSERT(KPKT_TC_UNSPEC == MBUF_TC_UNSPEC);
128 	_CASSERT(KPKT_TC_BE == MBUF_TC_BE);
129 	_CASSERT(KPKT_TC_BK == MBUF_TC_BK);
130 	_CASSERT(KPKT_TC_VI == MBUF_TC_VI);
131 	_CASSERT(KPKT_TC_VO == MBUF_TC_VO);
132 	_CASSERT(KPKT_TC_MAX == MBUF_TC_MAX);
133 
134 	_CASSERT(KPKT_TC_BE == PKT_TC_BE);
135 	_CASSERT(KPKT_TC_BK == PKT_TC_BK);
136 	_CASSERT(KPKT_TC_VI == PKT_TC_VI);
137 	_CASSERT(KPKT_TC_VO == PKT_TC_VO);
138 
139 	_CASSERT(PKT_SCVAL_BK_SYS == SCVAL_BK_SYS);
140 	_CASSERT(PKT_SCVAL_BK == SCVAL_BK);
141 	_CASSERT(PKT_SCVAL_BE == SCVAL_BE);
142 	_CASSERT(PKT_SCVAL_RD == SCVAL_RD);
143 	_CASSERT(PKT_SCVAL_OAM == SCVAL_OAM);
144 	_CASSERT(PKT_SCVAL_AV == SCVAL_AV);
145 	_CASSERT(PKT_SCVAL_RV == SCVAL_RV);
146 	_CASSERT(PKT_SCVAL_VI == SCVAL_VI);
147 	_CASSERT(PKT_SCVAL_VO == SCVAL_VO);
148 	_CASSERT(PKT_SCVAL_CTL == SCVAL_CTL);
149 
150 	/*
151 	 * Assert that the value of common packet flags between mbuf and
152 	 * skywalk packets match, and that they are in PKT_F_COMMON_MASK.
153 	 */
154 	_CASSERT(PKT_F_BACKGROUND == PKTF_SO_BACKGROUND);
155 	_CASSERT(PKT_F_REALTIME == PKTF_SO_REALTIME);
156 	_CASSERT(PKT_F_REXMT == PKTF_TCP_REXMT);
157 	_CASSERT(PKT_F_LAST_PKT == PKTF_LAST_PKT);
158 	_CASSERT(PKT_F_FLOW_ID == PKTF_FLOW_ID);
159 	_CASSERT(PKT_F_FLOW_ADV == PKTF_FLOW_ADV);
160 	_CASSERT(PKT_F_TX_COMPL_TS_REQ == PKTF_TX_COMPL_TS_REQ);
161 	_CASSERT(PKT_F_TS_VALID == PKTF_TS_VALID);
162 	_CASSERT(PKT_F_NEW_FLOW == PKTF_NEW_FLOW);
163 	_CASSERT(PKT_F_START_SEQ == PKTF_START_SEQ);
164 	_CASSERT(PKT_F_KEEPALIVE == PKTF_KEEPALIVE);
165 	_CASSERT(PKT_F_WAKE_PKT == PKTF_WAKE_PKT);
166 	_CASSERT(PKT_F_COMMON_MASK == (PKT_F_BACKGROUND | PKT_F_REALTIME |
167 	    PKT_F_REXMT | PKT_F_LAST_PKT | PKT_F_FLOW_ID | PKT_F_FLOW_ADV |
168 	    PKT_F_TX_COMPL_TS_REQ | PKT_F_TS_VALID | PKT_F_NEW_FLOW |
169 	    PKT_F_START_SEQ | PKT_F_KEEPALIVE | PKT_F_WAKE_PKT));
170 	/*
171 	 * Assert packet flags shared with userland.
172 	 */
173 	_CASSERT(PKT_F_USER_MASK == (PKT_F_BACKGROUND | PKT_F_REALTIME |
174 	    PKT_F_REXMT | PKT_F_LAST_PKT | PKT_F_OPT_DATA | PKT_F_PROMISC |
175 	    PKT_F_TRUNCATED | PKT_F_WAKE_PKT | PKT_F_L4S));
176 
177 	_CASSERT(offsetof(struct __kern_quantum, qum_len) ==
178 	    offsetof(struct __kern_packet, pkt_length));
179 
180 	/*
181 	 * Due to the use of tagged pointer, we need the size of
182 	 * the metadata preamble structure to be multiples of 16.
183 	 * See SK_PTR_TAG() definition for details.
184 	 */
185 	_CASSERT(sizeof(struct __metadata_preamble) != 0 &&
186 	    (sizeof(struct __metadata_preamble) % 16) == 0);
187 
188 	_CASSERT(NX_PBUF_FRAGS_MIN == 1 &&
189 	    NX_PBUF_FRAGS_MIN == NX_PBUF_FRAGS_DEFAULT);
190 
191 	/*
192 	 * Batch alloc/free requires linking the objects together;
193 	 * make sure that the fields are at the same offset since
194 	 * we cast the object to struct skmem_obj.
195 	 */
196 	_CASSERT(offsetof(struct __metadata_preamble, _mdp_next) ==
197 	    offsetof(struct skmem_obj, mo_next));
198 	_CASSERT(offsetof(struct __buflet, __buflet_next) ==
199 	    offsetof(struct skmem_obj, mo_next));
200 
201 	SK_LOCK_ASSERT_HELD();
202 	ASSERT(!__pp_inited);
203 
204 	pp_opt_cache = skmem_cache_create("pkt.opt",
205 	    sizeof(struct __packet_opt), sizeof(uint64_t),
206 	    NULL, NULL, NULL, NULL, NULL, 0);
207 	pp_flow_cache = skmem_cache_create("pkt.flow",
208 	    sizeof(struct __flow), 16,  /* 16-bytes aligned */
209 	    NULL, NULL, NULL, NULL, NULL, 0);
210 	pp_compl_cache = skmem_cache_create("pkt.compl",
211 	    sizeof(struct __packet_compl), sizeof(uint64_t),
212 	    NULL, NULL, NULL, NULL, NULL, 0);
213 
214 	return 0;
215 }
216 
217 void
pp_fini(void)218 pp_fini(void)
219 {
220 	SK_LOCK_ASSERT_HELD();
221 
222 	if (__pp_inited) {
223 		if (pp_compl_cache != NULL) {
224 			skmem_cache_destroy(pp_compl_cache);
225 			pp_compl_cache = NULL;
226 		}
227 		if (pp_flow_cache != NULL) {
228 			skmem_cache_destroy(pp_flow_cache);
229 			pp_flow_cache = NULL;
230 		}
231 		if (pp_opt_cache != NULL) {
232 			skmem_cache_destroy(pp_opt_cache);
233 			pp_opt_cache = NULL;
234 		}
235 
236 		__pp_inited = 0;
237 	}
238 }
239 
240 static struct kern_pbufpool *
pp_alloc(zalloc_flags_t how)241 pp_alloc(zalloc_flags_t how)
242 {
243 	struct kern_pbufpool *pp = zalloc_flags(pp_zone, how | Z_ZERO);
244 
245 	if (pp) {
246 		lck_mtx_init(&pp->pp_lock, &skmem_lock_grp, &skmem_lock_attr);
247 	}
248 	return pp;
249 }
250 
251 static void
pp_free(struct kern_pbufpool * pp)252 pp_free(struct kern_pbufpool *pp)
253 {
254 	PP_LOCK_ASSERT_HELD(pp);
255 
256 	pp_destroy(pp);
257 	PP_UNLOCK(pp);
258 
259 	SK_DF(SK_VERB_MEM, "pp 0x%llx FREE", SK_KVA(pp));
260 	lck_mtx_destroy(&pp->pp_lock, &skmem_lock_grp);
261 	zfree(pp_zone, pp);
262 }
263 
264 void
pp_retain_locked(struct kern_pbufpool * pp)265 pp_retain_locked(struct kern_pbufpool *pp)
266 {
267 	PP_LOCK_ASSERT_HELD(pp);
268 
269 	pp->pp_refcnt++;
270 	ASSERT(pp->pp_refcnt != 0);
271 }
272 
273 void
pp_retain(struct kern_pbufpool * pp)274 pp_retain(struct kern_pbufpool *pp)
275 {
276 	PP_LOCK(pp);
277 	pp_retain_locked(pp);
278 	PP_UNLOCK(pp);
279 }
280 
281 boolean_t
pp_release_locked(struct kern_pbufpool * pp)282 pp_release_locked(struct kern_pbufpool *pp)
283 {
284 	uint32_t oldref = pp->pp_refcnt;
285 
286 	PP_LOCK_ASSERT_HELD(pp);
287 
288 	ASSERT(pp->pp_refcnt != 0);
289 	if (--pp->pp_refcnt == 0) {
290 		pp_free(pp);
291 	}
292 
293 	return oldref == 1;
294 }
295 
296 boolean_t
pp_release(struct kern_pbufpool * pp)297 pp_release(struct kern_pbufpool *pp)
298 {
299 	boolean_t lastref;
300 
301 	PP_LOCK(pp);
302 	if (!(lastref = pp_release_locked(pp))) {
303 		PP_UNLOCK(pp);
304 	}
305 
306 	return lastref;
307 }
308 
309 void
pp_close(struct kern_pbufpool * pp)310 pp_close(struct kern_pbufpool *pp)
311 {
312 	PP_LOCK(pp);
313 	ASSERT(pp->pp_refcnt > 0);
314 	ASSERT(!(pp->pp_flags & PPF_CLOSED));
315 	pp->pp_flags |= PPF_CLOSED;
316 	if (!pp_release_locked(pp)) {
317 		PP_UNLOCK(pp);
318 	}
319 }
320 
321 void
pp_regions_params_adjust(struct skmem_region_params * srp_array,nexus_meta_type_t md_type,nexus_meta_subtype_t md_subtype,uint32_t md_cnt,uint16_t max_frags,uint32_t buf_size,uint32_t large_buf_size,uint32_t buf_cnt,uint32_t buf_seg_size,uint32_t flags)322 pp_regions_params_adjust(struct skmem_region_params *srp_array,
323     nexus_meta_type_t md_type, nexus_meta_subtype_t md_subtype, uint32_t md_cnt,
324     uint16_t max_frags, uint32_t buf_size, uint32_t large_buf_size,
325     uint32_t buf_cnt, uint32_t buf_seg_size, uint32_t flags)
326 {
327 	struct skmem_region_params *srp, *kmd_srp, *buf_srp, *kbft_srp,
328 	    *lbuf_srp;
329 	uint32_t md_size = 0;
330 	bool kernel_only = ((flags & PP_REGION_CONFIG_KERNEL_ONLY) != 0);
331 	bool md_persistent = ((flags & PP_REGION_CONFIG_MD_PERSISTENT) != 0);
332 	bool buf_persistent = ((flags & PP_REGION_CONFIG_BUF_PERSISTENT) != 0);
333 	bool config_buflet = ((flags & PP_REGION_CONFIG_BUFLET) != 0);
334 	bool md_magazine_enable = ((flags &
335 	    PP_REGION_CONFIG_MD_MAGAZINE_ENABLE) != 0);
336 
337 	ASSERT(max_frags != 0);
338 
339 	switch (md_type) {
340 	case NEXUS_META_TYPE_QUANTUM:
341 		md_size = NX_METADATA_QUANTUM_SZ;
342 		break;
343 	case NEXUS_META_TYPE_PACKET:
344 		md_size = NX_METADATA_PACKET_SZ(max_frags);
345 		break;
346 	default:
347 		VERIFY(0);
348 		/* NOTREACHED */
349 		__builtin_unreachable();
350 	}
351 
352 	switch (flags & PP_REGION_CONFIG_BUF_IODIR_BIDIR) {
353 	case PP_REGION_CONFIG_BUF_IODIR_IN:
354 		kmd_srp = &srp_array[SKMEM_REGION_RXKMD];
355 		buf_srp = &srp_array[SKMEM_REGION_RXBUF_DEF];
356 		lbuf_srp = &srp_array[SKMEM_REGION_RXBUF_LARGE];
357 		kbft_srp = &srp_array[SKMEM_REGION_RXKBFT];
358 		break;
359 	case PP_REGION_CONFIG_BUF_IODIR_OUT:
360 		kmd_srp = &srp_array[SKMEM_REGION_TXKMD];
361 		buf_srp = &srp_array[SKMEM_REGION_TXBUF_DEF];
362 		lbuf_srp = &srp_array[SKMEM_REGION_TXBUF_LARGE];
363 		kbft_srp = &srp_array[SKMEM_REGION_TXKBFT];
364 		break;
365 	case PP_REGION_CONFIG_BUF_IODIR_BIDIR:
366 	default:
367 		kmd_srp = &srp_array[SKMEM_REGION_KMD];
368 		buf_srp = &srp_array[SKMEM_REGION_BUF_DEF];
369 		lbuf_srp = &srp_array[SKMEM_REGION_BUF_LARGE];
370 		kbft_srp = &srp_array[SKMEM_REGION_KBFT];
371 		break;
372 	}
373 
374 	/* add preamble size to metadata obj size */
375 	md_size += METADATA_PREAMBLE_SZ;
376 	ASSERT(md_size >= NX_METADATA_OBJ_MIN_SZ);
377 
378 	/* configure kernel metadata region */
379 	kmd_srp->srp_md_type = md_type;
380 	kmd_srp->srp_md_subtype = md_subtype;
381 	kmd_srp->srp_r_obj_cnt = md_cnt;
382 	kmd_srp->srp_r_obj_size = md_size;
383 	kmd_srp->srp_max_frags = max_frags;
384 	ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) == 0);
385 	if (md_persistent) {
386 		kmd_srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
387 	}
388 	ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) != 0);
389 	if (md_magazine_enable) {
390 		kmd_srp->srp_cflags &= ~SKMEM_REGION_CR_NOMAGAZINES;
391 	}
392 	skmem_region_params_config(kmd_srp);
393 
394 	/* configure user metadata region */
395 	srp = &srp_array[SKMEM_REGION_UMD];
396 	if (!kernel_only) {
397 		srp->srp_md_type = kmd_srp->srp_md_type;
398 		srp->srp_md_subtype = kmd_srp->srp_md_subtype;
399 		srp->srp_r_obj_cnt = kmd_srp->srp_c_obj_cnt;
400 		srp->srp_r_obj_size = kmd_srp->srp_c_obj_size;
401 		srp->srp_max_frags = kmd_srp->srp_max_frags;
402 		ASSERT((srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) == 0);
403 		if (md_persistent) {
404 			srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
405 		}
406 		/*
407 		 * UMD is a mirrored region and object allocation operations
408 		 * are performed on the KMD objects.
409 		 */
410 		ASSERT((srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) != 0);
411 		skmem_region_params_config(srp);
412 		ASSERT(srp->srp_c_obj_cnt == kmd_srp->srp_c_obj_cnt);
413 	} else {
414 		ASSERT(srp->srp_r_obj_cnt == 0);
415 		ASSERT(srp->srp_r_obj_size == 0);
416 	}
417 
418 	/* configure buffer region */
419 	buf_srp->srp_r_obj_cnt = MAX(buf_cnt, kmd_srp->srp_c_obj_cnt);
420 	buf_srp->srp_r_obj_size = buf_size;
421 	buf_srp->srp_cflags &= ~SKMEM_REGION_CR_MONOLITHIC;
422 	ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) == 0);
423 	if (buf_persistent) {
424 		buf_srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
425 	}
426 	ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) != 0);
427 	ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_UREADONLY) == 0);
428 	if ((flags & PP_REGION_CONFIG_BUF_UREADONLY) != 0) {
429 		buf_srp->srp_cflags |= SKMEM_REGION_CR_UREADONLY;
430 	}
431 	ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_KREADONLY) == 0);
432 	if ((flags & PP_REGION_CONFIG_BUF_KREADONLY) != 0) {
433 		buf_srp->srp_cflags |= SKMEM_REGION_CR_KREADONLY;
434 	}
435 	ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_MONOLITHIC) == 0);
436 	if ((flags & PP_REGION_CONFIG_BUF_MONOLITHIC) != 0) {
437 		buf_srp->srp_cflags |= SKMEM_REGION_CR_MONOLITHIC;
438 	}
439 	ASSERT((srp->srp_cflags & SKMEM_REGION_CR_SEGPHYSCONTIG) == 0);
440 	if ((flags & PP_REGION_CONFIG_BUF_SEGPHYSCONTIG) != 0) {
441 		buf_srp->srp_cflags |= SKMEM_REGION_CR_SEGPHYSCONTIG;
442 	}
443 	ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_NOCACHE) == 0);
444 	if ((flags & PP_REGION_CONFIG_BUF_NOCACHE) != 0) {
445 		buf_srp->srp_cflags |= SKMEM_REGION_CR_NOCACHE;
446 	}
447 	ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_THREADSAFE) == 0);
448 	if ((flags & PP_REGION_CONFIG_BUF_THREADSAFE) != 0) {
449 		buf_srp->srp_cflags |= SKMEM_REGION_CR_THREADSAFE;
450 	}
451 	if (buf_seg_size != 0) {
452 		buf_srp->srp_r_seg_size = buf_seg_size;
453 	}
454 	skmem_region_params_config(buf_srp);
455 
456 	/* configure large buffer region */
457 	if (large_buf_size != 0) {
458 		lbuf_srp->srp_r_obj_cnt = buf_srp->srp_r_obj_cnt;
459 		lbuf_srp->srp_r_obj_size = large_buf_size;
460 		lbuf_srp->srp_r_seg_size = buf_srp->srp_r_seg_size;
461 		lbuf_srp->srp_cflags = buf_srp->srp_cflags;
462 		skmem_region_params_config(lbuf_srp);
463 	}
464 
465 	/* configure kernel buflet region */
466 	if (config_buflet) {
467 		ASSERT(md_type == NEXUS_META_TYPE_PACKET);
468 		/*
469 		 * Ideally we want the number of buflets to be
470 		 * "kmd_srp->srp_c_obj_cnt * (kmd_srp->srp_max_frags - 1)",
471 		 * so that we have enough buflets when multi-buflet and
472 		 * shared buffer object is used.
473 		 * Currently multi-buflet is being used only by user pool
474 		 * which doesn't support shared buffer object, hence to reduce
475 		 * the number of objects we are restricting the number of
476 		 * buflets to the number of buffers.
477 		 */
478 		kbft_srp->srp_r_obj_cnt = buf_srp->srp_c_obj_cnt +
479 		    lbuf_srp->srp_c_obj_cnt;
480 		kbft_srp->srp_r_obj_size = MAX(sizeof(struct __kern_buflet_ext),
481 		    sizeof(struct __user_buflet));
482 		kbft_srp->srp_cflags = kmd_srp->srp_cflags;
483 		skmem_region_params_config(kbft_srp);
484 		ASSERT(kbft_srp->srp_c_obj_cnt >= buf_srp->srp_c_obj_cnt +
485 		    lbuf_srp->srp_c_obj_cnt);
486 	} else {
487 		ASSERT(kbft_srp->srp_r_obj_cnt == 0);
488 		ASSERT(kbft_srp->srp_r_obj_size == 0);
489 	}
490 
491 	/* configure user buflet region */
492 	srp = &srp_array[SKMEM_REGION_UBFT];
493 	if (config_buflet && !kernel_only) {
494 		srp->srp_r_obj_cnt = kbft_srp->srp_c_obj_cnt;
495 		srp->srp_r_obj_size = kbft_srp->srp_c_obj_size;
496 		srp->srp_cflags = srp_array[SKMEM_REGION_UMD].srp_cflags;
497 		skmem_region_params_config(srp);
498 		ASSERT(srp->srp_c_obj_cnt == kbft_srp->srp_c_obj_cnt);
499 	} else {
500 		ASSERT(srp->srp_r_obj_cnt == 0);
501 		ASSERT(srp->srp_r_obj_size == 0);
502 	}
503 
504 	/* make sure each metadata can be paired with a buffer */
505 	ASSERT(kmd_srp->srp_c_obj_cnt <= buf_srp->srp_c_obj_cnt);
506 }
507 
508 SK_NO_INLINE_ATTRIBUTE
509 static int
pp_metadata_construct(struct __kern_quantum * kqum,struct __user_quantum * uqum,obj_idx_t midx,struct kern_pbufpool * pp,uint32_t skmflag,uint16_t bufcnt,bool raw,struct skmem_obj ** blist)510 pp_metadata_construct(struct __kern_quantum *kqum, struct __user_quantum *uqum,
511     obj_idx_t midx, struct kern_pbufpool *pp, uint32_t skmflag, uint16_t bufcnt,
512     bool raw, struct skmem_obj **blist)
513 {
514 	struct __kern_buflet *kbuf;
515 	mach_vm_address_t baddr = 0;
516 	uint16_t *pbufs_cnt, *pbufs_max;
517 	uint16_t i;
518 
519 	ASSERT(bufcnt == 1 || PP_HAS_BUFFER_ON_DEMAND(pp));
520 
521 	/* construct {user,kernel} metadata */
522 	switch (pp->pp_md_type) {
523 	case NEXUS_META_TYPE_PACKET: {
524 		struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
525 		struct __user_packet *upkt = SK_PTR_ADDR_UPKT(uqum);
526 		struct __packet_opt *opt;
527 		struct __flow *flow;
528 		struct __packet_compl *compl;
529 		uint64_t pflags;
530 
531 		if (raw) {
532 			opt = skmem_cache_alloc(pp_opt_cache, SKMEM_SLEEP);
533 			flow = skmem_cache_alloc(pp_flow_cache, SKMEM_SLEEP);
534 			compl = skmem_cache_alloc(pp_compl_cache, SKMEM_SLEEP);
535 			pflags = (PKT_F_OPT_ALLOC | PKT_F_FLOW_ALLOC |
536 			    PKT_F_TX_COMPL_ALLOC);
537 		} else {
538 			ASSERT((kpkt->pkt_pflags & PKT_F_OPT_ALLOC) &&
539 			    kpkt->pkt_com_opt != NULL);
540 			opt = kpkt->pkt_com_opt;
541 			ASSERT((kpkt->pkt_pflags & PKT_F_FLOW_ALLOC) &&
542 			    kpkt->pkt_flow != NULL);
543 			flow = kpkt->pkt_flow;
544 			ASSERT((kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC) &&
545 			    kpkt->pkt_tx_compl != NULL);
546 			compl = kpkt->pkt_tx_compl;
547 			pflags = kpkt->pkt_pflags;
548 		}
549 		/* will be adjusted below as part of allocating buffer(s) */
550 		_CASSERT(sizeof(kpkt->pkt_bufs_cnt) == sizeof(uint16_t));
551 		_CASSERT(sizeof(kpkt->pkt_bufs_max) == sizeof(uint16_t));
552 		pbufs_cnt = __DECONST(uint16_t *, &kpkt->pkt_bufs_cnt);
553 		pbufs_max = __DECONST(uint16_t *, &kpkt->pkt_bufs_max);
554 
555 		/* kernel (and user) packet */
556 		KPKT_CTOR(kpkt, pflags, opt, flow, compl, midx,
557 		    upkt, pp, 0, pp->pp_max_frags, 0);
558 		break;
559 	}
560 	default:
561 		ASSERT(pp->pp_md_type == NEXUS_META_TYPE_QUANTUM);
562 		VERIFY(bufcnt == 1);
563 		/* TODO: point these to quantum's once they're defined */
564 		pbufs_cnt = pbufs_max = NULL;
565 		/* kernel quantum */
566 		KQUM_CTOR(kqum, midx, uqum, pp, 0);
567 		break;
568 	}
569 
570 	kbuf = kqum->qum_buf;
571 	for (i = 0; i < bufcnt; i++) {
572 		struct skmem_obj_info oib;
573 
574 		if (!PP_HAS_BUFFER_ON_DEMAND(pp)) {
575 			ASSERT(i == 0);
576 			ASSERT(*blist == NULL);
577 			/*
578 			 * quantum has a native buflet, so we only need a
579 			 * buffer to be allocated and attached to the buflet.
580 			 */
581 			baddr = pp_alloc_buffer_common(pp, &oib, skmflag,
582 			    false);
583 			if (__improbable(baddr == 0)) {
584 				goto fail;
585 			}
586 			KBUF_CTOR(kbuf, baddr, SKMEM_OBJ_IDX_REG(&oib),
587 			    SKMEM_OBJ_BUFCTL(&oib), pp, false);
588 			baddr = 0;
589 		} else {
590 			/*
591 			 * we use pre-constructed buflets with attached buffers.
592 			 */
593 			struct __kern_buflet *pkbuf = kbuf;
594 			struct skmem_obj *blistn;
595 
596 			ASSERT(pkbuf != NULL);
597 			kbuf = (kern_buflet_t)*blist;
598 			if (__improbable(kbuf == NULL)) {
599 				SK_DF(SK_VERB_MEM, "failed to get buflet,"
600 				    " pp 0x%llx", SK_KVA(pp));
601 				goto fail;
602 			}
603 
604 #if CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT)
605 			/* Checking to ensure the object address is tagged */
606 			ASSERT((vm_offset_t)kbuf !=
607 			    vm_memtag_canonicalize_address((vm_offset_t)kbuf));
608 #endif /* CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT) */
609 
610 			blistn = (*blist)->mo_next;
611 			(*blist)->mo_next = NULL;
612 
613 			KBUF_EXT_INIT(kbuf, pp);
614 			KBUF_LINK(pkbuf, kbuf);
615 			*blist = blistn;
616 		}
617 
618 		/* adjust buffer count accordingly */
619 		if (__probable(pbufs_cnt != NULL)) {
620 			*pbufs_cnt += 1;
621 			ASSERT(*pbufs_cnt <= *pbufs_max);
622 		}
623 	}
624 
625 	ASSERT(!PP_KERNEL_ONLY(pp) || (kqum->qum_qflags & QUM_F_KERNEL_ONLY));
626 	ASSERT(METADATA_IDX(kqum) != OBJ_IDX_NONE);
627 	SK_DF(SK_VERB_MEM, "pp 0x%llx pkt 0x%llx bufcnt %d buf 0x%llx",
628 	    SK_KVA(pp), SK_KVA(kqum), bufcnt, SK_KVA(baddr));
629 	return 0;
630 
631 fail:
632 	ASSERT(bufcnt != 0 && baddr == 0);
633 	pp_metadata_destruct(kqum, pp, raw);
634 	return ENOMEM;
635 }
636 
637 static int
pp_metadata_ctor_common(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,struct kern_pbufpool * pp,uint32_t skmflag,bool no_buflet)638 pp_metadata_ctor_common(struct skmem_obj_info *oi0,
639     struct skmem_obj_info *oim0, struct kern_pbufpool *pp, uint32_t skmflag,
640     bool no_buflet)
641 {
642 	struct skmem_obj_info _oi, _oim;
643 	struct skmem_obj_info *oi, *oim;
644 	struct __kern_quantum *kqum;
645 	struct __user_quantum *uqum;
646 	uint16_t bufcnt = (no_buflet ? 0 : pp->pp_max_frags);
647 	struct skmem_obj *blist = NULL;
648 	int error;
649 
650 #if (DEVELOPMENT || DEBUG)
651 	uint64_t mtbf = skmem_region_get_mtbf();
652 	/*
653 	 * MTBF is applicable only for non-blocking allocations here.
654 	 */
655 	if (__improbable(mtbf != 0 && (net_uptime_ms() % mtbf) == 0 &&
656 	    (skmflag & SKMEM_NOSLEEP))) {
657 		SK_ERR("pp \"%s\" MTBF failure", pp->pp_name);
658 		net_update_uptime();
659 		return ENOMEM;
660 	}
661 #endif /* (DEVELOPMENT || DEBUG) */
662 
663 	/*
664 	 * Note that oi0 and oim0 may be stored inside the object itself;
665 	 * if so, copy them to local variables before constructing.  We
666 	 * don't use PPF_BATCH to test as the allocator may be allocating
667 	 * storage space differently depending on the number of objects.
668 	 */
669 	if (__probable((uintptr_t)oi0 >= (uintptr_t)SKMEM_OBJ_ADDR(oi0) &&
670 	    ((uintptr_t)oi0 + sizeof(*oi0)) <=
671 	    ((uintptr_t)SKMEM_OBJ_ADDR(oi0) + SKMEM_OBJ_SIZE(oi0)))) {
672 		oi = &_oi;
673 		*oi = *oi0;
674 		if (__probable(oim0 != NULL)) {
675 			oim = &_oim;
676 			*oim = *oim0;
677 		} else {
678 			oim = NULL;
679 		}
680 	} else {
681 		oi = oi0;
682 		oim = oim0;
683 	}
684 
685 	kqum = SK_PTR_ADDR_KQUM((uintptr_t)SKMEM_OBJ_ADDR(oi) +
686 	    METADATA_PREAMBLE_SZ);
687 
688 	if (__probable(!PP_KERNEL_ONLY(pp))) {
689 		ASSERT(oim != NULL && SKMEM_OBJ_ADDR(oim) != NULL);
690 		ASSERT(SKMEM_OBJ_SIZE(oi) == SKMEM_OBJ_SIZE(oim));
691 		uqum = SK_PTR_ADDR_UQUM((uintptr_t)SKMEM_OBJ_ADDR(oim) +
692 		    METADATA_PREAMBLE_SZ);
693 	} else {
694 		ASSERT(oim == NULL);
695 		uqum = NULL;
696 	}
697 
698 	if (oim != NULL) {
699 		/* initialize user metadata redzone */
700 		struct __metadata_preamble *mdp = SKMEM_OBJ_ADDR(oim);
701 		mdp->mdp_redzone =
702 		    (SKMEM_OBJ_ROFF(oim) + METADATA_PREAMBLE_SZ) ^
703 		    __ch_umd_redzone_cookie;
704 	}
705 
706 	/* allocate (constructed) buflet(s) with buffer(s) attached */
707 	if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0) {
708 		(void) skmem_cache_batch_alloc(PP_KBFT_CACHE_DEF(pp), &blist,
709 		    bufcnt, skmflag);
710 	}
711 
712 	error = pp_metadata_construct(kqum, uqum, SKMEM_OBJ_IDX_REG(oi), pp,
713 	    skmflag, bufcnt, TRUE, &blist);
714 	if (__improbable(blist != NULL)) {
715 		skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp), blist);
716 		blist = NULL;
717 	}
718 	return error;
719 }
720 
721 static int
pp_metadata_ctor_no_buflet(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)722 pp_metadata_ctor_no_buflet(struct skmem_obj_info *oi0,
723     struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
724 {
725 	return pp_metadata_ctor_common(oi0, oim0, arg, skmflag, true);
726 }
727 
728 static int
pp_metadata_ctor_max_buflet(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)729 pp_metadata_ctor_max_buflet(struct skmem_obj_info *oi0,
730     struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
731 {
732 	return pp_metadata_ctor_common(oi0, oim0, arg, skmflag, false);
733 }
734 
735 __attribute__((always_inline))
736 static void
pp_metadata_destruct_common(struct __kern_quantum * kqum,struct kern_pbufpool * pp,bool raw,struct skmem_obj ** blist_def,struct skmem_obj ** blist_large)737 pp_metadata_destruct_common(struct __kern_quantum *kqum,
738     struct kern_pbufpool *pp, bool raw, struct skmem_obj **blist_def,
739     struct skmem_obj **blist_large)
740 {
741 	struct __kern_buflet *kbuf, *nbuf;
742 	struct skmem_obj *p_blist_def = NULL, *p_blist_large = NULL;
743 	struct skmem_obj **pp_blist_def = &p_blist_def;
744 	struct skmem_obj **pp_blist_large = &p_blist_large;
745 	uint16_t bufcnt, i = 0;
746 	bool first_buflet_empty;
747 
748 	ASSERT(blist_def != NULL);
749 	ASSERT(blist_large != NULL);
750 
751 	switch (pp->pp_md_type) {
752 	case NEXUS_META_TYPE_PACKET: {
753 		struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
754 
755 		ASSERT(kpkt->pkt_user != NULL || PP_KERNEL_ONLY(pp));
756 		ASSERT(kpkt->pkt_qum.qum_pp == pp);
757 		ASSERT(METADATA_TYPE(kpkt) == pp->pp_md_type);
758 		ASSERT(METADATA_SUBTYPE(kpkt) == pp->pp_md_subtype);
759 		ASSERT(METADATA_IDX(kpkt) != OBJ_IDX_NONE);
760 		ASSERT(kpkt->pkt_qum.qum_ksd == NULL);
761 		ASSERT(kpkt->pkt_bufs_cnt <= kpkt->pkt_bufs_max);
762 		ASSERT(kpkt->pkt_bufs_max == pp->pp_max_frags);
763 		_CASSERT(sizeof(kpkt->pkt_bufs_cnt) == sizeof(uint16_t));
764 		bufcnt = kpkt->pkt_bufs_cnt;
765 		kbuf = &kqum->qum_buf[0];
766 		/*
767 		 * special handling for empty first buflet.
768 		 */
769 		first_buflet_empty = (kbuf->buf_addr == 0);
770 		*__DECONST(uint16_t *, &kpkt->pkt_bufs_cnt) = 0;
771 		break;
772 	}
773 	default:
774 		ASSERT(pp->pp_md_type == NEXUS_META_TYPE_QUANTUM);
775 		ASSERT(kqum->qum_user != NULL || PP_KERNEL_ONLY(pp));
776 		ASSERT(kqum->qum_pp == pp);
777 		ASSERT(METADATA_TYPE(kqum) == pp->pp_md_type);
778 		ASSERT(METADATA_SUBTYPE(kqum) == pp->pp_md_subtype);
779 		ASSERT(METADATA_IDX(kqum) != OBJ_IDX_NONE);
780 		ASSERT(kqum->qum_ksd == NULL);
781 		kbuf = &kqum->qum_buf[0];
782 		/*
783 		 * XXX: Special handling for quantum as we don't currently
784 		 * define bufs_{cnt,max} there.  Given that we support at
785 		 * most only 1 buflet for now, check if buf_addr is non-NULL.
786 		 * See related code in pp_metadata_construct().
787 		 */
788 		first_buflet_empty = (kbuf->buf_addr == 0);
789 		bufcnt = first_buflet_empty ? 0 : 1;
790 		break;
791 	}
792 
793 	nbuf = __DECONST(struct __kern_buflet *, kbuf->buf_nbft_addr);
794 	BUF_NBFT_ADDR(kbuf, 0);
795 	BUF_NBFT_IDX(kbuf, OBJ_IDX_NONE);
796 	if (!first_buflet_empty) {
797 		pp_free_buflet_common(pp, kbuf);
798 		++i;
799 	}
800 
801 	while (nbuf != NULL) {
802 		if (BUFLET_HAS_LARGE_BUF(nbuf)) {
803 			*pp_blist_large = (struct skmem_obj *)(void *)nbuf;
804 			pp_blist_large =
805 			    &((struct skmem_obj *)(void *)nbuf)->mo_next;
806 		} else {
807 			*pp_blist_def = (struct skmem_obj *)(void *)nbuf;
808 			pp_blist_def =
809 			    &((struct skmem_obj *)(void *)nbuf)->mo_next;
810 		}
811 		BUF_NBFT_IDX(nbuf, OBJ_IDX_NONE);
812 		nbuf = __DECONST(struct __kern_buflet *, nbuf->buf_nbft_addr);
813 		++i;
814 	}
815 
816 	ASSERT(i == bufcnt);
817 
818 	if (p_blist_def != NULL) {
819 		*pp_blist_def = *blist_def;
820 		*blist_def = p_blist_def;
821 	}
822 	if (p_blist_large != NULL) {
823 		*pp_blist_large = *blist_large;
824 		*blist_large = p_blist_large;
825 	}
826 
827 	/* if we're about to return this object to the slab, clean it up */
828 	if (raw) {
829 		switch (pp->pp_md_type) {
830 		case NEXUS_META_TYPE_PACKET: {
831 			struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
832 
833 			ASSERT(kpkt->pkt_com_opt != NULL ||
834 			    !(kpkt->pkt_pflags & PKT_F_OPT_ALLOC));
835 			if (kpkt->pkt_com_opt != NULL) {
836 				ASSERT(kpkt->pkt_pflags & PKT_F_OPT_ALLOC);
837 				skmem_cache_free(pp_opt_cache,
838 				    kpkt->pkt_com_opt);
839 				kpkt->pkt_com_opt = NULL;
840 			}
841 			ASSERT(kpkt->pkt_flow != NULL ||
842 			    !(kpkt->pkt_pflags & PKT_F_FLOW_ALLOC));
843 			if (kpkt->pkt_flow != NULL) {
844 				ASSERT(kpkt->pkt_pflags & PKT_F_FLOW_ALLOC);
845 				skmem_cache_free(pp_flow_cache, kpkt->pkt_flow);
846 				kpkt->pkt_flow = NULL;
847 			}
848 			ASSERT(kpkt->pkt_tx_compl != NULL ||
849 			    !(kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC));
850 			if (kpkt->pkt_tx_compl != NULL) {
851 				ASSERT(kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC);
852 				skmem_cache_free(pp_compl_cache,
853 				    kpkt->pkt_tx_compl);
854 				kpkt->pkt_tx_compl = NULL;
855 			}
856 			kpkt->pkt_pflags = 0;
857 			break;
858 		}
859 		default:
860 			ASSERT(METADATA_TYPE(kqum) == NEXUS_META_TYPE_QUANTUM);
861 			/* nothing to do for quantum (yet) */
862 			break;
863 		}
864 	}
865 }
866 
867 __attribute__((always_inline))
868 static void
pp_metadata_destruct(struct __kern_quantum * kqum,struct kern_pbufpool * pp,bool raw)869 pp_metadata_destruct(struct __kern_quantum *kqum, struct kern_pbufpool *pp,
870     bool raw)
871 {
872 	struct skmem_obj *blist_def = NULL, *blist_large = NULL;
873 
874 	pp_metadata_destruct_common(kqum, pp, raw, &blist_def, &blist_large);
875 	if (blist_def != NULL) {
876 		skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp), blist_def);
877 	}
878 	if (blist_large != NULL) {
879 		skmem_cache_batch_free(PP_KBFT_CACHE_LARGE(pp), blist_large);
880 	}
881 }
882 
883 static void
pp_metadata_dtor(void * addr,void * arg)884 pp_metadata_dtor(void *addr, void *arg)
885 {
886 	pp_metadata_destruct(SK_PTR_ADDR_KQUM((uintptr_t)addr +
887 	    METADATA_PREAMBLE_SZ), arg, TRUE);
888 }
889 
890 static void
pp_buf_seg_ctor(struct sksegment * sg,IOSKMemoryBufferRef md,void * arg)891 pp_buf_seg_ctor(struct sksegment *sg, IOSKMemoryBufferRef md, void *arg)
892 {
893 	struct kern_pbufpool *pp = arg;
894 
895 	if (pp->pp_pbuf_seg_ctor != NULL) {
896 		pp->pp_pbuf_seg_ctor(pp, sg, md);
897 	}
898 }
899 
900 static void
pp_buf_seg_dtor(struct sksegment * sg,IOSKMemoryBufferRef md,void * arg)901 pp_buf_seg_dtor(struct sksegment *sg, IOSKMemoryBufferRef md, void *arg)
902 {
903 	struct kern_pbufpool *pp = arg;
904 
905 	if (pp->pp_pbuf_seg_dtor != NULL) {
906 		pp->pp_pbuf_seg_dtor(pp, sg, md);
907 	}
908 }
909 
910 static int
pp_buflet_metadata_ctor_common(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag,bool large)911 pp_buflet_metadata_ctor_common(struct skmem_obj_info *oi0,
912     struct skmem_obj_info *oim0, void *arg, uint32_t skmflag, bool large)
913 {
914 #pragma unused (skmflag)
915 	struct kern_pbufpool *pp = (struct kern_pbufpool *)arg;
916 	struct __kern_buflet *kbft;
917 	struct __user_buflet *ubft;
918 	struct skmem_obj_info oib;
919 	mach_vm_address_t baddr;
920 	obj_idx_t oi_idx_reg;
921 
922 	baddr = pp_alloc_buffer_common(pp, &oib, skmflag, large);
923 	if (__improbable(baddr == 0)) {
924 		return ENOMEM;
925 	}
926 	/*
927 	 * Note that oi0 and oim0 may be stored inside the object itself;
928 	 * so copy what is required to local variables before constructing.
929 	 */
930 	oi_idx_reg = SKMEM_OBJ_IDX_REG(oi0);
931 	kbft = SKMEM_OBJ_ADDR(oi0);
932 
933 	if (__probable(!PP_KERNEL_ONLY(pp))) {
934 		ASSERT(oim0 != NULL && SKMEM_OBJ_ADDR(oim0) != NULL);
935 		ASSERT(SKMEM_OBJ_SIZE(oi0) == SKMEM_OBJ_SIZE(oim0));
936 		ASSERT(oi_idx_reg == SKMEM_OBJ_IDX_REG(oim0));
937 		ASSERT(SKMEM_OBJ_IDX_SEG(oi0) == SKMEM_OBJ_IDX_SEG(oim0));
938 		ubft = SKMEM_OBJ_ADDR(oim0);
939 	} else {
940 		ASSERT(oim0 == NULL);
941 		ubft = NULL;
942 	}
943 	KBUF_EXT_CTOR(kbft, ubft, baddr, SKMEM_OBJ_IDX_REG(&oib),
944 	    SKMEM_OBJ_BUFCTL(&oib), oi_idx_reg, pp, large);
945 	return 0;
946 }
947 
948 static int
pp_buflet_default_buffer_metadata_ctor(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)949 pp_buflet_default_buffer_metadata_ctor(struct skmem_obj_info *oi0,
950     struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
951 {
952 	return pp_buflet_metadata_ctor_common(oi0, oim0, arg, skmflag, false);
953 }
954 
955 static int
pp_buflet_large_buffer_metadata_ctor(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)956 pp_buflet_large_buffer_metadata_ctor(struct skmem_obj_info *oi0,
957     struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
958 {
959 	return pp_buflet_metadata_ctor_common(oi0, oim0, arg, skmflag, true);
960 }
961 
962 static void
pp_buflet_metadata_dtor(void * addr,void * arg)963 pp_buflet_metadata_dtor(void *addr, void *arg)
964 {
965 	struct __kern_buflet *kbft = addr;
966 	void *objaddr = kbft->buf_objaddr;
967 	struct kern_pbufpool *pp = arg;
968 	uint32_t usecnt = 0;
969 	bool large = BUFLET_HAS_LARGE_BUF(kbft);
970 
971 	ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
972 	/*
973 	 * don't assert for (buf_nbft_addr == 0) here as constructed
974 	 * buflet may have this field as non-zero. This is because
975 	 * buf_nbft_addr (__buflet_next) is used by skmem batch alloc
976 	 * for chaining the buflets.
977 	 * To ensure that the frred buflet was not part of a chain we
978 	 * assert for (buf_nbft_idx == OBJ_IDX_NONE).
979 	 */
980 	ASSERT(kbft->buf_nbft_idx == OBJ_IDX_NONE);
981 	ASSERT(((struct __kern_buflet_ext *)kbft)->kbe_buf_upp_link.sle_next ==
982 	    NULL);
983 	ASSERT(kbft->buf_addr != 0);
984 	ASSERT(kbft->buf_idx != OBJ_IDX_NONE);
985 	ASSERT(kbft->buf_ctl != NULL);
986 
987 	KBUF_DTOR(kbft, usecnt);
988 	SK_DF(SK_VERB_MEM, "pp 0x%llx buf 0x%llx usecnt %u", SK_KVA(pp),
989 	    SK_KVA(objaddr), usecnt);
990 	if (__probable(usecnt == 0)) {
991 		skmem_cache_free(large ? PP_BUF_CACHE_LARGE(pp) :
992 		    PP_BUF_CACHE_DEF(pp), objaddr);
993 	}
994 }
995 
996 struct kern_pbufpool *
pp_create(const char * name,struct skmem_region_params * srp_array,pbuf_seg_ctor_fn_t buf_seg_ctor,pbuf_seg_dtor_fn_t buf_seg_dtor,const void * ctx,pbuf_ctx_retain_fn_t ctx_retain,pbuf_ctx_release_fn_t ctx_release,uint32_t ppcreatef)997 pp_create(const char *name, struct skmem_region_params *srp_array,
998     pbuf_seg_ctor_fn_t buf_seg_ctor, pbuf_seg_dtor_fn_t buf_seg_dtor,
999     const void *ctx, pbuf_ctx_retain_fn_t ctx_retain,
1000     pbuf_ctx_release_fn_t ctx_release, uint32_t ppcreatef)
1001 {
1002 	struct kern_pbufpool *pp = NULL;
1003 	uint32_t md_size, def_buf_obj_size;
1004 	uint32_t def_buf_size, large_buf_size;
1005 	nexus_meta_type_t md_type;
1006 	nexus_meta_subtype_t md_subtype;
1007 	uint32_t md_cflags;
1008 	uint16_t max_frags;
1009 	char cname[64];
1010 	struct skmem_region_params *kmd_srp;
1011 	struct skmem_region_params *buf_srp;
1012 	struct skmem_region_params *kbft_srp;
1013 	struct skmem_region_params *umd_srp = NULL;
1014 	struct skmem_region_params *ubft_srp = NULL;
1015 	struct skmem_region_params *lbuf_srp = NULL;
1016 
1017 	/* buf_seg_{ctor,dtor} pair must be either NULL or non-NULL */
1018 	ASSERT(!(!(buf_seg_ctor == NULL && buf_seg_dtor == NULL) &&
1019 	    ((buf_seg_ctor == NULL) ^ (buf_seg_dtor == NULL))));
1020 
1021 	/* ctx{,_retain,_release} must be either ALL NULL or ALL non-NULL */
1022 	ASSERT((ctx == NULL && ctx_retain == NULL && ctx_release == NULL) ||
1023 	    (ctx != NULL && ctx_retain != NULL && ctx_release != NULL));
1024 
1025 	if (srp_array[SKMEM_REGION_KMD].srp_c_obj_cnt != 0) {
1026 		kmd_srp = &srp_array[SKMEM_REGION_KMD];
1027 		buf_srp = &srp_array[SKMEM_REGION_BUF_DEF];
1028 		lbuf_srp = &srp_array[SKMEM_REGION_BUF_LARGE];
1029 		kbft_srp = &srp_array[SKMEM_REGION_KBFT];
1030 	} else if (srp_array[SKMEM_REGION_RXKMD].srp_c_obj_cnt != 0) {
1031 		kmd_srp = &srp_array[SKMEM_REGION_RXKMD];
1032 		buf_srp = &srp_array[SKMEM_REGION_RXBUF_DEF];
1033 		lbuf_srp = &srp_array[SKMEM_REGION_RXBUF_LARGE];
1034 		kbft_srp = &srp_array[SKMEM_REGION_RXKBFT];
1035 	} else {
1036 		VERIFY(srp_array[SKMEM_REGION_TXKMD].srp_c_obj_cnt != 0);
1037 		kmd_srp = &srp_array[SKMEM_REGION_TXKMD];
1038 		buf_srp = &srp_array[SKMEM_REGION_TXBUF_DEF];
1039 		lbuf_srp = &srp_array[SKMEM_REGION_TXBUF_LARGE];
1040 		kbft_srp = &srp_array[SKMEM_REGION_TXKBFT];
1041 	}
1042 
1043 	VERIFY(kmd_srp->srp_c_obj_size != 0);
1044 	VERIFY(buf_srp->srp_c_obj_cnt != 0);
1045 	VERIFY(buf_srp->srp_c_obj_size != 0);
1046 
1047 	if (ppcreatef & PPCREATEF_ONDEMAND_BUF) {
1048 		VERIFY(kbft_srp->srp_c_obj_cnt != 0);
1049 		VERIFY(kbft_srp->srp_c_obj_size != 0);
1050 	} else {
1051 		kbft_srp = NULL;
1052 	}
1053 
1054 	if ((ppcreatef & PPCREATEF_KERNEL_ONLY) == 0) {
1055 		umd_srp = &srp_array[SKMEM_REGION_UMD];
1056 		ASSERT(umd_srp->srp_c_obj_size == kmd_srp->srp_c_obj_size);
1057 		ASSERT(umd_srp->srp_c_obj_cnt == kmd_srp->srp_c_obj_cnt);
1058 		ASSERT(umd_srp->srp_c_seg_size == kmd_srp->srp_c_seg_size);
1059 		ASSERT(umd_srp->srp_seg_cnt == kmd_srp->srp_seg_cnt);
1060 		ASSERT(umd_srp->srp_md_type == kmd_srp->srp_md_type);
1061 		ASSERT(umd_srp->srp_md_subtype == kmd_srp->srp_md_subtype);
1062 		ASSERT(umd_srp->srp_max_frags == kmd_srp->srp_max_frags);
1063 		ASSERT((umd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) ==
1064 		    (kmd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT));
1065 		if (kbft_srp != NULL) {
1066 			ubft_srp = &srp_array[SKMEM_REGION_UBFT];
1067 			ASSERT(ubft_srp->srp_c_obj_size ==
1068 			    kbft_srp->srp_c_obj_size);
1069 			ASSERT(ubft_srp->srp_c_obj_cnt ==
1070 			    kbft_srp->srp_c_obj_cnt);
1071 			ASSERT(ubft_srp->srp_c_seg_size ==
1072 			    kbft_srp->srp_c_seg_size);
1073 			ASSERT(ubft_srp->srp_seg_cnt == kbft_srp->srp_seg_cnt);
1074 		}
1075 	}
1076 
1077 	md_size = kmd_srp->srp_r_obj_size;
1078 	md_type = kmd_srp->srp_md_type;
1079 	md_subtype = kmd_srp->srp_md_subtype;
1080 	max_frags = kmd_srp->srp_max_frags;
1081 	def_buf_obj_size = buf_srp->srp_c_obj_size;
1082 	def_buf_size = def_buf_obj_size;
1083 	large_buf_size = lbuf_srp->srp_c_obj_size;
1084 
1085 #if (DEBUG || DEVELOPMENT)
1086 	ASSERT(def_buf_obj_size != 0);
1087 	ASSERT(md_type > NEXUS_META_TYPE_INVALID &&
1088 	    md_type <= NEXUS_META_TYPE_MAX);
1089 	if (md_type == NEXUS_META_TYPE_QUANTUM) {
1090 		ASSERT(max_frags == 1);
1091 		ASSERT(md_size >=
1092 		    (METADATA_PREAMBLE_SZ + NX_METADATA_QUANTUM_SZ));
1093 	} else {
1094 		ASSERT(max_frags >= 1);
1095 		ASSERT(md_type == NEXUS_META_TYPE_PACKET);
1096 		ASSERT(md_size >= (METADATA_PREAMBLE_SZ +
1097 		    NX_METADATA_PACKET_SZ(max_frags)));
1098 	}
1099 	ASSERT(md_subtype > NEXUS_META_SUBTYPE_INVALID &&
1100 	    md_subtype <= NEXUS_META_SUBTYPE_MAX);
1101 #endif /* DEBUG || DEVELOPMENT */
1102 
1103 	pp = pp_alloc(Z_WAITOK);
1104 
1105 	(void) snprintf((char *)pp->pp_name, sizeof(pp->pp_name),
1106 	    "skywalk.pp.%s", name);
1107 
1108 	pp->pp_ctx = __DECONST(void *, ctx);
1109 	pp->pp_ctx_retain = ctx_retain;
1110 	pp->pp_ctx_release = ctx_release;
1111 	if (pp->pp_ctx != NULL) {
1112 		pp->pp_ctx_retain(pp->pp_ctx);
1113 	}
1114 
1115 	pp->pp_pbuf_seg_ctor = buf_seg_ctor;
1116 	pp->pp_pbuf_seg_dtor = buf_seg_dtor;
1117 	PP_BUF_SIZE_DEF(pp) = def_buf_size;
1118 	PP_BUF_OBJ_SIZE_DEF(pp) = def_buf_obj_size;
1119 	PP_BUF_SIZE_LARGE(pp) = large_buf_size;
1120 	PP_BUF_OBJ_SIZE_LARGE(pp) = lbuf_srp->srp_c_obj_size;
1121 	pp->pp_md_type = md_type;
1122 	pp->pp_md_subtype = md_subtype;
1123 	pp->pp_max_frags = max_frags;
1124 	if (ppcreatef & PPCREATEF_EXTERNAL) {
1125 		pp->pp_flags |= PPF_EXTERNAL;
1126 	}
1127 	if (ppcreatef & PPCREATEF_TRUNCATED_BUF) {
1128 		pp->pp_flags |= PPF_TRUNCATED_BUF;
1129 	}
1130 	if (ppcreatef & PPCREATEF_KERNEL_ONLY) {
1131 		pp->pp_flags |= PPF_KERNEL;
1132 	}
1133 	if (ppcreatef & PPCREATEF_ONDEMAND_BUF) {
1134 		pp->pp_flags |= PPF_BUFFER_ON_DEMAND;
1135 	}
1136 	if (ppcreatef & PPCREATEF_DYNAMIC) {
1137 		pp->pp_flags |= PPF_DYNAMIC;
1138 	}
1139 	if (lbuf_srp->srp_c_obj_cnt > 0) {
1140 		ASSERT(lbuf_srp->srp_c_obj_size != 0);
1141 		pp->pp_flags |= PPF_LARGE_BUF;
1142 	}
1143 
1144 	pp_retain(pp);
1145 
1146 	md_cflags = ((kmd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) ?
1147 	    SKMEM_CR_NOMAGAZINES : 0);
1148 	md_cflags |= SKMEM_CR_BATCH;
1149 	pp->pp_flags |= PPF_BATCH;
1150 
1151 	if (pp->pp_flags & PPF_DYNAMIC) {
1152 		md_cflags |= SKMEM_CR_DYNAMIC;
1153 	}
1154 
1155 	if (umd_srp != NULL && (pp->pp_umd_region =
1156 	    skmem_region_create(name, umd_srp, NULL, NULL, NULL)) == NULL) {
1157 		SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1158 		    pp->pp_name, SK_KVA(pp), umd_srp->srp_name);
1159 		goto failed;
1160 	}
1161 
1162 	if ((pp->pp_kmd_region = skmem_region_create(name, kmd_srp, NULL, NULL,
1163 	    NULL)) == NULL) {
1164 		SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1165 		    pp->pp_name, SK_KVA(pp), kmd_srp->srp_name);
1166 		goto failed;
1167 	}
1168 
1169 	if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1170 		VERIFY((kbft_srp != NULL) && (kbft_srp->srp_c_obj_cnt > 0));
1171 		if (!PP_KERNEL_ONLY(pp)) {
1172 			VERIFY((ubft_srp != NULL) &&
1173 			    (ubft_srp->srp_c_obj_cnt > 0));
1174 		}
1175 	}
1176 	/*
1177 	 * Metadata regions {KMD,KBFT,UBFT} magazines layer and persistency
1178 	 * attribute must match.
1179 	 */
1180 	if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1181 		ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) ==
1182 		    (kbft_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES));
1183 		ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) ==
1184 		    (kbft_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT));
1185 	}
1186 
1187 	if (PP_HAS_BUFFER_ON_DEMAND(pp) && !PP_KERNEL_ONLY(pp)) {
1188 		if ((pp->pp_ubft_region = skmem_region_create(name, ubft_srp,
1189 		    NULL, NULL, NULL)) == NULL) {
1190 			SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1191 			    pp->pp_name, SK_KVA(pp), ubft_srp->srp_name);
1192 			goto failed;
1193 		}
1194 	}
1195 
1196 	if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1197 		if ((pp->pp_kbft_region = skmem_region_create(name,
1198 		    kbft_srp, NULL, NULL, NULL)) == NULL) {
1199 			SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1200 			    pp->pp_name, SK_KVA(pp), kbft_srp->srp_name);
1201 			goto failed;
1202 		}
1203 	}
1204 
1205 	if (!PP_KERNEL_ONLY(pp)) {
1206 		skmem_region_mirror(pp->pp_kmd_region, pp->pp_umd_region);
1207 	}
1208 	if (!PP_KERNEL_ONLY(pp) && pp->pp_ubft_region != NULL) {
1209 		ASSERT(pp->pp_kbft_region != NULL);
1210 		skmem_region_mirror(pp->pp_kbft_region, pp->pp_ubft_region);
1211 	}
1212 
1213 	/*
1214 	 * Create the metadata cache; magazines layer is determined by caller.
1215 	 */
1216 	(void) snprintf(cname, sizeof(cname), "kmd.%s", name);
1217 	if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1218 		pp->pp_kmd_cache = skmem_cache_create(cname, md_size, 0,
1219 		    pp_metadata_ctor_no_buflet, pp_metadata_dtor, NULL, pp,
1220 		    pp->pp_kmd_region, md_cflags);
1221 	} else {
1222 		pp->pp_kmd_cache = skmem_cache_create(cname, md_size, 0,
1223 		    pp_metadata_ctor_max_buflet, pp_metadata_dtor, NULL, pp,
1224 		    pp->pp_kmd_region, md_cflags);
1225 	}
1226 
1227 	if (pp->pp_kmd_cache == NULL) {
1228 		SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1229 		    pp->pp_name, SK_KVA(pp), cname);
1230 		goto failed;
1231 	}
1232 
1233 	/*
1234 	 * Create the buflet metadata cache
1235 	 */
1236 	if (pp->pp_kbft_region != NULL) {
1237 		(void) snprintf(cname, sizeof(cname), "kbft_def.%s", name);
1238 		PP_KBFT_CACHE_DEF(pp) = skmem_cache_create(cname,
1239 		    kbft_srp->srp_c_obj_size, 0,
1240 		    pp_buflet_default_buffer_metadata_ctor,
1241 		    pp_buflet_metadata_dtor, NULL, pp, pp->pp_kbft_region,
1242 		    md_cflags);
1243 
1244 		if (PP_KBFT_CACHE_DEF(pp) == NULL) {
1245 			SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1246 			    pp->pp_name, SK_KVA(pp), cname);
1247 			goto failed;
1248 		}
1249 
1250 		if (PP_HAS_LARGE_BUF(pp)) {
1251 			/* Aggressive memory reclaim flag set to kbft_large for now */
1252 			md_cflags |= SKMEM_CR_RECLAIM;
1253 			(void) snprintf(cname, sizeof(cname), "kbft_large.%s",
1254 			    name);
1255 			PP_KBFT_CACHE_LARGE(pp) = skmem_cache_create(cname,
1256 			    kbft_srp->srp_c_obj_size, 0,
1257 			    pp_buflet_large_buffer_metadata_ctor,
1258 			    pp_buflet_metadata_dtor,
1259 			    NULL, pp, pp->pp_kbft_region, md_cflags);
1260 
1261 			if (PP_KBFT_CACHE_LARGE(pp) == NULL) {
1262 				SK_ERR("\"%s\" (0x%llx) failed to "
1263 				    "create \"%s\" cache", pp->pp_name,
1264 				    SK_KVA(pp), cname);
1265 				goto failed;
1266 			}
1267 		}
1268 	}
1269 
1270 	if ((PP_BUF_REGION_DEF(pp) = skmem_region_create(name,
1271 	    buf_srp, pp_buf_seg_ctor, pp_buf_seg_dtor, pp)) == NULL) {
1272 		SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1273 		    pp->pp_name, SK_KVA(pp), buf_srp->srp_name);
1274 		goto failed;
1275 	}
1276 
1277 	if (PP_HAS_LARGE_BUF(pp)) {
1278 		PP_BUF_REGION_LARGE(pp) = skmem_region_create(name, lbuf_srp,
1279 		    pp_buf_seg_ctor, pp_buf_seg_dtor, pp);
1280 		if (PP_BUF_REGION_LARGE(pp) == NULL) {
1281 			SK_ERR("\"%s\" (0x%llx) failed to create %s region",
1282 			    pp->pp_name, SK_KVA(pp), lbuf_srp->srp_name);
1283 			goto failed;
1284 		}
1285 	}
1286 
1287 	/*
1288 	 * Create the buffer object cache without the magazines layer.
1289 	 * We rely on caching the constructed metadata object instead.
1290 	 */
1291 	(void) snprintf(cname, sizeof(cname), "buf_def.%s", name);
1292 	if ((PP_BUF_CACHE_DEF(pp) = skmem_cache_create(cname, def_buf_obj_size,
1293 	    0, NULL, NULL, NULL, pp, PP_BUF_REGION_DEF(pp),
1294 	    SKMEM_CR_NOMAGAZINES)) == NULL) {
1295 		SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1296 		    pp->pp_name, SK_KVA(pp), cname);
1297 		goto failed;
1298 	}
1299 
1300 	if (PP_BUF_REGION_LARGE(pp) != NULL) {
1301 		(void) snprintf(cname, sizeof(cname), "buf_large.%s", name);
1302 		if ((PP_BUF_CACHE_LARGE(pp) = skmem_cache_create(cname,
1303 		    lbuf_srp->srp_c_obj_size, 0, NULL, NULL, NULL, pp,
1304 		    PP_BUF_REGION_LARGE(pp), SKMEM_CR_NOMAGAZINES)) == NULL) {
1305 			SK_ERR("\"%s\" (0x%llx) failed to create \"%s\" cache",
1306 			    pp->pp_name, SK_KVA(pp), cname);
1307 			goto failed;
1308 		}
1309 	}
1310 
1311 	return pp;
1312 
1313 failed:
1314 	if (pp != NULL) {
1315 		if (pp->pp_ctx != NULL) {
1316 			pp->pp_ctx_release(pp->pp_ctx);
1317 			pp->pp_ctx = NULL;
1318 		}
1319 		pp_close(pp);
1320 	}
1321 
1322 	return NULL;
1323 }
1324 
1325 void
pp_destroy(struct kern_pbufpool * pp)1326 pp_destroy(struct kern_pbufpool *pp)
1327 {
1328 	PP_LOCK_ASSERT_HELD(pp);
1329 
1330 	/* may be called for built-in pp with outstanding reference */
1331 	ASSERT(!(pp->pp_flags & PPF_EXTERNAL) || pp->pp_refcnt == 0);
1332 
1333 	pp_destroy_upp_locked(pp);
1334 
1335 	pp_destroy_upp_bft_locked(pp);
1336 
1337 	if (pp->pp_kmd_cache != NULL) {
1338 		skmem_cache_destroy(pp->pp_kmd_cache);
1339 		pp->pp_kmd_cache = NULL;
1340 	}
1341 
1342 	if (pp->pp_umd_region != NULL) {
1343 		skmem_region_release(pp->pp_umd_region);
1344 		pp->pp_umd_region = NULL;
1345 	}
1346 
1347 	if (pp->pp_kmd_region != NULL) {
1348 		skmem_region_release(pp->pp_kmd_region);
1349 		pp->pp_kmd_region = NULL;
1350 	}
1351 
1352 	if (PP_KBFT_CACHE_DEF(pp) != NULL) {
1353 		skmem_cache_destroy(PP_KBFT_CACHE_DEF(pp));
1354 		PP_KBFT_CACHE_DEF(pp) = NULL;
1355 	}
1356 
1357 	if (PP_KBFT_CACHE_LARGE(pp) != NULL) {
1358 		skmem_cache_destroy(PP_KBFT_CACHE_LARGE(pp));
1359 		PP_KBFT_CACHE_LARGE(pp) = NULL;
1360 	}
1361 
1362 	if (pp->pp_ubft_region != NULL) {
1363 		skmem_region_release(pp->pp_ubft_region);
1364 		pp->pp_ubft_region = NULL;
1365 	}
1366 
1367 	if (pp->pp_kbft_region != NULL) {
1368 		skmem_region_release(pp->pp_kbft_region);
1369 		pp->pp_kbft_region = NULL;
1370 	}
1371 
1372 	/*
1373 	 * The order is important here, since pp_metadata_dtor()
1374 	 * called by freeing on the pp_kmd_cache will in turn
1375 	 * free the attached buffer.  Therefore destroy the
1376 	 * buffer cache last.
1377 	 */
1378 	if (PP_BUF_CACHE_DEF(pp) != NULL) {
1379 		skmem_cache_destroy(PP_BUF_CACHE_DEF(pp));
1380 		PP_BUF_CACHE_DEF(pp) = NULL;
1381 	}
1382 	if (PP_BUF_REGION_DEF(pp) != NULL) {
1383 		skmem_region_release(PP_BUF_REGION_DEF(pp));
1384 		PP_BUF_REGION_DEF(pp) = NULL;
1385 	}
1386 	if (PP_BUF_CACHE_LARGE(pp) != NULL) {
1387 		skmem_cache_destroy(PP_BUF_CACHE_LARGE(pp));
1388 		PP_BUF_CACHE_LARGE(pp) = NULL;
1389 	}
1390 	if (PP_BUF_REGION_LARGE(pp) != NULL) {
1391 		skmem_region_release(PP_BUF_REGION_LARGE(pp));
1392 		PP_BUF_REGION_LARGE(pp) = NULL;
1393 	}
1394 
1395 	if (pp->pp_ctx != NULL) {
1396 		pp->pp_ctx_release(pp->pp_ctx);
1397 		pp->pp_ctx = NULL;
1398 	}
1399 }
1400 
1401 static int
pp_init_upp_locked(struct kern_pbufpool * pp,boolean_t can_block)1402 pp_init_upp_locked(struct kern_pbufpool *pp, boolean_t can_block)
1403 {
1404 	int i, err = 0;
1405 
1406 	if (pp->pp_u_hash_table != NULL) {
1407 		goto done;
1408 	}
1409 
1410 	/* allocated-address hash table */
1411 	pp->pp_u_hash_table = can_block ? zalloc(pp_u_htbl_zone) :
1412 	    zalloc_noblock(pp_u_htbl_zone);
1413 	if (pp->pp_u_hash_table == NULL) {
1414 		SK_ERR("failed to zalloc packet buffer pool upp hash table");
1415 		err = ENOMEM;
1416 		goto done;
1417 	}
1418 
1419 	for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1420 		SLIST_INIT(&pp->pp_u_hash_table[i].upp_head);
1421 	}
1422 done:
1423 	return err;
1424 }
1425 
1426 static void
pp_destroy_upp_locked(struct kern_pbufpool * pp)1427 pp_destroy_upp_locked(struct kern_pbufpool *pp)
1428 {
1429 	PP_LOCK_ASSERT_HELD(pp);
1430 	if (pp->pp_u_hash_table != NULL) {
1431 		/* purge anything that's left */
1432 		pp_purge_upp_locked(pp, -1);
1433 
1434 #if (DEBUG || DEVELOPMENT)
1435 		for (int i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1436 			ASSERT(SLIST_EMPTY(&pp->pp_u_hash_table[i].upp_head));
1437 		}
1438 #endif /* DEBUG || DEVELOPMENT */
1439 
1440 		zfree(pp_u_htbl_zone, pp->pp_u_hash_table);
1441 		pp->pp_u_hash_table = NULL;
1442 	}
1443 	ASSERT(pp->pp_u_bufinuse == 0);
1444 }
1445 
1446 int
pp_init_upp(struct kern_pbufpool * pp,boolean_t can_block)1447 pp_init_upp(struct kern_pbufpool *pp, boolean_t can_block)
1448 {
1449 	int err = 0;
1450 
1451 	PP_LOCK(pp);
1452 	err = pp_init_upp_locked(pp, can_block);
1453 	if (err) {
1454 		SK_ERR("packet UPP init failed (%d)", err);
1455 		goto done;
1456 	}
1457 	err = pp_init_upp_bft_locked(pp, can_block);
1458 	if (err) {
1459 		SK_ERR("buflet UPP init failed (%d)", err);
1460 		pp_destroy_upp_locked(pp);
1461 		goto done;
1462 	}
1463 	pp_retain_locked(pp);
1464 done:
1465 	PP_UNLOCK(pp);
1466 	return err;
1467 }
1468 
1469 __attribute__((always_inline))
1470 static void
pp_insert_upp_bft_locked(struct kern_pbufpool * pp,struct __kern_buflet * kbft,pid_t pid)1471 pp_insert_upp_bft_locked(struct kern_pbufpool *pp,
1472     struct __kern_buflet *kbft, pid_t pid)
1473 {
1474 	struct kern_pbufpool_u_bft_bkt *bkt;
1475 	struct __kern_buflet_ext *kbe = (struct __kern_buflet_ext *)kbft;
1476 
1477 	ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
1478 	ASSERT(kbe->kbe_buf_pid == (pid_t)-1);
1479 	kbe->kbe_buf_pid = pid;
1480 	bkt = KERN_PBUFPOOL_U_BFT_HASH(pp, kbft->buf_bft_idx_reg);
1481 	SLIST_INSERT_HEAD(&bkt->upp_head, kbe, kbe_buf_upp_link);
1482 	pp->pp_u_bftinuse++;
1483 }
1484 
1485 __attribute__((always_inline))
1486 static void
pp_insert_upp_bft_chain_locked(struct kern_pbufpool * pp,struct __kern_buflet * kbft,pid_t pid)1487 pp_insert_upp_bft_chain_locked(struct kern_pbufpool *pp,
1488     struct __kern_buflet *kbft, pid_t pid)
1489 {
1490 	while (kbft != NULL) {
1491 		pp_insert_upp_bft_locked(pp, kbft, pid);
1492 		kbft = __DECONST(kern_buflet_t, kbft->buf_nbft_addr);
1493 	}
1494 }
1495 
1496 /* Also inserts the attached chain of buflets */
1497 void static inline
pp_insert_upp_common(struct kern_pbufpool * pp,struct __kern_quantum * kqum,pid_t pid)1498 pp_insert_upp_common(struct kern_pbufpool *pp, struct __kern_quantum *kqum,
1499     pid_t pid)
1500 {
1501 	struct kern_pbufpool_u_bkt *bkt;
1502 	struct __kern_buflet *kbft;
1503 
1504 	ASSERT(kqum->qum_pid == (pid_t)-1);
1505 	kqum->qum_pid = pid;
1506 
1507 	bkt = KERN_PBUFPOOL_U_HASH(pp, METADATA_IDX(kqum));
1508 	SLIST_INSERT_HEAD(&bkt->upp_head, kqum, qum_upp_link);
1509 	pp->pp_u_bufinuse++;
1510 
1511 	kbft = (kern_buflet_t)kqum->qum_buf[0].buf_nbft_addr;
1512 	if (kbft != NULL) {
1513 		ASSERT(((kern_buflet_t)kbft)->buf_flag & BUFLET_FLAG_EXTERNAL);
1514 		ASSERT(kqum->qum_qflags & QUM_F_INTERNALIZED);
1515 		pp_insert_upp_bft_chain_locked(pp, kbft, pid);
1516 	}
1517 }
1518 
1519 void
pp_insert_upp_locked(struct kern_pbufpool * pp,struct __kern_quantum * kqum,pid_t pid)1520 pp_insert_upp_locked(struct kern_pbufpool *pp, struct __kern_quantum *kqum,
1521     pid_t pid)
1522 {
1523 	pp_insert_upp_common(pp, kqum, pid);
1524 }
1525 
1526 void
pp_insert_upp(struct kern_pbufpool * pp,struct __kern_quantum * kqum,pid_t pid)1527 pp_insert_upp(struct kern_pbufpool *pp, struct __kern_quantum *kqum, pid_t pid)
1528 {
1529 	PP_LOCK(pp);
1530 	pp_insert_upp_common(pp, kqum, pid);
1531 	PP_UNLOCK(pp);
1532 }
1533 
1534 void
pp_insert_upp_batch(struct kern_pbufpool * pp,pid_t pid,uint64_t * array,uint32_t num)1535 pp_insert_upp_batch(struct kern_pbufpool *pp, pid_t pid, uint64_t *array,
1536     uint32_t num)
1537 {
1538 	uint32_t i = 0;
1539 
1540 	ASSERT(array != NULL && num > 0);
1541 	PP_LOCK(pp);
1542 	while (num != 0) {
1543 		struct __kern_quantum *kqum = SK_PTR_ADDR_KQUM(array[i]);
1544 
1545 		ASSERT(kqum != NULL);
1546 		pp_insert_upp_common(pp, kqum, pid);
1547 		--num;
1548 		++i;
1549 	}
1550 	PP_UNLOCK(pp);
1551 }
1552 
1553 __attribute__((always_inline))
1554 static struct __kern_buflet *
pp_remove_upp_bft_locked(struct kern_pbufpool * pp,obj_idx_t bft_idx)1555 pp_remove_upp_bft_locked(struct kern_pbufpool *pp, obj_idx_t bft_idx)
1556 {
1557 	struct __kern_buflet_ext *kbft, *tbft;
1558 	struct kern_pbufpool_u_bft_bkt *bkt;
1559 
1560 	bkt = KERN_PBUFPOOL_U_BFT_HASH(pp, bft_idx);
1561 	SLIST_FOREACH_SAFE(kbft, &bkt->upp_head, kbe_buf_upp_link, tbft) {
1562 		if (((kern_buflet_t)kbft)->buf_bft_idx_reg == bft_idx) {
1563 			SLIST_REMOVE(&bkt->upp_head, kbft, __kern_buflet_ext,
1564 			    kbe_buf_upp_link);
1565 			kbft->kbe_buf_pid = (pid_t)-1;
1566 			kbft->kbe_buf_upp_link.sle_next = NULL;
1567 			ASSERT(pp->pp_u_bftinuse != 0);
1568 			pp->pp_u_bftinuse--;
1569 			break;
1570 		}
1571 	}
1572 	return (kern_buflet_t)kbft;
1573 }
1574 
1575 struct __kern_buflet *
pp_remove_upp_bft(struct kern_pbufpool * pp,obj_idx_t md_idx,int * err)1576 pp_remove_upp_bft(struct kern_pbufpool *pp, obj_idx_t md_idx, int *err)
1577 {
1578 	struct __kern_buflet *kbft = pp_remove_upp_bft_locked(pp, md_idx);
1579 
1580 	*err = __improbable(kbft != NULL) ? 0 : EINVAL;
1581 	return kbft;
1582 }
1583 
1584 __attribute__((always_inline))
1585 static int
pp_remove_upp_bft_chain_locked(struct kern_pbufpool * pp,struct __kern_quantum * kqum)1586 pp_remove_upp_bft_chain_locked(struct kern_pbufpool *pp,
1587     struct __kern_quantum *kqum)
1588 {
1589 	uint32_t max_frags = pp->pp_max_frags;
1590 	struct __kern_buflet *kbft;
1591 	uint16_t nbfts, upkt_nbfts;
1592 	obj_idx_t bft_idx;
1593 
1594 	ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
1595 	bft_idx = kqum->qum_user->qum_buf[0].buf_nbft_idx;
1596 	kbft = &kqum->qum_buf[0];
1597 	if (bft_idx == OBJ_IDX_NONE) {
1598 		return 0;
1599 	}
1600 
1601 	ASSERT(METADATA_TYPE(kqum) == NEXUS_META_TYPE_PACKET);
1602 	struct __kern_packet *kpkt = __DECONST(struct __kern_packet *, kqum);
1603 	struct __user_packet *upkt = __DECONST(struct __user_packet *,
1604 	    kpkt->pkt_qum.qum_user);
1605 
1606 	upkt_nbfts = upkt->pkt_bufs_cnt;
1607 	if (__improbable(upkt_nbfts > max_frags)) {
1608 		SK_ERR("bad bcnt in upkt (%d > %d)", upkt_nbfts, max_frags);
1609 		BUF_NBFT_IDX(kbft, OBJ_IDX_NONE);
1610 		BUF_NBFT_ADDR(kbft, 0);
1611 		return ERANGE;
1612 	}
1613 
1614 	nbfts = (kbft->buf_addr != 0) ? 1 : 0;
1615 
1616 	do {
1617 		struct __kern_buflet *pbft = kbft;
1618 		struct __kern_buflet_ext *kbe;
1619 
1620 		kbft = pp_remove_upp_bft_locked(pp, bft_idx);
1621 		if (__improbable(kbft == NULL)) {
1622 			BUF_NBFT_IDX(pbft, OBJ_IDX_NONE);
1623 			BUF_NBFT_ADDR(pbft, 0);
1624 			SK_ERR("unallocated next buflet (%d), %p", bft_idx,
1625 			    SK_KVA(pbft));
1626 			return ERANGE;
1627 		}
1628 		ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
1629 		BUF_NBFT_IDX(pbft, bft_idx);
1630 		BUF_NBFT_ADDR(pbft, kbft);
1631 		kbe = (struct __kern_buflet_ext *)kbft;
1632 		bft_idx = kbe->kbe_buf_user->buf_nbft_idx;
1633 		++nbfts;
1634 	} while ((bft_idx != OBJ_IDX_NONE) && (nbfts < upkt_nbfts));
1635 
1636 	ASSERT(kbft != NULL);
1637 	BUF_NBFT_IDX(kbft, OBJ_IDX_NONE);
1638 	BUF_NBFT_ADDR(kbft, 0);
1639 	*__DECONST(uint16_t *, &kpkt->pkt_bufs_cnt) = nbfts;
1640 
1641 	if (__improbable((bft_idx != OBJ_IDX_NONE) || (nbfts != upkt_nbfts))) {
1642 		SK_ERR("bad buflet in upkt (%d, %d)", nbfts, upkt_nbfts);
1643 		return ERANGE;
1644 	}
1645 	return 0;
1646 }
1647 
1648 struct __kern_quantum *
pp_remove_upp_locked(struct kern_pbufpool * pp,obj_idx_t md_idx,int * err)1649 pp_remove_upp_locked(struct kern_pbufpool *pp, obj_idx_t md_idx, int *err)
1650 {
1651 	struct __kern_quantum *kqum, *tqum;
1652 	struct kern_pbufpool_u_bkt *bkt;
1653 
1654 	bkt = KERN_PBUFPOOL_U_HASH(pp, md_idx);
1655 	SLIST_FOREACH_SAFE(kqum, &bkt->upp_head, qum_upp_link, tqum) {
1656 		if (METADATA_IDX(kqum) == md_idx) {
1657 			SLIST_REMOVE(&bkt->upp_head, kqum, __kern_quantum,
1658 			    qum_upp_link);
1659 			kqum->qum_pid = (pid_t)-1;
1660 			ASSERT(pp->pp_u_bufinuse != 0);
1661 			pp->pp_u_bufinuse--;
1662 			break;
1663 		}
1664 	}
1665 	if (__probable(kqum != NULL)) {
1666 		*err = pp_remove_upp_bft_chain_locked(pp, kqum);
1667 	} else {
1668 		*err = ERANGE;
1669 	}
1670 	return kqum;
1671 }
1672 
1673 struct __kern_quantum *
pp_remove_upp(struct kern_pbufpool * pp,obj_idx_t md_idx,int * err)1674 pp_remove_upp(struct kern_pbufpool *pp, obj_idx_t md_idx, int *err)
1675 {
1676 	struct __kern_quantum *kqum;
1677 
1678 	PP_LOCK(pp);
1679 	kqum = pp_remove_upp_locked(pp, md_idx, err);
1680 	PP_UNLOCK(pp);
1681 	return kqum;
1682 }
1683 
1684 struct __kern_quantum *
pp_find_upp(struct kern_pbufpool * pp,obj_idx_t md_idx)1685 pp_find_upp(struct kern_pbufpool *pp, obj_idx_t md_idx)
1686 {
1687 	struct __kern_quantum *kqum, *tqum;
1688 	struct kern_pbufpool_u_bkt *bkt;
1689 
1690 	PP_LOCK(pp);
1691 	bkt = KERN_PBUFPOOL_U_HASH(pp, md_idx);
1692 	SLIST_FOREACH_SAFE(kqum, &bkt->upp_head, qum_upp_link, tqum) {
1693 		if (METADATA_IDX(kqum) == md_idx) {
1694 			break;
1695 		}
1696 	}
1697 	PP_UNLOCK(pp);
1698 
1699 	return kqum;
1700 }
1701 
1702 __attribute__((always_inline))
1703 static void
pp_purge_upp_locked(struct kern_pbufpool * pp,pid_t pid)1704 pp_purge_upp_locked(struct kern_pbufpool *pp, pid_t pid)
1705 {
1706 	struct __kern_quantum *kqum, *tqum;
1707 	struct kern_pbufpool_u_bkt *bkt;
1708 	int i;
1709 
1710 	PP_LOCK_ASSERT_HELD(pp);
1711 
1712 	/*
1713 	 * TODO: Build a list of packets and batch-free them.
1714 	 */
1715 	for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1716 		bkt = &pp->pp_u_hash_table[i];
1717 		SLIST_FOREACH_SAFE(kqum, &bkt->upp_head, qum_upp_link, tqum) {
1718 			ASSERT(kqum->qum_pid != (pid_t)-1);
1719 			if (pid != (pid_t)-1 && kqum->qum_pid != pid) {
1720 				continue;
1721 			}
1722 			SLIST_REMOVE(&bkt->upp_head, kqum, __kern_quantum,
1723 			    qum_upp_link);
1724 			pp_remove_upp_bft_chain_locked(pp, kqum);
1725 			kqum->qum_pid = (pid_t)-1;
1726 			kqum->qum_qflags &= ~QUM_F_FINALIZED;
1727 			kqum->qum_ksd = NULL;
1728 			pp_free_packet(__DECONST(struct kern_pbufpool *,
1729 			    kqum->qum_pp), (uint64_t)kqum);
1730 			ASSERT(pp->pp_u_bufinuse != 0);
1731 			pp->pp_u_bufinuse--;
1732 		}
1733 	}
1734 }
1735 
1736 __attribute__((always_inline))
1737 static void
pp_purge_upp_bft_locked(struct kern_pbufpool * pp,pid_t pid)1738 pp_purge_upp_bft_locked(struct kern_pbufpool *pp, pid_t pid)
1739 {
1740 	struct __kern_buflet_ext *kbft, *tbft;
1741 	struct kern_pbufpool_u_bft_bkt *bkt;
1742 	int i;
1743 
1744 	PP_LOCK_ASSERT_HELD(pp);
1745 
1746 	for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1747 		bkt = &pp->pp_u_bft_hash_table[i];
1748 		SLIST_FOREACH_SAFE(kbft, &bkt->upp_head, kbe_buf_upp_link,
1749 		    tbft) {
1750 			ASSERT(kbft->kbe_buf_pid != (pid_t)-1);
1751 			if (pid != (pid_t)-1 && kbft->kbe_buf_pid != pid) {
1752 				continue;
1753 			}
1754 			SLIST_REMOVE(&bkt->upp_head, kbft, __kern_buflet_ext,
1755 			    kbe_buf_upp_link);
1756 			kbft->kbe_buf_pid = (pid_t)-1;
1757 			kbft->kbe_buf_upp_link.sle_next = NULL;
1758 			pp_free_buflet(pp, (kern_buflet_t)kbft);
1759 			ASSERT(pp->pp_u_bftinuse != 0);
1760 			pp->pp_u_bftinuse--;
1761 		}
1762 	}
1763 }
1764 
1765 void
pp_purge_upp(struct kern_pbufpool * pp,pid_t pid)1766 pp_purge_upp(struct kern_pbufpool *pp, pid_t pid)
1767 {
1768 	PP_LOCK(pp);
1769 	pp_purge_upp_locked(pp, pid);
1770 	pp_purge_upp_bft_locked(pp, pid);
1771 	PP_UNLOCK(pp);
1772 }
1773 
1774 static int
pp_init_upp_bft_locked(struct kern_pbufpool * pp,boolean_t can_block)1775 pp_init_upp_bft_locked(struct kern_pbufpool *pp, boolean_t can_block)
1776 {
1777 	int i, err = 0;
1778 
1779 	PP_LOCK_ASSERT_HELD(pp);
1780 	if (pp->pp_u_bft_hash_table != NULL) {
1781 		return 0;
1782 	}
1783 
1784 	/* allocated-address hash table */
1785 	pp->pp_u_bft_hash_table = can_block ? zalloc(pp_u_htbl_zone) :
1786 	    zalloc_noblock(pp_u_htbl_zone);
1787 	if (pp->pp_u_bft_hash_table == NULL) {
1788 		SK_ERR("failed to zalloc packet buffer pool upp buflet hash table");
1789 		err = ENOMEM;
1790 		goto fail;
1791 	}
1792 
1793 	for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1794 		SLIST_INIT(&pp->pp_u_bft_hash_table[i].upp_head);
1795 	}
1796 
1797 fail:
1798 	return err;
1799 }
1800 
1801 static void
pp_destroy_upp_bft_locked(struct kern_pbufpool * pp)1802 pp_destroy_upp_bft_locked(struct kern_pbufpool *pp)
1803 {
1804 	PP_LOCK_ASSERT_HELD(pp);
1805 	if (pp->pp_u_bft_hash_table != NULL) {
1806 		/* purge anything that's left */
1807 		pp_purge_upp_bft_locked(pp, -1);
1808 
1809 #if (DEBUG || DEVELOPMENT)
1810 		for (int i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1811 			ASSERT(SLIST_EMPTY(&pp->pp_u_bft_hash_table[i].upp_head));
1812 		}
1813 #endif /* DEBUG || DEVELOPMENT */
1814 
1815 		zfree(pp_u_htbl_zone, pp->pp_u_bft_hash_table);
1816 		pp->pp_u_bft_hash_table = NULL;
1817 	}
1818 	ASSERT(pp->pp_u_bftinuse == 0);
1819 }
1820 
1821 void
pp_insert_upp_bft(struct kern_pbufpool * pp,struct __kern_buflet * kbft,pid_t pid)1822 pp_insert_upp_bft(struct kern_pbufpool *pp,
1823     struct __kern_buflet *kbft, pid_t pid)
1824 {
1825 	PP_LOCK(pp);
1826 	pp_insert_upp_bft_locked(pp, kbft, pid);
1827 	PP_UNLOCK(pp);
1828 }
1829 
1830 boolean_t
pp_isempty_upp(struct kern_pbufpool * pp)1831 pp_isempty_upp(struct kern_pbufpool *pp)
1832 {
1833 	boolean_t isempty;
1834 
1835 	PP_LOCK(pp);
1836 	isempty = (pp->pp_u_bufinuse == 0);
1837 	PP_UNLOCK(pp);
1838 
1839 	return isempty;
1840 }
1841 
1842 __attribute__((always_inline))
1843 static inline struct __kern_quantum *
pp_metadata_init(struct __metadata_preamble * mdp,struct kern_pbufpool * pp,uint16_t bufcnt,uint32_t skmflag,struct skmem_obj ** blist)1844 pp_metadata_init(struct __metadata_preamble *mdp, struct kern_pbufpool *pp,
1845     uint16_t bufcnt, uint32_t skmflag, struct skmem_obj **blist)
1846 {
1847 	struct __kern_quantum *kqum;
1848 	struct __user_quantum *uqum;
1849 
1850 	kqum = SK_PTR_ADDR_KQUM((uintptr_t)mdp + METADATA_PREAMBLE_SZ);
1851 	ASSERT(kqum->qum_pp == pp);
1852 	if (__probable(!PP_KERNEL_ONLY(pp))) {
1853 		ASSERT(!(kqum->qum_qflags & QUM_F_KERNEL_ONLY));
1854 		uqum =  __DECONST(struct __user_quantum *, kqum->qum_user);
1855 		ASSERT(uqum != NULL);
1856 	} else {
1857 		ASSERT(kqum->qum_qflags & QUM_F_KERNEL_ONLY);
1858 		ASSERT(kqum->qum_user == NULL);
1859 		uqum = NULL;
1860 	}
1861 
1862 	if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0 &&
1863 	    pp_metadata_construct(kqum, uqum, METADATA_IDX(kqum), pp,
1864 	    skmflag, bufcnt, FALSE, blist) != 0) {
1865 		return NULL;
1866 	}
1867 
1868 	/* (re)construct {user,kernel} metadata */
1869 	switch (pp->pp_md_type) {
1870 	case NEXUS_META_TYPE_PACKET: {
1871 		struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
1872 		struct __kern_buflet *kbuf = &kpkt->pkt_qum_buf;
1873 		uint16_t i;
1874 
1875 		/* sanitize flags */
1876 		kpkt->pkt_pflags &= PKT_F_INIT_MASK;
1877 
1878 		ASSERT((kpkt->pkt_pflags & PKT_F_OPT_ALLOC) &&
1879 		    kpkt->pkt_com_opt != NULL);
1880 		ASSERT((kpkt->pkt_pflags & PKT_F_FLOW_ALLOC) &&
1881 		    kpkt->pkt_flow != NULL);
1882 		ASSERT((kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC) &&
1883 		    kpkt->pkt_tx_compl != NULL);
1884 
1885 		/*
1886 		 * XXX: For now we always set PKT_F_FLOW_DATA;
1887 		 * this is a no-op but done for consistency
1888 		 * with the other PKT_F_*_DATA flags.
1889 		 */
1890 		kpkt->pkt_pflags |= PKT_F_FLOW_DATA;
1891 
1892 		/* initialize kernel packet */
1893 		KPKT_INIT(kpkt, QUM_F_INTERNALIZED);
1894 
1895 		ASSERT(bufcnt || PP_HAS_BUFFER_ON_DEMAND(pp));
1896 		if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1897 			ASSERT(kbuf->buf_ctl == NULL);
1898 			ASSERT(kbuf->buf_addr == 0);
1899 			kbuf = __DECONST(struct __kern_buflet *,
1900 			    kbuf->buf_nbft_addr);
1901 		}
1902 		/* initialize kernel buflet */
1903 		for (i = 0; i < bufcnt; i++) {
1904 			ASSERT(kbuf != NULL);
1905 			KBUF_INIT(kbuf);
1906 			kbuf = __DECONST(struct __kern_buflet *,
1907 			    kbuf->buf_nbft_addr);
1908 		}
1909 		ASSERT((kbuf == NULL) || (bufcnt == 0));
1910 		break;
1911 	}
1912 	default:
1913 		ASSERT(pp->pp_md_type == NEXUS_META_TYPE_QUANTUM);
1914 		/* kernel quantum */
1915 		KQUM_INIT(kqum, QUM_F_INTERNALIZED);
1916 		KBUF_INIT(&kqum->qum_buf[0]);
1917 		break;
1918 	}
1919 
1920 	return kqum;
1921 }
1922 
1923 /*
1924  * When PPF_BUFFER_ON_DEMAND flag is set on packet pool creation, we create
1925  * packet descriptor cache with no buffer attached and a buflet cache with
1926  * cpu layer caching enabled. While operating in this mode, we can call
1927  * pp_alloc_packet_common() either with `bufcnt = 0` or `bufcnt = n`,
1928  * where n <= pp->pp_max_frags. If `bufcnt == 0` then we allocate packet
1929  * descriptor with no attached buffer from the metadata cache.
1930  * If `bufcnt != 0`, then this routine allocates packet descriptor and buflets
1931  * from their respective caches and constructs the packet on behalf of the
1932  * caller.
1933  */
1934 __attribute__((always_inline))
1935 static inline uint32_t
pp_alloc_packet_common(struct kern_pbufpool * pp,uint16_t bufcnt,uint64_t * array,uint32_t num,boolean_t tagged,alloc_cb_func_t cb,const void * ctx,uint32_t skmflag)1936 pp_alloc_packet_common(struct kern_pbufpool *pp, uint16_t bufcnt,
1937     uint64_t *array, uint32_t num, boolean_t tagged, alloc_cb_func_t cb,
1938     const void *ctx, uint32_t skmflag)
1939 {
1940 	struct __metadata_preamble *mdp;
1941 	struct __kern_quantum *kqum = NULL;
1942 	uint32_t allocp, need = num;
1943 	struct skmem_obj *plist, *blist = NULL;
1944 
1945 	ASSERT(bufcnt <= pp->pp_max_frags);
1946 	ASSERT(array != NULL && num > 0);
1947 	ASSERT(PP_BATCH_CAPABLE(pp));
1948 
1949 	/* allocate (constructed) packet(s) with buffer(s) attached */
1950 	allocp = skmem_cache_batch_alloc(pp->pp_kmd_cache, &plist, num,
1951 	    skmflag);
1952 
1953 	/* allocate (constructed) buflet(s) with buffer(s) attached */
1954 	if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0 && allocp != 0) {
1955 		(void) skmem_cache_batch_alloc(PP_KBFT_CACHE_DEF(pp), &blist,
1956 		    (allocp * bufcnt), skmflag);
1957 	}
1958 
1959 	while (plist != NULL) {
1960 		struct skmem_obj *plistn;
1961 
1962 		plistn = plist->mo_next;
1963 		plist->mo_next = NULL;
1964 
1965 		mdp = (struct __metadata_preamble *)(void *)plist;
1966 		kqum = pp_metadata_init(mdp, pp, bufcnt, skmflag, &blist);
1967 		if (kqum == NULL) {
1968 			if (blist != NULL) {
1969 				skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp),
1970 				    blist);
1971 				blist = NULL;
1972 			}
1973 			plist->mo_next = plistn;
1974 			skmem_cache_batch_free(pp->pp_kmd_cache, plist);
1975 			plist = NULL;
1976 			break;
1977 		}
1978 
1979 #if CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT)
1980 		/* Checking to ensure the object address is tagged */
1981 		ASSERT((vm_offset_t)kqum !=
1982 		    vm_memtag_canonicalize_address((vm_offset_t)kqum));
1983 #endif /* CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT) */
1984 
1985 		if (tagged) {
1986 			*array = SK_PTR_ENCODE(kqum, METADATA_TYPE(kqum),
1987 			    METADATA_SUBTYPE(kqum));
1988 		} else {
1989 			*array = (uint64_t)kqum;
1990 		}
1991 
1992 		if (cb != NULL) {
1993 			(cb)(*array, (num - need), ctx);
1994 		}
1995 
1996 		++array;
1997 		plist = plistn;
1998 
1999 		ASSERT(need > 0);
2000 		--need;
2001 	}
2002 	ASSERT(blist == NULL);
2003 	ASSERT((num - need) == allocp || kqum == NULL);
2004 
2005 	return num - need;
2006 }
2007 
2008 uint64_t
pp_alloc_packet(struct kern_pbufpool * pp,uint16_t bufcnt,uint32_t skmflag)2009 pp_alloc_packet(struct kern_pbufpool *pp, uint16_t bufcnt, uint32_t skmflag)
2010 {
2011 	uint64_t kpkt = 0;
2012 
2013 	(void) pp_alloc_packet_common(pp, bufcnt, &kpkt, 1, FALSE,
2014 	    NULL, NULL, skmflag);
2015 
2016 	return kpkt;
2017 }
2018 
2019 int
pp_alloc_packet_batch(struct kern_pbufpool * pp,uint16_t bufcnt,uint64_t * array,uint32_t * size,boolean_t tagged,alloc_cb_func_t cb,const void * ctx,uint32_t skmflag)2020 pp_alloc_packet_batch(struct kern_pbufpool *pp, uint16_t bufcnt,
2021     uint64_t *array, uint32_t *size, boolean_t tagged, alloc_cb_func_t cb,
2022     const void *ctx, uint32_t skmflag)
2023 {
2024 	uint32_t i, n;
2025 	int err;
2026 
2027 	ASSERT(array != NULL && size > 0);
2028 
2029 	n = *size;
2030 	*size = 0;
2031 
2032 	i = pp_alloc_packet_common(pp, bufcnt, array, n, tagged,
2033 	    cb, ctx, skmflag);
2034 	*size = i;
2035 
2036 	if (__probable(i == n)) {
2037 		err = 0;
2038 	} else if (i != 0) {
2039 		err = EAGAIN;
2040 	} else {
2041 		err = ENOMEM;
2042 	}
2043 
2044 	return err;
2045 }
2046 
2047 int
pp_alloc_pktq(struct kern_pbufpool * pp,uint16_t bufcnt,struct pktq * pktq,uint32_t num,alloc_cb_func_t cb,const void * ctx,uint32_t skmflag)2048 pp_alloc_pktq(struct kern_pbufpool *pp, uint16_t bufcnt,
2049     struct pktq *pktq, uint32_t num, alloc_cb_func_t cb, const void *ctx,
2050     uint32_t skmflag)
2051 {
2052 	struct __metadata_preamble *mdp;
2053 	struct __kern_packet *kpkt = NULL;
2054 	uint32_t allocp, need = num;
2055 	struct skmem_obj *plist, *blist = NULL;
2056 	int err;
2057 
2058 	ASSERT(pktq != NULL && num > 0);
2059 	ASSERT(pp->pp_md_type == NEXUS_META_TYPE_PACKET);
2060 	ASSERT(bufcnt <= pp->pp_max_frags);
2061 	ASSERT(PP_BATCH_CAPABLE(pp));
2062 
2063 	/* allocate (constructed) packet(s) with buffer(s) attached */
2064 	allocp = skmem_cache_batch_alloc(pp->pp_kmd_cache, &plist, num,
2065 	    skmflag);
2066 
2067 	/* allocate (constructed) buflet(s) with buffer(s) attached */
2068 	if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0 && allocp != 0) {
2069 		(void) skmem_cache_batch_alloc(PP_KBFT_CACHE_DEF(pp), &blist,
2070 		    (allocp * bufcnt), skmflag);
2071 	}
2072 
2073 	while (plist != NULL) {
2074 		struct skmem_obj *plistn;
2075 
2076 		plistn = plist->mo_next;
2077 		plist->mo_next = NULL;
2078 
2079 		mdp = (struct __metadata_preamble *)(void *)plist;
2080 		kpkt = (struct __kern_packet *)pp_metadata_init(mdp, pp,
2081 		    bufcnt, skmflag, &blist);
2082 		if (kpkt == NULL) {
2083 			if (blist != NULL) {
2084 				skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp),
2085 				    blist);
2086 				blist = NULL;
2087 			}
2088 			plist->mo_next = plistn;
2089 			skmem_cache_batch_free(pp->pp_kmd_cache, plist);
2090 			plist = NULL;
2091 			break;
2092 		}
2093 
2094 #if CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT)
2095 		/* Checking to ensure the object address is tagged */
2096 		ASSERT((vm_offset_t)kpkt !=
2097 		    vm_memtag_canonicalize_address((vm_offset_t)kpkt));
2098 #endif /* CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT) */
2099 
2100 		KPKTQ_ENQUEUE(pktq, kpkt);
2101 
2102 		if (cb != NULL) {
2103 			(cb)((uint64_t)kpkt, (num - need), ctx);
2104 		}
2105 
2106 		plist = plistn;
2107 
2108 		ASSERT(need > 0);
2109 		--need;
2110 	}
2111 	ASSERT(blist == NULL);
2112 	ASSERT((num - need) == allocp || kpkt == NULL);
2113 
2114 	if (__probable(need == 0)) {
2115 		err = 0;
2116 	} else if (need == num) {
2117 		err = ENOMEM;
2118 	} else {
2119 		err = EAGAIN;
2120 	}
2121 
2122 	return err;
2123 }
2124 
2125 uint64_t
pp_alloc_packet_by_size(struct kern_pbufpool * pp,uint32_t size,uint32_t skmflag)2126 pp_alloc_packet_by_size(struct kern_pbufpool *pp, uint32_t size,
2127     uint32_t skmflag)
2128 {
2129 	uint32_t bufcnt = pp->pp_max_frags;
2130 	uint64_t kpkt = 0;
2131 
2132 	if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
2133 		bufcnt =
2134 		    SK_ROUNDUP(size, PP_BUF_SIZE_DEF(pp)) / PP_BUF_SIZE_DEF(pp);
2135 		ASSERT(bufcnt <= UINT16_MAX);
2136 	}
2137 
2138 	(void) pp_alloc_packet_common(pp, (uint16_t)bufcnt, &kpkt, 1, TRUE,
2139 	    NULL, NULL, skmflag);
2140 
2141 	return kpkt;
2142 }
2143 
2144 __attribute__((always_inline))
2145 static inline struct __metadata_preamble *
pp_metadata_fini(struct __kern_quantum * kqum,struct kern_pbufpool * pp,struct mbuf ** mp,struct __kern_packet ** kpp,struct skmem_obj ** blist_def,struct skmem_obj ** blist_large)2146 pp_metadata_fini(struct __kern_quantum *kqum, struct kern_pbufpool *pp,
2147     struct mbuf **mp, struct __kern_packet **kpp, struct skmem_obj **blist_def,
2148     struct skmem_obj **blist_large)
2149 {
2150 	struct __metadata_preamble *mdp = METADATA_PREAMBLE(kqum);
2151 
2152 	ASSERT(SK_PTR_TAG(kqum) == 0);
2153 
2154 	switch (pp->pp_md_type) {
2155 	case NEXUS_META_TYPE_PACKET: {
2156 		struct __kern_packet *kpkt = SK_PTR_KPKT(kqum);
2157 
2158 		if ((kpkt->pkt_pflags & PKT_F_TX_COMPL_TS_REQ) != 0) {
2159 			__packet_perform_tx_completion_callbacks(
2160 				SK_PKT2PH(kpkt), NULL);
2161 		}
2162 		if ((kpkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) {
2163 			ASSERT((kpkt->pkt_pflags & PKT_F_PKT_DATA) == 0);
2164 			ASSERT(kpkt->pkt_mbuf != NULL);
2165 			ASSERT(kpkt->pkt_mbuf->m_nextpkt == NULL);
2166 			if (mp != NULL) {
2167 				ASSERT(*mp == NULL);
2168 				*mp = kpkt->pkt_mbuf;
2169 			} else {
2170 				m_freem(kpkt->pkt_mbuf);
2171 			}
2172 			KPKT_CLEAR_MBUF_DATA(kpkt);
2173 		} else if ((kpkt->pkt_pflags & PKT_F_PKT_DATA) != 0) {
2174 			ASSERT(kpkt->pkt_pkt != NULL);
2175 			ASSERT(kpkt->pkt_pkt->pkt_nextpkt == NULL);
2176 			if (kpp != NULL) {
2177 				ASSERT(*kpp == NULL);
2178 				*kpp = kpkt->pkt_pkt;
2179 			} else {
2180 				/* can only recurse once */
2181 				ASSERT((kpkt->pkt_pkt->pkt_pflags &
2182 				    PKT_F_PKT_DATA) == 0);
2183 				pp_free_packet_single(kpkt->pkt_pkt);
2184 			}
2185 			KPKT_CLEAR_PKT_DATA(kpkt);
2186 		}
2187 		kpkt->pkt_pflags &= ~PKT_F_TRUNCATED;
2188 		ASSERT(kpkt->pkt_nextpkt == NULL);
2189 		ASSERT(kpkt->pkt_qum.qum_ksd == NULL);
2190 		ASSERT((kpkt->pkt_pflags & PKT_F_MBUF_MASK) == 0);
2191 		ASSERT((kpkt->pkt_pflags & PKT_F_PKT_MASK) == 0);
2192 		break;
2193 	}
2194 	default:
2195 		break;
2196 	}
2197 
2198 	if (__improbable(PP_HAS_BUFFER_ON_DEMAND(pp))) {
2199 		pp_metadata_destruct_common(kqum, pp, FALSE, blist_def,
2200 		    blist_large);
2201 	}
2202 	return mdp;
2203 }
2204 
2205 void
pp_free_packet_chain(struct __kern_packet * pkt_chain,int * npkt)2206 pp_free_packet_chain(struct __kern_packet *pkt_chain, int *npkt)
2207 {
2208 	struct __metadata_preamble *mdp;
2209 	struct skmem_obj *top = NULL;
2210 	struct skmem_obj *blist_def = NULL;
2211 	struct skmem_obj *blist_large = NULL;
2212 	struct skmem_obj **list = &top;
2213 	struct mbuf *mtop = NULL;
2214 	struct mbuf **mp = &mtop;
2215 	struct __kern_packet *kptop = NULL;
2216 	struct __kern_packet **kpp = &kptop, *pkt, *next;
2217 	struct kern_pbufpool *pp;
2218 	int c = 0;
2219 
2220 	pp = __DECONST(struct kern_pbufpool *, pkt_chain->pkt_qum.qum_pp);
2221 	ASSERT(pp != NULL);
2222 	ASSERT(PP_BATCH_CAPABLE(pp));
2223 
2224 	for (pkt = pkt_chain; pkt != NULL; pkt = next) {
2225 		next = pkt->pkt_nextpkt;
2226 		pkt->pkt_nextpkt = NULL;
2227 
2228 		ASSERT(SK_PTR_ADDR_KQUM(pkt)->qum_pp == pp);
2229 		mdp = pp_metadata_fini(SK_PTR_ADDR_KQUM(pkt), pp,
2230 		    mp, kpp, &blist_def, &blist_large);
2231 
2232 		*list = (struct skmem_obj *)mdp;
2233 		list = &(*list)->mo_next;
2234 		c++;
2235 
2236 		if (*mp != NULL) {
2237 			mp = &(*mp)->m_nextpkt;
2238 			ASSERT(*mp == NULL);
2239 		}
2240 		if (*kpp != NULL) {
2241 			kpp = &(*kpp)->pkt_nextpkt;
2242 			ASSERT(*kpp == NULL);
2243 		}
2244 	}
2245 
2246 	ASSERT(top != NULL);
2247 	skmem_cache_batch_free(pp->pp_kmd_cache, top);
2248 	if (blist_def != NULL) {
2249 		skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp), blist_def);
2250 		blist_def = NULL;
2251 	}
2252 	if (blist_large != NULL) {
2253 		skmem_cache_batch_free(PP_KBFT_CACHE_LARGE(pp), blist_large);
2254 		blist_large = NULL;
2255 	}
2256 	if (mtop != NULL) {
2257 		DTRACE_SKYWALK(free__attached__mbuf);
2258 		if (__probable(mtop->m_nextpkt != NULL)) {
2259 			m_freem_list(mtop);
2260 		} else {
2261 			m_freem(mtop);
2262 		}
2263 	}
2264 	if (kptop != NULL) {
2265 		int cnt = 0;
2266 		pp_free_packet_chain(kptop, &cnt);
2267 		DTRACE_SKYWALK1(free__attached__pkt, int, cnt);
2268 	}
2269 	if (npkt != NULL) {
2270 		*npkt = c;
2271 	}
2272 }
2273 
2274 void
pp_free_pktq(struct pktq * pktq)2275 pp_free_pktq(struct pktq *pktq)
2276 {
2277 	if (__improbable(KPKTQ_EMPTY(pktq))) {
2278 		return;
2279 	}
2280 	struct __kern_packet *pkt = KPKTQ_FIRST(pktq);
2281 	pp_free_packet_chain(pkt, NULL);
2282 	KPKTQ_DISPOSE(pktq);
2283 }
2284 
2285 __attribute__((always_inline))
2286 static inline void
pp_free_packet_array(struct kern_pbufpool * pp,uint64_t * array,uint32_t num)2287 pp_free_packet_array(struct kern_pbufpool *pp, uint64_t *array, uint32_t num)
2288 {
2289 	struct __metadata_preamble *mdp;
2290 	struct skmem_obj *top = NULL;
2291 	struct skmem_obj *blist_def = NULL;
2292 	struct skmem_obj *blist_large = NULL;
2293 	struct skmem_obj **list = &top;
2294 	struct mbuf *mtop = NULL;
2295 	struct mbuf **mp = &mtop;
2296 	struct __kern_packet *kptop = NULL;
2297 	struct __kern_packet **kpp = &kptop;
2298 	uint32_t i;
2299 
2300 	ASSERT(pp != NULL);
2301 	ASSERT(array != NULL && num > 0);
2302 	ASSERT(PP_BATCH_CAPABLE(pp));
2303 
2304 	for (i = 0; i < num; i++) {
2305 		ASSERT(SK_PTR_ADDR_KQUM(array[i])->qum_pp == pp);
2306 		mdp = pp_metadata_fini(SK_PTR_ADDR_KQUM(array[i]), pp,
2307 		    mp, kpp, &blist_def, &blist_large);
2308 
2309 		*list = (struct skmem_obj *)mdp;
2310 		list = &(*list)->mo_next;
2311 		array[i] = 0;
2312 
2313 		if (*mp != NULL) {
2314 			mp = &(*mp)->m_nextpkt;
2315 			ASSERT(*mp == NULL);
2316 		}
2317 		if (*kpp != NULL) {
2318 			kpp = &(*kpp)->pkt_nextpkt;
2319 			ASSERT(*kpp == NULL);
2320 		}
2321 	}
2322 
2323 	ASSERT(top != NULL);
2324 	skmem_cache_batch_free(pp->pp_kmd_cache, top);
2325 	if (blist_def != NULL) {
2326 		skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp), blist_def);
2327 		blist_def = NULL;
2328 	}
2329 	if (blist_large != NULL) {
2330 		skmem_cache_batch_free(PP_KBFT_CACHE_LARGE(pp), blist_large);
2331 		blist_large = NULL;
2332 	}
2333 	if (mtop != NULL) {
2334 		DTRACE_SKYWALK(free__attached__mbuf);
2335 		if (__probable(mtop->m_nextpkt != NULL)) {
2336 			m_freem_list(mtop);
2337 		} else {
2338 			m_freem(mtop);
2339 		}
2340 	}
2341 	if (kptop != NULL) {
2342 		int cnt = 0;
2343 		pp_free_packet_chain(kptop, &cnt);
2344 		DTRACE_SKYWALK1(free__attached__pkt, int, cnt);
2345 	}
2346 }
2347 
2348 void
pp_free_packet(struct kern_pbufpool * pp,uint64_t kqum)2349 pp_free_packet(struct kern_pbufpool *pp, uint64_t kqum)
2350 {
2351 	pp_free_packet_array(pp, &kqum, 1);
2352 }
2353 
2354 void
pp_free_packet_batch(const kern_pbufpool_t pp,uint64_t * array,uint32_t size)2355 pp_free_packet_batch(const kern_pbufpool_t pp, uint64_t *array, uint32_t size)
2356 {
2357 	pp_free_packet_array(pp, array, size);
2358 }
2359 
2360 void
pp_free_packet_single(struct __kern_packet * pkt)2361 pp_free_packet_single(struct __kern_packet *pkt)
2362 {
2363 	ASSERT(pkt->pkt_nextpkt == NULL);
2364 	pp_free_packet(__DECONST(struct kern_pbufpool *,
2365 	    pkt->pkt_qum.qum_pp), SK_PTR_ADDR(pkt));
2366 }
2367 
2368 static mach_vm_address_t
pp_alloc_buffer_common(const kern_pbufpool_t pp,struct skmem_obj_info * oi,uint32_t skmflag,bool large)2369 pp_alloc_buffer_common(const kern_pbufpool_t pp, struct skmem_obj_info *oi,
2370     uint32_t skmflag, bool large)
2371 {
2372 	mach_vm_address_t baddr;
2373 	struct skmem_cache *skm = large ? PP_BUF_CACHE_LARGE(pp):
2374 	    PP_BUF_CACHE_DEF(pp);
2375 
2376 	ASSERT(skm != NULL);
2377 	/* allocate a cached buffer */
2378 	baddr = (mach_vm_address_t)skmem_cache_alloc(skm, skmflag);
2379 
2380 #if (DEVELOPMENT || DEBUG)
2381 	uint64_t mtbf = skmem_region_get_mtbf();
2382 	/*
2383 	 * MTBF is applicable only for non-blocking allocations here.
2384 	 */
2385 	if (__improbable(mtbf != 0 && (net_uptime_ms() % mtbf) == 0 &&
2386 	    (skmflag & SKMEM_NOSLEEP))) {
2387 		SK_ERR("pp \"%s\" MTBF failure", pp->pp_name);
2388 		net_update_uptime();
2389 		if (baddr != 0) {
2390 			skmem_cache_free(skm, (void *)baddr);
2391 			baddr = 0;
2392 		}
2393 	}
2394 #endif /* (DEVELOPMENT || DEBUG) */
2395 
2396 	if (__improbable(baddr == 0)) {
2397 		SK_DF(SK_VERB_MEM, "failed to alloc buffer, pp 0x%llx",
2398 		    SK_KVA(pp));
2399 		return 0;
2400 	}
2401 	skmem_cache_get_obj_info(skm, (void *)baddr, oi, NULL);
2402 	ASSERT(SKMEM_OBJ_BUFCTL(oi) != NULL);
2403 	ASSERT((mach_vm_address_t)SKMEM_OBJ_ADDR(oi) == baddr);
2404 	return baddr;
2405 }
2406 
2407 errno_t
pp_alloc_buffer(const kern_pbufpool_t pp,mach_vm_address_t * baddr,kern_segment_t * seg,kern_obj_idx_seg_t * idx,uint32_t skmflag)2408 pp_alloc_buffer(const kern_pbufpool_t pp, mach_vm_address_t *baddr,
2409     kern_segment_t *seg, kern_obj_idx_seg_t *idx, uint32_t skmflag)
2410 {
2411 	struct skmem_obj_info oib;
2412 
2413 	VERIFY(pp != NULL && baddr != NULL);
2414 	VERIFY((seg != NULL) == (idx != NULL));
2415 
2416 	if (__improbable(!PP_HAS_BUFFER_ON_DEMAND(pp))) {
2417 		return ENOTSUP;
2418 	}
2419 
2420 	*baddr = pp_alloc_buffer_common(pp, &oib, skmflag, false);
2421 	if (__improbable(*baddr == 0)) {
2422 		return ENOMEM;
2423 	}
2424 
2425 	if (seg != NULL) {
2426 		ASSERT(SKMEM_OBJ_SEG(&oib) != NULL);
2427 		*seg = SKMEM_OBJ_SEG(&oib);
2428 		*idx = SKMEM_OBJ_IDX_SEG(&oib);
2429 	}
2430 	return 0;
2431 }
2432 
2433 void
pp_free_buffer(const kern_pbufpool_t pp,mach_vm_address_t addr)2434 pp_free_buffer(const kern_pbufpool_t pp, mach_vm_address_t addr)
2435 {
2436 	ASSERT(pp != NULL && addr != 0);
2437 	skmem_cache_free(PP_BUF_CACHE_DEF(pp), (void *)addr);
2438 }
2439 
2440 __attribute__((always_inline))
2441 static inline uint32_t
pp_alloc_buflet_common(struct kern_pbufpool * pp,uint64_t * array,uint32_t num,uint32_t skmflag,bool large)2442 pp_alloc_buflet_common(struct kern_pbufpool *pp, uint64_t *array,
2443     uint32_t num, uint32_t skmflag, bool large)
2444 {
2445 	struct __kern_buflet *kbft = NULL;
2446 	uint32_t allocd, need = num;
2447 	struct skmem_obj *list;
2448 
2449 	ASSERT(array != NULL && num > 0);
2450 	ASSERT(PP_BATCH_CAPABLE(pp));
2451 	ASSERT(PP_KBFT_CACHE_DEF(pp) != NULL);
2452 	ASSERT(PP_BUF_SIZE_LARGE(pp) != 0 || !large);
2453 
2454 	allocd = skmem_cache_batch_alloc(large ? PP_KBFT_CACHE_LARGE(pp) :
2455 	    PP_KBFT_CACHE_DEF(pp), &list, num, skmflag);
2456 
2457 	while (list != NULL) {
2458 		struct skmem_obj *listn;
2459 
2460 		listn = list->mo_next;
2461 		list->mo_next = NULL;
2462 		kbft = (kern_buflet_t)(void *)list;
2463 
2464 #if CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT)
2465 		/* Checking to ensure the object address is tagged */
2466 		ASSERT((vm_offset_t)kbft !=
2467 		    vm_memtag_canonicalize_address((vm_offset_t)kbft));
2468 #endif /* CONFIG_KERNEL_TAGGING && !defined(KASAN_LIGHT) */
2469 
2470 		KBUF_EXT_INIT(kbft, pp);
2471 		*array = (uint64_t)kbft;
2472 		++array;
2473 		list = listn;
2474 		ASSERT(need > 0);
2475 		--need;
2476 	}
2477 	ASSERT((num - need) == allocd || kbft == NULL);
2478 	return num - need;
2479 }
2480 
2481 errno_t
pp_alloc_buflet(struct kern_pbufpool * pp,kern_buflet_t * kbft,uint32_t skmflag,bool large)2482 pp_alloc_buflet(struct kern_pbufpool *pp, kern_buflet_t *kbft, uint32_t skmflag,
2483     bool large)
2484 {
2485 	uint64_t bft;
2486 
2487 	if (__improbable(!pp_alloc_buflet_common(pp, &bft, 1, skmflag, large))) {
2488 		return ENOMEM;
2489 	}
2490 	*kbft = (kern_buflet_t)bft;
2491 	return 0;
2492 }
2493 
2494 errno_t
pp_alloc_buflet_batch(struct kern_pbufpool * pp,uint64_t * array,uint32_t * size,uint32_t skmflag,bool large)2495 pp_alloc_buflet_batch(struct kern_pbufpool *pp, uint64_t *array,
2496     uint32_t *size, uint32_t skmflag, bool large)
2497 {
2498 	uint32_t i, n;
2499 	int err;
2500 
2501 	ASSERT(array != NULL && size > 0);
2502 
2503 	n = *size;
2504 	*size = 0;
2505 
2506 	i = pp_alloc_buflet_common(pp, array, n, skmflag, large);
2507 	*size = i;
2508 
2509 	if (__probable(i == n)) {
2510 		err = 0;
2511 	} else if (i != 0) {
2512 		err = EAGAIN;
2513 	} else {
2514 		err = ENOMEM;
2515 	}
2516 
2517 	return err;
2518 }
2519 
2520 __attribute__((always_inline))
2521 static void
pp_free_buflet_common(const kern_pbufpool_t pp,kern_buflet_t kbft)2522 pp_free_buflet_common(const kern_pbufpool_t pp, kern_buflet_t kbft)
2523 {
2524 	ASSERT(kbft->buf_nbft_idx == OBJ_IDX_NONE);
2525 	ASSERT(kbft->buf_nbft_addr == 0);
2526 
2527 	if (kbft->buf_flag & BUFLET_FLAG_EXTERNAL) {
2528 		ASSERT(kbft->buf_addr != 0);
2529 		ASSERT(kbft->buf_idx != OBJ_IDX_NONE);
2530 		ASSERT(kbft->buf_bft_idx_reg != OBJ_IDX_NONE);
2531 		ASSERT(kbft->buf_ctl != NULL);
2532 		ASSERT(((struct __kern_buflet_ext *)kbft)->
2533 		    kbe_buf_upp_link.sle_next == NULL);
2534 		/*
2535 		 * external buflet has buffer attached at construction,
2536 		 * so we don't free the buffer here.
2537 		 */
2538 		skmem_cache_free(BUFLET_HAS_LARGE_BUF(kbft) ?
2539 		    PP_KBFT_CACHE_LARGE(pp) : PP_KBFT_CACHE_DEF(pp),
2540 		    (void *)kbft);
2541 	} else if (__probable(kbft->buf_addr != 0)) {
2542 		void *objaddr = kbft->buf_objaddr;
2543 		uint32_t usecnt = 0;
2544 
2545 		ASSERT(kbft->buf_idx != OBJ_IDX_NONE);
2546 		ASSERT(kbft->buf_ctl != NULL);
2547 		KBUF_DTOR(kbft, usecnt);
2548 		SK_DF(SK_VERB_MEM, "pp 0x%llx buf 0x%llx usecnt %u",
2549 		    SK_KVA(pp), SK_KVA(objaddr), usecnt);
2550 		if (__probable(usecnt == 0)) {
2551 			skmem_cache_free(BUFLET_HAS_LARGE_BUF(kbft) ?
2552 			    PP_BUF_CACHE_LARGE(pp) : PP_BUF_CACHE_DEF(pp),
2553 			    objaddr);
2554 		}
2555 	}
2556 }
2557 
2558 void
pp_free_buflet(const kern_pbufpool_t pp,kern_buflet_t kbft)2559 pp_free_buflet(const kern_pbufpool_t pp, kern_buflet_t kbft)
2560 {
2561 	ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
2562 	ASSERT(pp != NULL && kbft != NULL);
2563 	pp_free_buflet_common(pp, kbft);
2564 }
2565 
2566 void
pp_reap_caches(boolean_t purge)2567 pp_reap_caches(boolean_t purge)
2568 {
2569 	skmem_cache_reap_now(pp_opt_cache, purge);
2570 	skmem_cache_reap_now(pp_flow_cache, purge);
2571 	skmem_cache_reap_now(pp_compl_cache, purge);
2572 }
2573