1 /*
2 * Copyright (c) 2016-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/packet/pbufpool_var.h>
31 #include <sys/sdt.h>
32 #include <net/droptap.h>
33 #include <kern/uipc_domain.h>
34
35 static struct kern_pbufpool *pp_alloc(zalloc_flags_t);
36 static void pp_free(struct kern_pbufpool *);
37 static uint32_t pp_alloc_packet_common(struct kern_pbufpool *, uint16_t,
38 uint64_t *__counted_by(num), uint32_t num, boolean_t, alloc_cb_func_t,
39 const void *, uint32_t);
40 static void pp_free_packet_array(struct kern_pbufpool *,
41 uint64_t *__counted_by(num)array, uint32_t num);
42 static int pp_metadata_ctor_no_buflet(struct skmem_obj_info *,
43 struct skmem_obj_info *, void *, uint32_t);
44 static int pp_metadata_ctor_max_buflet(struct skmem_obj_info *,
45 struct skmem_obj_info *, void *, uint32_t);
46 static void pp_metadata_dtor(void *, void *);
47 static int pp_metadata_construct(struct __kern_quantum *,
48 struct __user_quantum *, obj_idx_t, struct kern_pbufpool *, uint32_t,
49 uint16_t, bool, struct skmem_obj **);
50 static void pp_metadata_destruct(struct __kern_quantum *,
51 struct kern_pbufpool *, bool);
52 static struct __kern_quantum *pp_metadata_init(struct __metadata_preamble *,
53 struct kern_pbufpool *, uint16_t, uint32_t, struct skmem_obj **);
54 static struct __metadata_preamble *pp_metadata_fini(struct __kern_quantum *,
55 struct kern_pbufpool *, struct mbuf **, struct __kern_packet **,
56 struct skmem_obj **, struct skmem_obj **, struct skmem_obj **, struct skmem_obj **);
57 static void pp_purge_upp_locked(struct kern_pbufpool *pp, pid_t pid);
58 static int pp_buf_seg_ctor(struct sksegment *, IOSKMemoryBufferRef, void *);
59 static void pp_buf_seg_dtor(struct sksegment *, IOSKMemoryBufferRef, void *);
60 static void pp_destroy_upp_locked(struct kern_pbufpool *);
61 static void pp_destroy_upp_bft_locked(struct kern_pbufpool *);
62 static int pp_init_upp_bft_locked(struct kern_pbufpool *, boolean_t);
63 static void pp_free_buflet_common(const kern_pbufpool_t, kern_buflet_t);
64 static mach_vm_address_t pp_alloc_buffer_common(const kern_pbufpool_t pp,
65 struct skmem_obj_info *oi, uint32_t skmflag, bool large);
66 static inline uint32_t
67 pp_alloc_buflet_common(struct kern_pbufpool *pp,
68 uint64_t *__counted_by(num)array, uint32_t num, uint32_t skmflag,
69 bool large);
70
71 #define KERN_PBUFPOOL_U_HASH_SIZE 64 /* hash table size */
72
73 #define KERN_BUF_MIN_STRIDING_SIZE 32 * 1024
74 static uint32_t kern_buf_min_striding_size = KERN_BUF_MIN_STRIDING_SIZE;
75
76 /*
77 * Since the inputs are small (indices to the metadata region), we can use
78 * Knuth's multiplicative hash method which is fast and good enough. Here
79 * we multiply the input by the golden ratio of 2^32. See "The Art of
80 * Computer Programming", section 6.4.
81 */
82 #define KERN_PBUFPOOL_U_HASH_INDEX(_i, _m) \
83 (((_i) * 2654435761U) & (_m))
84 #define KERN_PBUFPOOL_U_HASH(_pp, _i) \
85 (&(_pp)->pp_u_hash_table[KERN_PBUFPOOL_U_HASH_INDEX(_i, \
86 KERN_PBUFPOOL_U_HASH_SIZE - 1)])
87 #define KERN_PBUFPOOL_U_BFT_HASH(_pp, _i) \
88 (&(_pp)->pp_u_bft_hash_table[KERN_PBUFPOOL_U_HASH_INDEX(_i, \
89 KERN_PBUFPOOL_U_HASH_SIZE - 1)])
90
91 static SKMEM_TYPE_DEFINE(pp_zone, struct kern_pbufpool);
92
93 #define SKMEM_TAG_PBUFPOOL_HASH "com.apple.skywalk.pbufpool.hash"
94 static SKMEM_TAG_DEFINE(skmem_tag_pbufpool_hash, SKMEM_TAG_PBUFPOOL_HASH);
95
96 #define SKMEM_TAG_PBUFPOOL_BFT_HASH "com.apple.skywalk.pbufpool.bft.hash"
97 static SKMEM_TAG_DEFINE(skmem_tag_pbufpool_bft_hash, SKMEM_TAG_PBUFPOOL_BFT_HASH);
98
99 #if HAS_MTE
100 extern bool is_mte_enabled;
101 #endif /* HAS_MTE */
102
103 struct kern_pbufpool_u_htbl {
104 struct kern_pbufpool_u_bkt upp_hash[KERN_PBUFPOOL_U_HASH_SIZE];
105 };
106
107 #define PP_U_HTBL_SIZE sizeof(struct kern_pbufpool_u_htbl)
108 static SKMEM_TYPE_DEFINE(pp_u_htbl_zone, struct kern_pbufpool_u_htbl);
109
110 static struct skmem_cache *pp_opt_cache; /* cache for __packet_opt */
111 static struct skmem_cache *pp_flow_cache; /* cache for __flow */
112 static struct skmem_cache *pp_compl_cache; /* cache for __packet_compl */
113
114 static int __pp_inited = 0;
115
116 int
pp_init(void)117 pp_init(void)
118 {
119 static_assert(KPKT_SC_UNSPEC == MBUF_SC_UNSPEC);
120 static_assert(KPKT_SC_BK_SYS == MBUF_SC_BK_SYS);
121 static_assert(KPKT_SC_BK == MBUF_SC_BK);
122 static_assert(KPKT_SC_BE == MBUF_SC_BE);
123 static_assert(KPKT_SC_RD == MBUF_SC_RD);
124 static_assert(KPKT_SC_OAM == MBUF_SC_OAM);
125 static_assert(KPKT_SC_AV == MBUF_SC_AV);
126 static_assert(KPKT_SC_RV == MBUF_SC_RV);
127 static_assert(KPKT_SC_VI == MBUF_SC_VI);
128 static_assert(KPKT_SC_SIG == MBUF_SC_SIG);
129 static_assert(KPKT_SC_VO == MBUF_SC_VO);
130 static_assert(KPKT_SC_CTL == MBUF_SC_CTL);
131
132 static_assert(KPKT_SC_BK_SYS == PKT_SC_BK_SYS);
133 static_assert(KPKT_SC_BK == PKT_SC_BK);
134 static_assert(KPKT_SC_BE == PKT_SC_BE);
135 static_assert(KPKT_SC_RD == PKT_SC_RD);
136 static_assert(KPKT_SC_OAM == PKT_SC_OAM);
137 static_assert(KPKT_SC_AV == PKT_SC_AV);
138 static_assert(KPKT_SC_RV == PKT_SC_RV);
139 static_assert(KPKT_SC_VI == PKT_SC_VI);
140 static_assert(KPKT_SC_SIG == PKT_SC_SIG);
141 static_assert(KPKT_SC_VO == PKT_SC_VO);
142 static_assert(KPKT_SC_CTL == PKT_SC_CTL);
143 static_assert(KPKT_SC_MAX_CLASSES == MBUF_SC_MAX_CLASSES);
144
145 static_assert(KPKT_TC_UNSPEC == MBUF_TC_UNSPEC);
146 static_assert(KPKT_TC_BE == MBUF_TC_BE);
147 static_assert(KPKT_TC_BK == MBUF_TC_BK);
148 static_assert(KPKT_TC_VI == MBUF_TC_VI);
149 static_assert(KPKT_TC_VO == MBUF_TC_VO);
150 static_assert(KPKT_TC_MAX == MBUF_TC_MAX);
151
152 static_assert(KPKT_TC_BE == PKT_TC_BE);
153 static_assert(KPKT_TC_BK == PKT_TC_BK);
154 static_assert(KPKT_TC_VI == PKT_TC_VI);
155 static_assert(KPKT_TC_VO == PKT_TC_VO);
156
157 static_assert(PKT_SCVAL_BK_SYS == SCVAL_BK_SYS);
158 static_assert(PKT_SCVAL_BK == SCVAL_BK);
159 static_assert(PKT_SCVAL_BE == SCVAL_BE);
160 static_assert(PKT_SCVAL_RD == SCVAL_RD);
161 static_assert(PKT_SCVAL_OAM == SCVAL_OAM);
162 static_assert(PKT_SCVAL_AV == SCVAL_AV);
163 static_assert(PKT_SCVAL_RV == SCVAL_RV);
164 static_assert(PKT_SCVAL_VI == SCVAL_VI);
165 static_assert(PKT_SCVAL_VO == SCVAL_VO);
166 static_assert(PKT_SCVAL_CTL == SCVAL_CTL);
167
168 /*
169 * Assert that the value of common packet flags between mbuf and
170 * skywalk packets match, and that they are in PKT_F_COMMON_MASK.
171 */
172 static_assert(PKT_F_BACKGROUND == PKTF_SO_BACKGROUND);
173 static_assert(PKT_F_REALTIME == PKTF_SO_REALTIME);
174 static_assert(PKT_F_REXMT == PKTF_TCP_REXMT);
175 static_assert(PKT_F_LAST_PKT == PKTF_LAST_PKT);
176 static_assert(PKT_F_FLOW_ID == PKTF_FLOW_ID);
177 static_assert(PKT_F_FLOW_ADV == PKTF_FLOW_ADV);
178 static_assert(PKT_F_TX_COMPL_TS_REQ == PKTF_TX_COMPL_TS_REQ);
179 static_assert(PKT_F_TS_VALID == PKTF_TS_VALID);
180 static_assert(PKT_F_NEW_FLOW == PKTF_NEW_FLOW);
181 static_assert(PKT_F_START_SEQ == PKTF_START_SEQ);
182 static_assert(PKT_F_KEEPALIVE == PKTF_KEEPALIVE);
183 static_assert(PKT_F_WAKE_PKT == PKTF_WAKE_PKT);
184 static_assert(PKT_F_COMMON_MASK == (PKT_F_BACKGROUND | PKT_F_REALTIME | PKT_F_REXMT | PKT_F_LAST_PKT | PKT_F_FLOW_ID | PKT_F_FLOW_ADV | PKT_F_TX_COMPL_TS_REQ | PKT_F_TS_VALID | PKT_F_NEW_FLOW | PKT_F_START_SEQ | PKT_F_KEEPALIVE | PKT_F_WAKE_PKT));
185 /*
186 * Assert packet flags shared with userland.
187 */
188 static_assert(PKT_F_USER_MASK == (PKT_F_BACKGROUND | PKT_F_REALTIME | PKT_F_REXMT | PKT_F_LAST_PKT | PKT_F_OPT_DATA | PKT_F_PROMISC | PKT_F_TRUNCATED | PKT_F_WAKE_PKT | PKT_F_L4S | PKT_F_ULPN));
189
190 static_assert(offsetof(struct __kern_quantum, qum_len) == offsetof(struct __kern_packet, pkt_length));
191
192 /*
193 * Due to the use of tagged pointer, we need the size of
194 * the metadata preamble structure to be multiples of 16.
195 * See SK_PTR_TAG() definition for details.
196 */
197 static_assert(sizeof(struct __metadata_preamble) != 0 && (sizeof(struct __metadata_preamble) % 16) == 0);
198
199 static_assert(NX_PBUF_FRAGS_MIN == 1 && NX_PBUF_FRAGS_MIN == NX_PBUF_FRAGS_DEFAULT);
200
201 /*
202 * Batch alloc/free requires linking the objects together;
203 * make sure that the fields are at the same offset since
204 * we cast the object to struct skmem_obj.
205 */
206 static_assert(offsetof(struct __metadata_preamble, _mdp_next) == offsetof(struct skmem_obj, mo_next));
207 static_assert(offsetof(struct __buflet, __buflet_next) == offsetof(struct skmem_obj, mo_next));
208
209 SK_LOCK_ASSERT_HELD();
210 ASSERT(!__pp_inited);
211
212 pp_opt_cache = skmem_cache_create("pkt.opt",
213 sizeof(struct __packet_opt), sizeof(uint64_t),
214 NULL, NULL, NULL, NULL, NULL, 0);
215 pp_flow_cache = skmem_cache_create("pkt.flow",
216 sizeof(struct __flow), 16, /* 16-bytes aligned */
217 NULL, NULL, NULL, NULL, NULL, 0);
218 pp_compl_cache = skmem_cache_create("pkt.compl",
219 sizeof(struct __packet_compl), sizeof(uint64_t),
220 NULL, NULL, NULL, NULL, NULL, 0);
221
222 PE_parse_boot_argn("sk_pp_min_striding_size", &kern_buf_min_striding_size,
223 sizeof(kern_buf_min_striding_size));
224
225 return 0;
226 }
227
228 void
pp_fini(void)229 pp_fini(void)
230 {
231 SK_LOCK_ASSERT_HELD();
232
233 if (__pp_inited) {
234 if (pp_compl_cache != NULL) {
235 skmem_cache_destroy(pp_compl_cache);
236 pp_compl_cache = NULL;
237 }
238 if (pp_flow_cache != NULL) {
239 skmem_cache_destroy(pp_flow_cache);
240 pp_flow_cache = NULL;
241 }
242 if (pp_opt_cache != NULL) {
243 skmem_cache_destroy(pp_opt_cache);
244 pp_opt_cache = NULL;
245 }
246
247 __pp_inited = 0;
248 }
249 }
250
251 static struct kern_pbufpool *
pp_alloc(zalloc_flags_t how)252 pp_alloc(zalloc_flags_t how)
253 {
254 struct kern_pbufpool *pp = zalloc_flags(pp_zone, how | Z_ZERO);
255
256 if (pp) {
257 lck_mtx_init(&pp->pp_lock, &skmem_lock_grp, &skmem_lock_attr);
258 }
259 return pp;
260 }
261
262 static void
pp_free(struct kern_pbufpool * pp)263 pp_free(struct kern_pbufpool *pp)
264 {
265 PP_LOCK_ASSERT_HELD(pp);
266
267 pp_destroy(pp);
268 PP_UNLOCK(pp);
269
270 SK_DF(SK_VERB_MEM, "pp %p FREE", SK_KVA(pp));
271 lck_mtx_destroy(&pp->pp_lock, &skmem_lock_grp);
272 zfree(pp_zone, pp);
273 }
274
275 void
pp_retain_locked(struct kern_pbufpool * pp)276 pp_retain_locked(struct kern_pbufpool *pp)
277 {
278 PP_LOCK_ASSERT_HELD(pp);
279
280 pp->pp_refcnt++;
281 ASSERT(pp->pp_refcnt != 0);
282 }
283
284 void
pp_retain(struct kern_pbufpool * pp)285 pp_retain(struct kern_pbufpool *pp)
286 {
287 PP_LOCK(pp);
288 pp_retain_locked(pp);
289 PP_UNLOCK(pp);
290 }
291
292 boolean_t
pp_release_locked(struct kern_pbufpool * pp)293 pp_release_locked(struct kern_pbufpool *pp)
294 {
295 uint32_t oldref = pp->pp_refcnt;
296
297 PP_LOCK_ASSERT_HELD(pp);
298
299 ASSERT(pp->pp_refcnt != 0);
300 if (--pp->pp_refcnt == 0) {
301 pp_free(pp);
302 }
303
304 return oldref == 1;
305 }
306
307 boolean_t
pp_release(struct kern_pbufpool * pp)308 pp_release(struct kern_pbufpool *pp)
309 {
310 boolean_t lastref;
311
312 PP_LOCK(pp);
313 if (!(lastref = pp_release_locked(pp))) {
314 PP_UNLOCK(pp);
315 }
316
317 return lastref;
318 }
319
320 void
pp_close(struct kern_pbufpool * pp)321 pp_close(struct kern_pbufpool *pp)
322 {
323 PP_LOCK(pp);
324 ASSERT(pp->pp_refcnt > 0);
325 ASSERT(!(pp->pp_flags & PPF_CLOSED));
326 pp->pp_flags |= PPF_CLOSED;
327 if (!pp_release_locked(pp)) {
328 PP_UNLOCK(pp);
329 }
330 }
331
332 /*
333 * -fbounds-safety: All callers of pp_regions_params_adjust use SKMEM_REGIONS
334 * size for the srp_array. This is same as marking it __counted_by(SKMEM_REGIONS)
335 */
336 void
pp_regions_params_adjust(struct skmem_region_params srp_array[SKMEM_REGIONS],nexus_meta_type_t md_type,nexus_meta_subtype_t md_subtype,uint32_t md_cnt,uint16_t max_frags,uint32_t buf_size,uint32_t large_buf_size,uint32_t buf_cnt,uint32_t buf_seg_size,uint32_t flags)337 pp_regions_params_adjust(struct skmem_region_params srp_array[SKMEM_REGIONS],
338 nexus_meta_type_t md_type, nexus_meta_subtype_t md_subtype, uint32_t md_cnt,
339 uint16_t max_frags, uint32_t buf_size, uint32_t large_buf_size,
340 uint32_t buf_cnt, uint32_t buf_seg_size, uint32_t flags)
341 {
342 struct skmem_region_params *srp, *kmd_srp, *buf_srp, *kbft_srp,
343 *lbuf_srp;
344 uint32_t md_size = 0;
345 bool kernel_only = ((flags & PP_REGION_CONFIG_KERNEL_ONLY) != 0);
346 bool md_persistent = ((flags & PP_REGION_CONFIG_MD_PERSISTENT) != 0);
347 bool buf_persistent = ((flags & PP_REGION_CONFIG_BUF_PERSISTENT) != 0);
348 bool config_buflet = ((flags & PP_REGION_CONFIG_BUFLET) != 0);
349 bool md_magazine_enable = ((flags &
350 PP_REGION_CONFIG_MD_MAGAZINE_ENABLE) != 0);
351
352 ASSERT(max_frags != 0);
353
354 md_size = NX_METADATA_PACKET_SZ(max_frags);
355
356 switch (flags & PP_REGION_CONFIG_BUF_IODIR_BIDIR) {
357 case PP_REGION_CONFIG_BUF_IODIR_IN:
358 kmd_srp = &srp_array[SKMEM_REGION_RXKMD];
359 buf_srp = &srp_array[SKMEM_REGION_RXBUF_DEF];
360 lbuf_srp = &srp_array[SKMEM_REGION_RXBUF_LARGE];
361 kbft_srp = &srp_array[SKMEM_REGION_RXKBFT];
362 break;
363 case PP_REGION_CONFIG_BUF_IODIR_OUT:
364 kmd_srp = &srp_array[SKMEM_REGION_TXKMD];
365 buf_srp = &srp_array[SKMEM_REGION_TXBUF_DEF];
366 lbuf_srp = &srp_array[SKMEM_REGION_TXBUF_LARGE];
367 kbft_srp = &srp_array[SKMEM_REGION_TXKBFT];
368 break;
369 case PP_REGION_CONFIG_BUF_IODIR_BIDIR:
370 default:
371 kmd_srp = &srp_array[SKMEM_REGION_KMD];
372 buf_srp = &srp_array[SKMEM_REGION_BUF_DEF];
373 lbuf_srp = &srp_array[SKMEM_REGION_BUF_LARGE];
374 kbft_srp = &srp_array[SKMEM_REGION_KBFT];
375 break;
376 }
377
378 /* add preamble size to metadata obj size */
379 md_size += METADATA_PREAMBLE_SZ;
380 ASSERT(md_size >= NX_METADATA_OBJ_MIN_SZ);
381
382 /* configure kernel metadata region */
383 kmd_srp->srp_md_type = md_type;
384 kmd_srp->srp_md_subtype = md_subtype;
385 kmd_srp->srp_r_obj_cnt = md_cnt;
386 kmd_srp->srp_r_obj_size = md_size;
387 kmd_srp->srp_max_frags = max_frags;
388 ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) == 0);
389 if (md_persistent) {
390 kmd_srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
391 }
392 ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) != 0);
393 if (md_magazine_enable) {
394 kmd_srp->srp_cflags &= ~SKMEM_REGION_CR_NOMAGAZINES;
395 }
396 skmem_region_params_config(kmd_srp);
397
398 /* Sanity check for memtag */
399 ASSERT(kmd_srp->srp_c_seg_size == SKMEM_MD_SEG_SIZE);
400
401 /* configure user metadata region */
402 srp = &srp_array[SKMEM_REGION_UMD];
403 if (!kernel_only) {
404 srp->srp_md_type = kmd_srp->srp_md_type;
405 srp->srp_md_subtype = kmd_srp->srp_md_subtype;
406 srp->srp_r_obj_cnt = kmd_srp->srp_c_obj_cnt;
407 srp->srp_r_obj_size = kmd_srp->srp_c_obj_size;
408 srp->srp_max_frags = kmd_srp->srp_max_frags;
409 ASSERT((srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) == 0);
410 if (md_persistent) {
411 srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
412 }
413 /*
414 * UMD is a mirrored region and object allocation operations
415 * are performed on the KMD objects.
416 */
417 ASSERT((srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) != 0);
418 skmem_region_params_config(srp);
419 ASSERT(srp->srp_c_obj_cnt == kmd_srp->srp_c_obj_cnt);
420 } else {
421 ASSERT(srp->srp_r_obj_cnt == 0);
422 ASSERT(srp->srp_r_obj_size == 0);
423 }
424
425 /* configure buffer region */
426 buf_srp->srp_r_obj_cnt = MAX(buf_cnt, kmd_srp->srp_c_obj_cnt);
427 buf_srp->srp_r_obj_size = buf_size;
428 buf_srp->srp_cflags &= ~SKMEM_REGION_CR_MONOLITHIC;
429 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) == 0);
430 if (buf_persistent) {
431 buf_srp->srp_cflags |= SKMEM_REGION_CR_PERSISTENT;
432 }
433 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) != 0);
434 if (buf_srp->srp_r_obj_size >= kern_buf_min_striding_size) {
435 /*
436 * A buffer size larger than 32K indicates striding is in use, which
437 * means a buffer could be detached from a buflet. In this case, magzine
438 * layer should be enabled.
439 */
440 buf_srp->srp_cflags &= ~SKMEM_REGION_CR_NOMAGAZINES;
441 }
442 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_UREADONLY) == 0);
443 if ((flags & PP_REGION_CONFIG_BUF_UREADONLY) != 0) {
444 buf_srp->srp_cflags |= SKMEM_REGION_CR_UREADONLY;
445 }
446 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_KREADONLY) == 0);
447 if ((flags & PP_REGION_CONFIG_BUF_KREADONLY) != 0) {
448 buf_srp->srp_cflags |= SKMEM_REGION_CR_KREADONLY;
449 }
450 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_MONOLITHIC) == 0);
451 if ((flags & PP_REGION_CONFIG_BUF_MONOLITHIC) != 0) {
452 buf_srp->srp_cflags |= SKMEM_REGION_CR_MONOLITHIC;
453 }
454 ASSERT((srp->srp_cflags & SKMEM_REGION_CR_SEGPHYSCONTIG) == 0);
455 if ((flags & PP_REGION_CONFIG_BUF_SEGPHYSCONTIG) != 0) {
456 buf_srp->srp_cflags |= SKMEM_REGION_CR_SEGPHYSCONTIG;
457 }
458 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_NOCACHE) == 0);
459 if ((flags & PP_REGION_CONFIG_BUF_NOCACHE) != 0) {
460 buf_srp->srp_cflags |= SKMEM_REGION_CR_NOCACHE;
461 }
462 ASSERT((buf_srp->srp_cflags & SKMEM_REGION_CR_THREADSAFE) == 0);
463 if ((flags & PP_REGION_CONFIG_BUF_THREADSAFE) != 0) {
464 buf_srp->srp_cflags |= SKMEM_REGION_CR_THREADSAFE;
465 }
466 if (buf_seg_size != 0) {
467 buf_srp->srp_r_seg_size = buf_seg_size;
468 }
469 skmem_region_params_config(buf_srp);
470
471 /* configure large buffer region */
472 if (large_buf_size != 0) {
473 lbuf_srp->srp_r_obj_cnt = buf_srp->srp_r_obj_cnt;
474 lbuf_srp->srp_r_obj_size = large_buf_size;
475 lbuf_srp->srp_r_seg_size = buf_srp->srp_r_seg_size;
476 lbuf_srp->srp_cflags = buf_srp->srp_cflags;
477 skmem_region_params_config(lbuf_srp);
478 }
479
480 /* configure kernel buflet region */
481 if (config_buflet) {
482 /*
483 * Ideally we want the number of buflets to be
484 * "kmd_srp->srp_c_obj_cnt * (kmd_srp->srp_max_frags - 1)",
485 * so that we have enough buflets when multi-buflet and
486 * shared buffer object is used.
487 * Currently multi-buflet is being used only by user pool
488 * which doesn't support shared buffer object, hence to reduce
489 * the number of objects we are restricting the number of
490 * buflets to the number of buffers.
491 */
492 kbft_srp->srp_r_obj_cnt = buf_srp->srp_c_obj_cnt +
493 lbuf_srp->srp_c_obj_cnt;
494 kbft_srp->srp_r_obj_size = MAX(sizeof(struct __kern_buflet_ext),
495 sizeof(struct __user_buflet));
496 kbft_srp->srp_cflags = kmd_srp->srp_cflags;
497 skmem_region_params_config(kbft_srp);
498 ASSERT(kbft_srp->srp_c_obj_cnt >= buf_srp->srp_c_obj_cnt +
499 lbuf_srp->srp_c_obj_cnt);
500 /* Sanity check for memtag */
501 ASSERT(kbft_srp->srp_c_seg_size == SKMEM_MD_SEG_SIZE);
502 } else {
503 ASSERT(kbft_srp->srp_r_obj_cnt == 0);
504 ASSERT(kbft_srp->srp_r_obj_size == 0);
505 }
506
507 /* configure user buflet region */
508 srp = &srp_array[SKMEM_REGION_UBFT];
509 if (config_buflet && !kernel_only) {
510 srp->srp_r_obj_cnt = kbft_srp->srp_c_obj_cnt;
511 srp->srp_r_obj_size = kbft_srp->srp_c_obj_size;
512 srp->srp_cflags = srp_array[SKMEM_REGION_UMD].srp_cflags;
513 skmem_region_params_config(srp);
514 ASSERT(srp->srp_c_obj_cnt == kbft_srp->srp_c_obj_cnt);
515 } else {
516 ASSERT(srp->srp_r_obj_cnt == 0);
517 ASSERT(srp->srp_r_obj_size == 0);
518 }
519
520 /* make sure each metadata can be paired with a buffer */
521 ASSERT(kmd_srp->srp_c_obj_cnt <= buf_srp->srp_c_obj_cnt);
522 }
523
524 SK_NO_INLINE_ATTRIBUTE
525 static int
pp_metadata_construct(struct __kern_quantum * kqum,struct __user_quantum * uqum,obj_idx_t midx,struct kern_pbufpool * pp,uint32_t skmflag,uint16_t bufcnt,bool raw,struct skmem_obj ** blist)526 pp_metadata_construct(struct __kern_quantum *kqum, struct __user_quantum *uqum,
527 obj_idx_t midx, struct kern_pbufpool *pp, uint32_t skmflag, uint16_t bufcnt,
528 bool raw, struct skmem_obj **blist)
529 {
530 struct __kern_buflet *kbuf;
531 mach_vm_address_t baddr = 0;
532 uint16_t *pbufs_cnt, *pbufs_max;
533 uint16_t i;
534
535 ASSERT(bufcnt == 1 || PP_HAS_BUFFER_ON_DEMAND(pp));
536
537 /* construct {user,kernel} metadata */
538 struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
539 struct __user_packet *upkt = SK_PTR_ADDR_UPKT(uqum);
540 struct __packet_opt *__single opt;
541 struct __flow *__single flow;
542 struct __packet_compl *__single compl;
543 uint64_t pflags;
544
545 if (raw) {
546 opt = skmem_cache_alloc(pp_opt_cache, SKMEM_SLEEP);
547 flow = skmem_cache_alloc(pp_flow_cache, SKMEM_SLEEP);
548 compl = skmem_cache_alloc(pp_compl_cache, SKMEM_SLEEP);
549 pflags = (PKT_F_OPT_ALLOC | PKT_F_FLOW_ALLOC |
550 PKT_F_TX_COMPL_ALLOC);
551 } else {
552 ASSERT((kpkt->pkt_pflags & PKT_F_OPT_ALLOC) &&
553 kpkt->pkt_com_opt != NULL);
554 opt = kpkt->pkt_com_opt;
555 ASSERT((kpkt->pkt_pflags & PKT_F_FLOW_ALLOC) &&
556 kpkt->pkt_flow != NULL);
557 flow = kpkt->pkt_flow;
558 ASSERT((kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC) &&
559 kpkt->pkt_tx_compl != NULL);
560 compl = kpkt->pkt_tx_compl;
561 pflags = kpkt->pkt_pflags;
562 }
563 /* will be adjusted below as part of allocating buffer(s) */
564 static_assert(sizeof(kpkt->pkt_bufs_cnt) == sizeof(uint16_t));
565 static_assert(sizeof(kpkt->pkt_bufs_max) == sizeof(uint16_t));
566 pbufs_cnt = __DECONST(uint16_t *, &kpkt->pkt_bufs_cnt);
567 pbufs_max = __DECONST(uint16_t *, &kpkt->pkt_bufs_max);
568
569 /* kernel (and user) packet */
570 KPKT_CTOR(kpkt, pflags, opt, flow, compl, midx,
571 upkt, pp, 0, pp->pp_max_frags, 0);
572
573 kbuf = kqum->qum_buf;
574 for (i = 0; i < bufcnt; i++) {
575 struct skmem_obj_info oib;
576
577 if (!PP_HAS_BUFFER_ON_DEMAND(pp)) {
578 ASSERT(i == 0);
579 ASSERT(*blist == NULL);
580 /*
581 * quantum has a native buflet, so we only need a
582 * buffer to be allocated and attached to the buflet.
583 */
584 baddr = pp_alloc_buffer_common(pp, &oib, skmflag,
585 false);
586 if (__improbable(baddr == 0)) {
587 goto fail;
588 }
589 KBUF_CTOR(kbuf, baddr, SKMEM_OBJ_IDX_REG(&oib),
590 SKMEM_OBJ_BUFCTL(&oib), pp, false);
591 baddr = 0;
592 } else {
593 /*
594 * we use pre-constructed buflets with attached buffers.
595 */
596 struct __kern_buflet *pkbuf = kbuf;
597 struct skmem_obj *blistn;
598
599 ASSERT(pkbuf != NULL);
600 kbuf = (kern_buflet_t)*blist;
601 if (__improbable(kbuf == NULL)) {
602 SK_DF(SK_VERB_MEM, "failed to get buflet,"
603 " pp %p", SK_KVA(pp));
604 goto fail;
605 }
606
607 #if HAS_MTE && CONFIG_KERNEL_TAGGING
608 if (__probable(is_mte_enabled)) {
609 /* Checking to ensure the object address is tagged */
610 ASSERT((vm_offset_t)kbuf !=
611 vm_memtag_canonicalize_kernel((vm_offset_t)kbuf));
612 }
613 #endif /* HAS_MTE && CONFIG_KERNEL_TAGGING */
614
615 blistn = (*blist)->mo_next;
616 (*blist)->mo_next = NULL;
617
618 KBUF_EXT_INIT(kbuf, pp);
619 KBUF_LINK(pkbuf, kbuf);
620 *blist = blistn;
621 }
622
623 /* adjust buffer count accordingly */
624 if (__probable(pbufs_cnt != NULL)) {
625 *pbufs_cnt += 1;
626 ASSERT(*pbufs_cnt <= *pbufs_max);
627 }
628 }
629
630 ASSERT(!PP_KERNEL_ONLY(pp) || (kqum->qum_qflags & QUM_F_KERNEL_ONLY));
631 ASSERT(METADATA_IDX(kqum) != OBJ_IDX_NONE);
632 SK_DF(SK_VERB_MEM, "pp %p pkt %p bufcnt %d buf %p",
633 SK_KVA(pp), SK_KVA(kqum), bufcnt, SK_KVA(baddr));
634 return 0;
635
636 fail:
637 ASSERT(bufcnt != 0 && baddr == 0);
638 pp_metadata_destruct(kqum, pp, raw);
639 return ENOMEM;
640 }
641
642 static int
pp_metadata_ctor_common(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,struct kern_pbufpool * pp,uint32_t skmflag,bool no_buflet)643 pp_metadata_ctor_common(struct skmem_obj_info *oi0,
644 struct skmem_obj_info *oim0, struct kern_pbufpool *pp, uint32_t skmflag,
645 bool no_buflet)
646 {
647 struct skmem_obj_info _oi, _oim;
648 struct skmem_obj_info *oi, *oim;
649 struct __kern_quantum *kqum;
650 struct __user_quantum *uqum;
651 uint16_t bufcnt = (no_buflet ? 0 : pp->pp_max_frags);
652 struct skmem_obj *__single blist = NULL;
653 int error;
654
655 #if (DEVELOPMENT || DEBUG)
656 uint64_t mtbf = skmem_region_get_mtbf();
657 /*
658 * MTBF is applicable only for non-blocking allocations here.
659 */
660 if (__improbable(mtbf != 0 && (net_uptime_ms() % mtbf) == 0 &&
661 (skmflag & SKMEM_NOSLEEP))) {
662 SK_ERR("pp \"%s\" MTBF failure", pp->pp_name);
663 net_update_uptime();
664 return ENOMEM;
665 }
666 #endif /* (DEVELOPMENT || DEBUG) */
667
668 /*
669 * Note that oi0 and oim0 may be stored inside the object itself;
670 * if so, copy them to local variables before constructing. We
671 * don't use PPF_BATCH to test as the allocator may be allocating
672 * storage space differently depending on the number of objects.
673 */
674 if (__probable((uintptr_t)oi0 >= (uintptr_t)SKMEM_OBJ_ADDR(oi0) &&
675 ((uintptr_t)oi0 + sizeof(*oi0)) <=
676 ((uintptr_t)SKMEM_OBJ_ADDR(oi0) + SKMEM_OBJ_SIZE(oi0)))) {
677 oi = &_oi;
678 *oi = *oi0;
679 if (__probable(oim0 != NULL)) {
680 oim = &_oim;
681 *oim = *oim0;
682 } else {
683 oim = NULL;
684 }
685 } else {
686 oi = oi0;
687 oim = oim0;
688 }
689
690 kqum = SK_PTR_ADDR_KQUM((uintptr_t)SKMEM_OBJ_ADDR(oi) +
691 METADATA_PREAMBLE_SZ);
692
693 if (__probable(!PP_KERNEL_ONLY(pp))) {
694 ASSERT(oim != NULL && SKMEM_OBJ_ADDR(oim) != NULL);
695 ASSERT(SKMEM_OBJ_SIZE(oi) == SKMEM_OBJ_SIZE(oim));
696 uqum = SK_PTR_ADDR_UQUM((uintptr_t)SKMEM_OBJ_ADDR(oim) +
697 METADATA_PREAMBLE_SZ);
698 } else {
699 ASSERT(oim == NULL);
700 uqum = NULL;
701 }
702
703 if (oim != NULL) {
704 /* initialize user metadata redzone */
705 struct __metadata_preamble *mdp = SKMEM_OBJ_ADDR(oim);
706 mdp->mdp_redzone =
707 (SKMEM_OBJ_ROFF(oim) + METADATA_PREAMBLE_SZ) ^
708 __ch_umd_redzone_cookie;
709 }
710
711 /* allocate (constructed) buflet(s) with buffer(s) attached */
712 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0) {
713 (void) skmem_cache_batch_alloc(PP_KBFT_CACHE_DEF(pp), &blist,
714 PP_KBFT_CACHE_DEF(pp)->skm_objsize, bufcnt, skmflag);
715 }
716
717 error = pp_metadata_construct(kqum, uqum, SKMEM_OBJ_IDX_REG(oi), pp,
718 skmflag, bufcnt, TRUE, &blist);
719 if (__improbable(blist != NULL)) {
720 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp), blist);
721 blist = NULL;
722 }
723 return error;
724 }
725
726 static int
pp_metadata_ctor_no_buflet(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)727 pp_metadata_ctor_no_buflet(struct skmem_obj_info *oi0,
728 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
729 {
730 return pp_metadata_ctor_common(oi0, oim0, arg, skmflag, true);
731 }
732
733 static int
pp_metadata_ctor_max_buflet(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)734 pp_metadata_ctor_max_buflet(struct skmem_obj_info *oi0,
735 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
736 {
737 return pp_metadata_ctor_common(oi0, oim0, arg, skmflag, false);
738 }
739
740 __attribute__((always_inline))
741 static void
pp_metadata_destruct_common(struct __kern_quantum * kqum,struct kern_pbufpool * pp,bool raw,struct skmem_obj ** blist_def,struct skmem_obj ** blist_nocache_def,struct skmem_obj ** blist_large,struct skmem_obj ** blist_nocache_large)742 pp_metadata_destruct_common(struct __kern_quantum *kqum,
743 struct kern_pbufpool *pp, bool raw, struct skmem_obj **blist_def,
744 struct skmem_obj **blist_nocache_def, struct skmem_obj **blist_large,
745 struct skmem_obj **blist_nocache_large)
746 {
747 struct __kern_buflet *kbuf, *nbuf;
748 struct skmem_obj *__single p_blist_def = NULL, *__single p_blist_large = NULL;
749 struct skmem_obj *__single p_blist_nocache_def = NULL, *__single p_blist_nocache_large = NULL;
750 struct skmem_obj **pp_blist_def = &p_blist_def;
751 struct skmem_obj **pp_blist_large = &p_blist_large;
752 struct skmem_obj **pp_blist_nocache_def = &p_blist_nocache_def;
753 struct skmem_obj **pp_blist_nocache_large = &p_blist_nocache_large;
754 uint16_t bufcnt, i = 0;
755 bool first_buflet_empty;
756
757 ASSERT(blist_def != NULL);
758 ASSERT(blist_large != NULL);
759
760 struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
761
762 ASSERT(kpkt->pkt_user != NULL || PP_KERNEL_ONLY(pp));
763 ASSERT(kpkt->pkt_qum.qum_pp == pp);
764 ASSERT(METADATA_TYPE(kpkt) == pp->pp_md_type);
765 ASSERT(METADATA_SUBTYPE(kpkt) == pp->pp_md_subtype);
766 ASSERT(METADATA_IDX(kpkt) != OBJ_IDX_NONE);
767 ASSERT(kpkt->pkt_qum.qum_ksd == NULL);
768 ASSERT(kpkt->pkt_bufs_cnt <= kpkt->pkt_bufs_max);
769 ASSERT(kpkt->pkt_bufs_max == pp->pp_max_frags);
770 static_assert(sizeof(kpkt->pkt_bufs_cnt) == sizeof(uint16_t));
771 bufcnt = kpkt->pkt_bufs_cnt;
772 kbuf = &kqum->qum_buf[0];
773 /*
774 * special handling for empty first buflet.
775 */
776 first_buflet_empty = (kbuf->buf_addr == 0);
777 *__DECONST(uint16_t *, &kpkt->pkt_bufs_cnt) = 0;
778
779 /*
780 * -fbounds-safety: buf_nbft_addr is a mach_vm_address_t which is
781 * unsafe, so we forge it here.
782 */
783 nbuf = __unsafe_forge_single(struct __kern_buflet *,
784 __DECONST(struct __kern_buflet *, kbuf->buf_nbft_addr));
785 BUF_NBFT_ADDR(kbuf, 0);
786 BUF_NBFT_IDX(kbuf, OBJ_IDX_NONE);
787 if (!first_buflet_empty) {
788 pp_free_buflet_common(pp, kbuf);
789 ++i;
790 }
791
792 while (nbuf != NULL) {
793 ASSERT(nbuf->buf_ctl != NULL);
794 if (BUFLET_HAS_LARGE_BUF(nbuf)) {
795 /*
796 * bc_usecnt larger than 1 means the buffer has been cloned and is
797 * still being used by other bflts. In this case, when we free
798 * this bflt we need to explicitly ask for it to not be cached again
799 * into magzine layer to prevent immediate reuse of the buffer and
800 * data corruption.
801 */
802 if (nbuf->buf_ctl->bc_usecnt > 1) {
803 *pp_blist_nocache_large = (struct skmem_obj *)(void *)nbuf;
804 pp_blist_nocache_large =
805 &((struct skmem_obj *)(void *)nbuf)->mo_next;
806 } else {
807 *pp_blist_large = (struct skmem_obj *)(void *)nbuf;
808 pp_blist_large =
809 &((struct skmem_obj *)(void *)nbuf)->mo_next;
810 }
811 } else {
812 if (nbuf->buf_ctl->bc_usecnt > 1) {
813 *pp_blist_nocache_def = (struct skmem_obj *)(void *)nbuf;
814 pp_blist_nocache_def =
815 &((struct skmem_obj *)(void *)nbuf)->mo_next;
816 } else {
817 *pp_blist_def = (struct skmem_obj *)(void *)nbuf;
818 pp_blist_def =
819 &((struct skmem_obj *)(void *)nbuf)->mo_next;
820 }
821 }
822 BUF_NBFT_IDX(nbuf, OBJ_IDX_NONE);
823 nbuf = __unsafe_forge_single(struct __kern_buflet *,
824 __DECONST(struct __kern_buflet *, nbuf->buf_nbft_addr));
825 ++i;
826 }
827
828 ASSERT(i == bufcnt);
829
830 if (p_blist_def != NULL) {
831 *pp_blist_def = *blist_def;
832 *blist_def = p_blist_def;
833 }
834 if (p_blist_large != NULL) {
835 *pp_blist_large = *blist_large;
836 *blist_large = p_blist_large;
837 }
838 if (p_blist_nocache_def != NULL) {
839 *pp_blist_nocache_def = *blist_nocache_def;
840 *blist_nocache_def = p_blist_nocache_def;
841 }
842 if (p_blist_nocache_large != NULL) {
843 *pp_blist_nocache_large = *blist_nocache_large;
844 *blist_nocache_large = p_blist_nocache_large;
845 }
846
847 /* if we're about to return this object to the slab, clean it up */
848 if (raw) {
849 ASSERT(kpkt->pkt_com_opt != NULL ||
850 !(kpkt->pkt_pflags & PKT_F_OPT_ALLOC));
851 if (kpkt->pkt_com_opt != NULL) {
852 ASSERT(kpkt->pkt_pflags & PKT_F_OPT_ALLOC);
853 skmem_cache_free(pp_opt_cache,
854 kpkt->pkt_com_opt);
855 kpkt->pkt_com_opt = NULL;
856 }
857 ASSERT(kpkt->pkt_flow != NULL ||
858 !(kpkt->pkt_pflags & PKT_F_FLOW_ALLOC));
859 if (kpkt->pkt_flow != NULL) {
860 ASSERT(kpkt->pkt_pflags & PKT_F_FLOW_ALLOC);
861 skmem_cache_free(pp_flow_cache, kpkt->pkt_flow);
862 kpkt->pkt_flow = NULL;
863 }
864 ASSERT(kpkt->pkt_tx_compl != NULL ||
865 !(kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC));
866 if (kpkt->pkt_tx_compl != NULL) {
867 ASSERT(kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC);
868 skmem_cache_free(pp_compl_cache,
869 kpkt->pkt_tx_compl);
870 kpkt->pkt_tx_compl = NULL;
871 }
872 kpkt->pkt_pflags = 0;
873 }
874 }
875
876 __attribute__((always_inline))
877 static void
pp_free_kbft_list(struct kern_pbufpool * pp,struct skmem_obj * blist_def,struct skmem_obj * blist_nocache_def,struct skmem_obj * blist_large,struct skmem_obj * blist_nocache_large)878 pp_free_kbft_list(struct kern_pbufpool *pp, struct skmem_obj *blist_def, struct skmem_obj *blist_nocache_def,
879 struct skmem_obj *blist_large, struct skmem_obj *blist_nocache_large)
880 {
881 if (blist_def != NULL) {
882 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp), blist_def);
883 }
884 if (blist_large != NULL) {
885 skmem_cache_batch_free(PP_KBFT_CACHE_LARGE(pp), blist_large);
886 }
887 if (blist_nocache_def != NULL) {
888 skmem_cache_batch_free_nocache(PP_KBFT_CACHE_DEF(pp), blist_nocache_def);
889 }
890 if (blist_nocache_large != NULL) {
891 skmem_cache_batch_free_nocache(PP_KBFT_CACHE_LARGE(pp), blist_nocache_large);
892 }
893 }
894
895 __attribute__((always_inline))
896 static void
pp_metadata_destruct(struct __kern_quantum * kqum,struct kern_pbufpool * pp,bool raw)897 pp_metadata_destruct(struct __kern_quantum *kqum, struct kern_pbufpool *pp,
898 bool raw)
899 {
900 struct skmem_obj *__single blist_def = NULL, *__single blist_large = NULL;
901 struct skmem_obj *__single blist_nocache_def = NULL, *__single blist_nocache_large = NULL;
902
903 pp_metadata_destruct_common(kqum, pp, raw, &blist_def, &blist_nocache_def,
904 &blist_large, &blist_nocache_large);
905 pp_free_kbft_list(pp, blist_def, blist_nocache_def, blist_large, blist_nocache_large);
906 }
907
908 static void
pp_metadata_dtor(void * addr,void * arg)909 pp_metadata_dtor(void *addr, void *arg)
910 {
911 pp_metadata_destruct(SK_PTR_ADDR_KQUM((uintptr_t)addr +
912 METADATA_PREAMBLE_SZ), arg, TRUE);
913 }
914
915 static int
pp_buf_seg_ctor(struct sksegment * sg,IOSKMemoryBufferRef md,void * arg)916 pp_buf_seg_ctor(struct sksegment *sg, IOSKMemoryBufferRef md, void *arg)
917 {
918 struct kern_pbufpool *__single pp = arg;
919 int ret;
920
921 ret = 0;
922 if (pp->pp_pbuf_seg_ctor != NULL) {
923 ret = pp->pp_pbuf_seg_ctor(pp, sg, md);
924 }
925 return ret;
926 }
927
928 static void
pp_buf_seg_dtor(struct sksegment * sg,IOSKMemoryBufferRef md,void * arg)929 pp_buf_seg_dtor(struct sksegment *sg, IOSKMemoryBufferRef md, void *arg)
930 {
931 struct kern_pbufpool *__single pp = arg;
932
933 if (pp->pp_pbuf_seg_dtor != NULL) {
934 pp->pp_pbuf_seg_dtor(pp, sg, md);
935 }
936 }
937
938 static int
pp_buflet_metadata_ctor_common(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag,bool large)939 pp_buflet_metadata_ctor_common(struct skmem_obj_info *oi0,
940 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag, bool large)
941 {
942 #pragma unused (skmflag)
943 struct kern_pbufpool *pp = (struct kern_pbufpool *)arg;
944 struct __kern_buflet *kbft;
945 struct __user_buflet *ubft;
946 struct skmem_obj_info oib;
947 mach_vm_address_t baddr;
948 obj_idx_t oi_idx_reg;
949
950 baddr = pp_alloc_buffer_common(pp, &oib, skmflag, large);
951 if (__improbable(baddr == 0)) {
952 return ENOMEM;
953 }
954 /*
955 * Note that oi0 and oim0 may be stored inside the object itself;
956 * so copy what is required to local variables before constructing.
957 */
958 oi_idx_reg = SKMEM_OBJ_IDX_REG(oi0);
959 kbft = SKMEM_OBJ_ADDR(oi0);
960
961 if (__probable(!PP_KERNEL_ONLY(pp))) {
962 ASSERT(oim0 != NULL && SKMEM_OBJ_ADDR(oim0) != NULL);
963 ASSERT(SKMEM_OBJ_SIZE(oi0) == SKMEM_OBJ_SIZE(oim0));
964 ASSERT(oi_idx_reg == SKMEM_OBJ_IDX_REG(oim0));
965 ASSERT(SKMEM_OBJ_IDX_SEG(oi0) == SKMEM_OBJ_IDX_SEG(oim0));
966 ubft = SKMEM_OBJ_ADDR(oim0);
967 } else {
968 ASSERT(oim0 == NULL);
969 ubft = NULL;
970 }
971 KBUF_EXT_CTOR(kbft, ubft, baddr, SKMEM_OBJ_IDX_REG(&oib),
972 SKMEM_OBJ_BUFCTL(&oib), oi_idx_reg, pp, large);
973 return 0;
974 }
975
976 static int
pp_buflet_default_buffer_metadata_ctor(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)977 pp_buflet_default_buffer_metadata_ctor(struct skmem_obj_info *oi0,
978 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
979 {
980 return pp_buflet_metadata_ctor_common(oi0, oim0, arg, skmflag, false);
981 }
982
983 static int
pp_buflet_large_buffer_metadata_ctor(struct skmem_obj_info * oi0,struct skmem_obj_info * oim0,void * arg,uint32_t skmflag)984 pp_buflet_large_buffer_metadata_ctor(struct skmem_obj_info *oi0,
985 struct skmem_obj_info *oim0, void *arg, uint32_t skmflag)
986 {
987 return pp_buflet_metadata_ctor_common(oi0, oim0, arg, skmflag, true);
988 }
989
990 static void
pp_buflet_metadata_dtor(void * addr,void * arg)991 pp_buflet_metadata_dtor(void *addr, void *arg)
992 {
993 struct __kern_buflet *__single kbft = addr;
994 void *objaddr = kbft->buf_objaddr;
995 struct kern_pbufpool *__single pp = arg;
996 uint32_t usecnt = 0;
997 bool large = BUFLET_HAS_LARGE_BUF(kbft);
998
999 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
1000 /*
1001 * don't assert for (buf_nbft_addr == 0) here as constructed
1002 * buflet may have this field as non-zero. This is because
1003 * buf_nbft_addr (__buflet_next) is used by skmem batch alloc
1004 * for chaining the buflets.
1005 * To ensure that the frred buflet was not part of a chain we
1006 * assert for (buf_nbft_idx == OBJ_IDX_NONE).
1007 */
1008 ASSERT(kbft->buf_nbft_idx == OBJ_IDX_NONE);
1009 ASSERT(((struct __kern_buflet_ext *)kbft)->kbe_buf_upp_link.sle_next ==
1010 NULL);
1011 ASSERT(kbft->buf_addr != 0);
1012 ASSERT(kbft->buf_idx != OBJ_IDX_NONE);
1013 ASSERT(kbft->buf_ctl != NULL);
1014
1015 KBUF_DTOR(kbft, usecnt);
1016 SK_DF(SK_VERB_MEM, "pp %p buf %p usecnt %u", SK_KVA(pp),
1017 SK_KVA(objaddr), usecnt);
1018 if (__probable(usecnt == 0)) {
1019 skmem_cache_free(large ? PP_BUF_CACHE_LARGE(pp) :
1020 PP_BUF_CACHE_DEF(pp), objaddr);
1021 }
1022 }
1023
1024 /*
1025 * -fbounds-safety: all callers of pp_create use srp_array with a known size:
1026 * SKMEM_REGIONS. This is same as marking it __counted_by(SKMEM_REGIONS)
1027 */
1028 struct kern_pbufpool *
pp_create(const char * name,struct skmem_region_params srp_array[SKMEM_REGIONS],pbuf_seg_ctor_fn_t buf_seg_ctor,pbuf_seg_dtor_fn_t buf_seg_dtor,const void * ctx,pbuf_ctx_retain_fn_t ctx_retain,pbuf_ctx_release_fn_t ctx_release,uint32_t ppcreatef)1029 pp_create(const char *name, struct skmem_region_params srp_array[SKMEM_REGIONS],
1030 pbuf_seg_ctor_fn_t buf_seg_ctor, pbuf_seg_dtor_fn_t buf_seg_dtor,
1031 const void *ctx, pbuf_ctx_retain_fn_t ctx_retain,
1032 pbuf_ctx_release_fn_t ctx_release, uint32_t ppcreatef)
1033 {
1034 struct kern_pbufpool *pp = NULL;
1035 uint32_t md_size, def_buf_obj_size;
1036 uint32_t def_buf_size, large_buf_size;
1037 nexus_meta_type_t md_type;
1038 nexus_meta_subtype_t md_subtype;
1039 uint32_t md_cflags;
1040 uint16_t max_frags;
1041 uint32_t buf_def_cflags;
1042 char cname[64];
1043 const char *__null_terminated cache_name = NULL;
1044 struct skmem_region_params *kmd_srp;
1045 struct skmem_region_params *buf_srp;
1046 struct skmem_region_params *kbft_srp;
1047 struct skmem_region_params *umd_srp = NULL;
1048 struct skmem_region_params *ubft_srp = NULL;
1049 struct skmem_region_params *lbuf_srp = NULL;
1050
1051 /* buf_seg_{ctor,dtor} pair must be either NULL or non-NULL */
1052 ASSERT(!(!(buf_seg_ctor == NULL && buf_seg_dtor == NULL) &&
1053 ((buf_seg_ctor == NULL) ^ (buf_seg_dtor == NULL))));
1054
1055 /* ctx{,_retain,_release} must be either ALL NULL or ALL non-NULL */
1056 ASSERT((ctx == NULL && ctx_retain == NULL && ctx_release == NULL) ||
1057 (ctx != NULL && ctx_retain != NULL && ctx_release != NULL));
1058
1059 if (srp_array[SKMEM_REGION_KMD].srp_c_obj_cnt != 0) {
1060 kmd_srp = &srp_array[SKMEM_REGION_KMD];
1061 buf_srp = &srp_array[SKMEM_REGION_BUF_DEF];
1062 lbuf_srp = &srp_array[SKMEM_REGION_BUF_LARGE];
1063 kbft_srp = &srp_array[SKMEM_REGION_KBFT];
1064 } else if (srp_array[SKMEM_REGION_RXKMD].srp_c_obj_cnt != 0) {
1065 kmd_srp = &srp_array[SKMEM_REGION_RXKMD];
1066 buf_srp = &srp_array[SKMEM_REGION_RXBUF_DEF];
1067 lbuf_srp = &srp_array[SKMEM_REGION_RXBUF_LARGE];
1068 kbft_srp = &srp_array[SKMEM_REGION_RXKBFT];
1069 } else {
1070 VERIFY(srp_array[SKMEM_REGION_TXKMD].srp_c_obj_cnt != 0);
1071 kmd_srp = &srp_array[SKMEM_REGION_TXKMD];
1072 buf_srp = &srp_array[SKMEM_REGION_TXBUF_DEF];
1073 lbuf_srp = &srp_array[SKMEM_REGION_TXBUF_LARGE];
1074 kbft_srp = &srp_array[SKMEM_REGION_TXKBFT];
1075 }
1076
1077 VERIFY(kmd_srp->srp_c_obj_size != 0);
1078 VERIFY(buf_srp->srp_c_obj_cnt != 0);
1079 VERIFY(buf_srp->srp_c_obj_size != 0);
1080
1081 if (ppcreatef & PPCREATEF_ONDEMAND_BUF) {
1082 VERIFY(kbft_srp->srp_c_obj_cnt != 0);
1083 VERIFY(kbft_srp->srp_c_obj_size != 0);
1084 } else {
1085 kbft_srp = NULL;
1086 }
1087
1088 if ((ppcreatef & PPCREATEF_KERNEL_ONLY) == 0) {
1089 umd_srp = &srp_array[SKMEM_REGION_UMD];
1090 ASSERT(umd_srp->srp_c_obj_size == kmd_srp->srp_c_obj_size);
1091 ASSERT(umd_srp->srp_c_obj_cnt == kmd_srp->srp_c_obj_cnt);
1092 ASSERT(umd_srp->srp_c_seg_size == kmd_srp->srp_c_seg_size);
1093 ASSERT(umd_srp->srp_seg_cnt == kmd_srp->srp_seg_cnt);
1094 ASSERT(umd_srp->srp_md_type == kmd_srp->srp_md_type);
1095 ASSERT(umd_srp->srp_md_subtype == kmd_srp->srp_md_subtype);
1096 ASSERT(umd_srp->srp_max_frags == kmd_srp->srp_max_frags);
1097 ASSERT((umd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) ==
1098 (kmd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT));
1099 if (kbft_srp != NULL) {
1100 ubft_srp = &srp_array[SKMEM_REGION_UBFT];
1101 ASSERT(ubft_srp->srp_c_obj_size ==
1102 kbft_srp->srp_c_obj_size);
1103 ASSERT(ubft_srp->srp_c_obj_cnt ==
1104 kbft_srp->srp_c_obj_cnt);
1105 ASSERT(ubft_srp->srp_c_seg_size ==
1106 kbft_srp->srp_c_seg_size);
1107 ASSERT(ubft_srp->srp_seg_cnt == kbft_srp->srp_seg_cnt);
1108 }
1109 }
1110
1111 md_size = kmd_srp->srp_r_obj_size;
1112 md_type = kmd_srp->srp_md_type;
1113 md_subtype = kmd_srp->srp_md_subtype;
1114 max_frags = kmd_srp->srp_max_frags;
1115 def_buf_obj_size = buf_srp->srp_c_obj_size;
1116 def_buf_size = def_buf_obj_size;
1117 large_buf_size = lbuf_srp->srp_c_obj_size;
1118
1119 #if (DEBUG || DEVELOPMENT)
1120 ASSERT(def_buf_obj_size != 0);
1121 ASSERT(md_type > NEXUS_META_TYPE_INVALID &&
1122 md_type <= NEXUS_META_TYPE_MAX);
1123 ASSERT(max_frags >= 1);
1124 ASSERT(md_type == NEXUS_META_TYPE_PACKET);
1125 ASSERT(md_size >= (METADATA_PREAMBLE_SZ +
1126 NX_METADATA_PACKET_SZ(max_frags)));
1127 ASSERT(md_subtype > NEXUS_META_SUBTYPE_INVALID &&
1128 md_subtype <= NEXUS_META_SUBTYPE_MAX);
1129 #endif /* DEBUG || DEVELOPMENT */
1130
1131 pp = pp_alloc(Z_WAITOK);
1132
1133 (void) snprintf((char *)pp->pp_name, sizeof(pp->pp_name),
1134 "skywalk.pp.%s", name);
1135
1136 pp->pp_ctx = __DECONST(void *, ctx);
1137 pp->pp_ctx_retain = ctx_retain;
1138 pp->pp_ctx_release = ctx_release;
1139 if (pp->pp_ctx != NULL) {
1140 pp->pp_ctx_retain(pp->pp_ctx);
1141 }
1142
1143 pp->pp_pbuf_seg_ctor = buf_seg_ctor;
1144 pp->pp_pbuf_seg_dtor = buf_seg_dtor;
1145 PP_BUF_SIZE_DEF(pp) = def_buf_size;
1146 PP_BUF_OBJ_SIZE_DEF(pp) = def_buf_obj_size;
1147 PP_BUF_SIZE_LARGE(pp) = large_buf_size;
1148 PP_BUF_OBJ_SIZE_LARGE(pp) = lbuf_srp->srp_c_obj_size;
1149 pp->pp_md_type = md_type;
1150 pp->pp_md_subtype = md_subtype;
1151 pp->pp_max_frags = max_frags;
1152 if (ppcreatef & PPCREATEF_EXTERNAL) {
1153 pp->pp_flags |= PPF_EXTERNAL;
1154 }
1155 if (ppcreatef & PPCREATEF_TRUNCATED_BUF) {
1156 pp->pp_flags |= PPF_TRUNCATED_BUF;
1157 }
1158 if (ppcreatef & PPCREATEF_KERNEL_ONLY) {
1159 pp->pp_flags |= PPF_KERNEL;
1160 }
1161 if (ppcreatef & PPCREATEF_ONDEMAND_BUF) {
1162 pp->pp_flags |= PPF_BUFFER_ON_DEMAND;
1163 }
1164 if (ppcreatef & PPCREATEF_DYNAMIC) {
1165 pp->pp_flags |= PPF_DYNAMIC;
1166 }
1167 if (lbuf_srp->srp_c_obj_cnt > 0) {
1168 ASSERT(lbuf_srp->srp_c_obj_size != 0);
1169 pp->pp_flags |= PPF_LARGE_BUF;
1170 }
1171
1172 pp_retain(pp);
1173
1174 md_cflags = ((kmd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) ?
1175 SKMEM_CR_NOMAGAZINES : 0);
1176 md_cflags |= SKMEM_CR_BATCH;
1177 pp->pp_flags |= PPF_BATCH;
1178
1179 if (pp->pp_flags & PPF_DYNAMIC) {
1180 md_cflags |= SKMEM_CR_DYNAMIC;
1181 }
1182
1183 if (umd_srp != NULL && (pp->pp_umd_region =
1184 skmem_region_create(name, umd_srp, NULL, NULL, NULL)) == NULL) {
1185 SK_ERR("\"%s\" (%p) failed to create %s region",
1186 pp->pp_name, SK_KVA(pp), umd_srp->srp_name);
1187 goto failed;
1188 }
1189
1190 if ((pp->pp_kmd_region = skmem_region_create(name, kmd_srp, NULL, NULL,
1191 NULL)) == NULL) {
1192 SK_ERR("\"%s\" (%p) failed to create %s region",
1193 pp->pp_name, SK_KVA(pp), kmd_srp->srp_name);
1194 goto failed;
1195 }
1196
1197 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1198 VERIFY((kbft_srp != NULL) && (kbft_srp->srp_c_obj_cnt > 0));
1199 if (!PP_KERNEL_ONLY(pp)) {
1200 VERIFY((ubft_srp != NULL) &&
1201 (ubft_srp->srp_c_obj_cnt > 0));
1202 }
1203 }
1204 /*
1205 * Metadata regions {KMD,KBFT,UBFT} magazines layer and persistency
1206 * attribute must match.
1207 */
1208 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1209 ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES) ==
1210 (kbft_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES));
1211 ASSERT((kmd_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT) ==
1212 (kbft_srp->srp_cflags & SKMEM_REGION_CR_PERSISTENT));
1213 }
1214
1215 if (PP_HAS_BUFFER_ON_DEMAND(pp) && !PP_KERNEL_ONLY(pp)) {
1216 if ((pp->pp_ubft_region = skmem_region_create(name, ubft_srp,
1217 NULL, NULL, NULL)) == NULL) {
1218 SK_ERR("\"%s\" (%p) failed to create %s region",
1219 pp->pp_name, SK_KVA(pp), ubft_srp->srp_name);
1220 goto failed;
1221 }
1222 }
1223
1224 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1225 if ((pp->pp_kbft_region = skmem_region_create(name,
1226 kbft_srp, NULL, NULL, NULL)) == NULL) {
1227 SK_ERR("\"%s\" (%p) failed to create %s region",
1228 pp->pp_name, SK_KVA(pp), kbft_srp->srp_name);
1229 goto failed;
1230 }
1231 }
1232
1233 if (!PP_KERNEL_ONLY(pp)) {
1234 skmem_region_mirror(pp->pp_kmd_region, pp->pp_umd_region);
1235 }
1236 if (!PP_KERNEL_ONLY(pp) && pp->pp_ubft_region != NULL) {
1237 ASSERT(pp->pp_kbft_region != NULL);
1238 skmem_region_mirror(pp->pp_kbft_region, pp->pp_ubft_region);
1239 }
1240
1241 /*
1242 * Create the metadata cache; magazines layer is determined by caller.
1243 */
1244 cache_name = tsnprintf(cname, sizeof(cname), "kmd.%s", name);
1245 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1246 pp->pp_kmd_cache = skmem_cache_create(cache_name, md_size, 0,
1247 pp_metadata_ctor_no_buflet, pp_metadata_dtor, NULL, pp,
1248 pp->pp_kmd_region, md_cflags);
1249 } else {
1250 pp->pp_kmd_cache = skmem_cache_create(cache_name, md_size, 0,
1251 pp_metadata_ctor_max_buflet, pp_metadata_dtor, NULL, pp,
1252 pp->pp_kmd_region, md_cflags);
1253 }
1254
1255 if (pp->pp_kmd_cache == NULL) {
1256 SK_ERR("\"%s\" (%p) failed to create \"%s\" cache",
1257 pp->pp_name, SK_KVA(pp), cname);
1258 goto failed;
1259 }
1260
1261 /*
1262 * Create the buflet metadata cache
1263 */
1264 if (pp->pp_kbft_region != NULL) {
1265 cache_name = tsnprintf(cname, sizeof(cname), "kbft_def.%s", name);
1266 PP_KBFT_CACHE_DEF(pp) = skmem_cache_create(cache_name,
1267 kbft_srp->srp_c_obj_size, 0,
1268 pp_buflet_default_buffer_metadata_ctor,
1269 pp_buflet_metadata_dtor, NULL, pp, pp->pp_kbft_region,
1270 md_cflags);
1271
1272 if (PP_KBFT_CACHE_DEF(pp) == NULL) {
1273 SK_ERR("\"%s\" (%p) failed to create \"%s\" cache",
1274 pp->pp_name, SK_KVA(pp), cname);
1275 goto failed;
1276 }
1277
1278 if (PP_HAS_LARGE_BUF(pp)) {
1279 /* Aggressive memory reclaim flag set to kbft_large for now */
1280 md_cflags |= SKMEM_CR_RECLAIM;
1281 cache_name = tsnprintf(cname, sizeof(cname),
1282 "kbft_large.%s", name);
1283 PP_KBFT_CACHE_LARGE(pp) = skmem_cache_create(cache_name,
1284 kbft_srp->srp_c_obj_size, 0,
1285 pp_buflet_large_buffer_metadata_ctor,
1286 pp_buflet_metadata_dtor,
1287 NULL, pp, pp->pp_kbft_region, md_cflags);
1288
1289 if (PP_KBFT_CACHE_LARGE(pp) == NULL) {
1290 SK_ERR("\"%s\" (%p) failed to "
1291 "create \"%s\" cache", pp->pp_name,
1292 SK_KVA(pp), cname);
1293 goto failed;
1294 }
1295 }
1296 }
1297
1298 if ((PP_BUF_REGION_DEF(pp) = skmem_region_create(name,
1299 buf_srp, pp_buf_seg_ctor, pp_buf_seg_dtor, pp)) == NULL) {
1300 SK_ERR("\"%s\" (%p) failed to create %s region",
1301 pp->pp_name, SK_KVA(pp), buf_srp->srp_name);
1302 goto failed;
1303 }
1304
1305 if (PP_HAS_LARGE_BUF(pp)) {
1306 PP_BUF_REGION_LARGE(pp) = skmem_region_create(name, lbuf_srp,
1307 pp_buf_seg_ctor, pp_buf_seg_dtor, pp);
1308 if (PP_BUF_REGION_LARGE(pp) == NULL) {
1309 SK_ERR("\"%s\" (%p) failed to create %s region",
1310 pp->pp_name, SK_KVA(pp), lbuf_srp->srp_name);
1311 goto failed;
1312 }
1313 }
1314
1315 /*
1316 * Create the buffer object cache without the magazines layer.
1317 * We rely on caching the constructed metadata object instead.
1318 */
1319 cache_name = tsnprintf(cname, sizeof(cname), "buf_def.%s", name);
1320 buf_def_cflags = buf_srp->srp_cflags & SKMEM_REGION_CR_NOMAGAZINES ? SKMEM_CR_NOMAGAZINES : 0;
1321 if ((PP_BUF_CACHE_DEF(pp) = skmem_cache_create(cache_name,
1322 def_buf_obj_size,
1323 0, NULL, NULL, NULL, pp, PP_BUF_REGION_DEF(pp),
1324 buf_def_cflags)) == NULL) {
1325 SK_ERR("\"%s\" (%p) failed to create \"%s\" cache",
1326 pp->pp_name, SK_KVA(pp), cname);
1327 goto failed;
1328 }
1329
1330 if (PP_BUF_REGION_LARGE(pp) != NULL) {
1331 cache_name = tsnprintf(cname, sizeof(cname), "buf_large.%s", name);
1332 if ((PP_BUF_CACHE_LARGE(pp) = skmem_cache_create(cache_name,
1333 lbuf_srp->srp_c_obj_size, 0, NULL, NULL, NULL, pp,
1334 PP_BUF_REGION_LARGE(pp), SKMEM_CR_NOMAGAZINES)) == NULL) {
1335 SK_ERR("\"%s\" (%p) failed to create \"%s\" cache",
1336 pp->pp_name, SK_KVA(pp), cname);
1337 goto failed;
1338 }
1339 }
1340
1341 return pp;
1342
1343 failed:
1344 if (pp != NULL) {
1345 if (pp->pp_ctx != NULL) {
1346 pp->pp_ctx_release(pp->pp_ctx);
1347 pp->pp_ctx = NULL;
1348 }
1349 pp_close(pp);
1350 }
1351
1352 return NULL;
1353 }
1354
1355 void
pp_destroy(struct kern_pbufpool * pp)1356 pp_destroy(struct kern_pbufpool *pp)
1357 {
1358 PP_LOCK_ASSERT_HELD(pp);
1359
1360 /* may be called for built-in pp with outstanding reference */
1361 ASSERT(!(pp->pp_flags & PPF_EXTERNAL) || pp->pp_refcnt == 0);
1362
1363 pp_destroy_upp_locked(pp);
1364
1365 pp_destroy_upp_bft_locked(pp);
1366
1367 if (pp->pp_kmd_cache != NULL) {
1368 skmem_cache_destroy(pp->pp_kmd_cache);
1369 pp->pp_kmd_cache = NULL;
1370 }
1371
1372 if (pp->pp_umd_region != NULL) {
1373 skmem_region_release(pp->pp_umd_region);
1374 pp->pp_umd_region = NULL;
1375 }
1376
1377 if (pp->pp_kmd_region != NULL) {
1378 skmem_region_release(pp->pp_kmd_region);
1379 pp->pp_kmd_region = NULL;
1380 }
1381
1382 if (PP_KBFT_CACHE_DEF(pp) != NULL) {
1383 skmem_cache_destroy(PP_KBFT_CACHE_DEF(pp));
1384 PP_KBFT_CACHE_DEF(pp) = NULL;
1385 }
1386
1387 if (PP_KBFT_CACHE_LARGE(pp) != NULL) {
1388 skmem_cache_destroy(PP_KBFT_CACHE_LARGE(pp));
1389 PP_KBFT_CACHE_LARGE(pp) = NULL;
1390 }
1391
1392 if (pp->pp_ubft_region != NULL) {
1393 skmem_region_release(pp->pp_ubft_region);
1394 pp->pp_ubft_region = NULL;
1395 }
1396
1397 if (pp->pp_kbft_region != NULL) {
1398 skmem_region_release(pp->pp_kbft_region);
1399 pp->pp_kbft_region = NULL;
1400 }
1401
1402 /*
1403 * The order is important here, since pp_metadata_dtor()
1404 * called by freeing on the pp_kmd_cache will in turn
1405 * free the attached buffer. Therefore destroy the
1406 * buffer cache last.
1407 */
1408 if (PP_BUF_CACHE_DEF(pp) != NULL) {
1409 skmem_cache_destroy(PP_BUF_CACHE_DEF(pp));
1410 PP_BUF_CACHE_DEF(pp) = NULL;
1411 }
1412 if (PP_BUF_REGION_DEF(pp) != NULL) {
1413 skmem_region_release(PP_BUF_REGION_DEF(pp));
1414 PP_BUF_REGION_DEF(pp) = NULL;
1415 }
1416 if (PP_BUF_CACHE_LARGE(pp) != NULL) {
1417 skmem_cache_destroy(PP_BUF_CACHE_LARGE(pp));
1418 PP_BUF_CACHE_LARGE(pp) = NULL;
1419 }
1420 if (PP_BUF_REGION_LARGE(pp) != NULL) {
1421 skmem_region_release(PP_BUF_REGION_LARGE(pp));
1422 PP_BUF_REGION_LARGE(pp) = NULL;
1423 }
1424
1425 if (pp->pp_ctx != NULL) {
1426 pp->pp_ctx_release(pp->pp_ctx);
1427 pp->pp_ctx = NULL;
1428 }
1429 }
1430
1431 static int
pp_init_upp_locked(struct kern_pbufpool * pp,boolean_t can_block)1432 pp_init_upp_locked(struct kern_pbufpool *pp, boolean_t can_block)
1433 {
1434 int i, err = 0;
1435
1436 if (pp->pp_u_hash_table != NULL) {
1437 goto done;
1438 }
1439
1440 /* allocated-address hash table */
1441 /*
1442 * -fbounds-safety: We switched to sk_alloc (aka kalloc) from zalloc, so
1443 * if we see any performance hit, we can check if this caused it.
1444 */
1445 if (can_block) {
1446 pp->pp_u_hash_table = sk_alloc_type_array(
1447 struct kern_pbufpool_u_bkt, KERN_PBUFPOOL_U_HASH_SIZE,
1448 Z_WAITOK, skmem_tag_pbufpool_hash);
1449 pp->pp_u_hash_table_size = KERN_PBUFPOOL_U_HASH_SIZE;
1450 } else {
1451 pp->pp_u_hash_table = sk_alloc_type_array(
1452 struct kern_pbufpool_u_bkt, KERN_PBUFPOOL_U_HASH_SIZE,
1453 Z_NOWAIT, skmem_tag_pbufpool_hash);
1454 pp->pp_u_hash_table_size = KERN_PBUFPOOL_U_HASH_SIZE;
1455 }
1456 if (pp->pp_u_hash_table == NULL) {
1457 SK_ERR("failed to zalloc packet buffer pool upp hash table");
1458 err = ENOMEM;
1459 goto done;
1460 }
1461
1462 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1463 SLIST_INIT(&pp->pp_u_hash_table[i].upp_head);
1464 }
1465 done:
1466 return err;
1467 }
1468
1469 static void
pp_destroy_upp_locked(struct kern_pbufpool * pp)1470 pp_destroy_upp_locked(struct kern_pbufpool *pp)
1471 {
1472 PP_LOCK_ASSERT_HELD(pp);
1473 if (pp->pp_u_hash_table != NULL) {
1474 /* purge anything that's left */
1475 pp_purge_upp_locked(pp, -1);
1476
1477 #if (DEBUG || DEVELOPMENT)
1478 for (int i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1479 ASSERT(SLIST_EMPTY(&pp->pp_u_hash_table[i].upp_head));
1480 }
1481 #endif /* DEBUG || DEVELOPMENT */
1482
1483 kfree_type_counted_by(struct kern_pbufpool_u_bkt,
1484 pp->pp_u_hash_table_size,
1485 pp->pp_u_hash_table);
1486 }
1487 ASSERT(pp->pp_u_bufinuse == 0);
1488 }
1489
1490 int
pp_init_upp(struct kern_pbufpool * pp,boolean_t can_block)1491 pp_init_upp(struct kern_pbufpool *pp, boolean_t can_block)
1492 {
1493 int err = 0;
1494
1495 PP_LOCK(pp);
1496 err = pp_init_upp_locked(pp, can_block);
1497 if (err) {
1498 SK_ERR("packet UPP init failed (%d)", err);
1499 goto done;
1500 }
1501 err = pp_init_upp_bft_locked(pp, can_block);
1502 if (err) {
1503 SK_ERR("buflet UPP init failed (%d)", err);
1504 pp_destroy_upp_locked(pp);
1505 goto done;
1506 }
1507 pp_retain_locked(pp);
1508 done:
1509 PP_UNLOCK(pp);
1510 return err;
1511 }
1512
1513 __attribute__((always_inline))
1514 static void
pp_insert_upp_bft_locked(struct kern_pbufpool * pp,struct __kern_buflet * kbft,pid_t pid)1515 pp_insert_upp_bft_locked(struct kern_pbufpool *pp,
1516 struct __kern_buflet *kbft, pid_t pid)
1517 {
1518 struct kern_pbufpool_u_bft_bkt *bkt;
1519 struct __kern_buflet_ext *kbe = (struct __kern_buflet_ext *)kbft;
1520
1521 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
1522 ASSERT(kbe->kbe_buf_pid == (pid_t)-1);
1523 kbe->kbe_buf_pid = pid;
1524 bkt = KERN_PBUFPOOL_U_BFT_HASH(pp, kbft->buf_bft_idx_reg);
1525 SLIST_INSERT_HEAD(&bkt->upp_head, kbe, kbe_buf_upp_link);
1526 pp->pp_u_bftinuse++;
1527 }
1528
1529 __attribute__((always_inline))
1530 static void
pp_insert_upp_bft_chain_locked(struct kern_pbufpool * pp,struct __kern_buflet * kbft,pid_t pid)1531 pp_insert_upp_bft_chain_locked(struct kern_pbufpool *pp,
1532 struct __kern_buflet *kbft, pid_t pid)
1533 {
1534 while (kbft != NULL) {
1535 pp_insert_upp_bft_locked(pp, kbft, pid);
1536 kbft = __unsafe_forge_single(struct __kern_buflet *,
1537 __DECONST(kern_buflet_t, kbft->buf_nbft_addr));
1538 }
1539 }
1540
1541 /* Also inserts the attached chain of buflets */
1542 void static inline
pp_insert_upp_common(struct kern_pbufpool * pp,struct __kern_quantum * kqum,pid_t pid)1543 pp_insert_upp_common(struct kern_pbufpool *pp, struct __kern_quantum *kqum,
1544 pid_t pid)
1545 {
1546 struct kern_pbufpool_u_bkt *bkt;
1547 struct __kern_buflet *kbft;
1548
1549 ASSERT(kqum->qum_pid == (pid_t)-1);
1550 kqum->qum_pid = pid;
1551
1552 bkt = KERN_PBUFPOOL_U_HASH(pp, METADATA_IDX(kqum));
1553 SLIST_INSERT_HEAD(&bkt->upp_head, kqum, qum_upp_link);
1554 pp->pp_u_bufinuse++;
1555
1556 kbft = __unsafe_forge_single(struct __kern_buflet *, (kern_buflet_t)kqum->qum_buf[0].buf_nbft_addr);
1557 if (kbft != NULL) {
1558 ASSERT(((kern_buflet_t)kbft)->buf_flag & BUFLET_FLAG_EXTERNAL);
1559 ASSERT(kqum->qum_qflags & QUM_F_INTERNALIZED);
1560 pp_insert_upp_bft_chain_locked(pp, kbft, pid);
1561 }
1562 }
1563
1564 void
pp_insert_upp_locked(struct kern_pbufpool * pp,struct __kern_quantum * kqum,pid_t pid)1565 pp_insert_upp_locked(struct kern_pbufpool *pp, struct __kern_quantum *kqum,
1566 pid_t pid)
1567 {
1568 pp_insert_upp_common(pp, kqum, pid);
1569 }
1570
1571 void
pp_insert_upp(struct kern_pbufpool * pp,struct __kern_quantum * kqum,pid_t pid)1572 pp_insert_upp(struct kern_pbufpool *pp, struct __kern_quantum *kqum, pid_t pid)
1573 {
1574 PP_LOCK(pp);
1575 pp_insert_upp_common(pp, kqum, pid);
1576 PP_UNLOCK(pp);
1577 }
1578
1579 void
pp_insert_upp_batch(struct kern_pbufpool * pp,pid_t pid,uint64_t * __counted_by (num)array,uint32_t num)1580 pp_insert_upp_batch(struct kern_pbufpool *pp, pid_t pid,
1581 uint64_t *__counted_by(num)array, uint32_t num)
1582 {
1583 uint32_t i = 0;
1584
1585 ASSERT(array != NULL && num > 0);
1586 PP_LOCK(pp);
1587 while (i < num) {
1588 struct __kern_quantum *kqum = SK_PTR_ADDR_KQUM(array[i]);
1589
1590 ASSERT(kqum != NULL);
1591 pp_insert_upp_common(pp, kqum, pid);
1592 ++i;
1593 }
1594 PP_UNLOCK(pp);
1595 }
1596
1597 __attribute__((always_inline))
1598 static struct __kern_buflet *
pp_remove_upp_bft_locked(struct kern_pbufpool * pp,obj_idx_t bft_idx)1599 pp_remove_upp_bft_locked(struct kern_pbufpool *pp, obj_idx_t bft_idx)
1600 {
1601 struct __kern_buflet_ext *kbft, *tbft;
1602 struct kern_pbufpool_u_bft_bkt *bkt;
1603
1604 bkt = KERN_PBUFPOOL_U_BFT_HASH(pp, bft_idx);
1605 SLIST_FOREACH_SAFE(kbft, &bkt->upp_head, kbe_buf_upp_link, tbft) {
1606 if (((kern_buflet_t)kbft)->buf_bft_idx_reg == bft_idx) {
1607 SLIST_REMOVE(&bkt->upp_head, kbft, __kern_buflet_ext,
1608 kbe_buf_upp_link);
1609 kbft->kbe_buf_pid = (pid_t)-1;
1610 kbft->kbe_buf_upp_link.sle_next = NULL;
1611 ASSERT(pp->pp_u_bftinuse != 0);
1612 pp->pp_u_bftinuse--;
1613 break;
1614 }
1615 }
1616 return (kern_buflet_t)kbft;
1617 }
1618
1619 struct __kern_buflet *
pp_remove_upp_bft(struct kern_pbufpool * pp,obj_idx_t md_idx,int * err)1620 pp_remove_upp_bft(struct kern_pbufpool *pp, obj_idx_t md_idx, int *err)
1621 {
1622 struct __kern_buflet *kbft = pp_remove_upp_bft_locked(pp, md_idx);
1623
1624 *err = __improbable(kbft != NULL) ? 0 : EINVAL;
1625 return kbft;
1626 }
1627
1628 __attribute__((always_inline))
1629 static int
pp_remove_upp_bft_chain_locked(struct kern_pbufpool * pp,struct __kern_quantum * kqum)1630 pp_remove_upp_bft_chain_locked(struct kern_pbufpool *pp,
1631 struct __kern_quantum *kqum)
1632 {
1633 uint32_t max_frags = pp->pp_max_frags;
1634 struct __kern_buflet *kbft;
1635 uint16_t nbfts, upkt_nbfts;
1636 obj_idx_t bft_idx;
1637
1638 ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
1639 bft_idx = kqum->qum_user->qum_buf[0].buf_nbft_idx;
1640 kbft = &kqum->qum_buf[0];
1641 if (bft_idx == OBJ_IDX_NONE) {
1642 return 0;
1643 }
1644
1645 ASSERT(METADATA_TYPE(kqum) == NEXUS_META_TYPE_PACKET);
1646 struct __kern_packet *kpkt = __DECONST(struct __kern_packet *, kqum);
1647 struct __user_packet *upkt = __DECONST(struct __user_packet *,
1648 kpkt->pkt_qum.qum_user);
1649
1650 upkt_nbfts = upkt->pkt_bufs_cnt;
1651 if (__improbable(upkt_nbfts > max_frags)) {
1652 SK_ERR("bad bcnt in upkt (%d > %d)", upkt_nbfts, max_frags);
1653 BUF_NBFT_IDX(kbft, OBJ_IDX_NONE);
1654 BUF_NBFT_ADDR(kbft, 0);
1655 return ERANGE;
1656 }
1657
1658 nbfts = (kbft->buf_addr != 0) ? 1 : 0;
1659
1660 do {
1661 struct __kern_buflet *pbft = kbft;
1662 struct __kern_buflet_ext *kbe;
1663
1664 kbft = pp_remove_upp_bft_locked(pp, bft_idx);
1665 if (__improbable(kbft == NULL)) {
1666 BUF_NBFT_IDX(pbft, OBJ_IDX_NONE);
1667 BUF_NBFT_ADDR(pbft, 0);
1668 SK_ERR("unallocated next buflet (%d), %p", bft_idx,
1669 SK_KVA(pbft));
1670 return ERANGE;
1671 }
1672 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
1673 BUF_NBFT_IDX(pbft, bft_idx);
1674 BUF_NBFT_ADDR(pbft, kbft);
1675 kbe = __container_of(kbft, struct __kern_buflet_ext, kbe_overlay);
1676 bft_idx = kbe->kbe_buf_user->buf_nbft_idx;
1677 ++nbfts;
1678 } while ((bft_idx != OBJ_IDX_NONE) && (nbfts < upkt_nbfts));
1679
1680 ASSERT(kbft != NULL);
1681 BUF_NBFT_IDX(kbft, OBJ_IDX_NONE);
1682 BUF_NBFT_ADDR(kbft, 0);
1683 *__DECONST(uint16_t *, &kpkt->pkt_bufs_cnt) = nbfts;
1684
1685 if (__improbable((bft_idx != OBJ_IDX_NONE) || (nbfts != upkt_nbfts))) {
1686 SK_ERR("bad buflet in upkt (%d, %d)", nbfts, upkt_nbfts);
1687 return ERANGE;
1688 }
1689 return 0;
1690 }
1691
1692 struct __kern_quantum *
pp_remove_upp_locked(struct kern_pbufpool * pp,obj_idx_t md_idx,int * err)1693 pp_remove_upp_locked(struct kern_pbufpool *pp, obj_idx_t md_idx, int *err)
1694 {
1695 struct __kern_quantum *kqum, *tqum;
1696 struct kern_pbufpool_u_bkt *bkt;
1697
1698 bkt = KERN_PBUFPOOL_U_HASH(pp, md_idx);
1699 SLIST_FOREACH_SAFE(kqum, &bkt->upp_head, qum_upp_link, tqum) {
1700 if (METADATA_IDX(kqum) == md_idx) {
1701 SLIST_REMOVE(&bkt->upp_head, kqum, __kern_quantum,
1702 qum_upp_link);
1703 kqum->qum_pid = (pid_t)-1;
1704 ASSERT(pp->pp_u_bufinuse != 0);
1705 pp->pp_u_bufinuse--;
1706 break;
1707 }
1708 }
1709 if (__probable(kqum != NULL)) {
1710 *err = pp_remove_upp_bft_chain_locked(pp, kqum);
1711 } else {
1712 *err = ERANGE;
1713 }
1714 return kqum;
1715 }
1716
1717 struct __kern_quantum *
pp_remove_upp(struct kern_pbufpool * pp,obj_idx_t md_idx,int * err)1718 pp_remove_upp(struct kern_pbufpool *pp, obj_idx_t md_idx, int *err)
1719 {
1720 struct __kern_quantum *kqum;
1721
1722 PP_LOCK(pp);
1723 kqum = pp_remove_upp_locked(pp, md_idx, err);
1724 PP_UNLOCK(pp);
1725 return kqum;
1726 }
1727
1728 struct __kern_quantum *
pp_find_upp(struct kern_pbufpool * pp,obj_idx_t md_idx)1729 pp_find_upp(struct kern_pbufpool *pp, obj_idx_t md_idx)
1730 {
1731 struct __kern_quantum *kqum, *tqum;
1732 struct kern_pbufpool_u_bkt *bkt;
1733
1734 PP_LOCK(pp);
1735 bkt = KERN_PBUFPOOL_U_HASH(pp, md_idx);
1736 SLIST_FOREACH_SAFE(kqum, &bkt->upp_head, qum_upp_link, tqum) {
1737 if (METADATA_IDX(kqum) == md_idx) {
1738 break;
1739 }
1740 }
1741 PP_UNLOCK(pp);
1742
1743 return kqum;
1744 }
1745
1746 __attribute__((always_inline))
1747 static void
pp_purge_upp_locked(struct kern_pbufpool * pp,pid_t pid)1748 pp_purge_upp_locked(struct kern_pbufpool *pp, pid_t pid)
1749 {
1750 struct __kern_quantum *kqum, *tqum;
1751 struct kern_pbufpool_u_bkt *bkt;
1752 int i;
1753
1754 PP_LOCK_ASSERT_HELD(pp);
1755
1756 /*
1757 * TODO: Build a list of packets and batch-free them.
1758 */
1759 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1760 bkt = &pp->pp_u_hash_table[i];
1761 SLIST_FOREACH_SAFE(kqum, &bkt->upp_head, qum_upp_link, tqum) {
1762 ASSERT(kqum->qum_pid != (pid_t)-1);
1763 if (pid != (pid_t)-1 && kqum->qum_pid != pid) {
1764 continue;
1765 }
1766 SLIST_REMOVE(&bkt->upp_head, kqum, __kern_quantum,
1767 qum_upp_link);
1768 pp_remove_upp_bft_chain_locked(pp, kqum);
1769 kqum->qum_pid = (pid_t)-1;
1770 kqum->qum_qflags &= ~QUM_F_FINALIZED;
1771 kqum->qum_ksd = NULL;
1772 pp_free_packet(__DECONST(struct kern_pbufpool *,
1773 kqum->qum_pp), (uint64_t)kqum);
1774 ASSERT(pp->pp_u_bufinuse != 0);
1775 pp->pp_u_bufinuse--;
1776 }
1777 }
1778 }
1779
1780 __attribute__((always_inline))
1781 static void
pp_purge_upp_bft_locked(struct kern_pbufpool * pp,pid_t pid)1782 pp_purge_upp_bft_locked(struct kern_pbufpool *pp, pid_t pid)
1783 {
1784 struct __kern_buflet_ext *kbft, *tbft;
1785 struct kern_pbufpool_u_bft_bkt *bkt;
1786 int i;
1787
1788 PP_LOCK_ASSERT_HELD(pp);
1789
1790 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1791 bkt = &pp->pp_u_bft_hash_table[i];
1792 SLIST_FOREACH_SAFE(kbft, &bkt->upp_head, kbe_buf_upp_link,
1793 tbft) {
1794 ASSERT(kbft->kbe_buf_pid != (pid_t)-1);
1795 if (pid != (pid_t)-1 && kbft->kbe_buf_pid != pid) {
1796 continue;
1797 }
1798 SLIST_REMOVE(&bkt->upp_head, kbft, __kern_buflet_ext,
1799 kbe_buf_upp_link);
1800 kbft->kbe_buf_pid = (pid_t)-1;
1801 kbft->kbe_buf_upp_link.sle_next = NULL;
1802 pp_free_buflet(pp, (kern_buflet_t)kbft);
1803 ASSERT(pp->pp_u_bftinuse != 0);
1804 pp->pp_u_bftinuse--;
1805 }
1806 }
1807 }
1808
1809 void
pp_purge_upp(struct kern_pbufpool * pp,pid_t pid)1810 pp_purge_upp(struct kern_pbufpool *pp, pid_t pid)
1811 {
1812 PP_LOCK(pp);
1813 pp_purge_upp_locked(pp, pid);
1814 pp_purge_upp_bft_locked(pp, pid);
1815 PP_UNLOCK(pp);
1816 }
1817
1818 static int
pp_init_upp_bft_locked(struct kern_pbufpool * pp,boolean_t can_block)1819 pp_init_upp_bft_locked(struct kern_pbufpool *pp, boolean_t can_block)
1820 {
1821 int i, err = 0;
1822
1823 PP_LOCK_ASSERT_HELD(pp);
1824 if (pp->pp_u_bft_hash_table != NULL) {
1825 return 0;
1826 }
1827
1828 /* allocated-address hash table */
1829 /*
1830 * -fbounds-safety: We switched to sk_alloc (aka kalloc) from zalloc, so
1831 * if we see any performance hit, we can check if this caused it.
1832 */
1833 if (can_block) {
1834 pp->pp_u_bft_hash_table = sk_alloc_type_array(
1835 struct kern_pbufpool_u_bft_bkt, KERN_PBUFPOOL_U_HASH_SIZE,
1836 Z_WAITOK, skmem_tag_pbufpool_bft_hash);
1837 pp->pp_u_bft_hash_table_size = KERN_PBUFPOOL_U_HASH_SIZE;
1838 } else {
1839 pp->pp_u_bft_hash_table = sk_alloc_type_array(
1840 struct kern_pbufpool_u_bft_bkt, KERN_PBUFPOOL_U_HASH_SIZE,
1841 Z_NOWAIT, skmem_tag_pbufpool_bft_hash);
1842 pp->pp_u_bft_hash_table_size = KERN_PBUFPOOL_U_HASH_SIZE;
1843 }
1844 if (pp->pp_u_bft_hash_table == NULL) {
1845 SK_ERR("failed to zalloc packet buffer pool upp buflet hash table");
1846 err = ENOMEM;
1847 goto fail;
1848 }
1849
1850 for (i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1851 SLIST_INIT(&pp->pp_u_bft_hash_table[i].upp_head);
1852 }
1853
1854 fail:
1855 return err;
1856 }
1857
1858 static void
pp_destroy_upp_bft_locked(struct kern_pbufpool * pp)1859 pp_destroy_upp_bft_locked(struct kern_pbufpool *pp)
1860 {
1861 PP_LOCK_ASSERT_HELD(pp);
1862 if (pp->pp_u_bft_hash_table != NULL) {
1863 /* purge anything that's left */
1864 pp_purge_upp_bft_locked(pp, -1);
1865
1866 #if (DEBUG || DEVELOPMENT)
1867 for (int i = 0; i < KERN_PBUFPOOL_U_HASH_SIZE; i++) {
1868 ASSERT(SLIST_EMPTY(&pp->pp_u_bft_hash_table[i].upp_head));
1869 }
1870 #endif /* DEBUG || DEVELOPMENT */
1871
1872 kfree_type_counted_by(struct kern_pbufpool_u_bft_bkt,
1873 pp->pp_u_bft_hash_table_size,
1874 pp->pp_u_bft_hash_table);
1875 }
1876 ASSERT(pp->pp_u_bftinuse == 0);
1877 }
1878
1879 void
pp_insert_upp_bft(struct kern_pbufpool * pp,struct __kern_buflet * kbft,pid_t pid)1880 pp_insert_upp_bft(struct kern_pbufpool *pp,
1881 struct __kern_buflet *kbft, pid_t pid)
1882 {
1883 PP_LOCK(pp);
1884 pp_insert_upp_bft_locked(pp, kbft, pid);
1885 PP_UNLOCK(pp);
1886 }
1887
1888 boolean_t
pp_isempty_upp(struct kern_pbufpool * pp)1889 pp_isempty_upp(struct kern_pbufpool *pp)
1890 {
1891 boolean_t isempty;
1892
1893 PP_LOCK(pp);
1894 isempty = (pp->pp_u_bufinuse == 0);
1895 PP_UNLOCK(pp);
1896
1897 return isempty;
1898 }
1899
1900 __attribute__((always_inline))
1901 static inline struct __kern_quantum *
pp_metadata_init(struct __metadata_preamble * mdp,struct kern_pbufpool * pp,uint16_t bufcnt,uint32_t skmflag,struct skmem_obj ** blist)1902 pp_metadata_init(struct __metadata_preamble *mdp, struct kern_pbufpool *pp,
1903 uint16_t bufcnt, uint32_t skmflag, struct skmem_obj **blist)
1904 {
1905 struct __kern_quantum *kqum;
1906 struct __user_quantum *uqum;
1907
1908 kqum = SK_PTR_ADDR_KQUM((uintptr_t)mdp + METADATA_PREAMBLE_SZ);
1909 ASSERT(kqum->qum_pp == pp);
1910 if (__probable(!PP_KERNEL_ONLY(pp))) {
1911 ASSERT(!(kqum->qum_qflags & QUM_F_KERNEL_ONLY));
1912 uqum = __DECONST(struct __user_quantum *, kqum->qum_user);
1913 ASSERT(uqum != NULL);
1914 } else {
1915 ASSERT(kqum->qum_qflags & QUM_F_KERNEL_ONLY);
1916 ASSERT(kqum->qum_user == NULL);
1917 uqum = NULL;
1918 }
1919
1920 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0 &&
1921 pp_metadata_construct(kqum, uqum, METADATA_IDX(kqum), pp,
1922 skmflag, bufcnt, FALSE, blist) != 0) {
1923 return NULL;
1924 }
1925
1926 /* (re)construct {user,kernel} metadata */
1927 struct __kern_packet *kpkt = SK_PTR_ADDR_KPKT(kqum);
1928 struct __kern_buflet *kbuf = &kpkt->pkt_qum_buf;
1929 uint16_t i;
1930
1931 /* sanitize flags */
1932 kpkt->pkt_pflags &= PKT_F_INIT_MASK;
1933
1934 ASSERT((kpkt->pkt_pflags & PKT_F_OPT_ALLOC) &&
1935 kpkt->pkt_com_opt != NULL);
1936 ASSERT((kpkt->pkt_pflags & PKT_F_FLOW_ALLOC) &&
1937 kpkt->pkt_flow != NULL);
1938 ASSERT((kpkt->pkt_pflags & PKT_F_TX_COMPL_ALLOC) &&
1939 kpkt->pkt_tx_compl != NULL);
1940
1941 /*
1942 * XXX: For now we always set PKT_F_FLOW_DATA;
1943 * this is a no-op but done for consistency
1944 * with the other PKT_F_*_DATA flags.
1945 */
1946 kpkt->pkt_pflags |= PKT_F_FLOW_DATA;
1947
1948 /* initialize kernel packet */
1949 KPKT_INIT(kpkt, QUM_F_INTERNALIZED);
1950
1951 ASSERT(bufcnt || PP_HAS_BUFFER_ON_DEMAND(pp));
1952 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
1953 ASSERT(kbuf->buf_ctl == NULL);
1954 ASSERT(kbuf->buf_addr == 0);
1955 /*
1956 * -fbounds-safety: buf_nbft_addr is a mach_vm_address_t
1957 * which is unsafe, so we just forge it here.
1958 */
1959 kbuf = __unsafe_forge_single(struct __kern_buflet *,
1960 __DECONST(struct __kern_buflet *, kbuf->buf_nbft_addr));
1961 }
1962 /* initialize kernel buflet */
1963 for (i = 0; i < bufcnt; i++) {
1964 ASSERT(kbuf != NULL);
1965 KBUF_INIT(kbuf);
1966 kbuf = __unsafe_forge_single(struct __kern_buflet *,
1967 __DECONST(struct __kern_buflet *, kbuf->buf_nbft_addr));
1968 }
1969 ASSERT((kbuf == NULL) || (bufcnt == 0));
1970
1971 return kqum;
1972 }
1973
1974 /*
1975 * When PPF_BUFFER_ON_DEMAND flag is set on packet pool creation, we create
1976 * packet descriptor cache with no buffer attached and a buflet cache with
1977 * cpu layer caching enabled. While operating in this mode, we can call
1978 * pp_alloc_packet_common() either with `bufcnt = 0` or `bufcnt = n`,
1979 * where n <= pp->pp_max_frags. If `bufcnt == 0` then we allocate packet
1980 * descriptor with no attached buffer from the metadata cache.
1981 * If `bufcnt != 0`, then this routine allocates packet descriptor and buflets
1982 * from their respective caches and constructs the packet on behalf of the
1983 * caller.
1984 */
1985 __attribute__((always_inline))
1986 static inline uint32_t
pp_alloc_packet_common(struct kern_pbufpool * pp,uint16_t bufcnt,uint64_t * __counted_by (num)array,uint32_t num,boolean_t tagged,alloc_cb_func_t cb,const void * ctx,uint32_t skmflag)1987 pp_alloc_packet_common(struct kern_pbufpool *pp, uint16_t bufcnt,
1988 uint64_t *__counted_by(num)array, uint32_t num, boolean_t tagged,
1989 alloc_cb_func_t cb, const void *ctx, uint32_t skmflag)
1990 {
1991 struct __metadata_preamble *mdp;
1992 struct __kern_quantum *kqum = NULL;
1993 uint32_t allocp, need = num;
1994 struct skmem_obj *__single plist, *__single blist = NULL;
1995 uint64_t *array_cp; /* -fbounds-safety */
1996
1997 ASSERT(bufcnt <= pp->pp_max_frags);
1998 ASSERT(array != NULL && num > 0);
1999 ASSERT(PP_BATCH_CAPABLE(pp));
2000
2001 /* allocate (constructed) packet(s) with buffer(s) attached */
2002 allocp = skmem_cache_batch_alloc(pp->pp_kmd_cache, &plist,
2003 pp->pp_kmd_cache->skm_objsize, num, skmflag);
2004
2005 /* allocate (constructed) buflet(s) with buffer(s) attached */
2006 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0 && allocp != 0) {
2007 (void) skmem_cache_batch_alloc(PP_KBFT_CACHE_DEF(pp), &blist,
2008 PP_KBFT_CACHE_DEF(pp)->skm_objsize, (allocp * bufcnt), skmflag);
2009 }
2010
2011 array_cp = array;
2012 while (plist != NULL) {
2013 struct skmem_obj *plistn;
2014
2015 plistn = plist->mo_next;
2016 plist->mo_next = NULL;
2017
2018 mdp = (struct __metadata_preamble *)(void *)plist;
2019 kqum = pp_metadata_init(mdp, pp, bufcnt, skmflag, &blist);
2020 if (kqum == NULL) {
2021 if (blist != NULL) {
2022 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp),
2023 blist);
2024 blist = NULL;
2025 }
2026 plist->mo_next = plistn;
2027 skmem_cache_batch_free(pp->pp_kmd_cache, plist);
2028 plist = NULL;
2029 break;
2030 }
2031
2032 #if HAS_MTE && CONFIG_KERNEL_TAGGING
2033 if (__probable(is_mte_enabled)) {
2034 /* Checking to ensure the object address is tagged */
2035 ASSERT((vm_offset_t)kqum !=
2036 vm_memtag_canonicalize_kernel((vm_offset_t)kqum));
2037 }
2038 #endif /* HAS_MTE && CONFIG_KERNEL_TAGGING */
2039
2040 if (tagged) {
2041 *array_cp = SK_PTR_ENCODE(kqum, METADATA_TYPE(kqum),
2042 METADATA_SUBTYPE(kqum));
2043 } else {
2044 *array_cp = (uint64_t)kqum;
2045 }
2046
2047 if (cb != NULL) {
2048 (cb)(*array_cp, (num - need), ctx);
2049 }
2050
2051 ++array_cp;
2052 plist = plistn;
2053
2054 ASSERT(need > 0);
2055 --need;
2056 }
2057 ASSERT(blist == NULL);
2058 ASSERT((num - need) == allocp || kqum == NULL);
2059
2060 return num - need;
2061 }
2062
2063 uint64_t
pp_alloc_packet(struct kern_pbufpool * pp,uint16_t bufcnt,uint32_t skmflag)2064 pp_alloc_packet(struct kern_pbufpool *pp, uint16_t bufcnt, uint32_t skmflag)
2065 {
2066 uint64_t kpkt = 0;
2067
2068 (void) pp_alloc_packet_common(pp, bufcnt, &kpkt, 1, FALSE,
2069 NULL, NULL, skmflag);
2070
2071 return kpkt;
2072 }
2073
2074 int
pp_alloc_packet_batch(struct kern_pbufpool * pp,uint16_t bufcnt,uint64_t * __counted_by (* size)array,uint32_t * size,boolean_t tagged,alloc_cb_func_t cb,const void * ctx,uint32_t skmflag)2075 pp_alloc_packet_batch(struct kern_pbufpool *pp, uint16_t bufcnt,
2076 uint64_t *__counted_by(*size)array, uint32_t *size, boolean_t tagged,
2077 alloc_cb_func_t cb, const void *ctx, uint32_t skmflag)
2078 {
2079 uint32_t i, n;
2080 int err;
2081
2082 ASSERT(array != NULL && size > 0);
2083
2084 n = *size;
2085 /*
2086 * -fbounds-safety: Originally there was this line here: *size = 0; but
2087 * we removed this because array is now __counted_by(*size), so *size =
2088 * 0 leads to brk 0x5519. Also, *size is set to i anyway.
2089 */
2090
2091 i = pp_alloc_packet_common(pp, bufcnt, array, n, tagged,
2092 cb, ctx, skmflag);
2093 /*
2094 * -fbounds-safety: Since array is __counted_by(*size), we need to be
2095 * extra careful when *size is updated, like below. Here, we know i will
2096 * be less than or equal to the original *size value, so updating *size
2097 * is okay.
2098 */
2099 *size = i;
2100
2101 if (__probable(i == n)) {
2102 err = 0;
2103 } else if (i != 0) {
2104 err = EAGAIN;
2105 } else {
2106 err = ENOMEM;
2107 }
2108
2109 return err;
2110 }
2111
2112 int
pp_alloc_pktq(struct kern_pbufpool * pp,uint16_t bufcnt,struct pktq * pktq,uint32_t num,alloc_cb_func_t cb,const void * ctx,uint32_t skmflag)2113 pp_alloc_pktq(struct kern_pbufpool *pp, uint16_t bufcnt,
2114 struct pktq *pktq, uint32_t num, alloc_cb_func_t cb, const void *ctx,
2115 uint32_t skmflag)
2116 {
2117 struct __metadata_preamble *mdp;
2118 struct __kern_packet *kpkt = NULL;
2119 uint32_t allocp, need = num;
2120 struct skmem_obj *__single plist, *__single blist = NULL;
2121 int err;
2122
2123 ASSERT(pktq != NULL && num > 0);
2124 ASSERT(pp->pp_md_type == NEXUS_META_TYPE_PACKET);
2125 ASSERT(bufcnt <= pp->pp_max_frags);
2126 ASSERT(PP_BATCH_CAPABLE(pp));
2127
2128 /* allocate (constructed) packet(s) with buffer(s) attached */
2129 allocp = skmem_cache_batch_alloc(pp->pp_kmd_cache, &plist,
2130 pp->pp_kmd_cache->skm_objsize, num, skmflag);
2131
2132 /* allocate (constructed) buflet(s) with buffer(s) attached */
2133 if (PP_HAS_BUFFER_ON_DEMAND(pp) && bufcnt != 0 && allocp != 0) {
2134 (void) skmem_cache_batch_alloc(PP_KBFT_CACHE_DEF(pp), &blist,
2135 PP_KBFT_CACHE_DEF(pp)->skm_objsize, (allocp * bufcnt), skmflag);
2136 }
2137
2138 while (plist != NULL) {
2139 struct skmem_obj *plistn;
2140
2141 plistn = plist->mo_next;
2142 plist->mo_next = NULL;
2143
2144 mdp = (struct __metadata_preamble *)(void *)plist;
2145 kpkt = (struct __kern_packet *)pp_metadata_init(mdp, pp,
2146 bufcnt, skmflag, &blist);
2147 if (kpkt == NULL) {
2148 if (blist != NULL) {
2149 skmem_cache_batch_free(PP_KBFT_CACHE_DEF(pp),
2150 blist);
2151 blist = NULL;
2152 }
2153 plist->mo_next = plistn;
2154 skmem_cache_batch_free(pp->pp_kmd_cache, plist);
2155 plist = NULL;
2156 break;
2157 }
2158
2159 #if HAS_MTE && CONFIG_KERNEL_TAGGING
2160 if (__probable(is_mte_enabled)) {
2161 /* Checking to ensure the object address is tagged */
2162 ASSERT((vm_offset_t)kpkt !=
2163 vm_memtag_canonicalize_kernel((vm_offset_t)kpkt));
2164 }
2165 #endif /* HAS_MTE && CONFIG_KERNEL_TAGGING */
2166
2167 KPKTQ_ENQUEUE(pktq, kpkt);
2168
2169 if (cb != NULL) {
2170 (cb)((uint64_t)kpkt, (num - need), ctx);
2171 }
2172
2173 plist = plistn;
2174
2175 ASSERT(need > 0);
2176 --need;
2177 }
2178 ASSERT(blist == NULL);
2179 ASSERT((num - need) == allocp || kpkt == NULL);
2180
2181 if (__probable(need == 0)) {
2182 err = 0;
2183 } else if (need == num) {
2184 err = ENOMEM;
2185 } else {
2186 err = EAGAIN;
2187 }
2188
2189 return err;
2190 }
2191
2192 uint64_t
pp_alloc_packet_by_size(struct kern_pbufpool * pp,uint32_t size,uint32_t skmflag)2193 pp_alloc_packet_by_size(struct kern_pbufpool *pp, uint32_t size,
2194 uint32_t skmflag)
2195 {
2196 uint32_t bufcnt = pp->pp_max_frags;
2197 uint64_t kpkt = 0;
2198
2199 if (PP_HAS_BUFFER_ON_DEMAND(pp)) {
2200 bufcnt =
2201 SK_ROUNDUP(size, PP_BUF_SIZE_DEF(pp)) / PP_BUF_SIZE_DEF(pp);
2202 ASSERT(bufcnt <= UINT16_MAX);
2203 }
2204
2205 (void) pp_alloc_packet_common(pp, (uint16_t)bufcnt, &kpkt, 1, TRUE,
2206 NULL, NULL, skmflag);
2207
2208 return kpkt;
2209 }
2210
2211 __attribute__((always_inline))
2212 static inline struct __metadata_preamble *
pp_metadata_fini(struct __kern_quantum * kqum,struct kern_pbufpool * pp,struct mbuf ** mp,struct __kern_packet ** kpp,struct skmem_obj ** blist_def,struct skmem_obj ** blist_nocache_def,struct skmem_obj ** blist_large,struct skmem_obj ** blist_nocahce_large)2213 pp_metadata_fini(struct __kern_quantum *kqum, struct kern_pbufpool *pp,
2214 struct mbuf **mp, struct __kern_packet **kpp, struct skmem_obj **blist_def,
2215 struct skmem_obj **blist_nocache_def, struct skmem_obj **blist_large,
2216 struct skmem_obj **blist_nocahce_large)
2217 {
2218 struct __metadata_preamble *mdp = METADATA_PREAMBLE(kqum);
2219 ASSERT(SK_PTR_TAG(kqum) == 0);
2220 struct __kern_packet *kpkt = SK_PTR_KPKT(kqum);
2221
2222 if ((kpkt->pkt_pflags & PKT_F_TX_COMPL_TS_REQ) != 0) {
2223 __packet_perform_tx_completion_callbacks(
2224 SK_PKT2PH(kpkt), NULL);
2225 }
2226 if ((kpkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) {
2227 ASSERT((kpkt->pkt_pflags & PKT_F_PKT_DATA) == 0);
2228 ASSERT(kpkt->pkt_mbuf != NULL);
2229 ASSERT(kpkt->pkt_mbuf->m_nextpkt == NULL);
2230 if (mp != NULL) {
2231 ASSERT(*mp == NULL);
2232 *mp = kpkt->pkt_mbuf;
2233 } else {
2234 m_freem(kpkt->pkt_mbuf);
2235 }
2236 KPKT_CLEAR_MBUF_DATA(kpkt);
2237 } else if ((kpkt->pkt_pflags & PKT_F_PKT_DATA) != 0) {
2238 ASSERT(kpkt->pkt_pkt != NULL);
2239 ASSERT(kpkt->pkt_pkt->pkt_nextpkt == NULL);
2240 if (kpp != NULL) {
2241 ASSERT(*kpp == NULL);
2242 *kpp = kpkt->pkt_pkt;
2243 } else {
2244 /* can only recurse once */
2245 ASSERT((kpkt->pkt_pkt->pkt_pflags &
2246 PKT_F_PKT_DATA) == 0);
2247 pp_free_packet_single(kpkt->pkt_pkt);
2248 }
2249 KPKT_CLEAR_PKT_DATA(kpkt);
2250 }
2251 kpkt->pkt_pflags &= ~PKT_F_TRUNCATED;
2252 ASSERT(kpkt->pkt_nextpkt == NULL);
2253 ASSERT(kpkt->pkt_qum.qum_ksd == NULL);
2254 ASSERT((kpkt->pkt_pflags & PKT_F_MBUF_MASK) == 0);
2255 ASSERT((kpkt->pkt_pflags & PKT_F_PKT_MASK) == 0);
2256
2257 if (__improbable(PP_HAS_BUFFER_ON_DEMAND(pp))) {
2258 pp_metadata_destruct_common(kqum, pp, FALSE, blist_def, blist_nocache_def,
2259 blist_large, blist_nocahce_large);
2260 }
2261 return mdp;
2262 }
2263
2264 void
pp_free_packet_chain(struct __kern_packet * pkt_chain,int * npkt)2265 pp_free_packet_chain(struct __kern_packet *pkt_chain, int *npkt)
2266 {
2267 struct __metadata_preamble *mdp;
2268 struct skmem_obj *__single obj_mdp;
2269 struct skmem_obj *__single top = NULL;
2270 struct skmem_obj *__single blist_def = NULL, *__single blist_nocache_def = NULL;
2271 struct skmem_obj *__single blist_large = NULL, *__single blist_nocache_large = NULL;
2272 struct skmem_obj **list = ⊤
2273 struct mbuf *__single mtop = NULL;
2274 struct mbuf **mp = &mtop;
2275 struct __kern_packet *__single kptop = NULL;
2276 struct __kern_packet **__single kpp = &kptop, *pkt, *next;
2277 struct kern_pbufpool *pp;
2278 int c = 0;
2279
2280 pp = __DECONST(struct kern_pbufpool *, pkt_chain->pkt_qum.qum_pp);
2281 ASSERT(pp != NULL);
2282 ASSERT(PP_BATCH_CAPABLE(pp));
2283
2284 for (pkt = pkt_chain; pkt != NULL; pkt = next) {
2285 next = pkt->pkt_nextpkt;
2286 pkt->pkt_nextpkt = NULL;
2287
2288 ASSERT(SK_PTR_ADDR_KQUM(pkt)->qum_pp == pp);
2289 mdp = pp_metadata_fini(SK_PTR_ADDR_KQUM(pkt), pp,
2290 mp, kpp, &blist_def, &blist_nocache_def, &blist_large, &blist_nocache_large);
2291
2292 obj_mdp = __unsafe_forge_single(struct skmem_obj *, mdp);
2293 *list = obj_mdp;
2294 list = &(*list)->mo_next;
2295 c++;
2296
2297 if (*mp != NULL) {
2298 mp = &(*mp)->m_nextpkt;
2299 ASSERT(*mp == NULL);
2300 }
2301 if (*kpp != NULL) {
2302 kpp = &(*kpp)->pkt_nextpkt;
2303 ASSERT(*kpp == NULL);
2304 }
2305 }
2306
2307 ASSERT(top != NULL);
2308 skmem_cache_batch_free(pp->pp_kmd_cache, top);
2309 pp_free_kbft_list(pp, blist_def, blist_nocache_def, blist_large, blist_nocache_large);
2310 if (mtop != NULL) {
2311 DTRACE_SKYWALK(free__attached__mbuf);
2312 if (__probable(mtop->m_nextpkt != NULL)) {
2313 m_freem_list(mtop);
2314 } else {
2315 m_freem(mtop);
2316 }
2317 }
2318 if (kptop != NULL) {
2319 int cnt = 0;
2320 pp_free_packet_chain(kptop, &cnt);
2321 DTRACE_SKYWALK1(free__attached__pkt, int, cnt);
2322 }
2323 if (npkt != NULL) {
2324 *npkt = c;
2325 }
2326 }
2327
2328 void
pp_free_pktq(struct pktq * pktq)2329 pp_free_pktq(struct pktq *pktq)
2330 {
2331 if (__improbable(KPKTQ_EMPTY(pktq))) {
2332 return;
2333 }
2334 struct __kern_packet *pkt = KPKTQ_FIRST(pktq);
2335 pp_free_packet_chain(pkt, NULL);
2336 KPKTQ_DISPOSE(pktq);
2337 }
2338
2339 void
pp_drop_pktq(struct pktq * pktq,struct ifnet * ifp,uint16_t flags,drop_reason_t reason,const char * funcname,uint16_t linenum)2340 pp_drop_pktq(struct pktq *pktq, struct ifnet *ifp, uint16_t flags,
2341 drop_reason_t reason, const char *funcname, uint16_t linenum)
2342 {
2343 drop_func_t dropfunc;
2344 struct __kern_packet *kpkt;
2345
2346 if (KPKTQ_EMPTY(pktq)) {
2347 return;
2348 }
2349 if (__probable(droptap_total_tap_count == 0)) {
2350 goto nodroptap;
2351 }
2352
2353 if (flags & DROPTAP_FLAG_DIR_OUT) {
2354 dropfunc = droptap_output_packet;
2355 } else if (flags & DROPTAP_FLAG_DIR_IN) {
2356 dropfunc = droptap_input_packet;
2357 } else {
2358 goto nodroptap;
2359 }
2360
2361 KPKTQ_FOREACH(kpkt, pktq) {
2362 dropfunc(SK_PKT2PH(kpkt), reason, funcname, linenum, flags, ifp,
2363 kpkt->pkt_qum.qum_pid, NULL, -1, NULL, 0, 0);
2364 }
2365
2366 nodroptap:
2367 pp_free_pktq(pktq);
2368 }
2369
2370 __attribute__((always_inline))
2371 static inline void
pp_free_packet_array(struct kern_pbufpool * pp,uint64_t * __counted_by (num)array,uint32_t num)2372 pp_free_packet_array(struct kern_pbufpool *pp, uint64_t *__counted_by(num)array, uint32_t num)
2373 {
2374 struct __metadata_preamble *mdp;
2375 struct skmem_obj *__single obj_mdp = NULL;
2376 struct skmem_obj *__single top = NULL;
2377 struct skmem_obj *__single blist_def = NULL, *__single blist_nocache_def = NULL;
2378 struct skmem_obj *__single blist_large = NULL, *__single blist_nocache_large = NULL;
2379 struct skmem_obj **list = ⊤
2380 struct mbuf *__single mtop = NULL;
2381 struct mbuf **mp = &mtop;
2382 struct __kern_packet *__single kptop = NULL;
2383 struct __kern_packet **kpp = &kptop;
2384 uint32_t i;
2385
2386 ASSERT(pp != NULL);
2387 ASSERT(array != NULL && num > 0);
2388 ASSERT(PP_BATCH_CAPABLE(pp));
2389
2390 for (i = 0; i < num; i++) {
2391 ASSERT(SK_PTR_ADDR_KQUM(array[i])->qum_pp == pp);
2392 mdp = pp_metadata_fini(SK_PTR_ADDR_KQUM(array[i]), pp,
2393 mp, kpp, &blist_def, &blist_nocache_def, &blist_large, &blist_nocache_large);
2394
2395 obj_mdp = __unsafe_forge_single(struct skmem_obj *, mdp);
2396 *list = obj_mdp;
2397 list = &(*list)->mo_next;
2398 array[i] = 0;
2399
2400 if (*mp != NULL) {
2401 mp = &(*mp)->m_nextpkt;
2402 ASSERT(*mp == NULL);
2403 }
2404 if (*kpp != NULL) {
2405 kpp = &(*kpp)->pkt_nextpkt;
2406 ASSERT(*kpp == NULL);
2407 }
2408 }
2409
2410 ASSERT(top != NULL);
2411 skmem_cache_batch_free(pp->pp_kmd_cache, top);
2412 pp_free_kbft_list(pp, blist_def, blist_nocache_def, blist_large, blist_nocache_large);
2413 if (mtop != NULL) {
2414 DTRACE_SKYWALK(free__attached__mbuf);
2415 if (__probable(mtop->m_nextpkt != NULL)) {
2416 m_freem_list(mtop);
2417 } else {
2418 m_freem(mtop);
2419 }
2420 }
2421 if (kptop != NULL) {
2422 int cnt = 0;
2423 pp_free_packet_chain(kptop, &cnt);
2424 DTRACE_SKYWALK1(free__attached__pkt, int, cnt);
2425 }
2426 }
2427
2428 void
pp_free_packet(struct kern_pbufpool * pp,uint64_t kqum)2429 pp_free_packet(struct kern_pbufpool *pp, uint64_t kqum)
2430 {
2431 pp_free_packet_array(pp, &kqum, 1);
2432 }
2433
2434 void
pp_free_packet_batch(const kern_pbufpool_t pp,uint64_t * __counted_by (size)array,uint32_t size)2435 pp_free_packet_batch(const kern_pbufpool_t pp, uint64_t *__counted_by(size)array, uint32_t size)
2436 {
2437 pp_free_packet_array(pp, array, size);
2438 }
2439
2440 void
pp_free_packet_single(struct __kern_packet * pkt)2441 pp_free_packet_single(struct __kern_packet *pkt)
2442 {
2443 ASSERT(pkt->pkt_nextpkt == NULL);
2444 pp_free_packet(__DECONST(struct kern_pbufpool *,
2445 pkt->pkt_qum.qum_pp), SK_PTR_ADDR(pkt));
2446 }
2447
2448 void
pp_drop_packet_single(struct __kern_packet * pkt,struct ifnet * ifp,uint16_t flags,drop_reason_t reason,const char * funcname,uint16_t linenum)2449 pp_drop_packet_single(struct __kern_packet *pkt, struct ifnet *ifp, uint16_t flags,
2450 drop_reason_t reason, const char *funcname, uint16_t linenum)
2451 {
2452 drop_func_t dropfunc;
2453
2454 if (pkt->pkt_length == 0) {
2455 return;
2456 }
2457 if (__probable(droptap_total_tap_count == 0)) {
2458 goto nodroptap;
2459 }
2460
2461 if (flags & DROPTAP_FLAG_DIR_OUT) {
2462 dropfunc = droptap_output_packet;
2463 } else if (flags & DROPTAP_FLAG_DIR_IN) {
2464 dropfunc = droptap_input_packet;
2465 } else {
2466 goto nodroptap;
2467 }
2468
2469 dropfunc(SK_PKT2PH(pkt), reason, funcname, linenum, flags, ifp,
2470 pkt->pkt_qum.qum_pid, NULL, -1, NULL, 0, 0);
2471
2472 nodroptap:
2473 pp_free_packet_single(pkt);
2474 }
2475
2476 static mach_vm_address_t
pp_alloc_buffer_common(const kern_pbufpool_t pp,struct skmem_obj_info * oi,uint32_t skmflag,bool large)2477 pp_alloc_buffer_common(const kern_pbufpool_t pp, struct skmem_obj_info *oi,
2478 uint32_t skmflag, bool large)
2479 {
2480 /*
2481 * XXX -fbounds-safety: We can't change this mach_vm_address_t to some
2482 * other (safe) pointer type, because IOSkywalkFamily depends on this
2483 * being mach_vm_address_t
2484 */
2485 mach_vm_address_t baddr;
2486 struct skmem_cache *skm = large ? PP_BUF_CACHE_LARGE(pp):
2487 PP_BUF_CACHE_DEF(pp);
2488
2489 ASSERT(skm != NULL);
2490 /* allocate a cached buffer */
2491 baddr = (mach_vm_address_t)skmem_cache_alloc(skm, skmflag);
2492
2493 #if (DEVELOPMENT || DEBUG)
2494 uint64_t mtbf = skmem_region_get_mtbf();
2495 /*
2496 * MTBF is applicable only for non-blocking allocations here.
2497 */
2498 if (__improbable(mtbf != 0 && (net_uptime_ms() % mtbf) == 0 &&
2499 (skmflag & SKMEM_NOSLEEP))) {
2500 SK_ERR("pp \"%s\" MTBF failure", pp->pp_name);
2501 net_update_uptime();
2502 if (baddr != 0) {
2503 skmem_cache_free(skm,
2504 __unsafe_forge_single(struct skmem_obj *, baddr));
2505 baddr = 0;
2506 }
2507 }
2508 #endif /* (DEVELOPMENT || DEBUG) */
2509
2510 if (__improbable(baddr == 0)) {
2511 SK_DF(SK_VERB_MEM, "failed to alloc buffer, pp %p",
2512 SK_KVA(pp));
2513 return 0;
2514 }
2515 skmem_cache_get_obj_info(skm,
2516 __unsafe_forge_single(struct skmem_obj *, baddr), oi, NULL);
2517 ASSERT(SKMEM_OBJ_BUFCTL(oi) != NULL);
2518 ASSERT((mach_vm_address_t)SKMEM_OBJ_ADDR(oi) == baddr);
2519 return baddr;
2520 }
2521
2522 errno_t
pp_alloc_buffer(const kern_pbufpool_t pp,mach_vm_address_t * baddr,kern_segment_t * seg,kern_obj_idx_seg_t * idx,uint32_t skmflag)2523 pp_alloc_buffer(const kern_pbufpool_t pp, mach_vm_address_t *baddr,
2524 kern_segment_t *seg, kern_obj_idx_seg_t *idx, uint32_t skmflag)
2525 {
2526 struct skmem_obj_info oib;
2527
2528 VERIFY(pp != NULL && baddr != NULL);
2529 VERIFY((seg != NULL) == (idx != NULL));
2530
2531 if (__improbable(!PP_HAS_BUFFER_ON_DEMAND(pp))) {
2532 return ENOTSUP;
2533 }
2534
2535 *baddr = pp_alloc_buffer_common(pp, &oib, skmflag, false);
2536 if (__improbable(*baddr == 0)) {
2537 return ENOMEM;
2538 }
2539
2540 if (seg != NULL) {
2541 ASSERT(SKMEM_OBJ_SEG(&oib) != NULL);
2542 *seg = SKMEM_OBJ_SEG(&oib);
2543 *idx = SKMEM_OBJ_IDX_SEG(&oib);
2544 }
2545 return 0;
2546 }
2547
2548 void
pp_free_buffer(const kern_pbufpool_t pp,mach_vm_address_t addr)2549 pp_free_buffer(const kern_pbufpool_t pp, mach_vm_address_t addr)
2550 {
2551 ASSERT(pp != NULL && addr != 0);
2552 skmem_cache_free(PP_BUF_CACHE_DEF(pp), __unsafe_forge_single(
2553 struct skmem_obj *, addr));
2554 }
2555
2556 __attribute__((always_inline))
2557 static inline uint32_t
pp_alloc_buflet_common(struct kern_pbufpool * pp,uint64_t * __counted_by (num)array,uint32_t num,uint32_t skmflag,bool large)2558 pp_alloc_buflet_common(struct kern_pbufpool *pp,
2559 uint64_t *__counted_by(num)array, uint32_t num, uint32_t skmflag,
2560 bool large)
2561 {
2562 struct __kern_buflet *kbft = NULL;
2563 uint32_t allocd, need = num;
2564 struct skmem_obj *__single list;
2565 uint64_t *array_cp; /* -fbounds-safety */
2566
2567 ASSERT(array != NULL && num > 0);
2568 ASSERT(PP_BATCH_CAPABLE(pp));
2569 ASSERT(PP_KBFT_CACHE_DEF(pp) != NULL);
2570 ASSERT(PP_BUF_SIZE_LARGE(pp) != 0 || !large);
2571
2572 if (large) {
2573 allocd = skmem_cache_batch_alloc(PP_KBFT_CACHE_LARGE(pp), &list,
2574 PP_KBFT_CACHE_LARGE(pp)->skm_objsize, num, skmflag);
2575 } else {
2576 allocd = skmem_cache_batch_alloc(PP_KBFT_CACHE_DEF(pp), &list,
2577 PP_KBFT_CACHE_DEF(pp)->skm_objsize, num, skmflag);
2578 }
2579
2580 array_cp = array;
2581 while (list != NULL) {
2582 struct skmem_obj *listn;
2583
2584 listn = list->mo_next;
2585 list->mo_next = NULL;
2586 kbft = (kern_buflet_t)(void *)list;
2587
2588 #if HAS_MTE && CONFIG_KERNEL_TAGGING
2589 if (__probable(is_mte_enabled)) {
2590 /* Checking to ensure the object address is tagged */
2591 ASSERT((vm_offset_t)kbft !=
2592 vm_memtag_canonicalize_kernel((vm_offset_t)kbft));
2593 }
2594 #endif /* HAS_MTE && CONFIG_KERNEL_TAGGING */
2595
2596 KBUF_EXT_INIT(kbft, pp);
2597 *array_cp = (uint64_t)kbft;
2598 ++array_cp;
2599 list = listn;
2600 ASSERT(need > 0);
2601 --need;
2602 }
2603 ASSERT((num - need) == allocd || kbft == NULL);
2604 return num - need;
2605 }
2606
2607 errno_t
pp_alloc_buflet(struct kern_pbufpool * pp,kern_buflet_t * kbft,uint32_t skmflag,bool large)2608 pp_alloc_buflet(struct kern_pbufpool *pp, kern_buflet_t *kbft, uint32_t skmflag,
2609 bool large)
2610 {
2611 uint64_t bft;
2612
2613 if (__improbable(!pp_alloc_buflet_common(pp, &bft, 1, skmflag, large))) {
2614 return ENOMEM;
2615 }
2616 *kbft = __unsafe_forge_single(kern_buflet_t, bft);
2617 return 0;
2618 }
2619
2620 errno_t
pp_alloc_buflet_batch(struct kern_pbufpool * pp,uint64_t * __counted_by (* size)array,uint32_t * size,uint32_t skmflag,bool large)2621 pp_alloc_buflet_batch(struct kern_pbufpool *pp,
2622 uint64_t *__counted_by(*size)array, uint32_t *size, uint32_t skmflag,
2623 bool large)
2624 {
2625 uint32_t i, n;
2626 int err;
2627
2628 ASSERT(array != NULL && size > 0);
2629
2630 n = *size;
2631 i = pp_alloc_buflet_common(pp, array, n, skmflag, large);
2632 *size = i;
2633
2634 if (__probable(i == n)) {
2635 err = 0;
2636 } else if (i != 0) {
2637 err = EAGAIN;
2638 } else {
2639 err = ENOMEM;
2640 }
2641
2642 return err;
2643 }
2644
2645 __attribute__((always_inline))
2646 static void
pp_free_buflet_common(const kern_pbufpool_t pp,kern_buflet_t kbft)2647 pp_free_buflet_common(const kern_pbufpool_t pp, kern_buflet_t kbft)
2648 {
2649 ASSERT(kbft->buf_nbft_idx == OBJ_IDX_NONE);
2650 ASSERT(kbft->buf_nbft_addr == 0);
2651
2652 if (kbft->buf_flag & BUFLET_FLAG_EXTERNAL) {
2653 ASSERT(kbft->buf_addr != 0);
2654 ASSERT(kbft->buf_idx != OBJ_IDX_NONE);
2655 ASSERT(kbft->buf_bft_idx_reg != OBJ_IDX_NONE);
2656 ASSERT(kbft->buf_ctl != NULL);
2657 ASSERT(((struct __kern_buflet_ext *)kbft)->
2658 kbe_buf_upp_link.sle_next == NULL);
2659 if (kbft->buf_ctl->bc_usecnt > 1) {
2660 skmem_cache_free_nocache(BUFLET_HAS_LARGE_BUF(kbft) ?
2661 PP_KBFT_CACHE_LARGE(pp) : PP_KBFT_CACHE_DEF(pp),
2662 (void *)kbft);
2663 } else {
2664 skmem_cache_free(BUFLET_HAS_LARGE_BUF(kbft) ?
2665 PP_KBFT_CACHE_LARGE(pp) : PP_KBFT_CACHE_DEF(pp),
2666 (void *)kbft);
2667 }
2668 } else if (__probable(kbft->buf_addr != 0)) {
2669 void *objaddr = kbft->buf_objaddr;
2670 uint32_t usecnt = 0;
2671
2672 ASSERT(kbft->buf_idx != OBJ_IDX_NONE);
2673 ASSERT(kbft->buf_ctl != NULL);
2674 KBUF_DTOR(kbft, usecnt);
2675 SK_DF(SK_VERB_MEM, "pp %p buf %p usecnt %u",
2676 SK_KVA(pp), SK_KVA(objaddr), usecnt);
2677 if (__probable(usecnt == 0)) {
2678 skmem_cache_free(BUFLET_HAS_LARGE_BUF(kbft) ?
2679 PP_BUF_CACHE_LARGE(pp) : PP_BUF_CACHE_DEF(pp),
2680 objaddr);
2681 }
2682 }
2683 }
2684
2685 void
pp_free_buflet(const kern_pbufpool_t pp,kern_buflet_t kbft)2686 pp_free_buflet(const kern_pbufpool_t pp, kern_buflet_t kbft)
2687 {
2688 ASSERT(kbft->buf_flag & BUFLET_FLAG_EXTERNAL);
2689 ASSERT(pp != NULL && kbft != NULL);
2690 pp_free_buflet_common(pp, kbft);
2691 }
2692
2693 void
pp_reap_caches(boolean_t purge)2694 pp_reap_caches(boolean_t purge)
2695 {
2696 skmem_cache_reap_now(pp_opt_cache, purge);
2697 skmem_cache_reap_now(pp_flow_cache, purge);
2698 skmem_cache_reap_now(pp_compl_cache, purge);
2699 }
2700