xref: /xnu-8792.41.9/bsd/skywalk/nexus/nexus_adapter.c (revision 5c2921b07a2480ab43ec66f5b9e41cb872bc554f)
1 /*
2  * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri.
31  * All rights reserved.
32  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  *   1. Redistributions of source code must retain the above copyright
38  *      notice, this list of conditions and the following disclaimer.
39  *   2. Redistributions in binary form must reproduce the above copyright
40  *      notice, this list of conditions and the following disclaimer in the
41  *      documentation and/or other materials provided with the distribution.
42  *
43  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53  * SUCH DAMAGE.
54  */
55 #include <sys/systm.h>
56 #include <skywalk/os_skywalk_private.h>
57 #include <skywalk/nexus/monitor/nx_monitor.h>
58 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
59 #include <skywalk/nexus/netif/nx_netif.h>
60 #include <skywalk/nexus/upipe/nx_user_pipe.h>
61 #include <skywalk/nexus/kpipe/nx_kernel_pipe.h>
62 #include <kern/thread.h>
63 
64 static int na_krings_use(struct kern_channel *);
65 static void na_krings_unuse(struct kern_channel *);
66 static void na_krings_verify(struct nexus_adapter *);
67 static int na_notify(struct __kern_channel_ring *, struct proc *, uint32_t);
68 static void na_set_ring(struct nexus_adapter *, uint32_t, enum txrx, uint32_t);
69 static void na_set_all_rings(struct nexus_adapter *, uint32_t);
70 static int na_set_ringid(struct kern_channel *, ring_set_t, ring_id_t);
71 static void na_unset_ringid(struct kern_channel *);
72 static void na_teardown(struct nexus_adapter *, struct kern_channel *,
73     boolean_t);
74 
75 static int na_kr_create(struct nexus_adapter *, uint32_t, boolean_t);
76 static void na_kr_delete(struct nexus_adapter *);
77 static int na_kr_setup(struct nexus_adapter *, struct kern_channel *);
78 static void na_kr_teardown_all(struct nexus_adapter *, struct kern_channel *,
79     boolean_t);
80 static void na_kr_teardown_txrx(struct nexus_adapter *, struct kern_channel *,
81     boolean_t, struct proc *);
82 static int na_kr_populate_slots(struct __kern_channel_ring *);
83 static void na_kr_depopulate_slots(struct __kern_channel_ring *,
84     struct kern_channel *, boolean_t defunct);
85 
86 static int na_schema_alloc(struct kern_channel *);
87 
88 static struct nexus_adapter *na_pseudo_alloc(zalloc_flags_t);
89 static void na_pseudo_free(struct nexus_adapter *);
90 static int na_pseudo_txsync(struct __kern_channel_ring *, struct proc *,
91     uint32_t);
92 static int na_pseudo_rxsync(struct __kern_channel_ring *, struct proc *,
93     uint32_t);
94 static int na_pseudo_activate(struct nexus_adapter *, na_activate_mode_t);
95 static void na_pseudo_dtor(struct nexus_adapter *);
96 static int na_pseudo_krings_create(struct nexus_adapter *,
97     struct kern_channel *);
98 static void na_pseudo_krings_delete(struct nexus_adapter *,
99     struct kern_channel *, boolean_t);
100 static int na_packet_pool_alloc_sync(struct __kern_channel_ring *,
101     struct proc *, uint32_t);
102 static int na_packet_pool_free_sync(struct __kern_channel_ring *,
103     struct proc *, uint32_t);
104 static int na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *,
105     struct proc *, uint32_t);
106 static int na_packet_pool_free_buf_sync(struct __kern_channel_ring *,
107     struct proc *, uint32_t);
108 
109 #define NA_KRING_IDLE_TIMEOUT   (NSEC_PER_SEC * 30) /* 30 seconds */
110 
111 static ZONE_DEFINE(na_pseudo_zone, SKMEM_ZONE_PREFIX ".na.pseudo",
112     sizeof(struct nexus_adapter), ZC_ZFREE_CLEARMEM);
113 
114 static int __na_inited = 0;
115 
116 #define NA_NUM_WMM_CLASSES      4
117 #define NAKR_WMM_SC2RINGID(_s)  PKT_SC2TC(_s)
118 #define NAKR_SET_SVC_LUT(_n, _s)                                        \
119 	(_n)->na_kring_svc_lut[MBUF_SCIDX(_s)] = NAKR_WMM_SC2RINGID(_s)
120 #define NAKR_SET_KR_SVC(_n, _s)                                         \
121 	NAKR((_n), NR_TX)[NAKR_WMM_SC2RINGID(_s)].ckr_svc = (_s)
122 
123 #define NA_UPP_ALLOC_LOWAT      8
124 static uint32_t na_upp_alloc_lowat = NA_UPP_ALLOC_LOWAT;
125 
126 #define NA_UPP_REAP_INTERVAL    10 /* seconds */
127 static uint32_t na_upp_reap_interval = NA_UPP_REAP_INTERVAL;
128 
129 #define NA_UPP_WS_HOLD_TIME     2 /* seconds */
130 static uint32_t na_upp_ws_hold_time = NA_UPP_WS_HOLD_TIME;
131 
132 #define NA_UPP_REAP_MIN_PKTS    0
133 static uint32_t na_upp_reap_min_pkts = NA_UPP_REAP_MIN_PKTS;
134 
135 #define NA_UPP_ALLOC_BUF_LOWAT     64
136 static uint32_t na_upp_alloc_buf_lowat = NA_UPP_ALLOC_BUF_LOWAT;
137 
138 #if (DEVELOPMENT || DEBUG)
139 static  uint64_t _na_inject_error = 0;
140 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) \
141 	_SK_INJECT_ERROR(_na_inject_error, _en, _ev, _ec, NULL, _f, __VA_ARGS__)
142 
143 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_ws_hold_time,
144     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_ws_hold_time,
145     NA_UPP_WS_HOLD_TIME, "");
146 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_interval,
147     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_interval,
148     NA_UPP_REAP_INTERVAL, "");
149 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_min_pkts,
150     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_min_pkts,
151     NA_UPP_REAP_MIN_PKTS, "");
152 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_lowat,
153     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_lowat,
154     NA_UPP_ALLOC_LOWAT, "");
155 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_buf_lowat,
156     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_buf_lowat,
157     NA_UPP_ALLOC_BUF_LOWAT, "");
158 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, na_inject_error,
159     CTLFLAG_RW | CTLFLAG_LOCKED, &_na_inject_error, "");
160 #else
161 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) do { } while (0)
162 #endif /* !DEVELOPMENT && !DEBUG */
163 
164 #define SKMEM_TAG_NX_RINGS      "com.apple.skywalk.nexus.rings"
165 static SKMEM_TAG_DEFINE(skmem_tag_nx_rings, SKMEM_TAG_NX_RINGS);
166 
167 #define SKMEM_TAG_NX_CONTEXTS   "com.apple.skywalk.nexus.contexts"
168 static SKMEM_TAG_DEFINE(skmem_tag_nx_contexts, SKMEM_TAG_NX_CONTEXTS);
169 
170 #define SKMEM_TAG_NX_SCRATCH    "com.apple.skywalk.nexus.scratch"
171 static SKMEM_TAG_DEFINE(skmem_tag_nx_scratch, SKMEM_TAG_NX_SCRATCH);
172 
173 #if !XNU_TARGET_OS_OSX
174 /* see KLDBootstrap::readPrelinkedExtensions() for details */
175 extern uuid_t kernelcache_uuid;
176 #else /* XNU_TARGET_OS_OSX */
177 /* see panic_init() for details */
178 extern unsigned char *kernel_uuid;
179 #endif /* XNU_TARGET_OS_OSX */
180 
181 void
na_init(void)182 na_init(void)
183 {
184 	/*
185 	 * Changing the size of nexus_mdata structure won't break ABI,
186 	 * but we need to be mindful of memory consumption; Thus here
187 	 * we add a compile-time check to make sure the size is within
188 	 * the expected limit and that it's properly aligned.  This
189 	 * check may be adjusted in future as needed.
190 	 */
191 	_CASSERT(sizeof(struct nexus_mdata) <= 32 &&
192 	    IS_P2ALIGNED(sizeof(struct nexus_mdata), 8));
193 	_CASSERT(sizeof(struct nexus_mdata) <= sizeof(struct __user_quantum));
194 
195 	/* see comments on nexus_meta_type_t */
196 	_CASSERT(NEXUS_META_TYPE_MAX == 3);
197 	_CASSERT(NEXUS_META_SUBTYPE_MAX == 3);
198 
199 	ASSERT(!__na_inited);
200 
201 	__na_inited = 1;
202 }
203 
204 void
na_fini(void)205 na_fini(void)
206 {
207 	if (__na_inited) {
208 		__na_inited = 0;
209 	}
210 }
211 
212 /*
213  * Interpret the ringid of an chreq, by translating it into a pair
214  * of intervals of ring indices:
215  *
216  * [txfirst, txlast) and [rxfirst, rxlast)
217  */
218 int
na_interp_ringid(struct nexus_adapter * na,ring_id_t ring_id,ring_set_t ring_set,uint32_t first[NR_TXRX],uint32_t last[NR_TXRX])219 na_interp_ringid(struct nexus_adapter *na, ring_id_t ring_id,
220     ring_set_t ring_set, uint32_t first[NR_TXRX], uint32_t last[NR_TXRX])
221 {
222 	enum txrx t;
223 
224 	switch (ring_set) {
225 	case RING_SET_ALL:
226 		/*
227 		 * Ring pair eligibility: all ring(s).
228 		 */
229 		if (ring_id != CHANNEL_RING_ID_ANY &&
230 		    ring_id >= na_get_nrings(na, NR_TX) &&
231 		    ring_id >= na_get_nrings(na, NR_RX)) {
232 			SK_ERR("\"%s\": invalid ring_id %d for ring_set %u",
233 			    na->na_name, (int)ring_id, ring_set);
234 			return EINVAL;
235 		}
236 		for_rx_tx(t) {
237 			if (ring_id == CHANNEL_RING_ID_ANY) {
238 				first[t] = 0;
239 				last[t] = na_get_nrings(na, t);
240 			} else {
241 				first[t] = ring_id;
242 				last[t] = ring_id + 1;
243 			}
244 		}
245 		break;
246 
247 	default:
248 		SK_ERR("\"%s\": invalid ring_set %u", na->na_name, ring_set);
249 		return EINVAL;
250 	}
251 
252 	SK_DF(SK_VERB_NA | SK_VERB_RING,
253 	    "\"%s\": ring_id %d, ring_set %u tx [%u,%u) rx [%u,%u)",
254 	    na->na_name, (int)ring_id, ring_set, first[NR_TX], last[NR_TX],
255 	    first[NR_RX], last[NR_RX]);
256 
257 	return 0;
258 }
259 
260 /*
261  * Set the ring ID. For devices with a single queue, a request
262  * for all rings is the same as a single ring.
263  */
264 static int
na_set_ringid(struct kern_channel * ch,ring_set_t ring_set,ring_id_t ring_id)265 na_set_ringid(struct kern_channel *ch, ring_set_t ring_set, ring_id_t ring_id)
266 {
267 	struct nexus_adapter *na = ch->ch_na;
268 	int error;
269 	enum txrx t;
270 	uint32_t n_alloc_rings;
271 
272 	if ((error = na_interp_ringid(na, ring_id, ring_set,
273 	    ch->ch_first, ch->ch_last)) != 0) {
274 		return error;
275 	}
276 
277 	n_alloc_rings = na_get_nrings(na, NR_A);
278 	if (n_alloc_rings != 0) {
279 		ch->ch_first[NR_A] = ch->ch_first[NR_F] = 0;
280 		ch->ch_last[NR_A] = ch->ch_last[NR_F] =
281 		    ch->ch_first[NR_A] + n_alloc_rings;
282 	} else {
283 		ch->ch_first[NR_A] = ch->ch_last[NR_A] = 0;
284 		ch->ch_first[NR_F] = ch->ch_last[NR_F] = 0;
285 	}
286 	ch->ch_first[NR_EV] = 0;
287 	ch->ch_last[NR_EV] = ch->ch_first[NR_EV] + na_get_nrings(na, NR_EV);
288 	/* XXX: should we initialize na_si_users for event ring ? */
289 
290 	/*
291 	 * Optimization: count the users registered for more than
292 	 * one ring, which are the ones sleeping on the global queue.
293 	 * The default na_notify() callback will then avoid signaling
294 	 * the global queue if nobody is using it
295 	 */
296 	for_rx_tx(t) {
297 		if (ch_is_multiplex(ch, t)) {
298 			na->na_si_users[t]++;
299 			ASSERT(na->na_si_users[t] != 0);
300 		}
301 	}
302 	return 0;
303 }
304 
305 static void
na_unset_ringid(struct kern_channel * ch)306 na_unset_ringid(struct kern_channel *ch)
307 {
308 	struct nexus_adapter *na = ch->ch_na;
309 	enum txrx t;
310 
311 	for_rx_tx(t) {
312 		if (ch_is_multiplex(ch, t)) {
313 			ASSERT(na->na_si_users[t] != 0);
314 			na->na_si_users[t]--;
315 		}
316 		ch->ch_first[t] = ch->ch_last[t] = 0;
317 	}
318 }
319 
320 /*
321  * Check that the rings we want to bind are not exclusively owned by a previous
322  * bind.  If exclusive ownership has been requested, we also mark the rings.
323  */
324 /* Hoisted out of line to reduce kernel stack footprint */
325 SK_NO_INLINE_ATTRIBUTE
326 static int
na_krings_use(struct kern_channel * ch)327 na_krings_use(struct kern_channel *ch)
328 {
329 	struct nexus_adapter *na = ch->ch_na;
330 	struct __kern_channel_ring *kring;
331 	boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
332 	enum txrx t;
333 	uint32_t i;
334 
335 	SK_DF(SK_VERB_NA | SK_VERB_RING, "na \"%s\" (0x%llx) grabbing tx [%u,%u) rx [%u,%u)",
336 	    na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
337 	    ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
338 
339 	/*
340 	 * First round: check that all the requested rings
341 	 * are neither alread exclusively owned, nor we
342 	 * want exclusive ownership when they are already in use
343 	 */
344 	for_all_rings(t) {
345 		for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
346 			kring = &NAKR(na, t)[i];
347 			if ((kring->ckr_flags & CKRF_EXCLUSIVE) ||
348 			    (kring->ckr_users && excl)) {
349 				SK_DF(SK_VERB_NA | SK_VERB_RING,
350 				    "kr \"%s\" (0x%llx) krflags 0x%b is busy",
351 				    kring->ckr_name, SK_KVA(kring),
352 				    kring->ckr_flags, CKRF_BITS);
353 				return EBUSY;
354 			}
355 		}
356 	}
357 
358 	/*
359 	 * Second round: increment usage count and possibly
360 	 * mark as exclusive
361 	 */
362 
363 	for_all_rings(t) {
364 		for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
365 			kring = &NAKR(na, t)[i];
366 			kring->ckr_users++;
367 			if (excl) {
368 				kring->ckr_flags |= CKRF_EXCLUSIVE;
369 			}
370 		}
371 	}
372 
373 	return 0;
374 }
375 
376 /* Hoisted out of line to reduce kernel stack footprint */
377 SK_NO_INLINE_ATTRIBUTE
378 static void
na_krings_unuse(struct kern_channel * ch)379 na_krings_unuse(struct kern_channel *ch)
380 {
381 	struct nexus_adapter *na = ch->ch_na;
382 	struct __kern_channel_ring *kring;
383 	boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
384 	enum txrx t;
385 	uint32_t i;
386 
387 	SK_DF(SK_VERB_NA | SK_VERB_RING,
388 	    "na \"%s\" (0x%llx) releasing tx [%u, %u) rx [%u, %u)",
389 	    na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
390 	    ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
391 
392 	for_all_rings(t) {
393 		for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
394 			kring = &NAKR(na, t)[i];
395 			if (excl) {
396 				kring->ckr_flags &= ~CKRF_EXCLUSIVE;
397 			}
398 			kring->ckr_users--;
399 		}
400 	}
401 }
402 
403 /* Hoisted out of line to reduce kernel stack footprint */
404 SK_NO_INLINE_ATTRIBUTE
405 static void
na_krings_verify(struct nexus_adapter * na)406 na_krings_verify(struct nexus_adapter *na)
407 {
408 	struct __kern_channel_ring *kring;
409 	enum txrx t;
410 	uint32_t i;
411 
412 	for_all_rings(t) {
413 		for (i = 0; i < na_get_nrings(na, t); i++) {
414 			kring = &NAKR(na, t)[i];
415 			/* na_kr_create() validations */
416 			ASSERT(kring->ckr_num_slots > 0);
417 			ASSERT(kring->ckr_lim == (kring->ckr_num_slots - 1));
418 			ASSERT(kring->ckr_pp != NULL);
419 
420 			if (!(kring->ckr_flags & CKRF_MEM_RING_INITED)) {
421 				continue;
422 			}
423 			/* na_kr_setup() validations */
424 			if (KR_KERNEL_ONLY(kring)) {
425 				ASSERT(kring->ckr_ring == NULL);
426 			} else {
427 				ASSERT(kring->ckr_ring != NULL);
428 			}
429 			ASSERT(kring->ckr_ksds_last ==
430 			    &kring->ckr_ksds[kring->ckr_lim]);
431 		}
432 	}
433 }
434 
435 int
na_bind_channel(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr)436 na_bind_channel(struct nexus_adapter *na, struct kern_channel *ch,
437     struct chreq *chr)
438 {
439 	struct kern_pbufpool *rx_pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
440 	struct kern_pbufpool *tx_pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
441 	uint32_t ch_mode = chr->cr_mode;
442 	int err = 0;
443 
444 	SK_LOCK_ASSERT_HELD();
445 	ASSERT(ch->ch_schema == NULL);
446 	ASSERT(ch->ch_na == NULL);
447 
448 	/* ring configuration may have changed, fetch from the card */
449 	na_update_config(na);
450 	ch->ch_na = na; /* store the reference */
451 	err = na_set_ringid(ch, chr->cr_ring_set, chr->cr_ring_id);
452 	if (err != 0) {
453 		goto err;
454 	}
455 
456 	atomic_bitclear_32(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE |
457 	    CHANF_USER_PACKET_POOL | CHANF_EVENT_RING));
458 	if (ch_mode & CHMODE_EXCLUSIVE) {
459 		atomic_bitset_32(&ch->ch_flags, CHANF_EXCLUSIVE);
460 	}
461 	/*
462 	 * Disallow automatic sync for monitor mode, since TX
463 	 * direction is disabled.
464 	 */
465 	if (ch_mode & CHMODE_MONITOR) {
466 		atomic_bitset_32(&ch->ch_flags, CHANF_RXONLY);
467 	}
468 
469 	if (!!(na->na_flags & NAF_USER_PKT_POOL) ^
470 	    !!(ch_mode & CHMODE_USER_PACKET_POOL)) {
471 		SK_ERR("incompatible channel mode (0x%b), na_flags (0x%b)",
472 		    ch_mode, CHMODE_BITS, na->na_flags, NAF_BITS);
473 		err = EINVAL;
474 		goto err;
475 	}
476 
477 	if (na->na_arena->ar_flags & ARF_DEFUNCT) {
478 		err = ENXIO;
479 		goto err;
480 	}
481 
482 	if (ch_mode & CHMODE_USER_PACKET_POOL) {
483 		ASSERT(na->na_flags & NAF_USER_PKT_POOL);
484 		ASSERT(ch->ch_first[NR_A] != ch->ch_last[NR_A]);
485 		ASSERT(ch->ch_first[NR_F] != ch->ch_last[NR_F]);
486 		atomic_bitset_32(&ch->ch_flags, CHANF_USER_PACKET_POOL);
487 	}
488 
489 	if (ch_mode & CHMODE_EVENT_RING) {
490 		ASSERT(na->na_flags & NAF_USER_PKT_POOL);
491 		ASSERT(na->na_flags & NAF_EVENT_RING);
492 		ASSERT(ch->ch_first[NR_EV] != ch->ch_last[NR_EV]);
493 		atomic_bitset_32(&ch->ch_flags, CHANF_EVENT_RING);
494 	}
495 
496 	/*
497 	 * If this is the first channel of the adapter, create
498 	 * the rings and their in-kernel view, the krings.
499 	 */
500 	if (na->na_channels == 0) {
501 		err = na->na_krings_create(na, ch);
502 		if (err != 0) {
503 			goto err;
504 		}
505 
506 		/*
507 		 * Sanity check; this is already done in na_kr_create(),
508 		 * but we do it here as well to validate na_kr_setup().
509 		 */
510 		na_krings_verify(na);
511 		*(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
512 		    skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_type;
513 		*(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
514 		    skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_subtype;
515 	}
516 
517 	/*
518 	 * Validate ownership and usability of the krings; take into account
519 	 * whether some previous bind has exclusive ownership on them.
520 	 */
521 	err = na_krings_use(ch);
522 	if (err != 0) {
523 		goto err_del_rings;
524 	}
525 
526 	/* for user-facing channel, create a new channel schema */
527 	if (!(ch->ch_flags & CHANF_KERNEL)) {
528 		err = na_schema_alloc(ch);
529 		if (err != 0) {
530 			goto err_rel_excl;
531 		}
532 
533 		ASSERT(ch->ch_schema != NULL);
534 		ASSERT(ch->ch_schema_offset != (mach_vm_offset_t)-1);
535 	} else {
536 		ASSERT(ch->ch_schema == NULL);
537 		ch->ch_schema_offset = (mach_vm_offset_t)-1;
538 	}
539 
540 	/* update our work timestamp */
541 	na->na_work_ts = net_uptime();
542 
543 	/* update our work timestamp */
544 	na->na_work_ts = net_uptime();
545 
546 	na->na_channels++;
547 
548 	/*
549 	 * If user packet pool is desired, initialize the allocated
550 	 * object hash table in the pool, if not already.  This also
551 	 * retains a refcnt on the pool which the caller must release.
552 	 */
553 	ASSERT(ch->ch_pp == NULL);
554 	if (ch_mode & CHMODE_USER_PACKET_POOL) {
555 #pragma unused(tx_pp)
556 		ASSERT(rx_pp == tx_pp);
557 		err = pp_init_upp(rx_pp, TRUE);
558 		if (err != 0) {
559 			goto err_free_schema;
560 		}
561 		ch->ch_pp = rx_pp;
562 	}
563 
564 	if (!NA_IS_ACTIVE(na)) {
565 		err = na->na_activate(na, NA_ACTIVATE_MODE_ON);
566 		if (err != 0) {
567 			goto err_release_pp;
568 		}
569 
570 		SK_D("activated \"%s\" adapter 0x%llx", na->na_name,
571 		    SK_KVA(na));
572 		SK_D("  na_md_type:    %u", na->na_md_type);
573 		SK_D("  na_md_subtype: %u", na->na_md_subtype);
574 	}
575 
576 	SK_D("ch 0x%llx", SK_KVA(ch));
577 	SK_D("  ch_flags:     0x%b", ch->ch_flags, CHANF_BITS);
578 	if (ch->ch_schema != NULL) {
579 		SK_D("  ch_schema:    0x%llx", SK_KVA(ch->ch_schema));
580 	}
581 	SK_D("  ch_na:        0x%llx (chcnt %u)", SK_KVA(ch->ch_na),
582 	    ch->ch_na->na_channels);
583 	SK_D("  ch_tx_rings:  [%u,%u)", ch->ch_first[NR_TX],
584 	    ch->ch_last[NR_TX]);
585 	SK_D("  ch_rx_rings:  [%u,%u)", ch->ch_first[NR_RX],
586 	    ch->ch_last[NR_RX]);
587 	SK_D("  ch_alloc_rings:  [%u,%u)", ch->ch_first[NR_A],
588 	    ch->ch_last[NR_A]);
589 	SK_D("  ch_free_rings:  [%u,%u)", ch->ch_first[NR_F],
590 	    ch->ch_last[NR_F]);
591 	SK_D("  ch_ev_rings:  [%u,%u)", ch->ch_first[NR_EV],
592 	    ch->ch_last[NR_EV]);
593 
594 	return 0;
595 
596 err_release_pp:
597 	if (ch_mode & CHMODE_USER_PACKET_POOL) {
598 		ASSERT(ch->ch_pp != NULL);
599 		pp_release(rx_pp);
600 		ch->ch_pp = NULL;
601 	}
602 err_free_schema:
603 	*(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
604 	    NEXUS_META_TYPE_INVALID;
605 	*(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
606 	    NEXUS_META_SUBTYPE_INVALID;
607 	ASSERT(na->na_channels != 0);
608 	na->na_channels--;
609 	if (ch->ch_schema != NULL) {
610 		skmem_cache_free(
611 			skmem_arena_nexus(na->na_arena)->arn_schema_cache,
612 			ch->ch_schema);
613 		ch->ch_schema = NULL;
614 		ch->ch_schema_offset = (mach_vm_offset_t)-1;
615 	}
616 err_rel_excl:
617 	na_krings_unuse(ch);
618 err_del_rings:
619 	if (na->na_channels == 0) {
620 		na->na_krings_delete(na, ch, FALSE);
621 	}
622 err:
623 	ch->ch_na = NULL;
624 	ASSERT(err != 0);
625 
626 	return err;
627 }
628 
629 /*
630  * Undo everything that was done in na_bind_channel().
631  */
632 /* call with SK_LOCK held */
633 void
na_unbind_channel(struct kern_channel * ch)634 na_unbind_channel(struct kern_channel *ch)
635 {
636 	struct nexus_adapter *na = ch->ch_na;
637 
638 	SK_LOCK_ASSERT_HELD();
639 
640 	ASSERT(na->na_channels != 0);
641 	na->na_channels--;
642 
643 	/* release exclusive use if it was requested at bind time */
644 	na_krings_unuse(ch);
645 
646 	if (na->na_channels == 0) {     /* last instance */
647 		SK_D("%s(%d): deleting last channel instance for %s",
648 		    ch->ch_name, ch->ch_pid, na->na_name);
649 
650 		/*
651 		 * Free any remaining allocated packets attached to
652 		 * the slots, followed by a teardown of the arena.
653 		 */
654 		na_teardown(na, ch, FALSE);
655 
656 		*(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
657 		    NEXUS_META_TYPE_INVALID;
658 		*(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
659 		    NEXUS_META_SUBTYPE_INVALID;
660 	} else {
661 		SK_D("%s(%d): %s has %u remaining channel instance(s)",
662 		    ch->ch_name, ch->ch_pid, na->na_name, na->na_channels);
663 	}
664 
665 	/*
666 	 * Free any allocated packets (for the process) attached to the slots;
667 	 * note that na_teardown() could have done this there as well.
668 	 */
669 	if (ch->ch_pp != NULL) {
670 		ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
671 		pp_purge_upp(ch->ch_pp, ch->ch_pid);
672 		pp_release(ch->ch_pp);
673 		ch->ch_pp = NULL;
674 	}
675 
676 	/* possibily decrement counter of tx_si/rx_si users */
677 	na_unset_ringid(ch);
678 
679 	/* reap the caches now (purge if adapter is idle) */
680 	skmem_arena_reap(na->na_arena, (na->na_channels == 0));
681 
682 	/* delete the csm */
683 	if (ch->ch_schema != NULL) {
684 		skmem_cache_free(
685 			skmem_arena_nexus(na->na_arena)->arn_schema_cache,
686 			ch->ch_schema);
687 		ch->ch_schema = NULL;
688 		ch->ch_schema_offset = (mach_vm_offset_t)-1;
689 	}
690 
691 	/* destroy the memory map */
692 	skmem_arena_munmap_channel(na->na_arena, ch);
693 
694 	/* mark the channel as unbound */
695 	atomic_bitclear_32(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE));
696 	ch->ch_na = NULL;
697 
698 	/* and finally release the nexus adapter; this might free it */
699 	(void) na_release_locked(na);
700 }
701 
702 static void
na_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)703 na_teardown(struct nexus_adapter *na, struct kern_channel *ch,
704     boolean_t defunct)
705 {
706 	SK_LOCK_ASSERT_HELD();
707 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
708 
709 #if CONFIG_NEXUS_MONITOR
710 	/*
711 	 * Walk through all the rings and tell any monitor
712 	 * that the port is going to exit Skywalk mode
713 	 */
714 	nx_mon_stop(na);
715 #endif /* CONFIG_NEXUS_MONITOR */
716 
717 	/*
718 	 * Deactive the adapter.
719 	 */
720 	(void) na->na_activate(na,
721 	    (defunct ? NA_ACTIVATE_MODE_DEFUNCT : NA_ACTIVATE_MODE_OFF));
722 
723 	/*
724 	 * Free any remaining allocated packets for this process.
725 	 */
726 	if (ch->ch_pp != NULL) {
727 		ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
728 		pp_purge_upp(ch->ch_pp, ch->ch_pid);
729 		if (!defunct) {
730 			pp_release(ch->ch_pp);
731 			ch->ch_pp = NULL;
732 		}
733 	}
734 
735 	/*
736 	 * Delete rings and buffers.
737 	 */
738 	na->na_krings_delete(na, ch, defunct);
739 }
740 
741 /* call with SK_LOCK held */
742 /*
743  * Allocate the per-fd structure __user_channel_schema.
744  */
745 static int
na_schema_alloc(struct kern_channel * ch)746 na_schema_alloc(struct kern_channel *ch)
747 {
748 	struct nexus_adapter *na = ch->ch_na;
749 	struct skmem_arena *ar = na->na_arena;
750 	struct skmem_arena_nexus *arn;
751 	mach_vm_offset_t roff[SKMEM_REGIONS];
752 	struct __kern_channel_ring *kr;
753 	struct __user_channel_schema *csm;
754 	struct skmem_obj_info csm_oi, ring_oi, ksd_oi, usd_oi;
755 	mach_vm_offset_t base;
756 	uint32_t i, j, k, n[NR_ALL];
757 	enum txrx t;
758 
759 	/* see comments for struct __user_channel_schema */
760 	_CASSERT(offsetof(struct __user_channel_schema, csm_ver) == 0);
761 	_CASSERT(offsetof(struct __user_channel_schema, csm_flags) ==
762 	    sizeof(csm->csm_ver));
763 	_CASSERT(offsetof(struct __user_channel_schema, csm_kern_name) ==
764 	    sizeof(csm->csm_ver) + sizeof(csm->csm_flags));
765 	_CASSERT(offsetof(struct __user_channel_schema, csm_kern_uuid) ==
766 	    sizeof(csm->csm_ver) + sizeof(csm->csm_flags) +
767 	    sizeof(csm->csm_kern_name));
768 
769 	SK_LOCK_ASSERT_HELD();
770 
771 	ASSERT(!(ch->ch_flags & CHANF_KERNEL));
772 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
773 	arn = skmem_arena_nexus(ar);
774 	ASSERT(arn != NULL);
775 	for_all_rings(t) {
776 		n[t] = 0;
777 	}
778 
779 	csm = skmem_cache_alloc(arn->arn_schema_cache, SKMEM_NOSLEEP);
780 	if (csm == NULL) {
781 		return ENOMEM;
782 	}
783 
784 	skmem_cache_get_obj_info(arn->arn_schema_cache, csm, &csm_oi, NULL);
785 	bzero(csm, SKMEM_OBJ_SIZE(&csm_oi));
786 
787 	*(uint32_t *)(uintptr_t)&csm->csm_ver = CSM_CURRENT_VERSION;
788 
789 	/* kernel version and executable UUID */
790 	_CASSERT(sizeof(csm->csm_kern_name) == _SYS_NAMELEN);
791 	(void) strncpy((char *)(uintptr_t)csm->csm_kern_name,
792 	    version, sizeof(csm->csm_kern_name) - 1);
793 #if !XNU_TARGET_OS_OSX
794 	(void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid,
795 	    kernelcache_uuid, sizeof(csm->csm_kern_uuid));
796 #else /* XNU_TARGET_OS_OSX */
797 	if (kernel_uuid != NULL) {
798 		(void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid,
799 		    kernel_uuid, sizeof(csm->csm_kern_uuid));
800 	}
801 #endif /* XNU_TARGET_OS_OSX */
802 
803 	for_rx_tx(t) {
804 		ASSERT((ch->ch_last[t] > 0) || (ch->ch_first[t] == 0));
805 		n[t] = ch->ch_last[t] - ch->ch_first[t];
806 		ASSERT(n[t] == 0 || n[t] <= na_get_nrings(na, t));
807 	}
808 
809 	/* return total number of tx and rx rings for this channel */
810 	*(uint32_t *)(uintptr_t)&csm->csm_tx_rings = n[NR_TX];
811 	*(uint32_t *)(uintptr_t)&csm->csm_rx_rings = n[NR_RX];
812 
813 	if (ch->ch_flags & CHANF_USER_PACKET_POOL) {
814 		*(uint32_t *)(uintptr_t)&csm->csm_allocator_ring_pairs =
815 		    na->na_num_allocator_ring_pairs;
816 		n[NR_A] = n[NR_F] = na->na_num_allocator_ring_pairs;
817 		ASSERT(n[NR_A] != 0 && n[NR_A] <= na_get_nrings(na, NR_A));
818 		ASSERT(n[NR_A] == (ch->ch_last[NR_A] - ch->ch_first[NR_A]));
819 		ASSERT(n[NR_F] == (ch->ch_last[NR_F] - ch->ch_first[NR_F]));
820 	}
821 
822 	if (ch->ch_flags & CHANF_EVENT_RING) {
823 		n[NR_EV] = ch->ch_last[NR_EV] - ch->ch_first[NR_EV];
824 		ASSERT(n[NR_EV] != 0 && n[NR_EV] <= na_get_nrings(na, NR_EV));
825 		*(uint32_t *)(uintptr_t)&csm->csm_num_event_rings = n[NR_EV];
826 	}
827 
828 	bzero(&roff, sizeof(roff));
829 	for (i = 0; i < SKMEM_REGIONS; i++) {
830 		if (ar->ar_regions[i] == NULL) {
831 			ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
832 			    i == SKMEM_REGION_SCHEMA ||
833 			    i == SKMEM_REGION_BUF_LARGE ||
834 			    i == SKMEM_REGION_RXBUF_DEF ||
835 			    i == SKMEM_REGION_RXBUF_LARGE ||
836 			    i == SKMEM_REGION_TXBUF_DEF ||
837 			    i == SKMEM_REGION_TXBUF_LARGE ||
838 			    i == SKMEM_REGION_RXKMD ||
839 			    i == SKMEM_REGION_TXKMD ||
840 			    i == SKMEM_REGION_UMD ||
841 			    i == SKMEM_REGION_UBFT ||
842 			    i == SKMEM_REGION_KBFT ||
843 			    i == SKMEM_REGION_RXKBFT ||
844 			    i == SKMEM_REGION_TXKBFT ||
845 			    i == SKMEM_REGION_TXAUSD ||
846 			    i == SKMEM_REGION_RXFUSD ||
847 			    i == SKMEM_REGION_USTATS ||
848 			    i == SKMEM_REGION_KSTATS ||
849 			    i == SKMEM_REGION_INTRINSIC ||
850 			    i == SKMEM_REGION_FLOWADV ||
851 			    i == SKMEM_REGION_NEXUSADV ||
852 			    i == SKMEM_REGION_SYSCTLS ||
853 			    i == SKMEM_REGION_GUARD_TAIL);
854 			continue;
855 		}
856 
857 		/* not for nexus */
858 		ASSERT(i != SKMEM_REGION_SYSCTLS);
859 
860 		/*
861 		 * Get region offsets from base of mmap span; the arena
862 		 * doesn't need to be mmap'd at this point, since we
863 		 * simply compute the relative offset.
864 		 */
865 		roff[i] = skmem_arena_get_region_offset(ar, i);
866 	}
867 
868 	/*
869 	 * The schema is made up of the descriptor followed inline by an array
870 	 * of offsets to the tx, rx, allocator and event rings in the mmap span.
871 	 * They contain the offset between the ring and schema, so the
872 	 * information is usable in userspace to reach the ring from
873 	 * the schema.
874 	 */
875 	base = roff[SKMEM_REGION_SCHEMA] + SKMEM_OBJ_ROFF(&csm_oi);
876 
877 	/* initialize schema with tx ring info */
878 	for (i = 0, j = ch->ch_first[NR_TX]; i < n[NR_TX]; i++, j++) {
879 		kr = &na->na_tx_rings[j];
880 		if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
881 			continue;
882 		}
883 
884 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
885 		skmem_cache_get_obj_info(arn->arn_ring_cache,
886 		    kr->ckr_ring, &ring_oi, NULL);
887 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].ring_off =
888 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
889 
890 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
891 		skmem_cache_get_obj_info(kr->ckr_ksds_cache,
892 		    kr->ckr_ksds, &ksd_oi, &usd_oi);
893 
894 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].sd_off =
895 		    (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
896 		    base;
897 	}
898 	/* initialize schema with rx ring info */
899 	for (i = 0, j = ch->ch_first[NR_RX]; i < n[NR_RX]; i++, j++) {
900 		kr = &na->na_rx_rings[j];
901 		if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
902 			continue;
903 		}
904 
905 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
906 		skmem_cache_get_obj_info(arn->arn_ring_cache,
907 		    kr->ckr_ring, &ring_oi, NULL);
908 		*(mach_vm_offset_t *)
909 		(uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].ring_off =
910 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
911 
912 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
913 		skmem_cache_get_obj_info(kr->ckr_ksds_cache,
914 		    kr->ckr_ksds, &ksd_oi, &usd_oi);
915 
916 		*(mach_vm_offset_t *)
917 		(uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].sd_off =
918 		    (roff[SKMEM_REGION_RXFUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
919 		    base;
920 	}
921 	/* initialize schema with allocator ring info */
922 	for (i = 0, j = ch->ch_first[NR_A], k = n[NR_TX] + n[NR_RX];
923 	    i < n[NR_A]; i++, j++) {
924 		mach_vm_offset_t usd_roff;
925 
926 		usd_roff = roff[SKMEM_REGION_TXAUSD];
927 		kr = &na->na_alloc_rings[j];
928 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
929 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
930 
931 		skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
932 		    &ring_oi, NULL);
933 		*(mach_vm_offset_t *)
934 		(uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
935 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
936 
937 		skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
938 		    &ksd_oi, &usd_oi);
939 		*(mach_vm_offset_t *)
940 		(uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
941 		    (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
942 	}
943 	/* initialize schema with free ring info */
944 	for (i = 0, j = ch->ch_first[NR_F], k = n[NR_TX] + n[NR_RX] + n[NR_A];
945 	    i < n[NR_F]; i++, j++) {
946 		mach_vm_offset_t usd_roff;
947 
948 		usd_roff = roff[SKMEM_REGION_RXFUSD];
949 		kr = &na->na_free_rings[j];
950 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
951 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
952 
953 		skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
954 		    &ring_oi, NULL);
955 		*(mach_vm_offset_t *)
956 		(uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
957 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
958 
959 		skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
960 		    &ksd_oi, &usd_oi);
961 		*(mach_vm_offset_t *)
962 		(uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
963 		    (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
964 	}
965 	/* initialize schema with event ring info */
966 	for (i = 0, j = ch->ch_first[NR_EV], k = n[NR_TX] + n[NR_RX] +
967 	    n[NR_A] + n[NR_F]; i < n[NR_EV]; i++, j++) {
968 		ASSERT(csm->csm_num_event_rings != 0);
969 		kr = &na->na_event_rings[j];
970 		ASSERT(!KR_KERNEL_ONLY(kr));
971 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
972 		skmem_cache_get_obj_info(arn->arn_ring_cache,
973 		    kr->ckr_ring, &ring_oi, NULL);
974 		*(mach_vm_offset_t *)
975 		(uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
976 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
977 
978 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
979 		skmem_cache_get_obj_info(kr->ckr_ksds_cache,
980 		    kr->ckr_ksds, &ksd_oi, &usd_oi);
981 
982 		*(mach_vm_offset_t *)
983 		(uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
984 		    (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
985 		    base;
986 	}
987 
988 	*(uint64_t *)(uintptr_t)&csm->csm_md_redzone_cookie =
989 	    __ch_umd_redzone_cookie;
990 	*(nexus_meta_type_t *)(uintptr_t)&csm->csm_md_type = na->na_md_type;
991 	*(nexus_meta_subtype_t *)(uintptr_t)&csm->csm_md_subtype =
992 	    na->na_md_subtype;
993 
994 	if (arn->arn_stats_obj != NULL) {
995 		ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
996 		ASSERT(roff[SKMEM_REGION_USTATS] != 0);
997 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs =
998 		    roff[SKMEM_REGION_USTATS];
999 		*(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1000 		    na->na_stats_type;
1001 	} else {
1002 		ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1003 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs = 0;
1004 		*(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1005 		    NEXUS_STATS_TYPE_INVALID;
1006 	}
1007 
1008 	if (arn->arn_flowadv_obj != NULL) {
1009 		ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL);
1010 		ASSERT(roff[SKMEM_REGION_FLOWADV] != 0);
1011 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs =
1012 		    roff[SKMEM_REGION_FLOWADV];
1013 		*(uint32_t *)(uintptr_t)&csm->csm_flowadv_max =
1014 		    na->na_flowadv_max;
1015 	} else {
1016 		ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1017 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs = 0;
1018 		*(uint32_t *)(uintptr_t)&csm->csm_flowadv_max = 0;
1019 	}
1020 
1021 	if (arn->arn_nexusadv_obj != NULL) {
1022 		struct __kern_nexus_adv_metadata *adv_md;
1023 
1024 		adv_md = arn->arn_nexusadv_obj;
1025 		ASSERT(adv_md->knam_version == NX_ADVISORY_MD_CURRENT_VERSION);
1026 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] != NULL);
1027 		ASSERT(roff[SKMEM_REGION_NEXUSADV] != 0);
1028 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs =
1029 		    roff[SKMEM_REGION_NEXUSADV];
1030 	} else {
1031 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1032 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs = 0;
1033 	}
1034 
1035 	ch->ch_schema = csm;
1036 	ch->ch_schema_offset = base;
1037 
1038 	return 0;
1039 }
1040 
1041 /*
1042  * Called by all routines that create nexus_adapters.
1043  * Attach na to the ifp (if any) and provide defaults
1044  * for optional callbacks. Defaults assume that we
1045  * are creating an hardware nexus_adapter.
1046  */
1047 void
na_attach_common(struct nexus_adapter * na,struct kern_nexus * nx,struct kern_nexus_domain_provider * nxdom_prov)1048 na_attach_common(struct nexus_adapter *na, struct kern_nexus *nx,
1049     struct kern_nexus_domain_provider *nxdom_prov)
1050 {
1051 	SK_LOCK_ASSERT_HELD();
1052 
1053 	ASSERT(nx != NULL);
1054 	ASSERT(nxdom_prov != NULL);
1055 	ASSERT(na->na_krings_create != NULL);
1056 	ASSERT(na->na_krings_delete != NULL);
1057 	if (na->na_type != NA_NETIF_COMPAT_DEV) {
1058 		ASSERT(na_get_nrings(na, NR_TX) != 0);
1059 	}
1060 	if (na->na_type != NA_NETIF_COMPAT_HOST) {
1061 		ASSERT(na_get_nrings(na, NR_RX) != 0);
1062 	}
1063 	ASSERT(na->na_channels == 0);
1064 
1065 	if (na->na_notify == NULL) {
1066 		na->na_notify = na_notify;
1067 	}
1068 
1069 	na->na_nx = nx;
1070 	na->na_nxdom_prov = nxdom_prov;
1071 
1072 	SK_D("na 0x%llx nx 0x%llx nxtype %u ar 0x%llx",
1073 	    SK_KVA(na), SK_KVA(nx), nxdom_prov->nxdom_prov_dom->nxdom_type,
1074 	    SK_KVA(na->na_arena));
1075 }
1076 
1077 void
na_post_event(struct __kern_channel_ring * kring,boolean_t nodelay,boolean_t within_kevent,boolean_t selwake,uint32_t hint)1078 na_post_event(struct __kern_channel_ring *kring, boolean_t nodelay,
1079     boolean_t within_kevent, boolean_t selwake, uint32_t hint)
1080 {
1081 	struct nexus_adapter *na = KRNA(kring);
1082 	enum txrx t = kring->ckr_tx;
1083 
1084 	SK_DF(SK_VERB_EVENTS,
1085 	    "%s(%d) na \"%s\" (0x%llx) kr 0x%llx kev %u sel %u hint 0x%b",
1086 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1087 	    na->na_name, SK_KVA(na), SK_KVA(kring), within_kevent, selwake,
1088 	    hint, CHAN_FILT_HINT_BITS);
1089 
1090 	csi_selwakeup_one(kring, nodelay, within_kevent, selwake, hint);
1091 	/*
1092 	 * optimization: avoid a wake up on the global
1093 	 * queue if nobody has registered for more
1094 	 * than one ring
1095 	 */
1096 	if (na->na_si_users[t] > 0) {
1097 		csi_selwakeup_all(na, t, nodelay, within_kevent, selwake, hint);
1098 	}
1099 }
1100 
1101 /* default notify callback */
1102 static int
na_notify(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1103 na_notify(struct __kern_channel_ring *kring, struct proc *p, uint32_t flags)
1104 {
1105 #pragma unused(p)
1106 	SK_DF(SK_VERB_NOTIFY | ((kring->ckr_tx == NR_TX) ?
1107 	    SK_VERB_TX : SK_VERB_RX),
1108 	    "%s(%d) [%s] na \"%s\" (0x%llx) kr \"%s\" (0x%llx) krflags 0x%b "
1109 	    "flags 0x%x, kh %u kt %u | h %u t %u",
1110 	    sk_proc_name_address(p), sk_proc_pid(p),
1111 	    (kring->ckr_tx == NR_TX) ? "W" : "R", KRNA(kring)->na_name,
1112 	    SK_KVA(KRNA(kring)), kring->ckr_name, SK_KVA(kring),
1113 	    kring->ckr_flags, CKRF_BITS, flags, kring->ckr_khead,
1114 	    kring->ckr_ktail, kring->ckr_rhead, kring->ckr_rtail);
1115 
1116 	na_post_event(kring, (flags & NA_NOTEF_PUSH),
1117 	    (flags & NA_NOTEF_IN_KEVENT), TRUE, 0);
1118 
1119 	return 0;
1120 }
1121 
1122 /*
1123  * Fetch configuration from the device, to cope with dynamic
1124  * reconfigurations after loading the module.
1125  */
1126 /* call with SK_LOCK held */
1127 int
na_update_config(struct nexus_adapter * na)1128 na_update_config(struct nexus_adapter *na)
1129 {
1130 	uint32_t txr, txd, rxr, rxd;
1131 
1132 	SK_LOCK_ASSERT_HELD();
1133 
1134 	txr = txd = rxr = rxd = 0;
1135 	if (na->na_config == NULL ||
1136 	    na->na_config(na, &txr, &txd, &rxr, &rxd)) {
1137 		/* take whatever we had at init time */
1138 		txr = na_get_nrings(na, NR_TX);
1139 		txd = na_get_nslots(na, NR_TX);
1140 		rxr = na_get_nrings(na, NR_RX);
1141 		rxd = na_get_nslots(na, NR_RX);
1142 	}
1143 
1144 	if (na_get_nrings(na, NR_TX) == txr &&
1145 	    na_get_nslots(na, NR_TX) == txd &&
1146 	    na_get_nrings(na, NR_RX) == rxr &&
1147 	    na_get_nslots(na, NR_RX) == rxd) {
1148 		return 0; /* nothing changed */
1149 	}
1150 	SK_D("stored config %s: txring %u x %u, rxring %u x %u",
1151 	    na->na_name, na_get_nrings(na, NR_TX), na_get_nslots(na, NR_TX),
1152 	    na_get_nrings(na, NR_RX), na_get_nslots(na, NR_RX));
1153 	SK_D("new config %s: txring %u x %u, rxring %u x %u",
1154 	    na->na_name, txr, txd, rxr, rxd);
1155 
1156 	if (na->na_channels == 0) {
1157 		SK_D("configuration changed (but fine)");
1158 		na_set_nrings(na, NR_TX, txr);
1159 		na_set_nslots(na, NR_TX, txd);
1160 		na_set_nrings(na, NR_RX, rxr);
1161 		na_set_nslots(na, NR_RX, rxd);
1162 		return 0;
1163 	}
1164 	SK_ERR("configuration changed while active, this is bad...");
1165 	return 1;
1166 }
1167 
1168 static void
na_kr_setup_netif_svc_map(struct nexus_adapter * na)1169 na_kr_setup_netif_svc_map(struct nexus_adapter *na)
1170 {
1171 	uint32_t i;
1172 	uint32_t num_tx_rings;
1173 
1174 	ASSERT(na->na_type == NA_NETIF_DEV);
1175 	num_tx_rings = na_get_nrings(na, NR_TX);
1176 
1177 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK_SYS) ==
1178 	    NAKR_WMM_SC2RINGID(KPKT_SC_BK));
1179 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1180 	    NAKR_WMM_SC2RINGID(KPKT_SC_RD));
1181 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1182 	    NAKR_WMM_SC2RINGID(KPKT_SC_OAM));
1183 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1184 	    NAKR_WMM_SC2RINGID(KPKT_SC_RV));
1185 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1186 	    NAKR_WMM_SC2RINGID(KPKT_SC_VI));
1187 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) ==
1188 	    NAKR_WMM_SC2RINGID(KPKT_SC_CTL));
1189 
1190 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK) < NA_NUM_WMM_CLASSES);
1191 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) < NA_NUM_WMM_CLASSES);
1192 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VI) < NA_NUM_WMM_CLASSES);
1193 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) < NA_NUM_WMM_CLASSES);
1194 
1195 	_CASSERT(MBUF_SCIDX(KPKT_SC_BK_SYS) < KPKT_SC_MAX_CLASSES);
1196 	_CASSERT(MBUF_SCIDX(KPKT_SC_BK) < KPKT_SC_MAX_CLASSES);
1197 	_CASSERT(MBUF_SCIDX(KPKT_SC_BE) < KPKT_SC_MAX_CLASSES);
1198 	_CASSERT(MBUF_SCIDX(KPKT_SC_RD) < KPKT_SC_MAX_CLASSES);
1199 	_CASSERT(MBUF_SCIDX(KPKT_SC_OAM) < KPKT_SC_MAX_CLASSES);
1200 	_CASSERT(MBUF_SCIDX(KPKT_SC_AV) < KPKT_SC_MAX_CLASSES);
1201 	_CASSERT(MBUF_SCIDX(KPKT_SC_RV) < KPKT_SC_MAX_CLASSES);
1202 	_CASSERT(MBUF_SCIDX(KPKT_SC_VI) < KPKT_SC_MAX_CLASSES);
1203 	_CASSERT(MBUF_SCIDX(KPKT_SC_SIG) < KPKT_SC_MAX_CLASSES);
1204 	_CASSERT(MBUF_SCIDX(KPKT_SC_VO) < KPKT_SC_MAX_CLASSES);
1205 	_CASSERT(MBUF_SCIDX(KPKT_SC_CTL) < KPKT_SC_MAX_CLASSES);
1206 
1207 	/*
1208 	 * we support the following 2 configurations:
1209 	 * 1. packets from all 10 service class map to one ring.
1210 	 * 2. a 10:4 mapping between service classes and the rings. These 4
1211 	 *    rings map to the 4 WMM access categories.
1212 	 */
1213 	if (na->na_nx->nx_prov->nxprov_params->nxp_qmap == NEXUS_QMAP_TYPE_WMM) {
1214 		ASSERT(num_tx_rings == NEXUS_NUM_WMM_QUEUES);
1215 		/* setup the adapter's service class LUT */
1216 		NAKR_SET_SVC_LUT(na, KPKT_SC_BK_SYS);
1217 		NAKR_SET_SVC_LUT(na, KPKT_SC_BK);
1218 		NAKR_SET_SVC_LUT(na, KPKT_SC_BE);
1219 		NAKR_SET_SVC_LUT(na, KPKT_SC_RD);
1220 		NAKR_SET_SVC_LUT(na, KPKT_SC_OAM);
1221 		NAKR_SET_SVC_LUT(na, KPKT_SC_AV);
1222 		NAKR_SET_SVC_LUT(na, KPKT_SC_RV);
1223 		NAKR_SET_SVC_LUT(na, KPKT_SC_VI);
1224 		NAKR_SET_SVC_LUT(na, KPKT_SC_SIG);
1225 		NAKR_SET_SVC_LUT(na, KPKT_SC_VO);
1226 		NAKR_SET_SVC_LUT(na, KPKT_SC_CTL);
1227 
1228 		/* Initialize the service class for each of the 4 ring */
1229 		NAKR_SET_KR_SVC(na, KPKT_SC_BK);
1230 		NAKR_SET_KR_SVC(na, KPKT_SC_BE);
1231 		NAKR_SET_KR_SVC(na, KPKT_SC_VI);
1232 		NAKR_SET_KR_SVC(na, KPKT_SC_VO);
1233 	} else {
1234 		ASSERT(na->na_nx->nx_prov->nxprov_params->nxp_qmap ==
1235 		    NEXUS_QMAP_TYPE_DEFAULT);
1236 		/* 10: 1 mapping */
1237 		for (i = 0; i < KPKT_SC_MAX_CLASSES; i++) {
1238 			na->na_kring_svc_lut[i] = 0;
1239 		}
1240 		for (i = 0; i < num_tx_rings; i++) {
1241 			NAKR(na, NR_TX)[i].ckr_svc = KPKT_SC_UNSPEC;
1242 		}
1243 	}
1244 }
1245 
1246 static LCK_GRP_DECLARE(channel_txq_lock_group, "sk_ch_txq_lock");
1247 static LCK_GRP_DECLARE(channel_rxq_lock_group, "sk_ch_rxq_lock");
1248 static LCK_GRP_DECLARE(channel_txs_lock_group, "sk_ch_txs_lock");
1249 static LCK_GRP_DECLARE(channel_rxs_lock_group, "sk_ch_rxs_lock");
1250 static LCK_GRP_DECLARE(channel_alloc_lock_group, "sk_ch_alloc_lock");
1251 static LCK_GRP_DECLARE(channel_evq_lock_group, "sk_ch_evq_lock");
1252 static LCK_GRP_DECLARE(channel_evs_lock_group, "sk_ch_evs_lock");
1253 
1254 static lck_grp_t *
na_kr_q_lck_grp(enum txrx t)1255 na_kr_q_lck_grp(enum txrx t)
1256 {
1257 	switch (t) {
1258 	case NR_TX:
1259 		return &channel_txq_lock_group;
1260 	case NR_RX:
1261 		return &channel_rxq_lock_group;
1262 	case NR_A:
1263 	case NR_F:
1264 		return &channel_alloc_lock_group;
1265 	case NR_EV:
1266 		return &channel_evq_lock_group;
1267 	default:
1268 		VERIFY(0);
1269 		/* NOTREACHED */
1270 		__builtin_unreachable();
1271 	}
1272 }
1273 
1274 static lck_grp_t *
na_kr_s_lck_grp(enum txrx t)1275 na_kr_s_lck_grp(enum txrx t)
1276 {
1277 	switch (t) {
1278 	case NR_TX:
1279 		return &channel_txs_lock_group;
1280 	case NR_RX:
1281 		return &channel_rxs_lock_group;
1282 	case NR_A:
1283 	case NR_F:
1284 		return &channel_alloc_lock_group;
1285 	case NR_EV:
1286 		return &channel_evs_lock_group;
1287 	default:
1288 		VERIFY(0);
1289 		/* NOTREACHED */
1290 		__builtin_unreachable();
1291 	}
1292 }
1293 
1294 static void
kr_init_tbr(struct __kern_channel_ring * r)1295 kr_init_tbr(struct __kern_channel_ring *r)
1296 {
1297 	r->ckr_tbr_depth = CKR_TBR_TOKEN_INVALID;
1298 	r->ckr_tbr_token = CKR_TBR_TOKEN_INVALID;
1299 	r->ckr_tbr_last = 0;
1300 }
1301 
1302 struct kern_pbufpool *
na_kr_get_pp(struct nexus_adapter * na,enum txrx t)1303 na_kr_get_pp(struct nexus_adapter *na, enum txrx t)
1304 {
1305 	struct kern_pbufpool *pp = NULL;
1306 	switch (t) {
1307 	case NR_RX:
1308 	case NR_F:
1309 	case NR_EV:
1310 		pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
1311 		break;
1312 	case NR_TX:
1313 	case NR_A:
1314 		pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
1315 		break;
1316 	default:
1317 		VERIFY(0);
1318 		/* NOTREACHED */
1319 		__builtin_unreachable();
1320 	}
1321 
1322 	return pp;
1323 }
1324 
1325 /*
1326  * Create the krings array and initialize the fields common to all adapters.
1327  * The array layout is this:
1328  *
1329  *                       +----------+
1330  * na->na_tx_rings ----->|          | \
1331  *                       |          |  } na->num_tx_ring
1332  *                       |          | /
1333  * na->na_rx_rings ----> +----------+
1334  *                       |          | \
1335  *                       |          |  } na->na_num_rx_rings
1336  *                       |          | /
1337  * na->na_alloc_rings -> +----------+
1338  *                       |          | \
1339  * na->na_free_rings --> +----------+  } na->na_num_allocator_ring_pairs
1340  *                       |          | /
1341  * na->na_event_rings -> +----------+
1342  *                       |          | \
1343  *                       |          |  } na->na_num_event_rings
1344  *                       |          | /
1345  *                       +----------+
1346  * na->na_tailroom ----->|          | \
1347  *                       |          |  } tailroom bytes
1348  *                       |          | /
1349  *                       +----------+
1350  *
1351  * The tailroom space is currently used by flow switch ports for allocating
1352  * leases.
1353  */
1354 /* call with SK_LOCK held */
1355 static int
na_kr_create(struct nexus_adapter * na,uint32_t tailroom,boolean_t alloc_ctx)1356 na_kr_create(struct nexus_adapter *na, uint32_t tailroom, boolean_t alloc_ctx)
1357 {
1358 	lck_grp_t *q_lck_grp, *s_lck_grp;
1359 	uint32_t i, len, ndesc;
1360 	struct kern_pbufpool *pp = NULL;
1361 	struct __kern_channel_ring *kring;
1362 	uint32_t n[NR_ALL];
1363 	int c, tot_slots, err = 0;
1364 	enum txrx t;
1365 
1366 	SK_LOCK_ASSERT_HELD();
1367 
1368 	n[NR_TX] = na_get_nrings(na, NR_TX);
1369 	n[NR_RX] = na_get_nrings(na, NR_RX);
1370 	n[NR_A] = na_get_nrings(na, NR_A);
1371 	n[NR_F] = na_get_nrings(na, NR_F);
1372 	n[NR_EV] = na_get_nrings(na, NR_EV);
1373 
1374 	len = ((n[NR_TX] + n[NR_RX] + n[NR_A] + n[NR_F] + n[NR_EV]) *
1375 	    sizeof(struct __kern_channel_ring)) + tailroom;
1376 
1377 	na->na_rings_mem_sz = (size_t)len;
1378 	// rdar://88962126
1379 	__typed_allocators_ignore_push
1380 	na->na_tx_rings = sk_alloc((size_t)len, Z_WAITOK, skmem_tag_nx_rings);
1381 	__typed_allocators_ignore_pop
1382 	if (__improbable(na->na_tx_rings == NULL)) {
1383 		SK_ERR("Cannot allocate krings");
1384 		err = ENOMEM;
1385 		goto error;
1386 	}
1387 	na->na_rx_rings = na->na_tx_rings + n[NR_TX];
1388 	if (n[NR_A] != 0) {
1389 		na->na_alloc_rings = na->na_rx_rings + n[NR_RX];
1390 		na->na_free_rings = na->na_alloc_rings + n[NR_A];
1391 	} else {
1392 		na->na_alloc_rings = na->na_free_rings = NULL;
1393 	}
1394 	if (n[NR_EV] != 0) {
1395 		if (na->na_free_rings != NULL) {
1396 			na->na_event_rings = na->na_free_rings + n[NR_F];
1397 		} else {
1398 			na->na_event_rings = na->na_rx_rings + n[NR_RX];
1399 		}
1400 	}
1401 
1402 	/* total number of slots for TX/RX adapter rings */
1403 	c = tot_slots = (n[NR_TX] * na_get_nslots(na, NR_TX)) +
1404 	    (n[NR_RX] * na_get_nslots(na, NR_RX));
1405 
1406 	/* for scratch space on alloc and free rings */
1407 	if (n[NR_A] != 0) {
1408 		tot_slots += n[NR_A] * na_get_nslots(na, NR_A);
1409 		tot_slots += n[NR_F] * na_get_nslots(na, NR_F);
1410 		c = tot_slots;
1411 	}
1412 	na->na_total_slots = tot_slots;
1413 
1414 	/* slot context (optional) for all TX/RX ring slots of this adapter */
1415 	if (alloc_ctx) {
1416 		na->na_slot_ctxs =
1417 		    skn_alloc_type_array(slot_ctxs, struct slot_ctx,
1418 		    na->na_total_slots, Z_WAITOK, skmem_tag_nx_contexts);
1419 		if (na->na_slot_ctxs == NULL) {
1420 			SK_ERR("Cannot allocate slot contexts");
1421 			err = ENOMEM;
1422 			goto error;
1423 		}
1424 		atomic_bitset_32(&na->na_flags, NAF_SLOT_CONTEXT);
1425 	}
1426 
1427 	/*
1428 	 * packet handle array storage for all TX/RX ring slots of this
1429 	 * adapter.
1430 	 */
1431 	na->na_scratch = skn_alloc_type_array(scratch, kern_packet_t,
1432 	    na->na_total_slots, Z_WAITOK, skmem_tag_nx_scratch);
1433 	if (na->na_scratch == NULL) {
1434 		SK_ERR("Cannot allocate slot contexts");
1435 		err = ENOMEM;
1436 		goto error;
1437 	}
1438 
1439 	/*
1440 	 * All fields in krings are 0 except the one initialized below.
1441 	 * but better be explicit on important kring fields.
1442 	 */
1443 	for_all_rings(t) {
1444 		ndesc = na_get_nslots(na, t);
1445 		pp = na_kr_get_pp(na, t);
1446 		for (i = 0; i < n[t]; i++) {
1447 			kring = &NAKR(na, t)[i];
1448 			bzero(kring, sizeof(*kring));
1449 			kring->ckr_na = na;
1450 			kring->ckr_pp = pp;
1451 			kring->ckr_max_pkt_len = PP_BUF_SIZE_DEF(pp) *
1452 			    pp->pp_max_frags;
1453 			kring->ckr_ring_id = i;
1454 			kring->ckr_tx = t;
1455 			kr_init_to_mhints(kring, ndesc);
1456 			kr_init_tbr(kring);
1457 			if (NA_KERNEL_ONLY(na)) {
1458 				kring->ckr_flags |= CKRF_KERNEL_ONLY;
1459 			}
1460 			if (na->na_flags & NAF_HOST_ONLY) {
1461 				kring->ckr_flags |= CKRF_HOST;
1462 			}
1463 			ASSERT((t >= NR_TXRX) || (c > 0));
1464 			if ((t < NR_TXRX) &&
1465 			    (na->na_flags & NAF_SLOT_CONTEXT)) {
1466 				ASSERT(na->na_slot_ctxs != NULL);
1467 				kring->ckr_flags |= CKRF_SLOT_CONTEXT;
1468 				kring->ckr_slot_ctxs =
1469 				    na->na_slot_ctxs + (tot_slots - c);
1470 			}
1471 			ASSERT(na->na_scratch != NULL);
1472 			if (t < NR_TXRXAF) {
1473 				kring->ckr_scratch =
1474 				    na->na_scratch + (tot_slots - c);
1475 			}
1476 			if (t < NR_TXRXAF) {
1477 				c -= ndesc;
1478 			}
1479 			switch (t) {
1480 			case NR_A:
1481 				if (i == 0) {
1482 					kring->ckr_na_sync =
1483 					    na_packet_pool_alloc_sync;
1484 					kring->ckr_alloc_ws =
1485 					    na_upp_alloc_lowat;
1486 				} else {
1487 					ASSERT(i == 1);
1488 					kring->ckr_na_sync =
1489 					    na_packet_pool_alloc_buf_sync;
1490 					kring->ckr_alloc_ws =
1491 					    na_upp_alloc_buf_lowat;
1492 				}
1493 				break;
1494 			case NR_F:
1495 				if (i == 0) {
1496 					kring->ckr_na_sync =
1497 					    na_packet_pool_free_sync;
1498 				} else {
1499 					ASSERT(i == 1);
1500 					kring->ckr_na_sync =
1501 					    na_packet_pool_free_buf_sync;
1502 				}
1503 				break;
1504 			case NR_TX:
1505 				kring->ckr_na_sync = na->na_txsync;
1506 				if (na->na_flags & NAF_TX_MITIGATION) {
1507 					kring->ckr_flags |= CKRF_MITIGATION;
1508 				}
1509 				switch (na->na_type) {
1510 #if CONFIG_NEXUS_USER_PIPE
1511 				case NA_USER_PIPE:
1512 					ASSERT(!(na->na_flags &
1513 					    NAF_USER_PKT_POOL));
1514 					kring->ckr_prologue = kr_txprologue;
1515 					kring->ckr_finalize = NULL;
1516 					break;
1517 #endif /* CONFIG_NEXUS_USER_PIPE */
1518 #if CONFIG_NEXUS_MONITOR
1519 				case NA_MONITOR:
1520 					ASSERT(!(na->na_flags &
1521 					    NAF_USER_PKT_POOL));
1522 					kring->ckr_prologue = kr_txprologue;
1523 					kring->ckr_finalize = NULL;
1524 					break;
1525 #endif /* CONFIG_NEXUS_MONITOR */
1526 				default:
1527 					if (na->na_flags & NAF_USER_PKT_POOL) {
1528 						kring->ckr_prologue =
1529 						    kr_txprologue_upp;
1530 						kring->ckr_finalize =
1531 						    kr_txfinalize_upp;
1532 					} else {
1533 						kring->ckr_prologue =
1534 						    kr_txprologue;
1535 						kring->ckr_finalize =
1536 						    kr_txfinalize;
1537 					}
1538 					break;
1539 				}
1540 				break;
1541 			case NR_RX:
1542 				kring->ckr_na_sync = na->na_rxsync;
1543 				if (na->na_flags & NAF_RX_MITIGATION) {
1544 					kring->ckr_flags |= CKRF_MITIGATION;
1545 				}
1546 				switch (na->na_type) {
1547 #if CONFIG_NEXUS_USER_PIPE
1548 				case NA_USER_PIPE:
1549 					ASSERT(!(na->na_flags &
1550 					    NAF_USER_PKT_POOL));
1551 					kring->ckr_prologue =
1552 					    kr_rxprologue_nodetach;
1553 					kring->ckr_finalize = kr_rxfinalize;
1554 					break;
1555 #endif /* CONFIG_NEXUS_USER_PIPE */
1556 #if CONFIG_NEXUS_MONITOR
1557 				case NA_MONITOR:
1558 					ASSERT(!(na->na_flags &
1559 					    NAF_USER_PKT_POOL));
1560 					kring->ckr_prologue =
1561 					    kr_rxprologue_nodetach;
1562 					kring->ckr_finalize = kr_rxfinalize;
1563 					break;
1564 #endif /* CONFIG_NEXUS_MONITOR */
1565 				default:
1566 					if (na->na_flags & NAF_USER_PKT_POOL) {
1567 						kring->ckr_prologue =
1568 						    kr_rxprologue_upp;
1569 						kring->ckr_finalize =
1570 						    kr_rxfinalize_upp;
1571 					} else {
1572 						kring->ckr_prologue =
1573 						    kr_rxprologue;
1574 						kring->ckr_finalize =
1575 						    kr_rxfinalize;
1576 					}
1577 					break;
1578 				}
1579 				break;
1580 			case NR_EV:
1581 				kring->ckr_na_sync = kern_channel_event_sync;
1582 				break;
1583 			default:
1584 				VERIFY(0);
1585 				/* NOTREACHED */
1586 				__builtin_unreachable();
1587 			}
1588 			if (t != NR_EV) {
1589 				kring->ckr_na_notify = na->na_notify;
1590 			} else {
1591 				kring->ckr_na_notify = NULL;
1592 			}
1593 			(void) snprintf(kring->ckr_name,
1594 			    sizeof(kring->ckr_name) - 1,
1595 			    "%s %s%u%s", na->na_name, sk_ring2str(t), i,
1596 			    ((kring->ckr_flags & CKRF_HOST) ? "^" : ""));
1597 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1598 			    "kr \"%s\" (0x%llx) krflags 0x%b rh %u rt %u",
1599 			    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1600 			    CKRF_BITS, kring->ckr_rhead, kring->ckr_rtail);
1601 			kring->ckr_state = KR_READY;
1602 			q_lck_grp = na_kr_q_lck_grp(t);
1603 			s_lck_grp = na_kr_s_lck_grp(t);
1604 			kring->ckr_qlock_group = q_lck_grp;
1605 			lck_mtx_init(&kring->ckr_qlock, kring->ckr_qlock_group,
1606 			    &channel_lock_attr);
1607 			kring->ckr_slock_group = s_lck_grp;
1608 			lck_spin_init(&kring->ckr_slock, kring->ckr_slock_group,
1609 			    &channel_lock_attr);
1610 			csi_init(&kring->ckr_si,
1611 			    (kring->ckr_flags & CKRF_MITIGATION),
1612 			    na->na_ch_mit_ival);
1613 		}
1614 		csi_init(&na->na_si[t],
1615 		    (na->na_flags & (NAF_TX_MITIGATION | NAF_RX_MITIGATION)),
1616 		    na->na_ch_mit_ival);
1617 	}
1618 	ASSERT(c == 0);
1619 	na->na_tailroom = na->na_rx_rings + n[NR_RX] + n[NR_A] + n[NR_F];
1620 
1621 	if (na->na_type == NA_NETIF_DEV) {
1622 		na_kr_setup_netif_svc_map(na);
1623 	}
1624 
1625 	/* validate now for cases where we create only krings */
1626 	na_krings_verify(na);
1627 	return 0;
1628 
1629 error:
1630 	ASSERT(err != 0);
1631 	if (na->na_tx_rings != NULL) {
1632 		// rdar://88962126
1633 		__typed_allocators_ignore_push
1634 		sk_free(na->na_tx_rings, na->na_rings_mem_sz);
1635 		__typed_allocators_ignore_pop
1636 		na->na_tx_rings = NULL;
1637 	}
1638 	if (na->na_slot_ctxs != NULL) {
1639 		ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1640 		skn_free_type_array(slot_ctxs,
1641 		    struct slot_ctx, na->na_total_slots,
1642 		    na->na_slot_ctxs);
1643 		na->na_slot_ctxs = NULL;
1644 	}
1645 	if (na->na_scratch != NULL) {
1646 		skn_free_type_array(scratch,
1647 		    kern_packet_t, na->na_total_slots,
1648 		    na->na_scratch);
1649 		na->na_scratch = NULL;
1650 	}
1651 	return err;
1652 }
1653 
1654 /* undo the actions performed by na_kr_create() */
1655 /* call with SK_LOCK held */
1656 static void
na_kr_delete(struct nexus_adapter * na)1657 na_kr_delete(struct nexus_adapter *na)
1658 {
1659 	struct __kern_channel_ring *kring = na->na_tx_rings;
1660 	enum txrx t;
1661 
1662 	ASSERT((kring != NULL) && (na->na_tailroom != NULL));
1663 	SK_LOCK_ASSERT_HELD();
1664 
1665 	for_all_rings(t) {
1666 		csi_destroy(&na->na_si[t]);
1667 	}
1668 	/* we rely on the krings layout described above */
1669 	for (; kring != na->na_tailroom; kring++) {
1670 		lck_mtx_destroy(&kring->ckr_qlock, kring->ckr_qlock_group);
1671 		lck_spin_destroy(&kring->ckr_slock, kring->ckr_slock_group);
1672 		csi_destroy(&kring->ckr_si);
1673 		if (kring->ckr_flags & CKRF_SLOT_CONTEXT) {
1674 			kring->ckr_flags &= ~CKRF_SLOT_CONTEXT;
1675 			ASSERT(kring->ckr_slot_ctxs != NULL);
1676 			kring->ckr_slot_ctxs = NULL;
1677 		}
1678 	}
1679 	if (na->na_slot_ctxs != NULL) {
1680 		ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1681 		atomic_bitclear_32(&na->na_flags, NAF_SLOT_CONTEXT);
1682 		skn_free_type_array(slot_ctxs,
1683 		    struct slot_ctx, na->na_total_slots,
1684 		    na->na_slot_ctxs);
1685 		na->na_slot_ctxs = NULL;
1686 	}
1687 	if (na->na_scratch != NULL) {
1688 		skn_free_type_array(scratch,
1689 		    kern_packet_t, na->na_total_slots,
1690 		    na->na_scratch);
1691 		na->na_scratch = NULL;
1692 	}
1693 	ASSERT(!(na->na_flags & NAF_SLOT_CONTEXT));
1694 	// rdar://88962126
1695 	__typed_allocators_ignore_push
1696 	sk_free(na->na_tx_rings, na->na_rings_mem_sz);
1697 	__typed_allocators_ignore_pop
1698 	na->na_tx_rings = na->na_rx_rings = na->na_alloc_rings =
1699 	    na->na_free_rings = na->na_event_rings = na->na_tailroom = NULL;
1700 }
1701 
1702 static void
na_kr_slot_desc_init(struct __slot_desc * ksds,boolean_t kernel_only,struct __slot_desc * usds,size_t ndesc)1703 na_kr_slot_desc_init(struct __slot_desc *ksds,
1704     boolean_t kernel_only, struct __slot_desc *usds, size_t ndesc)
1705 {
1706 	size_t i;
1707 
1708 	bzero(ksds, ndesc * SLOT_DESC_SZ);
1709 	if (usds != NULL) {
1710 		ASSERT(!kernel_only);
1711 		bzero(usds, ndesc * SLOT_DESC_SZ);
1712 	} else {
1713 		ASSERT(kernel_only);
1714 	}
1715 
1716 	for (i = 0; i < ndesc; i++) {
1717 		KSD_INIT(SLOT_DESC_KSD(&ksds[i]));
1718 		if (!kernel_only) {
1719 			USD_INIT(SLOT_DESC_USD(&usds[i]));
1720 		}
1721 	}
1722 }
1723 
1724 /* call with SK_LOCK held */
1725 static int
na_kr_setup(struct nexus_adapter * na,struct kern_channel * ch)1726 na_kr_setup(struct nexus_adapter *na, struct kern_channel *ch)
1727 {
1728 	struct skmem_arena *ar = na->na_arena;
1729 	struct skmem_arena_nexus *arn;
1730 	mach_vm_offset_t roff[SKMEM_REGIONS];
1731 	enum txrx t;
1732 	uint32_t i;
1733 
1734 	SK_LOCK_ASSERT_HELD();
1735 	ASSERT(!(na->na_flags & NAF_MEM_NO_INIT));
1736 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
1737 	arn = skmem_arena_nexus(ar);
1738 	ASSERT(arn != NULL);
1739 
1740 	bzero(&roff, sizeof(roff));
1741 	for (i = 0; i < SKMEM_REGIONS; i++) {
1742 		if (ar->ar_regions[i] == NULL) {
1743 			continue;
1744 		}
1745 
1746 		/* not for nexus */
1747 		ASSERT(i != SKMEM_REGION_SYSCTLS);
1748 
1749 		/*
1750 		 * Get region offsets from base of mmap span; the arena
1751 		 * doesn't need to be mmap'd at this point, since we
1752 		 * simply compute the relative offset.
1753 		 */
1754 		roff[i] = skmem_arena_get_region_offset(ar, i);
1755 	}
1756 
1757 	for_all_rings(t) {
1758 		for (i = 0; i < na_get_nrings(na, t); i++) {
1759 			struct __kern_channel_ring *kring = &NAKR(na, t)[i];
1760 			struct __user_channel_ring *ring = kring->ckr_ring;
1761 			mach_vm_offset_t ring_off, usd_roff;
1762 			struct skmem_obj_info oi, oim;
1763 			uint32_t ndesc;
1764 
1765 			if (ring != NULL) {
1766 				SK_DF(SK_VERB_NA | SK_VERB_RING,
1767 				    "kr 0x%llx (\"%s\") is already "
1768 				    "initialized", SK_KVA(kring),
1769 				    kring->ckr_name);
1770 				continue; /* already created by somebody else */
1771 			}
1772 
1773 			if (!KR_KERNEL_ONLY(kring) &&
1774 			    (ring = skmem_cache_alloc(arn->arn_ring_cache,
1775 			    SKMEM_NOSLEEP)) == NULL) {
1776 				SK_ERR("Cannot allocate %s_ring for kr "
1777 				    "0x%llx (\"%s\")", sk_ring2str(t),
1778 				    SK_KVA(kring), kring->ckr_name);
1779 				goto cleanup;
1780 			}
1781 			kring->ckr_flags |= CKRF_MEM_RING_INITED;
1782 			kring->ckr_ring = ring;
1783 			ndesc = kring->ckr_num_slots;
1784 
1785 			if (ring == NULL) {
1786 				goto skip_user_ring_setup;
1787 			}
1788 
1789 			*(uint32_t *)(uintptr_t)&ring->ring_num_slots = ndesc;
1790 
1791 			/* offset of current ring in mmap span */
1792 			skmem_cache_get_obj_info(arn->arn_ring_cache,
1793 			    ring, &oi, NULL);
1794 			ring_off = (roff[SKMEM_REGION_RING] +
1795 			    SKMEM_OBJ_ROFF(&oi));
1796 
1797 			/*
1798 			 * ring_{buf,md,sd}_ofs offsets are relative to the
1799 			 * current ring, and not to the base of mmap span.
1800 			 */
1801 			*(mach_vm_offset_t *)(uintptr_t)
1802 			&ring->ring_def_buf_base =
1803 			    (roff[SKMEM_REGION_BUF_DEF] - ring_off);
1804 			*(mach_vm_offset_t *)(uintptr_t)
1805 			&ring->ring_large_buf_base =
1806 			    (roff[SKMEM_REGION_BUF_LARGE] - ring_off);
1807 			*(mach_vm_offset_t *)(uintptr_t)&ring->ring_md_base =
1808 			    (roff[SKMEM_REGION_UMD] - ring_off);
1809 			_CASSERT(sizeof(uint16_t) ==
1810 			    sizeof(ring->ring_bft_size));
1811 			if (roff[SKMEM_REGION_UBFT] != 0) {
1812 				ASSERT(ar->ar_regions[SKMEM_REGION_UBFT] !=
1813 				    NULL);
1814 				*(mach_vm_offset_t *)(uintptr_t)
1815 				&ring->ring_bft_base =
1816 				    (roff[SKMEM_REGION_UBFT] - ring_off);
1817 				*(uint16_t *)(uintptr_t)&ring->ring_bft_size =
1818 				    (uint16_t)ar->ar_regions[SKMEM_REGION_UBFT]->
1819 				    skr_c_obj_size;
1820 				ASSERT(ring->ring_bft_size ==
1821 				    ar->ar_regions[SKMEM_REGION_KBFT]->
1822 				    skr_c_obj_size);
1823 			} else {
1824 				*(mach_vm_offset_t *)(uintptr_t)
1825 				&ring->ring_bft_base = 0;
1826 				*(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1827 			}
1828 
1829 			if (t == NR_TX || t == NR_A || t == NR_EV) {
1830 				usd_roff = roff[SKMEM_REGION_TXAUSD];
1831 			} else {
1832 				ASSERT(t == NR_RX || t == NR_F);
1833 				usd_roff = roff[SKMEM_REGION_RXFUSD];
1834 			}
1835 			*(mach_vm_offset_t *)(uintptr_t)&ring->ring_sd_base =
1836 			    (usd_roff - ring_off);
1837 
1838 			/* copy values from kring */
1839 			ring->ring_head = kring->ckr_rhead;
1840 			*(slot_idx_t *)(uintptr_t)&ring->ring_khead =
1841 			    kring->ckr_khead;
1842 			*(slot_idx_t *)(uintptr_t)&ring->ring_tail =
1843 			    kring->ckr_rtail;
1844 
1845 			_CASSERT(sizeof(uint32_t) ==
1846 			    sizeof(ring->ring_def_buf_size));
1847 			_CASSERT(sizeof(uint32_t) ==
1848 			    sizeof(ring->ring_large_buf_size));
1849 			_CASSERT(sizeof(uint16_t) ==
1850 			    sizeof(ring->ring_md_size));
1851 			*(uint32_t *)(uintptr_t)&ring->ring_def_buf_size =
1852 			    ar->ar_regions[SKMEM_REGION_BUF_DEF]->skr_c_obj_size;
1853 			if (ar->ar_regions[SKMEM_REGION_BUF_LARGE] != NULL) {
1854 				*(uint32_t *)(uintptr_t)&ring->ring_large_buf_size =
1855 				    ar->ar_regions[SKMEM_REGION_BUF_LARGE]->skr_c_obj_size;
1856 			} else {
1857 				*(uint32_t *)(uintptr_t)&ring->ring_large_buf_size = 0;
1858 			}
1859 			if (ar->ar_regions[SKMEM_REGION_UMD] != NULL) {
1860 				*(uint16_t *)(uintptr_t)&ring->ring_md_size =
1861 				    (uint16_t)ar->ar_regions[SKMEM_REGION_UMD]->
1862 				    skr_c_obj_size;
1863 				ASSERT(ring->ring_md_size ==
1864 				    ar->ar_regions[SKMEM_REGION_KMD]->
1865 				    skr_c_obj_size);
1866 			} else {
1867 				*(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1868 				ASSERT(PP_KERNEL_ONLY(arn->arn_rx_pp));
1869 				ASSERT(PP_KERNEL_ONLY(arn->arn_tx_pp));
1870 			}
1871 
1872 			/* ring info */
1873 			_CASSERT(sizeof(uint16_t) == sizeof(ring->ring_id));
1874 			_CASSERT(sizeof(uint16_t) == sizeof(ring->ring_kind));
1875 			*(uint16_t *)(uintptr_t)&ring->ring_id =
1876 			    (uint16_t)kring->ckr_ring_id;
1877 			*(uint16_t *)(uintptr_t)&ring->ring_kind =
1878 			    (uint16_t)kring->ckr_tx;
1879 
1880 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1881 			    "%s_ring at 0x%llx kr 0x%llx (\"%s\")",
1882 			    sk_ring2str(t), SK_KVA(ring), SK_KVA(kring),
1883 			    kring->ckr_name);
1884 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1885 			    "  num_slots:  %u", ring->ring_num_slots);
1886 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1887 			    "  def_buf_base:   0x%llx",
1888 			    (uint64_t)ring->ring_def_buf_base);
1889 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1890 			    "  large_buf_base:   0x%llx",
1891 			    (uint64_t)ring->ring_large_buf_base);
1892 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1893 			    "  md_base:    0x%llx",
1894 			    (uint64_t)ring->ring_md_base);
1895 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1896 			    "  sd_base:    0x%llx",
1897 			    (uint64_t)ring->ring_sd_base);
1898 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1899 			    "  h, t:    %u, %u, %u", ring->ring_head,
1900 			    ring->ring_tail);
1901 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1902 			    "  md_size:    %d",
1903 			    (uint64_t)ring->ring_md_size);
1904 
1905 			/* make sure they're in synch */
1906 			_CASSERT(NR_RX == CR_KIND_RX);
1907 			_CASSERT(NR_TX == CR_KIND_TX);
1908 			_CASSERT(NR_A == CR_KIND_ALLOC);
1909 			_CASSERT(NR_F == CR_KIND_FREE);
1910 			_CASSERT(NR_EV == CR_KIND_EVENT);
1911 
1912 skip_user_ring_setup:
1913 			/*
1914 			 * This flag tells na_kr_teardown_all() that it should
1915 			 * go thru the checks to free up the slot maps.
1916 			 */
1917 			kring->ckr_flags |= CKRF_MEM_SD_INITED;
1918 			if (t == NR_TX || t == NR_A || t == NR_EV) {
1919 				kring->ckr_ksds_cache = arn->arn_txaksd_cache;
1920 			} else {
1921 				ASSERT(t == NR_RX || t == NR_F);
1922 				kring->ckr_ksds_cache = arn->arn_rxfksd_cache;
1923 			}
1924 			kring->ckr_ksds =
1925 			    skmem_cache_alloc(kring->ckr_ksds_cache,
1926 			    SKMEM_NOSLEEP);
1927 			if (kring->ckr_ksds == NULL) {
1928 				SK_ERR("Cannot allocate %s_ksds for kr "
1929 				    "0x%llx (\"%s\")", sk_ring2str(t),
1930 				    SK_KVA(kring), kring->ckr_name);
1931 				goto cleanup;
1932 			}
1933 			if (!KR_KERNEL_ONLY(kring)) {
1934 				skmem_cache_get_obj_info(kring->ckr_ksds_cache,
1935 				    kring->ckr_ksds, &oi, &oim);
1936 				kring->ckr_usds = SKMEM_OBJ_ADDR(&oim);
1937 			}
1938 			na_kr_slot_desc_init(kring->ckr_ksds,
1939 			    KR_KERNEL_ONLY(kring), kring->ckr_usds, ndesc);
1940 
1941 			/* cache last slot descriptor address */
1942 			ASSERT(kring->ckr_lim == (ndesc - 1));
1943 			kring->ckr_ksds_last = &kring->ckr_ksds[kring->ckr_lim];
1944 
1945 			if ((t < NR_TXRX) &&
1946 			    !(na->na_flags & NAF_USER_PKT_POOL) &&
1947 			    na_kr_populate_slots(kring) != 0) {
1948 				SK_ERR("Cannot allocate buffers for kr "
1949 				    "0x%llx (\"%s\")", SK_KVA(kring),
1950 				    kring->ckr_name);
1951 				goto cleanup;
1952 			}
1953 		}
1954 	}
1955 
1956 	return 0;
1957 
1958 cleanup:
1959 	na_kr_teardown_all(na, ch, FALSE);
1960 
1961 	return ENOMEM;
1962 }
1963 
1964 static void
na_kr_teardown_common(struct nexus_adapter * na,struct __kern_channel_ring * kring,enum txrx t,struct kern_channel * ch,boolean_t defunct)1965 na_kr_teardown_common(struct nexus_adapter *na,
1966     struct __kern_channel_ring *kring, enum txrx t, struct kern_channel *ch,
1967     boolean_t defunct)
1968 {
1969 	struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
1970 	struct __user_channel_ring *ckr_ring;
1971 	boolean_t sd_idle, sd_inited;
1972 
1973 	ASSERT(arn != NULL);
1974 	kr_enter(kring, TRUE);
1975 	/*
1976 	 * Check for CKRF_MEM_SD_INITED and CKRF_MEM_RING_INITED
1977 	 * to make sure that the freeing needs to happen (else just
1978 	 * nullify the values).
1979 	 * If this adapter owns the memory for the slot descriptors,
1980 	 * check if the region is marked as busy (sd_idle is false)
1981 	 * and leave the kring's slot descriptor fields alone if so,
1982 	 * at defunct time.  At final teardown time, sd_idle must be
1983 	 * true else we assert; this indicates a missing call to
1984 	 * skmem_arena_nexus_sd_set_noidle().
1985 	 */
1986 	sd_inited = ((kring->ckr_flags & CKRF_MEM_SD_INITED) != 0);
1987 	if (sd_inited) {
1988 		/* callee will do KR_KSD(), so check */
1989 		if (((t < NR_TXRX) || (t == NR_EV)) &&
1990 		    (kring->ckr_ksds != NULL)) {
1991 			na_kr_depopulate_slots(kring, ch, defunct);
1992 		}
1993 		/* leave CKRF_MEM_SD_INITED flag alone until idle */
1994 		sd_idle = skmem_arena_nexus_sd_idle(arn);
1995 		VERIFY(sd_idle || defunct);
1996 	} else {
1997 		sd_idle = TRUE;
1998 	}
1999 
2000 	if (sd_idle) {
2001 		kring->ckr_flags &= ~CKRF_MEM_SD_INITED;
2002 		if (kring->ckr_ksds != NULL) {
2003 			if (sd_inited) {
2004 				skmem_cache_free(kring->ckr_ksds_cache,
2005 				    kring->ckr_ksds);
2006 			}
2007 			kring->ckr_ksds = NULL;
2008 			kring->ckr_ksds_last = NULL;
2009 			kring->ckr_usds = NULL;
2010 		}
2011 		ASSERT(kring->ckr_ksds_last == NULL);
2012 		ASSERT(kring->ckr_usds == NULL);
2013 	}
2014 
2015 	if ((ckr_ring = kring->ckr_ring) != NULL) {
2016 		kring->ckr_ring = NULL;
2017 	}
2018 
2019 	if (kring->ckr_flags & CKRF_MEM_RING_INITED) {
2020 		ASSERT(ckr_ring != NULL || KR_KERNEL_ONLY(kring));
2021 		if (ckr_ring != NULL) {
2022 			skmem_cache_free(arn->arn_ring_cache, ckr_ring);
2023 		}
2024 		kring->ckr_flags &= ~CKRF_MEM_RING_INITED;
2025 	}
2026 
2027 	if (defunct) {
2028 		/* if defunct, drop everything; see KR_DROP() */
2029 		kring->ckr_flags |= CKRF_DEFUNCT;
2030 	}
2031 	kr_exit(kring);
2032 }
2033 
2034 /*
2035  * Teardown ALL rings of a nexus adapter; this includes {tx,rx,alloc,free,event}
2036  */
2037 static void
na_kr_teardown_all(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2038 na_kr_teardown_all(struct nexus_adapter *na, struct kern_channel *ch,
2039     boolean_t defunct)
2040 {
2041 	enum txrx t;
2042 
2043 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2044 
2045 	/* skip if this adapter has no allocated rings */
2046 	if (na->na_tx_rings == NULL) {
2047 		return;
2048 	}
2049 
2050 	for_all_rings(t) {
2051 		for (uint32_t i = 0; i < na_get_nrings(na, t); i++) {
2052 			na_kr_teardown_common(na, &NAKR(na, t)[i],
2053 			    t, ch, defunct);
2054 		}
2055 	}
2056 }
2057 
2058 /*
2059  * Teardown only {tx,rx} rings assigned to the channel.
2060  */
2061 static void
na_kr_teardown_txrx(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct,struct proc * p)2062 na_kr_teardown_txrx(struct nexus_adapter *na, struct kern_channel *ch,
2063     boolean_t defunct, struct proc *p)
2064 {
2065 	enum txrx t;
2066 
2067 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2068 
2069 	for_rx_tx(t) {
2070 		ring_id_t qfirst = ch->ch_first[t];
2071 		ring_id_t qlast = ch->ch_last[t];
2072 		uint32_t i;
2073 
2074 		for (i = qfirst; i < qlast; i++) {
2075 			struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2076 			na_kr_teardown_common(na, kring, t, ch, defunct);
2077 
2078 			/*
2079 			 * Issue a notify to wake up anyone sleeping in kqueue
2080 			 * so that they notice the newly defuncted channels and
2081 			 * return an error
2082 			 */
2083 			kring->ckr_na_notify(kring, p, 0);
2084 		}
2085 	}
2086 }
2087 
2088 static int
na_kr_populate_slots(struct __kern_channel_ring * kring)2089 na_kr_populate_slots(struct __kern_channel_ring *kring)
2090 {
2091 	const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2092 	struct nexus_adapter *na = KRNA(kring);
2093 	kern_pbufpool_t pp = kring->ckr_pp;
2094 	uint32_t nslots = kring->ckr_num_slots;
2095 	uint32_t start_idx, i;
2096 	uint32_t sidx = 0;      /* slot counter */
2097 	struct __kern_slot_desc *ksd;
2098 	struct __user_slot_desc *usd;
2099 	struct __kern_quantum *kqum;
2100 	nexus_type_t nexus_type;
2101 	int err = 0;
2102 
2103 	ASSERT(kring->ckr_tx < NR_TXRX);
2104 	ASSERT(!(KRNA(kring)->na_flags & NAF_USER_PKT_POOL));
2105 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2106 	ASSERT(pp != NULL);
2107 
2108 	/*
2109 	 * xxx_ppool: remove this special case
2110 	 */
2111 	nexus_type = na->na_nxdom_prov->nxdom_prov_dom->nxdom_type;
2112 
2113 	switch (nexus_type) {
2114 	case NEXUS_TYPE_FLOW_SWITCH:
2115 	case NEXUS_TYPE_KERNEL_PIPE:
2116 		/*
2117 		 * xxx_ppool: This is temporary code until we come up with a
2118 		 * scheme for user space to alloc & attach packets to tx ring.
2119 		 */
2120 		if (kernel_only || kring->ckr_tx == NR_RX) {
2121 			return 0;
2122 		}
2123 		break;
2124 
2125 	case NEXUS_TYPE_NET_IF:
2126 		if (((na->na_type == NA_NETIF_DEV) ||
2127 		    (na->na_type == NA_NETIF_HOST)) &&
2128 		    (kernel_only || (kring->ckr_tx == NR_RX))) {
2129 			return 0;
2130 		}
2131 
2132 		ASSERT((na->na_type == NA_NETIF_COMPAT_DEV) ||
2133 		    (na->na_type == NA_NETIF_COMPAT_HOST) ||
2134 		    (na->na_type == NA_NETIF_DEV) ||
2135 		    (na->na_type == NA_NETIF_VP));
2136 
2137 		if (!kernel_only) {
2138 			if (kring->ckr_tx == NR_RX) {
2139 				return 0;
2140 			} else {
2141 				break;
2142 			}
2143 		}
2144 
2145 		ASSERT(kernel_only);
2146 
2147 		if ((na->na_type == NA_NETIF_COMPAT_DEV) ||
2148 		    (na->na_type == NA_NETIF_COMPAT_HOST)) {
2149 			return 0;
2150 		}
2151 		VERIFY(0);
2152 		/* NOTREACHED */
2153 		__builtin_unreachable();
2154 
2155 	case NEXUS_TYPE_USER_PIPE:
2156 	case NEXUS_TYPE_MONITOR:
2157 		break;
2158 
2159 	default:
2160 		VERIFY(0);
2161 		/* NOTREACHED */
2162 		__builtin_unreachable();
2163 	}
2164 
2165 	/* Fill the ring with packets */
2166 	sidx = start_idx = 0;
2167 	for (i = 0; i < nslots; i++) {
2168 		kqum = SK_PTR_ADDR_KQUM(pp_alloc_packet(pp, pp->pp_max_frags,
2169 		    SKMEM_NOSLEEP));
2170 		if (kqum == NULL) {
2171 			err = ENOMEM;
2172 			SK_ERR("ar 0x%llx (\"%s\") no more buffers "
2173 			    "after %u of %u, err %d", SK_KVA(na->na_arena),
2174 			    na->na_arena->ar_name, i, nslots, err);
2175 			goto cleanup;
2176 		}
2177 		ksd = KR_KSD(kring, i);
2178 		usd = (kernel_only ? NULL : KR_USD(kring, i));
2179 
2180 		/* attach packet to slot */
2181 		kqum->qum_ksd = ksd;
2182 		ASSERT(!KSD_VALID_METADATA(ksd));
2183 		KSD_ATTACH_METADATA(ksd, kqum);
2184 		if (usd != NULL) {
2185 			USD_ATTACH_METADATA(usd, METADATA_IDX(kqum));
2186 			kr_externalize_metadata(kring, pp->pp_max_frags,
2187 			    kqum, current_proc());
2188 		}
2189 
2190 		SK_DF(SK_VERB_MEM, " C ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2191 		    " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd), METADATA_IDX(kqum),
2192 		    SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2193 		    SK_KVA(&kqum->qum_buf[0]));
2194 		if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2195 			SK_DF(SK_VERB_MEM, " C usd [%-3d, 0x%llx] "
2196 			    "uqum [%-3u, 0x%llx]  ubuf[%-3u, 0x%llx]",
2197 			    (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2198 			    SK_KVA(usd), METADATA_IDX(kqum),
2199 			    SK_KVA(kqum->qum_user),
2200 			    kqum->qum_user->qum_buf[0].buf_idx,
2201 			    SK_KVA(&kqum->qum_user->qum_buf[0]));
2202 		}
2203 
2204 		sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2205 	}
2206 
2207 	SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") populated %u slots from idx %u",
2208 	    SK_KVA(na->na_arena), na->na_arena->ar_name, nslots, start_idx);
2209 
2210 cleanup:
2211 	if (err != 0) {
2212 		sidx = start_idx;
2213 		while (i-- > 0) {
2214 			ksd = KR_KSD(kring, i);
2215 			usd = (kernel_only ? NULL : KR_USD(kring, i));
2216 			kqum = ksd->sd_qum;
2217 
2218 			ASSERT(ksd == kqum->qum_ksd);
2219 			KSD_RESET(ksd);
2220 			if (usd != NULL) {
2221 				USD_RESET(usd);
2222 			}
2223 			/* detach packet from slot */
2224 			kqum->qum_ksd = NULL;
2225 			pp_free_packet(pp, SK_PTR_ADDR(kqum));
2226 
2227 			sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2228 		}
2229 	}
2230 	return err;
2231 }
2232 
2233 static void
na_kr_depopulate_slots(struct __kern_channel_ring * kring,struct kern_channel * ch,boolean_t defunct)2234 na_kr_depopulate_slots(struct __kern_channel_ring *kring,
2235     struct kern_channel *ch, boolean_t defunct)
2236 {
2237 #pragma unused(ch)
2238 	const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2239 	uint32_t i, j, n = kring->ckr_num_slots;
2240 	struct nexus_adapter *na = KRNA(kring);
2241 	struct kern_pbufpool *pp = kring->ckr_pp;
2242 	boolean_t upp = FALSE;
2243 	obj_idx_t midx;
2244 
2245 	ASSERT((kring->ckr_tx < NR_TXRX) || (kring->ckr_tx == NR_EV));
2246 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2247 
2248 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2249 
2250 	if (((na->na_flags & NAF_USER_PKT_POOL) != 0) &&
2251 	    (kring->ckr_tx != NR_EV)) {
2252 		upp = TRUE;
2253 	}
2254 	for (i = 0, j = 0; i < n; i++) {
2255 		struct __kern_slot_desc *ksd = KR_KSD(kring, i);
2256 		struct __user_slot_desc *usd;
2257 		struct __kern_quantum *qum, *kqum;
2258 		boolean_t free_packet = FALSE;
2259 		int err;
2260 
2261 		if (!KSD_VALID_METADATA(ksd)) {
2262 			continue;
2263 		}
2264 
2265 		kqum = ksd->sd_qum;
2266 		usd = (kernel_only ? NULL : KR_USD(kring, i));
2267 		midx = METADATA_IDX(kqum);
2268 
2269 		/*
2270 		 * if the packet is internalized it should not be in the
2271 		 * hash table of packets loaned to user space.
2272 		 */
2273 		if (upp && (kqum->qum_qflags & QUM_F_INTERNALIZED)) {
2274 			if ((qum = pp_find_upp(pp, midx)) != NULL) {
2275 				panic("internalized packet 0x%llx in htbl",
2276 				    SK_KVA(qum));
2277 				/* NOTREACHED */
2278 				__builtin_unreachable();
2279 			}
2280 			free_packet = TRUE;
2281 		} else if (upp) {
2282 			/*
2283 			 * if the packet is not internalized check if it is
2284 			 * in the list of packets loaned to user-space.
2285 			 * Remove from the list before freeing.
2286 			 */
2287 			ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
2288 			qum = pp_remove_upp(pp, midx, &err);
2289 			if (err != 0) {
2290 				SK_ERR("un-allocated packet or buflet %d %p",
2291 				    midx, SK_KVA(qum));
2292 				if (qum != NULL) {
2293 					free_packet = TRUE;
2294 				}
2295 			}
2296 		} else {
2297 			free_packet = TRUE;
2298 		}
2299 
2300 		/*
2301 		 * Clear the user and kernel slot descriptors.  Note that
2302 		 * if we are depopulating the slots due to defunct (and not
2303 		 * due to normal deallocation/teardown), we leave the user
2304 		 * slot descriptor alone.  At that point the process may
2305 		 * be suspended, and later when it resumes it would just
2306 		 * pick up the original contents and move forward with
2307 		 * whatever it was doing.
2308 		 */
2309 		KSD_RESET(ksd);
2310 		if (usd != NULL && !defunct) {
2311 			USD_RESET(usd);
2312 		}
2313 
2314 		/* detach packet from slot */
2315 		kqum->qum_ksd = NULL;
2316 
2317 		SK_DF(SK_VERB_MEM, " D ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2318 		    " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd),
2319 		    METADATA_IDX(kqum), SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2320 		    SK_KVA(&kqum->qum_buf[0]));
2321 		if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2322 			SK_DF(SK_VERB_MEM, " D usd [%-3u, 0x%llx] "
2323 			    "uqum [%-3u, 0x%llx]  ubuf[%-3u, 0x%llx]",
2324 			    (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2325 			    SK_KVA(usd), METADATA_IDX(kqum),
2326 			    SK_KVA(kqum->qum_user),
2327 			    kqum->qum_user->qum_buf[0].buf_idx,
2328 			    SK_KVA(&kqum->qum_user->qum_buf[0]));
2329 		}
2330 
2331 		if (free_packet) {
2332 			pp_free_packet(pp, SK_PTR_ADDR(kqum)); ++j;
2333 		}
2334 	}
2335 
2336 	SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") depopulated %u of %u slots",
2337 	    SK_KVA(KRNA(kring)->na_arena), KRNA(kring)->na_arena->ar_name,
2338 	    j, n);
2339 }
2340 
2341 int
na_rings_mem_setup(struct nexus_adapter * na,uint32_t tailroom,boolean_t alloc_ctx,struct kern_channel * ch)2342 na_rings_mem_setup(struct nexus_adapter *na, uint32_t tailroom,
2343     boolean_t alloc_ctx, struct kern_channel *ch)
2344 {
2345 	boolean_t kronly;
2346 	int err;
2347 
2348 	SK_LOCK_ASSERT_HELD();
2349 	ASSERT(na->na_channels == 0);
2350 	/*
2351 	 * If NAF_MEM_NO_INIT is set, then only create the krings and not
2352 	 * the backing memory regions for the adapter.
2353 	 */
2354 	kronly = (na->na_flags & NAF_MEM_NO_INIT);
2355 	ASSERT(!kronly || NA_KERNEL_ONLY(na));
2356 
2357 	/*
2358 	 * Create and initialize the common fields of the krings array.
2359 	 * using the information that must be already available in the na.
2360 	 * tailroom can be used to request the allocation of additional
2361 	 * tailroom bytes after the krings array.  This is used by
2362 	 * nexus_vp_adapter's (i.e., flow switch ports) to make room
2363 	 * for leasing-related data structures.
2364 	 */
2365 	if ((err = na_kr_create(na, tailroom, alloc_ctx)) == 0 && !kronly) {
2366 		err = na_kr_setup(na, ch);
2367 		if (err != 0) {
2368 			na_kr_delete(na);
2369 		}
2370 	}
2371 
2372 	return err;
2373 }
2374 
2375 void
na_rings_mem_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2376 na_rings_mem_teardown(struct nexus_adapter *na, struct kern_channel *ch,
2377     boolean_t defunct)
2378 {
2379 	SK_LOCK_ASSERT_HELD();
2380 	ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT));
2381 
2382 	/*
2383 	 * Deletes the kring and ring array of the adapter. They
2384 	 * must have been created using na_rings_mem_setup().
2385 	 *
2386 	 * XXX: [email protected] -- the parameter "ch" should not be
2387 	 * needed here; however na_kr_depopulate_slots() needs to
2388 	 * go thru the channel's user packet pool hash, and so for
2389 	 * now we leave it here.
2390 	 */
2391 	na_kr_teardown_all(na, ch, defunct);
2392 	if (!defunct) {
2393 		na_kr_delete(na);
2394 	}
2395 }
2396 
2397 void
na_ch_rings_defunct(struct kern_channel * ch,struct proc * p)2398 na_ch_rings_defunct(struct kern_channel *ch, struct proc *p)
2399 {
2400 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2401 
2402 	/*
2403 	 * Depopulate slots on the TX and RX rings of this channel,
2404 	 * but don't touch other rings owned by other channels if
2405 	 * this adapter is being shared.
2406 	 */
2407 	na_kr_teardown_txrx(ch->ch_na, ch, TRUE, p);
2408 }
2409 
2410 void
na_kr_drop(struct nexus_adapter * na,boolean_t drop)2411 na_kr_drop(struct nexus_adapter *na, boolean_t drop)
2412 {
2413 	enum txrx t;
2414 	uint32_t i;
2415 
2416 	for_rx_tx(t) {
2417 		for (i = 0; i < na_get_nrings(na, t); i++) {
2418 			struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2419 			int error;
2420 			error = kr_enter(kring, TRUE);
2421 			if (drop) {
2422 				kring->ckr_flags |= CKRF_DROP;
2423 			} else {
2424 				kring->ckr_flags &= ~CKRF_DROP;
2425 			}
2426 
2427 			if (error != 0) {
2428 				SK_ERR("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2429 				    "kr_enter failed %d",
2430 				    na->na_name, SK_KVA(na),
2431 				    kring->ckr_name, SK_KVA(kring),
2432 				    error);
2433 			} else {
2434 				kr_exit(kring);
2435 			}
2436 			SK_D("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2437 			    "krflags 0x%b", na->na_name, SK_KVA(na),
2438 			    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
2439 			    CKRF_BITS);
2440 		}
2441 	}
2442 }
2443 
2444 /*
2445  * Set the stopped/enabled status of ring.  When stopping, they also wait
2446  * for all current activity on the ring to terminate.  The status change
2447  * is then notified using the na na_notify callback.
2448  */
2449 static void
na_set_ring(struct nexus_adapter * na,uint32_t ring_id,enum txrx t,uint32_t state)2450 na_set_ring(struct nexus_adapter *na, uint32_t ring_id, enum txrx t,
2451     uint32_t state)
2452 {
2453 	struct __kern_channel_ring *kr = &NAKR(na, t)[ring_id];
2454 
2455 	/*
2456 	 * Mark the ring as stopped/enabled, and run through the
2457 	 * locks to make sure other users get to see it.
2458 	 */
2459 	if (state == KR_READY) {
2460 		kr_start(kr);
2461 	} else {
2462 		kr_stop(kr, state);
2463 	}
2464 }
2465 
2466 
2467 /* stop or enable all the rings of na */
2468 static void
na_set_all_rings(struct nexus_adapter * na,uint32_t state)2469 na_set_all_rings(struct nexus_adapter *na, uint32_t state)
2470 {
2471 	uint32_t i;
2472 	enum txrx t;
2473 
2474 	SK_LOCK_ASSERT_HELD();
2475 
2476 	if (!NA_IS_ACTIVE(na)) {
2477 		return;
2478 	}
2479 
2480 	for_rx_tx(t) {
2481 		for (i = 0; i < na_get_nrings(na, t); i++) {
2482 			na_set_ring(na, i, t, state);
2483 		}
2484 	}
2485 }
2486 
2487 /*
2488  * Convenience function used in drivers.  Waits for current txsync()s/rxsync()s
2489  * to finish and prevents any new one from starting.  Call this before turning
2490  * Skywalk mode off, or before removing the harware rings (e.g., on module
2491  * onload).  As a rule of thumb for linux drivers, this should be placed near
2492  * each napi_disable().
2493  */
2494 void
na_disable_all_rings(struct nexus_adapter * na)2495 na_disable_all_rings(struct nexus_adapter *na)
2496 {
2497 	na_set_all_rings(na, KR_STOPPED);
2498 }
2499 
2500 /*
2501  * Convenience function used in drivers.  Re-enables rxsync and txsync on the
2502  * adapter's rings In linux drivers, this should be placed near each
2503  * napi_enable().
2504  */
2505 void
na_enable_all_rings(struct nexus_adapter * na)2506 na_enable_all_rings(struct nexus_adapter *na)
2507 {
2508 	na_set_all_rings(na, KR_READY /* enabled */);
2509 }
2510 
2511 void
na_lock_all_rings(struct nexus_adapter * na)2512 na_lock_all_rings(struct nexus_adapter *na)
2513 {
2514 	na_set_all_rings(na, KR_LOCKED);
2515 }
2516 
2517 void
na_unlock_all_rings(struct nexus_adapter * na)2518 na_unlock_all_rings(struct nexus_adapter *na)
2519 {
2520 	na_enable_all_rings(na);
2521 }
2522 
2523 int
na_connect(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)2524 na_connect(struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
2525     struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
2526 {
2527 	struct nexus_adapter *na = NULL;
2528 	mach_vm_size_t memsize = 0;
2529 	int err = 0;
2530 	enum txrx t;
2531 
2532 	ASSERT(!(chr->cr_mode & CHMODE_KERNEL));
2533 	ASSERT(!(ch->ch_flags & CHANF_KERNEL));
2534 
2535 	SK_LOCK_ASSERT_HELD();
2536 
2537 	/* find the nexus adapter and return the reference */
2538 	err = na_find(ch, nx, chr, ch0, nxb, p, &na, TRUE /* create */);
2539 	if (err != 0) {
2540 		ASSERT(na == NULL);
2541 		goto done;
2542 	}
2543 
2544 	if (NA_KERNEL_ONLY(na)) {
2545 		err = EBUSY;
2546 		goto done;
2547 	}
2548 
2549 	/* reject if the adapter is defunct of non-permissive */
2550 	if ((na->na_flags & NAF_DEFUNCT) || na_reject_channel(ch, na)) {
2551 		err = ENXIO;
2552 		goto done;
2553 	}
2554 
2555 	err = na_bind_channel(na, ch, chr);
2556 	if (err != 0) {
2557 		goto done;
2558 	}
2559 
2560 	ASSERT(ch->ch_schema != NULL);
2561 	ASSERT(na == ch->ch_na);
2562 
2563 	for_all_rings(t) {
2564 		if (na_get_nrings(na, t) == 0) {
2565 			ch->ch_si[t] = NULL;
2566 			continue;
2567 		}
2568 		ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2569 		    &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2570 	}
2571 
2572 	skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2573 
2574 	if (!(skmem_arena_nexus(na->na_arena)->arn_mode &
2575 	    AR_NEXUS_MODE_EXTERNAL_PPOOL)) {
2576 		atomic_bitset_32(__DECONST(uint32_t *,
2577 		    &ch->ch_schema->csm_flags), CSM_PRIV_MEM);
2578 	}
2579 
2580 	err = skmem_arena_mmap(na->na_arena, p, &ch->ch_mmap);
2581 	if (err != 0) {
2582 		goto done;
2583 	}
2584 
2585 	atomic_bitset_32(__DECONST(uint32_t *, &ch->ch_schema->csm_flags),
2586 	    CSM_ACTIVE);
2587 	chr->cr_memsize = memsize;
2588 	chr->cr_memoffset = ch->ch_schema_offset;
2589 
2590 	SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2591 	    "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2592 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2593 	    na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2594 	    na->na_flags, NAF_BITS);
2595 
2596 done:
2597 	if (err != 0) {
2598 		if (ch->ch_schema != NULL || na != NULL) {
2599 			if (ch->ch_schema != NULL) {
2600 				ASSERT(na == ch->ch_na);
2601 				/*
2602 				 * Callee will unmap memory region if needed,
2603 				 * as well as release reference held on 'na'.
2604 				 */
2605 				na_disconnect(nx, ch);
2606 				na = NULL;
2607 			}
2608 			if (na != NULL) {
2609 				(void) na_release_locked(na);
2610 				na = NULL;
2611 			}
2612 		}
2613 	}
2614 
2615 	return err;
2616 }
2617 
2618 void
na_disconnect(struct kern_nexus * nx,struct kern_channel * ch)2619 na_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
2620 {
2621 #pragma unused(nx)
2622 	enum txrx t;
2623 
2624 	SK_LOCK_ASSERT_HELD();
2625 
2626 	SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2627 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2628 	    ch->ch_na->na_name, ch->ch_info->cinfo_nx_port,
2629 	    (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(ch->ch_na),
2630 	    ch->ch_na->na_flags, NAF_BITS);
2631 
2632 	/* destroy mapping and release references */
2633 	na_unbind_channel(ch);
2634 	ASSERT(ch->ch_na == NULL);
2635 	ASSERT(ch->ch_schema == NULL);
2636 	for_all_rings(t) {
2637 		ch->ch_si[t] = NULL;
2638 	}
2639 }
2640 
2641 void
na_defunct(struct kern_nexus * nx,struct kern_channel * ch,struct nexus_adapter * na,boolean_t locked)2642 na_defunct(struct kern_nexus *nx, struct kern_channel *ch,
2643     struct nexus_adapter *na, boolean_t locked)
2644 {
2645 #pragma unused(nx)
2646 	SK_LOCK_ASSERT_HELD();
2647 	if (!locked) {
2648 		lck_mtx_lock(&ch->ch_lock);
2649 	}
2650 
2651 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2652 
2653 	if (!(na->na_flags & NAF_DEFUNCT)) {
2654 		/*
2655 		 * Mark this adapter as defunct to inform nexus-specific
2656 		 * teardown handler called by na_teardown() below.
2657 		 */
2658 		atomic_bitset_32(&na->na_flags, NAF_DEFUNCT);
2659 
2660 		/*
2661 		 * Depopulate slots.
2662 		 */
2663 		na_teardown(na, ch, TRUE);
2664 
2665 		/*
2666 		 * And finally destroy any already-defunct memory regions.
2667 		 * Do this only if the nexus adapter owns the arena, i.e.
2668 		 * NAF_MEM_LOANED is not set.  Otherwise, we'd expect
2669 		 * that this routine be called again for the real owner.
2670 		 */
2671 		if (!(na->na_flags & NAF_MEM_LOANED)) {
2672 			skmem_arena_defunct(na->na_arena);
2673 		}
2674 	}
2675 
2676 	SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d) "
2677 	    "na 0x%llx naflags %b", ch->ch_name, ch->ch_pid,
2678 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2679 	    na->na_name, ch->ch_info->cinfo_nx_port,
2680 	    (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2681 	    na->na_flags, NAF_BITS);
2682 
2683 	if (!locked) {
2684 		lck_mtx_unlock(&ch->ch_lock);
2685 	}
2686 }
2687 
2688 /*
2689  * TODO: [email protected] -- merge this into na_connect()
2690  */
2691 int
na_connect_spec(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct proc * p)2692 na_connect_spec(struct kern_nexus *nx, struct kern_channel *ch,
2693     struct chreq *chr, struct proc *p)
2694 {
2695 #pragma unused(p)
2696 	struct nexus_adapter *na = NULL;
2697 	mach_vm_size_t memsize = 0;
2698 	int error = 0;
2699 	enum txrx t;
2700 
2701 	ASSERT(chr->cr_mode & CHMODE_KERNEL);
2702 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2703 	ASSERT(ch->ch_na == NULL);
2704 	ASSERT(ch->ch_schema == NULL);
2705 
2706 	SK_LOCK_ASSERT_HELD();
2707 
2708 	error = na_find(ch, nx, chr, NULL, NULL, kernproc, &na, TRUE);
2709 	if (error != 0) {
2710 		goto done;
2711 	}
2712 
2713 	if (na == NULL) {
2714 		error = EINVAL;
2715 		goto done;
2716 	}
2717 
2718 	if (na->na_channels > 0) {
2719 		error = EBUSY;
2720 		goto done;
2721 	}
2722 
2723 	if (na->na_flags & NAF_DEFUNCT) {
2724 		error = ENXIO;
2725 		goto done;
2726 	}
2727 
2728 	/*
2729 	 * Special connect requires the nexus adapter to handle its
2730 	 * own channel binding and unbinding via na_special(); bail
2731 	 * if this adapter doesn't support it.
2732 	 */
2733 	if (na->na_special == NULL) {
2734 		error = ENOTSUP;
2735 		goto done;
2736 	}
2737 
2738 	/* upon success, "ch->ch_na" will point to "na" */
2739 	error = na->na_special(na, ch, chr, NXSPEC_CMD_CONNECT);
2740 	if (error != 0) {
2741 		ASSERT(ch->ch_na == NULL);
2742 		goto done;
2743 	}
2744 
2745 	ASSERT(na->na_flags & NAF_SPEC_INIT);
2746 	ASSERT(na == ch->ch_na);
2747 	/* make sure this is still the case */
2748 	ASSERT(ch->ch_schema == NULL);
2749 
2750 	for_rx_tx(t) {
2751 		ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2752 		    &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2753 	}
2754 
2755 	skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2756 	chr->cr_memsize = memsize;
2757 
2758 	SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2759 	    "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2760 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2761 	    na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2762 	    na->na_flags, NAF_BITS);
2763 
2764 done:
2765 	if (error != 0) {
2766 		if (ch->ch_na != NULL || na != NULL) {
2767 			if (ch->ch_na != NULL) {
2768 				ASSERT(na == ch->ch_na);
2769 				/* callee will release reference on 'na' */
2770 				na_disconnect_spec(nx, ch);
2771 				na = NULL;
2772 			}
2773 			if (na != NULL) {
2774 				(void) na_release_locked(na);
2775 				na = NULL;
2776 			}
2777 		}
2778 	}
2779 
2780 	return error;
2781 }
2782 
2783 /*
2784  * TODO: [email protected] -- merge this into na_disconnect()
2785  */
2786 void
na_disconnect_spec(struct kern_nexus * nx,struct kern_channel * ch)2787 na_disconnect_spec(struct kern_nexus *nx, struct kern_channel *ch)
2788 {
2789 #pragma unused(nx)
2790 	struct nexus_adapter *na = ch->ch_na;
2791 	enum txrx t;
2792 	int error;
2793 
2794 	SK_LOCK_ASSERT_HELD();
2795 	ASSERT(na != NULL);
2796 	ASSERT(na->na_flags & NAF_SPEC_INIT);   /* has been bound */
2797 
2798 	SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2799 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2800 	    na->na_name, ch->ch_info->cinfo_nx_port,
2801 	    (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2802 	    na->na_flags, NAF_BITS);
2803 
2804 	/* take a reference for this routine */
2805 	na_retain_locked(na);
2806 
2807 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2808 	ASSERT(ch->ch_schema == NULL);
2809 	ASSERT(na->na_special != NULL);
2810 	/* unbind this channel */
2811 	error = na->na_special(na, ch, NULL, NXSPEC_CMD_DISCONNECT);
2812 	ASSERT(error == 0);
2813 	ASSERT(!(na->na_flags & NAF_SPEC_INIT));
2814 
2815 	/* now release our reference; this may be the last */
2816 	na_release_locked(na);
2817 	na = NULL;
2818 
2819 	ASSERT(ch->ch_na == NULL);
2820 	for_rx_tx(t) {
2821 		ch->ch_si[t] = NULL;
2822 	}
2823 }
2824 
2825 void
na_start_spec(struct kern_nexus * nx,struct kern_channel * ch)2826 na_start_spec(struct kern_nexus *nx, struct kern_channel *ch)
2827 {
2828 #pragma unused(nx)
2829 	struct nexus_adapter *na = ch->ch_na;
2830 
2831 	SK_LOCK_ASSERT_HELD();
2832 
2833 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2834 	ASSERT(NA_KERNEL_ONLY(na));
2835 	ASSERT(na->na_special != NULL);
2836 
2837 	na->na_special(na, ch, NULL, NXSPEC_CMD_START);
2838 }
2839 
2840 void
na_stop_spec(struct kern_nexus * nx,struct kern_channel * ch)2841 na_stop_spec(struct kern_nexus *nx, struct kern_channel *ch)
2842 {
2843 #pragma unused(nx)
2844 	struct nexus_adapter *na = ch->ch_na;
2845 
2846 	SK_LOCK_ASSERT_HELD();
2847 
2848 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2849 	ASSERT(NA_KERNEL_ONLY(na));
2850 	ASSERT(na->na_special != NULL);
2851 
2852 	na->na_special(na, ch, NULL, NXSPEC_CMD_STOP);
2853 }
2854 
2855 /*
2856  * MUST BE CALLED UNDER SK_LOCK()
2857  *
2858  * Get a refcounted reference to a nexus adapter attached
2859  * to the interface specified by chr.
2860  * This is always called in the execution of an ioctl().
2861  *
2862  * Return ENXIO if the interface specified by the request does
2863  * not exist, ENOTSUP if Skywalk is not supported by the interface,
2864  * EINVAL if parameters are invalid, ENOMEM if needed resources
2865  * could not be allocated.
2866  * If successful, hold a reference to the nexus adapter.
2867  *
2868  * No reference is kept on the real interface, which may then
2869  * disappear at any time.
2870  */
2871 int
na_find(struct kern_channel * ch,struct kern_nexus * nx,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)2872 na_find(struct kern_channel *ch, struct kern_nexus *nx, struct chreq *chr,
2873     struct kern_channel *ch0, struct nxbind *nxb, struct proc *p,
2874     struct nexus_adapter **na, boolean_t create)
2875 {
2876 	int error = 0;
2877 
2878 	_CASSERT(sizeof(chr->cr_name) == sizeof((*na)->na_name));
2879 
2880 	*na = NULL;     /* default return value */
2881 
2882 	SK_LOCK_ASSERT_HELD();
2883 
2884 	/*
2885 	 * We cascade through all possibile types of nexus adapter.
2886 	 * All nx_*_na_find() functions return an error and an na,
2887 	 * with the following combinations:
2888 	 *
2889 	 * error    na
2890 	 *   0	   NULL		type doesn't match
2891 	 *  !0	   NULL		type matches, but na creation/lookup failed
2892 	 *   0	  !NULL		type matches and na created/found
2893 	 *  !0    !NULL		impossible
2894 	 */
2895 
2896 #if CONFIG_NEXUS_MONITOR
2897 	/* try to see if this is a monitor port */
2898 	error = nx_monitor_na_find(nx, ch, chr, ch0, nxb, p, na, create);
2899 	if (error != 0 || *na != NULL) {
2900 		return error;
2901 	}
2902 #endif /* CONFIG_NEXUS_MONITOR */
2903 #if CONFIG_NEXUS_USER_PIPE
2904 	/* try to see if this is a pipe port */
2905 	error = nx_upipe_na_find(nx, ch, chr, nxb, p, na, create);
2906 	if (error != 0 || *na != NULL) {
2907 		return error;
2908 	}
2909 #endif /* CONFIG_NEXUS_USER_PIPE */
2910 #if CONFIG_NEXUS_KERNEL_PIPE
2911 	/* try to see if this is a kernel pipe port */
2912 	error = nx_kpipe_na_find(nx, ch, chr, nxb, p, na, create);
2913 	if (error != 0 || *na != NULL) {
2914 		return error;
2915 	}
2916 #endif /* CONFIG_NEXUS_KERNEL_PIPE */
2917 #if CONFIG_NEXUS_FLOWSWITCH
2918 	/* try to see if this is a flowswitch port */
2919 	error = nx_fsw_na_find(nx, ch, chr, nxb, p, na, create);
2920 	if (error != 0 || *na != NULL) {
2921 		return error;
2922 	}
2923 #endif /* CONFIG_NEXUS_FLOWSWITCH */
2924 #if CONFIG_NEXUS_NETIF
2925 	error = nx_netif_na_find(nx, ch, chr, nxb, p, na, create);
2926 	if (error != 0 || *na != NULL) {
2927 		return error;
2928 	}
2929 #endif /* CONFIG_NEXUS_NETIF */
2930 
2931 	ASSERT(*na == NULL);
2932 	return ENXIO;
2933 }
2934 
2935 void
na_retain_locked(struct nexus_adapter * na)2936 na_retain_locked(struct nexus_adapter *na)
2937 {
2938 	SK_LOCK_ASSERT_HELD();
2939 
2940 	if (na != NULL) {
2941 #if SK_LOG
2942 		uint32_t oref = atomic_add_32_ov(&na->na_refcount, 1);
2943 		SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
2944 		    na->na_name, SK_KVA(na), oref + 1, na->na_channels);
2945 #else /* !SK_LOG */
2946 		atomic_add_32(&na->na_refcount, 1);
2947 #endif /* !SK_LOG */
2948 	}
2949 }
2950 
2951 /* returns 1 iff the nexus_adapter is destroyed */
2952 int
na_release_locked(struct nexus_adapter * na)2953 na_release_locked(struct nexus_adapter *na)
2954 {
2955 	uint32_t oref;
2956 
2957 	SK_LOCK_ASSERT_HELD();
2958 
2959 	ASSERT(na->na_refcount > 0);
2960 	oref = atomic_add_32_ov(&na->na_refcount, -1);
2961 	if (oref > 1) {
2962 		SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
2963 		    na->na_name, SK_KVA(na), oref - 1, na->na_channels);
2964 		return 0;
2965 	}
2966 	ASSERT(na->na_channels == 0);
2967 
2968 	if (na->na_dtor != NULL) {
2969 		na->na_dtor(na);
2970 	}
2971 
2972 	ASSERT(na->na_tx_rings == NULL && na->na_rx_rings == NULL);
2973 	ASSERT(na->na_slot_ctxs == NULL);
2974 	ASSERT(na->na_scratch == NULL);
2975 
2976 #if CONFIG_NEXUS_USER_PIPE
2977 	nx_upipe_na_dealloc(na);
2978 #endif /* CONFIG_NEXUS_USER_PIPE */
2979 	if (na->na_arena != NULL) {
2980 		skmem_arena_release(na->na_arena);
2981 		na->na_arena = NULL;
2982 	}
2983 
2984 	SK_DF(SK_VERB_MEM, "na \"%s\" (0x%llx) being freed",
2985 	    na->na_name, SK_KVA(na));
2986 
2987 	NA_FREE(na);
2988 	return 1;
2989 }
2990 
2991 static struct nexus_adapter *
na_pseudo_alloc(zalloc_flags_t how)2992 na_pseudo_alloc(zalloc_flags_t how)
2993 {
2994 	struct nexus_adapter *na;
2995 
2996 	na = zalloc_flags(na_pseudo_zone, how | Z_ZERO);
2997 	if (na) {
2998 		na->na_type = NA_PSEUDO;
2999 		na->na_free = na_pseudo_free;
3000 	}
3001 	return na;
3002 }
3003 
3004 static void
na_pseudo_free(struct nexus_adapter * na)3005 na_pseudo_free(struct nexus_adapter *na)
3006 {
3007 	ASSERT(na->na_refcount == 0);
3008 	SK_DF(SK_VERB_MEM, "na 0x%llx FREE", SK_KVA(na));
3009 	bzero(na, sizeof(*na));
3010 	zfree(na_pseudo_zone, na);
3011 }
3012 
3013 static int
na_pseudo_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3014 na_pseudo_txsync(struct __kern_channel_ring *kring, struct proc *p,
3015     uint32_t flags)
3016 {
3017 #pragma unused(kring, p, flags)
3018 	SK_DF(SK_VERB_SYNC | SK_VERB_TX,
3019 	    "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3020 	    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3021 	    SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3022 	    flags);
3023 
3024 	return 0;
3025 }
3026 
3027 static int
na_pseudo_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3028 na_pseudo_rxsync(struct __kern_channel_ring *kring, struct proc *p,
3029     uint32_t flags)
3030 {
3031 #pragma unused(kring, p, flags)
3032 	SK_DF(SK_VERB_SYNC | SK_VERB_RX,
3033 	    "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3034 	    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3035 	    SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3036 	    flags);
3037 
3038 	ASSERT(kring->ckr_rhead <= kring->ckr_lim);
3039 
3040 	return 0;
3041 }
3042 
3043 static int
na_pseudo_activate(struct nexus_adapter * na,na_activate_mode_t mode)3044 na_pseudo_activate(struct nexus_adapter *na, na_activate_mode_t mode)
3045 {
3046 	SK_D("na \"%s\" (0x%llx) %s", na->na_name,
3047 	    SK_KVA(na), na_activate_mode2str(mode));
3048 
3049 	switch (mode) {
3050 	case NA_ACTIVATE_MODE_ON:
3051 		atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
3052 		break;
3053 
3054 	case NA_ACTIVATE_MODE_DEFUNCT:
3055 		break;
3056 
3057 	case NA_ACTIVATE_MODE_OFF:
3058 		atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
3059 		break;
3060 
3061 	default:
3062 		VERIFY(0);
3063 		/* NOTREACHED */
3064 		__builtin_unreachable();
3065 	}
3066 
3067 	return 0;
3068 }
3069 
3070 static void
na_pseudo_dtor(struct nexus_adapter * na)3071 na_pseudo_dtor(struct nexus_adapter *na)
3072 {
3073 #pragma unused(na)
3074 }
3075 
3076 static int
na_pseudo_krings_create(struct nexus_adapter * na,struct kern_channel * ch)3077 na_pseudo_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
3078 {
3079 	return na_rings_mem_setup(na, 0, FALSE, ch);
3080 }
3081 
3082 static void
na_pseudo_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)3083 na_pseudo_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
3084     boolean_t defunct)
3085 {
3086 	na_rings_mem_teardown(na, ch, defunct);
3087 }
3088 
3089 /*
3090  * Pseudo nexus adapter; typically used as a generic parent adapter.
3091  */
3092 int
na_pseudo_create(struct kern_nexus * nx,struct chreq * chr,struct nexus_adapter ** ret)3093 na_pseudo_create(struct kern_nexus *nx, struct chreq *chr,
3094     struct nexus_adapter **ret)
3095 {
3096 	struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
3097 	struct nexus_adapter *na;
3098 	int error;
3099 
3100 	SK_LOCK_ASSERT_HELD();
3101 	*ret = NULL;
3102 
3103 	na = na_pseudo_alloc(Z_WAITOK);
3104 
3105 	ASSERT(na->na_type == NA_PSEUDO);
3106 	ASSERT(na->na_free == na_pseudo_free);
3107 
3108 	(void) strncpy(na->na_name, chr->cr_name, sizeof(na->na_name) - 1);
3109 	na->na_name[sizeof(na->na_name) - 1] = '\0';
3110 	uuid_generate_random(na->na_uuid);
3111 
3112 	/*
3113 	 * Verify upper bounds; for all cases including user pipe nexus,
3114 	 * the parameters must have already been validated by corresponding
3115 	 * nxdom_prov_params() function defined by each domain.
3116 	 */
3117 	na_set_nrings(na, NR_TX, nxp->nxp_tx_rings);
3118 	na_set_nrings(na, NR_RX, nxp->nxp_rx_rings);
3119 	na_set_nslots(na, NR_TX, nxp->nxp_tx_slots);
3120 	na_set_nslots(na, NR_RX, nxp->nxp_rx_slots);
3121 	ASSERT(na_get_nrings(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
3122 	ASSERT(na_get_nrings(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
3123 	ASSERT(na_get_nslots(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
3124 	ASSERT(na_get_nslots(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
3125 
3126 	na->na_txsync = na_pseudo_txsync;
3127 	na->na_rxsync = na_pseudo_rxsync;
3128 	na->na_activate = na_pseudo_activate;
3129 	na->na_dtor = na_pseudo_dtor;
3130 	na->na_krings_create = na_pseudo_krings_create;
3131 	na->na_krings_delete = na_pseudo_krings_delete;
3132 
3133 	*(nexus_stats_type_t *)(uintptr_t)&na->na_stats_type =
3134 	    NEXUS_STATS_TYPE_INVALID;
3135 
3136 	/* other fields are set in the common routine */
3137 	na_attach_common(na, nx, NX_DOM_PROV(nx));
3138 
3139 	if ((error = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
3140 	    nx, na)) != 0) {
3141 		ASSERT(na->na_arena == NULL);
3142 		goto err;
3143 	}
3144 	ASSERT(na->na_arena != NULL);
3145 
3146 	*(uint32_t *)(uintptr_t)&na->na_flowadv_max = nxp->nxp_flowadv_max;
3147 	ASSERT(na->na_flowadv_max == 0 ||
3148 	    skmem_arena_nexus(na->na_arena)->arn_flowadv_obj != NULL);
3149 
3150 #if SK_LOG
3151 	uuid_string_t uuidstr;
3152 	SK_D("na_name: \"%s\"", na->na_name);
3153 	SK_D("  UUID:        %s", sk_uuid_unparse(na->na_uuid, uuidstr));
3154 	SK_D("  nx:          0x%llx (\"%s\":\"%s\")",
3155 	    SK_KVA(na->na_nx), NX_DOM(na->na_nx)->nxdom_name,
3156 	    NX_DOM_PROV(na->na_nx)->nxdom_prov_name);
3157 	SK_D("  flags:       %b", na->na_flags, NAF_BITS);
3158 	SK_D("  flowadv_max: %u", na->na_flowadv_max);
3159 	SK_D("  rings:       tx %u rx %u",
3160 	    na_get_nrings(na, NR_TX), na_get_nrings(na, NR_RX));
3161 	SK_D("  slots:       tx %u rx %u",
3162 	    na_get_nslots(na, NR_TX), na_get_nslots(na, NR_RX));
3163 #if CONFIG_NEXUS_USER_PIPE
3164 	SK_D("  next_pipe:   %u", na->na_next_pipe);
3165 	SK_D("  max_pipes:   %u", na->na_max_pipes);
3166 #endif /* CONFIG_NEXUS_USER_PIPE */
3167 #endif /* SK_LOG */
3168 
3169 	*ret = na;
3170 	na_retain_locked(na);
3171 
3172 	return 0;
3173 
3174 err:
3175 	if (na != NULL) {
3176 		if (na->na_arena != NULL) {
3177 			skmem_arena_release(na->na_arena);
3178 			na->na_arena = NULL;
3179 		}
3180 		NA_FREE(na);
3181 	}
3182 	return error;
3183 }
3184 
3185 void
na_flowadv_entry_alloc(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx,const uint32_t flowid)3186 na_flowadv_entry_alloc(const struct nexus_adapter *na, uuid_t fae_id,
3187     const flowadv_idx_t fe_idx, const uint32_t flowid)
3188 {
3189 	struct skmem_arena *ar = na->na_arena;
3190 	struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
3191 	struct __flowadv_entry *fae;
3192 
3193 	ASSERT(NA_IS_ACTIVE(na) && na->na_flowadv_max != 0);
3194 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3195 
3196 	AR_LOCK(ar);
3197 
3198 	/* we must not get here if arena is defunct; this must be valid */
3199 	ASSERT(arn->arn_flowadv_obj != NULL);
3200 
3201 	VERIFY(fe_idx < na->na_flowadv_max);
3202 	fae = &arn->arn_flowadv_obj[fe_idx];
3203 	uuid_copy(fae->fae_id, fae_id);
3204 	fae->fae_flowid = flowid;
3205 	fae->fae_flags = FLOWADVF_VALID;
3206 
3207 	AR_UNLOCK(ar);
3208 }
3209 
3210 void
na_flowadv_entry_free(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx,const uint32_t flowid)3211 na_flowadv_entry_free(const struct nexus_adapter *na, uuid_t fae_id,
3212     const flowadv_idx_t fe_idx, const uint32_t flowid)
3213 {
3214 #pragma unused(fae_id)
3215 	struct skmem_arena *ar = na->na_arena;
3216 	struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3217 
3218 	ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3219 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3220 
3221 	AR_LOCK(ar);
3222 
3223 	ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3224 	if (arn->arn_flowadv_obj != NULL) {
3225 		struct __flowadv_entry *fae;
3226 
3227 		VERIFY(fe_idx < na->na_flowadv_max);
3228 		fae = &arn->arn_flowadv_obj[fe_idx];
3229 		ASSERT(uuid_compare(fae->fae_id, fae_id) == 0);
3230 		uuid_clear(fae->fae_id);
3231 		VERIFY(fae->fae_flowid == flowid);
3232 		fae->fae_flowid = 0;
3233 		fae->fae_flags = 0;
3234 	}
3235 
3236 	AR_UNLOCK(ar);
3237 }
3238 
3239 bool
na_flowadv_set(const struct nexus_adapter * na,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3240 na_flowadv_set(const struct nexus_adapter *na, const flowadv_idx_t fe_idx,
3241     const flowadv_token_t flow_token)
3242 {
3243 	struct skmem_arena *ar = na->na_arena;
3244 	struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3245 	bool suspend;
3246 
3247 	ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3248 	ASSERT(fe_idx < na->na_flowadv_max);
3249 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3250 
3251 	AR_LOCK(ar);
3252 
3253 	ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3254 
3255 	if (arn->arn_flowadv_obj != NULL) {
3256 		struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3257 
3258 		_CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3259 		/*
3260 		 * We cannot guarantee that the flow is still around by now,
3261 		 * so check if that's the case and let the caller know.
3262 		 */
3263 		if ((suspend = (fae->fae_token == flow_token))) {
3264 			ASSERT(fae->fae_flags & FLOWADVF_VALID);
3265 			fae->fae_flags |= FLOWADVF_SUSPENDED;
3266 		}
3267 	} else {
3268 		suspend = false;
3269 	}
3270 	if (suspend) {
3271 		SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) flow token 0x%llu fidx %u "
3272 		    "SUSPEND", sk_proc_name_address(current_proc()),
3273 		    sk_proc_pid(current_proc()), flow_token, fe_idx);
3274 	} else {
3275 		SK_ERR("%s(%d) flow token 0x%llu fidx %u no longer around",
3276 		    sk_proc_name_address(current_proc()),
3277 		    sk_proc_pid(current_proc()), flow_token, fe_idx);
3278 	}
3279 
3280 	AR_UNLOCK(ar);
3281 
3282 	return suspend;
3283 }
3284 
3285 int
na_flowadv_clear(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3286 na_flowadv_clear(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3287     const flowadv_token_t flow_token)
3288 {
3289 	struct nexus_adapter *na = ch->ch_na;
3290 	struct skmem_arena *ar = na->na_arena;
3291 	struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3292 	boolean_t resume;
3293 
3294 	ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3295 	ASSERT(fe_idx < na->na_flowadv_max);
3296 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3297 
3298 	AR_LOCK(ar);
3299 
3300 	ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3301 
3302 	if (arn->arn_flowadv_obj != NULL) {
3303 		struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3304 
3305 		_CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3306 		/*
3307 		 * We cannot guarantee that the flow is still around by now,
3308 		 * so check if that's the case and let the caller know.
3309 		 */
3310 		if ((resume = (fae->fae_token == flow_token))) {
3311 			ASSERT(fae->fae_flags & FLOWADVF_VALID);
3312 			fae->fae_flags &= ~FLOWADVF_SUSPENDED;
3313 		}
3314 	} else {
3315 		resume = FALSE;
3316 	}
3317 	if (resume) {
3318 		SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d): flow token 0x%x "
3319 		    "fidx %u RESUME", ch->ch_name, ch->ch_pid, flow_token,
3320 		    fe_idx);
3321 	} else {
3322 		SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3323 		    ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3324 	}
3325 
3326 	AR_UNLOCK(ar);
3327 
3328 	return resume;
3329 }
3330 
3331 void
na_flowadv_event(struct __kern_channel_ring * kring)3332 na_flowadv_event(struct __kern_channel_ring *kring)
3333 {
3334 	ASSERT(kring->ckr_tx == NR_TX);
3335 
3336 	SK_DF(SK_VERB_EVENTS, "%s(%d) na \"%s\" (0x%llx) kr 0x%llx",
3337 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
3338 	    KRNA(kring)->na_name, SK_KVA(KRNA(kring)), SK_KVA(kring));
3339 
3340 	na_post_event(kring, TRUE, FALSE, FALSE, CHAN_FILT_HINT_FLOW_ADV_UPD);
3341 }
3342 
3343 static int
na_packet_pool_free_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3344 na_packet_pool_free_sync(struct __kern_channel_ring *kring, struct proc *p,
3345     uint32_t flags)
3346 {
3347 #pragma unused(flags, p)
3348 	int n, ret = 0;
3349 	slot_idx_t j;
3350 	struct __kern_slot_desc *ksd;
3351 	struct __user_slot_desc *usd;
3352 	struct __kern_quantum *kqum;
3353 	struct kern_pbufpool *pp = kring->ckr_pp;
3354 	uint32_t nfree = 0;
3355 
3356 	/* packet pool list is protected by channel lock */
3357 	ASSERT(!KR_KERNEL_ONLY(kring));
3358 
3359 	/* # of new slots */
3360 	n = kring->ckr_rhead - kring->ckr_khead;
3361 	if (n < 0) {
3362 		n += kring->ckr_num_slots;
3363 	}
3364 
3365 	/* nothing to free */
3366 	if (__improbable(n == 0)) {
3367 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3368 		    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3369 		    "nothing to free");
3370 		goto done;
3371 	}
3372 
3373 	j = kring->ckr_khead;
3374 	PP_LOCK(pp);
3375 	while (n--) {
3376 		int err;
3377 
3378 		ksd = KR_KSD(kring, j);
3379 		usd = KR_USD(kring, j);
3380 
3381 		if (__improbable(!SD_VALID_METADATA(usd))) {
3382 			SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3383 			ret = EINVAL;
3384 			break;
3385 		}
3386 
3387 		kqum = pp_remove_upp_locked(pp, usd->sd_md_idx, &err);
3388 		if (__improbable(err != 0)) {
3389 			SK_ERR("un-allocated packet or buflet %d %p",
3390 			    usd->sd_md_idx, SK_KVA(kqum));
3391 			ret = EINVAL;
3392 			break;
3393 		}
3394 
3395 		/* detach and free the packet */
3396 		kqum->qum_qflags &= ~QUM_F_FINALIZED;
3397 		kqum->qum_ksd = NULL;
3398 		ASSERT(!KSD_VALID_METADATA(ksd));
3399 		USD_DETACH_METADATA(usd);
3400 		ASSERT(pp == kqum->qum_pp);
3401 		ASSERT(nfree < kring->ckr_num_slots);
3402 		kring->ckr_scratch[nfree++] = (uint64_t)kqum;
3403 		j = SLOT_NEXT(j, kring->ckr_lim);
3404 	}
3405 	PP_UNLOCK(pp);
3406 
3407 	if (__probable(nfree > 0)) {
3408 		pp_free_packet_batch(pp, &kring->ckr_scratch[0], nfree);
3409 	}
3410 
3411 	kring->ckr_khead = j;
3412 	kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3413 
3414 done:
3415 	return ret;
3416 }
3417 
3418 static int
na_packet_pool_alloc_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3419 na_packet_pool_alloc_sync(struct __kern_channel_ring *kring, struct proc *p,
3420     uint32_t flags)
3421 {
3422 	int b, err;
3423 	uint32_t n = 0;
3424 	slot_idx_t j;
3425 	uint64_t now;
3426 	uint32_t curr_ws, ph_needed, ph_cnt;
3427 	struct __kern_slot_desc *ksd;
3428 	struct __user_slot_desc *usd;
3429 	struct __kern_quantum *kqum;
3430 	kern_pbufpool_t pp = kring->ckr_pp;
3431 	pid_t pid = proc_pid(p);
3432 
3433 	/* packet pool list is protected by channel lock */
3434 	ASSERT(!KR_KERNEL_ONLY(kring));
3435 	ASSERT(!PP_KERNEL_ONLY(pp));
3436 
3437 	now = _net_uptime;
3438 	if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3439 		if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3440 			kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3441 		}
3442 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3443 		    "%s: purged curr_ws(%d)", kring->ckr_name,
3444 		    kring->ckr_alloc_ws);
3445 		return 0;
3446 	}
3447 	/* reclaim the completed slots */
3448 	kring->ckr_khead = kring->ckr_rhead;
3449 
3450 	/* # of busy (unclaimed) slots */
3451 	b = kring->ckr_ktail - kring->ckr_khead;
3452 	if (b < 0) {
3453 		b += kring->ckr_num_slots;
3454 	}
3455 
3456 	curr_ws = kring->ckr_alloc_ws;
3457 	if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3458 		/* increment the working set by 50% */
3459 		curr_ws += (curr_ws >> 1);
3460 		curr_ws = MIN(curr_ws, kring->ckr_lim);
3461 	} else {
3462 		if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3463 		    (uint32_t)b >= (curr_ws >> 2)) {
3464 			/* decrease the working set by 25% */
3465 			curr_ws -= (curr_ws >> 2);
3466 		}
3467 	}
3468 	curr_ws = MAX(curr_ws, na_upp_alloc_lowat);
3469 	if (curr_ws > (uint32_t)b) {
3470 		n = curr_ws - b;
3471 	}
3472 	kring->ckr_alloc_ws = curr_ws;
3473 	kring->ckr_sync_time = now;
3474 
3475 	/* min with # of avail free slots (subtract busy from max) */
3476 	n = ph_needed = MIN(n, kring->ckr_lim - b);
3477 	j = kring->ckr_ktail;
3478 	SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3479 	    "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3480 
3481 	if ((ph_cnt = ph_needed) == 0) {
3482 		goto done;
3483 	}
3484 
3485 	err = kern_pbufpool_alloc_batch_nosleep(pp, 1, kring->ckr_scratch,
3486 	    &ph_cnt);
3487 
3488 	if (__improbable(ph_cnt == 0)) {
3489 		SK_ERR("kr 0x%llx failed to alloc %u packet s(%d)",
3490 		    SK_KVA(kring), ph_needed, err);
3491 		kring->ckr_err_stats.cres_pkt_alloc_failures += ph_needed;
3492 	} else {
3493 		/*
3494 		 * Add packets to the allocated list of user packet pool.
3495 		 */
3496 		pp_insert_upp_batch(pp, pid, kring->ckr_scratch, ph_cnt);
3497 	}
3498 
3499 
3500 	for (n = 0; n < ph_cnt; n++) {
3501 		ksd = KR_KSD(kring, j);
3502 		usd = KR_USD(kring, j);
3503 
3504 		kqum = SK_PTR_ADDR_KQUM(kring->ckr_scratch[n]);
3505 		kring->ckr_scratch[n] = 0;
3506 		ASSERT(kqum != NULL);
3507 
3508 		/* cleanup any stale slot mapping */
3509 		KSD_RESET(ksd);
3510 		ASSERT(usd != NULL);
3511 		USD_RESET(usd);
3512 
3513 		/*
3514 		 * Since this packet is freshly allocated and we need to
3515 		 * have the flag set for the attach to succeed, just set
3516 		 * it here rather than calling __packet_finalize().
3517 		 */
3518 		kqum->qum_qflags |= QUM_F_FINALIZED;
3519 
3520 		/* Attach packet to slot */
3521 		KR_SLOT_ATTACH_METADATA(kring, ksd, kqum);
3522 		/*
3523 		 * externalize the packet as it is being transferred to
3524 		 * user space.
3525 		 */
3526 		kr_externalize_metadata(kring, pp->pp_max_frags, kqum, p);
3527 
3528 		j = SLOT_NEXT(j, kring->ckr_lim);
3529 	}
3530 done:
3531 	ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3532 	kring->ckr_ktail = j;
3533 	return 0;
3534 }
3535 
3536 static int
na_packet_pool_free_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3537 na_packet_pool_free_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3538     uint32_t flags)
3539 {
3540 #pragma unused(flags, p)
3541 	int n, ret = 0;
3542 	slot_idx_t j;
3543 	struct __kern_slot_desc *ksd;
3544 	struct __user_slot_desc *usd;
3545 	struct __kern_buflet *kbft;
3546 	struct kern_pbufpool *pp = kring->ckr_pp;
3547 
3548 	/* packet pool list is protected by channel lock */
3549 	ASSERT(!KR_KERNEL_ONLY(kring));
3550 
3551 	/* # of new slots */
3552 	n = kring->ckr_rhead - kring->ckr_khead;
3553 	if (n < 0) {
3554 		n += kring->ckr_num_slots;
3555 	}
3556 
3557 	/* nothing to free */
3558 	if (__improbable(n == 0)) {
3559 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3560 		    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3561 		    "nothing to free");
3562 		goto done;
3563 	}
3564 
3565 	j = kring->ckr_khead;
3566 	while (n--) {
3567 		int err;
3568 
3569 		ksd = KR_KSD(kring, j);
3570 		usd = KR_USD(kring, j);
3571 
3572 		if (__improbable(!SD_VALID_METADATA(usd))) {
3573 			SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3574 			ret = EINVAL;
3575 			break;
3576 		}
3577 
3578 		kbft = pp_remove_upp_bft(pp, usd->sd_md_idx, &err);
3579 		if (__improbable(err != 0)) {
3580 			SK_ERR("un-allocated buflet %d %p", usd->sd_md_idx,
3581 			    SK_KVA(kbft));
3582 			ret = EINVAL;
3583 			break;
3584 		}
3585 
3586 		/* detach and free the packet */
3587 		ASSERT(!KSD_VALID_METADATA(ksd));
3588 		USD_DETACH_METADATA(usd);
3589 		pp_free_buflet(pp, kbft);
3590 		j = SLOT_NEXT(j, kring->ckr_lim);
3591 	}
3592 	kring->ckr_khead = j;
3593 	kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3594 
3595 done:
3596 	return ret;
3597 }
3598 
3599 static int
na_packet_pool_alloc_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3600 na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3601     uint32_t flags)
3602 {
3603 	int b, err;
3604 	uint32_t n = 0;
3605 	slot_idx_t j;
3606 	uint64_t now;
3607 	uint32_t curr_ws, bh_needed, bh_cnt;
3608 	struct __kern_slot_desc *ksd;
3609 	struct __user_slot_desc *usd;
3610 	struct __kern_buflet *kbft;
3611 	struct __kern_buflet_ext *kbe;
3612 	kern_pbufpool_t pp = kring->ckr_pp;
3613 	pid_t pid = proc_pid(p);
3614 
3615 	/* packet pool list is protected by channel lock */
3616 	ASSERT(!KR_KERNEL_ONLY(kring));
3617 	ASSERT(!PP_KERNEL_ONLY(pp));
3618 
3619 	now = _net_uptime;
3620 	if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3621 		if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3622 			kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3623 		}
3624 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3625 		    "%s: purged curr_ws(%d)", kring->ckr_name,
3626 		    kring->ckr_alloc_ws);
3627 		return 0;
3628 	}
3629 	/* reclaim the completed slots */
3630 	kring->ckr_khead = kring->ckr_rhead;
3631 
3632 	/* # of busy (unclaimed) slots */
3633 	b = kring->ckr_ktail - kring->ckr_khead;
3634 	if (b < 0) {
3635 		b += kring->ckr_num_slots;
3636 	}
3637 
3638 	curr_ws = kring->ckr_alloc_ws;
3639 	if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3640 		/* increment the working set by 50% */
3641 		curr_ws += (curr_ws >> 1);
3642 		curr_ws = MIN(curr_ws, kring->ckr_lim);
3643 	} else {
3644 		if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3645 		    (uint32_t)b >= (curr_ws >> 2)) {
3646 			/* decrease the working set by 25% */
3647 			curr_ws -= (curr_ws >> 2);
3648 		}
3649 	}
3650 	curr_ws = MAX(curr_ws, na_upp_alloc_buf_lowat);
3651 	if (curr_ws > (uint32_t)b) {
3652 		n = curr_ws - b;
3653 	}
3654 	kring->ckr_alloc_ws = curr_ws;
3655 	kring->ckr_sync_time = now;
3656 
3657 	/* min with # of avail free slots (subtract busy from max) */
3658 	n = bh_needed = MIN(n, kring->ckr_lim - b);
3659 	j = kring->ckr_ktail;
3660 	SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3661 	    "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3662 
3663 	if ((bh_cnt = bh_needed) == 0) {
3664 		goto done;
3665 	}
3666 
3667 	err = pp_alloc_buflet_batch(pp, kring->ckr_scratch, &bh_cnt,
3668 	    SKMEM_NOSLEEP, PP_ALLOC_BFT_ATTACH_BUFFER);
3669 
3670 	if (bh_cnt == 0) {
3671 		SK_ERR("kr 0x%llx failed to alloc %u buflets(%d)",
3672 		    SK_KVA(kring), bh_needed, err);
3673 		kring->ckr_err_stats.cres_pkt_alloc_failures += bh_needed;
3674 	}
3675 
3676 	for (n = 0; n < bh_cnt; n++) {
3677 		struct __user_buflet *ubft;
3678 
3679 		ksd = KR_KSD(kring, j);
3680 		usd = KR_USD(kring, j);
3681 
3682 		kbft = (struct __kern_buflet *)(kring->ckr_scratch[n]);
3683 		kbe = (struct __kern_buflet_ext *)kbft;
3684 		kring->ckr_scratch[n] = 0;
3685 		ASSERT(kbft != NULL);
3686 
3687 		/*
3688 		 * Add buflet to the allocated list of user packet pool.
3689 		 */
3690 		pp_insert_upp_bft(pp, kbft, pid);
3691 
3692 		/*
3693 		 * externalize the buflet as it is being transferred to
3694 		 * user space.
3695 		 */
3696 		ubft = __DECONST(struct __user_buflet *, kbe->kbe_buf_user);
3697 		KBUF_EXTERNALIZE(kbft, ubft, pp);
3698 
3699 		/* cleanup any stale slot mapping */
3700 		KSD_RESET(ksd);
3701 		ASSERT(usd != NULL);
3702 		USD_RESET(usd);
3703 
3704 		/* Attach buflet to slot */
3705 		KR_SLOT_ATTACH_BUF_METADATA(kring, ksd, kbft);
3706 
3707 		j = SLOT_NEXT(j, kring->ckr_lim);
3708 	}
3709 done:
3710 	ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3711 	kring->ckr_ktail = j;
3712 	return 0;
3713 }
3714 
3715 /* The caller needs to ensure that the NA stays intact */
3716 void
na_drain(struct nexus_adapter * na,boolean_t purge)3717 na_drain(struct nexus_adapter *na, boolean_t purge)
3718 {
3719 	/* will be cleared on next channel sync */
3720 	if (!(atomic_bitset_32_ov(&na->na_flags, NAF_DRAINING) &
3721 	    NAF_DRAINING) && NA_IS_ACTIVE(na)) {
3722 		SK_DF(SK_VERB_NA, "%s: %s na 0x%llx flags %b",
3723 		    na->na_name, (purge ? "purging" : "pruning"),
3724 		    SK_KVA(na), na->na_flags, NAF_BITS);
3725 
3726 		/* reap (purge/prune) caches in the arena */
3727 		skmem_arena_reap(na->na_arena, purge);
3728 	}
3729 }
3730