xref: /xnu-12377.81.4/bsd/skywalk/nexus/nexus_adapter.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri.
31  * All rights reserved.
32  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  *   1. Redistributions of source code must retain the above copyright
38  *      notice, this list of conditions and the following disclaimer.
39  *   2. Redistributions in binary form must reproduce the above copyright
40  *      notice, this list of conditions and the following disclaimer in the
41  *      documentation and/or other materials provided with the distribution.
42  *
43  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53  * SUCH DAMAGE.
54  */
55 #include <sys/systm.h>
56 #include <skywalk/os_skywalk_private.h>
57 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
58 #include <skywalk/nexus/netif/nx_netif.h>
59 #include <skywalk/nexus/upipe/nx_user_pipe.h>
60 #include <skywalk/nexus/kpipe/nx_kernel_pipe.h>
61 #include <kern/thread.h>
62 #include <kern/uipc_domain.h>
63 
64 static int na_krings_use(struct kern_channel *);
65 static void na_krings_unuse(struct kern_channel *);
66 static void na_krings_verify(struct nexus_adapter *);
67 static int na_notify(struct __kern_channel_ring *, struct proc *, uint32_t);
68 static void na_set_ring(struct nexus_adapter *, uint32_t, enum txrx, uint32_t);
69 static void na_set_all_rings(struct nexus_adapter *, uint32_t);
70 static int na_set_ringid(struct kern_channel *, ring_set_t, ring_id_t);
71 static void na_unset_ringid(struct kern_channel *);
72 static void na_teardown(struct nexus_adapter *, struct kern_channel *,
73     boolean_t);
74 
75 static int na_kr_create(struct nexus_adapter *, boolean_t);
76 static void na_kr_delete(struct nexus_adapter *);
77 static int na_kr_setup(struct nexus_adapter *, struct kern_channel *);
78 static void na_kr_teardown_all(struct nexus_adapter *, struct kern_channel *,
79     boolean_t);
80 static void na_kr_teardown_txrx(struct nexus_adapter *, struct kern_channel *,
81     boolean_t, struct proc *);
82 static int na_kr_populate_slots(struct __kern_channel_ring *);
83 static void na_kr_depopulate_slots(struct __kern_channel_ring *,
84     struct kern_channel *, boolean_t defunct);
85 
86 static int na_schema_alloc(struct kern_channel *);
87 
88 static struct nexus_adapter *na_pseudo_alloc(zalloc_flags_t);
89 static void na_pseudo_free(struct nexus_adapter *);
90 static int na_pseudo_txsync(struct __kern_channel_ring *, struct proc *,
91     uint32_t);
92 static int na_pseudo_rxsync(struct __kern_channel_ring *, struct proc *,
93     uint32_t);
94 static int na_pseudo_activate(struct nexus_adapter *, na_activate_mode_t);
95 static void na_pseudo_dtor(struct nexus_adapter *);
96 static int na_pseudo_krings_create(struct nexus_adapter *,
97     struct kern_channel *);
98 static void na_pseudo_krings_delete(struct nexus_adapter *,
99     struct kern_channel *, boolean_t);
100 static int na_packet_pool_alloc_sync(struct __kern_channel_ring *,
101     struct proc *, uint32_t);
102 static int na_packet_pool_alloc_large_sync(struct __kern_channel_ring *,
103     struct proc *, uint32_t);
104 static int na_packet_pool_free_sync(struct __kern_channel_ring *,
105     struct proc *, uint32_t);
106 static int na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *,
107     struct proc *, uint32_t);
108 static int na_packet_pool_free_buf_sync(struct __kern_channel_ring *,
109     struct proc *, uint32_t);
110 
111 #define NA_KRING_IDLE_TIMEOUT   (NSEC_PER_SEC * 30) /* 30 seconds */
112 
113 static SKMEM_TYPE_DEFINE(na_pseudo_zone, struct nexus_adapter);
114 
115 static int __na_inited = 0;
116 
117 #define NA_NUM_WMM_CLASSES      4
118 #define NAKR_WMM_SC2RINGID(_s)  PKT_SC2TC(_s)
119 #define NAKR_SET_SVC_LUT(_n, _s)                                        \
120 	(_n)->na_kring_svc_lut[MBUF_SCIDX(_s)] = NAKR_WMM_SC2RINGID(_s)
121 #define NAKR_SET_KR_SVC(_n, _s)                                         \
122 	NAKR((_n), NR_TX)[NAKR_WMM_SC2RINGID(_s)].ckr_svc = (_s)
123 
124 #define NA_UPP_ALLOC_LOWAT      8
125 static uint32_t na_upp_alloc_lowat = NA_UPP_ALLOC_LOWAT;
126 
127 #define NA_UPP_REAP_INTERVAL    10 /* seconds */
128 static uint32_t na_upp_reap_interval = NA_UPP_REAP_INTERVAL;
129 
130 #define NA_UPP_WS_HOLD_TIME     2 /* seconds */
131 static uint32_t na_upp_ws_hold_time = NA_UPP_WS_HOLD_TIME;
132 
133 #define NA_UPP_REAP_MIN_PKTS    0
134 static uint32_t na_upp_reap_min_pkts = NA_UPP_REAP_MIN_PKTS;
135 
136 #define NA_UPP_ALLOC_BUF_LOWAT     64
137 static uint32_t na_upp_alloc_buf_lowat = NA_UPP_ALLOC_BUF_LOWAT;
138 
139 #if (DEVELOPMENT || DEBUG)
140 static  uint64_t _na_inject_error = 0;
141 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) \
142 	_SK_INJECT_ERROR(_na_inject_error, _en, _ev, _ec, NULL, _f, __VA_ARGS__)
143 
144 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_ws_hold_time,
145     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_ws_hold_time,
146     NA_UPP_WS_HOLD_TIME, "");
147 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_interval,
148     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_interval,
149     NA_UPP_REAP_INTERVAL, "");
150 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_min_pkts,
151     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_min_pkts,
152     NA_UPP_REAP_MIN_PKTS, "");
153 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_lowat,
154     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_lowat,
155     NA_UPP_ALLOC_LOWAT, "");
156 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_buf_lowat,
157     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_buf_lowat,
158     NA_UPP_ALLOC_BUF_LOWAT, "");
159 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, na_inject_error,
160     CTLFLAG_RW | CTLFLAG_LOCKED, &_na_inject_error, "");
161 #else
162 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) do { } while (0)
163 #endif /* !DEVELOPMENT && !DEBUG */
164 
165 #define SKMEM_TAG_NX_RINGS      "com.apple.skywalk.nexus.rings"
166 static SKMEM_TAG_DEFINE(skmem_tag_nx_rings, SKMEM_TAG_NX_RINGS);
167 
168 #define SKMEM_TAG_NX_CONTEXTS   "com.apple.skywalk.nexus.contexts"
169 static SKMEM_TAG_DEFINE(skmem_tag_nx_contexts, SKMEM_TAG_NX_CONTEXTS);
170 
171 #define SKMEM_TAG_NX_SCRATCH    "com.apple.skywalk.nexus.scratch"
172 static SKMEM_TAG_DEFINE(skmem_tag_nx_scratch, SKMEM_TAG_NX_SCRATCH);
173 
174 void
na_init(void)175 na_init(void)
176 {
177 	/*
178 	 * Changing the size of nexus_mdata structure won't break ABI,
179 	 * but we need to be mindful of memory consumption; Thus here
180 	 * we add a compile-time check to make sure the size is within
181 	 * the expected limit and that it's properly aligned.  This
182 	 * check may be adjusted in future as needed.
183 	 */
184 	static_assert(sizeof(struct nexus_mdata) <= 32 && IS_P2ALIGNED(sizeof(struct nexus_mdata), 8));
185 	static_assert(sizeof(struct nexus_mdata) <= sizeof(struct __user_quantum));
186 
187 	/* see comments on nexus_meta_type_t */
188 	static_assert(NEXUS_META_TYPE_MAX == 3);
189 	static_assert(NEXUS_META_SUBTYPE_MAX == 3);
190 
191 	ASSERT(!__na_inited);
192 
193 	__na_inited = 1;
194 }
195 
196 void
na_fini(void)197 na_fini(void)
198 {
199 	if (__na_inited) {
200 		__na_inited = 0;
201 	}
202 }
203 
204 /*
205  * Interpret the ringid of an chreq, by translating it into a pair
206  * of intervals of ring indices:
207  *
208  * [txfirst, txlast) and [rxfirst, rxlast)
209  */
210 int
na_interp_ringid(struct nexus_adapter * na,ring_id_t ring_id,ring_set_t ring_set,uint32_t first[NR_TXRX],uint32_t last[NR_TXRX])211 na_interp_ringid(struct nexus_adapter *na, ring_id_t ring_id,
212     ring_set_t ring_set, uint32_t first[NR_TXRX], uint32_t last[NR_TXRX])
213 {
214 	enum txrx t;
215 
216 	switch (ring_set) {
217 	case RING_SET_ALL:
218 		/*
219 		 * Ring pair eligibility: all ring(s).
220 		 */
221 		if (ring_id != CHANNEL_RING_ID_ANY &&
222 		    ring_id >= na_get_nrings(na, NR_TX) &&
223 		    ring_id >= na_get_nrings(na, NR_RX)) {
224 			SK_ERR("\"%s\": invalid ring_id %d for ring_set %u",
225 			    na->na_name, (int)ring_id, ring_set);
226 			return EINVAL;
227 		}
228 		for_rx_tx(t) {
229 			if (ring_id == CHANNEL_RING_ID_ANY) {
230 				first[t] = 0;
231 				last[t] = na_get_nrings(na, t);
232 			} else {
233 				first[t] = ring_id;
234 				last[t] = ring_id + 1;
235 			}
236 		}
237 		break;
238 
239 	default:
240 		SK_ERR("\"%s\": invalid ring_set %u", na->na_name, ring_set);
241 		return EINVAL;
242 	}
243 
244 	SK_DF(SK_VERB_NA | SK_VERB_RING,
245 	    "\"%s\": ring_id %d, ring_set %u tx [%u,%u) rx [%u,%u)",
246 	    na->na_name, (int)ring_id, ring_set, first[NR_TX], last[NR_TX],
247 	    first[NR_RX], last[NR_RX]);
248 
249 	return 0;
250 }
251 
252 /*
253  * Set the ring ID. For devices with a single queue, a request
254  * for all rings is the same as a single ring.
255  */
256 static int
na_set_ringid(struct kern_channel * ch,ring_set_t ring_set,ring_id_t ring_id)257 na_set_ringid(struct kern_channel *ch, ring_set_t ring_set, ring_id_t ring_id)
258 {
259 	struct nexus_adapter *na = ch->ch_na;
260 	int error;
261 	enum txrx t;
262 	uint32_t n_alloc_rings;
263 
264 	if ((error = na_interp_ringid(na, ring_id, ring_set,
265 	    ch->ch_first, ch->ch_last)) != 0) {
266 		return error;
267 	}
268 
269 	n_alloc_rings = na_get_nrings(na, NR_A);
270 	if (n_alloc_rings != 0) {
271 		uint32_t n_large_alloc_rings;
272 
273 		ch->ch_first[NR_A] = ch->ch_first[NR_F] = 0;
274 		ch->ch_last[NR_A] = ch->ch_last[NR_F] =
275 		    ch->ch_first[NR_A] + n_alloc_rings;
276 
277 		n_large_alloc_rings = na_get_nrings(na, NR_LBA);
278 		ch->ch_first[NR_LBA] = 0;
279 		ch->ch_last[NR_LBA] = ch->ch_first[NR_LBA] + n_large_alloc_rings;
280 	} else {
281 		ch->ch_first[NR_A] = ch->ch_last[NR_A] = 0;
282 		ch->ch_first[NR_F] = ch->ch_last[NR_F] = 0;
283 		ch->ch_first[NR_LBA] = ch->ch_last[NR_LBA] = 0;
284 	}
285 	ch->ch_first[NR_EV] = 0;
286 	ch->ch_last[NR_EV] = ch->ch_first[NR_EV] + na_get_nrings(na, NR_EV);
287 
288 	/* XXX: should we initialize na_si_users for event ring ? */
289 
290 	/*
291 	 * Optimization: count the users registered for more than
292 	 * one ring, which are the ones sleeping on the global queue.
293 	 * The default na_notify() callback will then avoid signaling
294 	 * the global queue if nobody is using it
295 	 */
296 	for_rx_tx(t) {
297 		if (ch_is_multiplex(ch, t)) {
298 			na->na_si_users[t]++;
299 			ASSERT(na->na_si_users[t] != 0);
300 		}
301 	}
302 	return 0;
303 }
304 
305 static void
na_unset_ringid(struct kern_channel * ch)306 na_unset_ringid(struct kern_channel *ch)
307 {
308 	struct nexus_adapter *na = ch->ch_na;
309 	enum txrx t;
310 
311 	for_rx_tx(t) {
312 		if (ch_is_multiplex(ch, t)) {
313 			ASSERT(na->na_si_users[t] != 0);
314 			na->na_si_users[t]--;
315 		}
316 		ch->ch_first[t] = ch->ch_last[t] = 0;
317 	}
318 }
319 
320 /*
321  * Check that the rings we want to bind are not exclusively owned by a previous
322  * bind.  If exclusive ownership has been requested, we also mark the rings.
323  */
324 /* Hoisted out of line to reduce kernel stack footprint */
325 SK_NO_INLINE_ATTRIBUTE
326 static int
na_krings_use(struct kern_channel * ch)327 na_krings_use(struct kern_channel *ch)
328 {
329 	struct nexus_adapter *na = ch->ch_na;
330 	struct __kern_channel_ring *__single kring;
331 	boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
332 	enum txrx t;
333 	uint32_t i;
334 
335 	SK_DF(SK_VERB_NA | SK_VERB_RING, "na \"%s\" (%p) grabbing tx [%u,%u) rx [%u,%u)",
336 	    na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
337 	    ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
338 
339 	/*
340 	 * First round: check that all the requested rings
341 	 * are neither alread exclusively owned, nor we
342 	 * want exclusive ownership when they are already in use
343 	 */
344 	for_all_rings(t) {
345 		for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
346 			kring = &NAKR(na, t)[i];
347 			if ((kring->ckr_flags & CKRF_EXCLUSIVE) ||
348 			    (kring->ckr_users && excl)) {
349 				SK_DF(SK_VERB_NA | SK_VERB_RING,
350 				    "kr \"%s\" (%p) krflags 0x%x is busy",
351 				    kring->ckr_name, SK_KVA(kring),
352 				    kring->ckr_flags);
353 				return EBUSY;
354 			}
355 		}
356 	}
357 
358 	/*
359 	 * Second round: increment usage count and possibly
360 	 * mark as exclusive
361 	 */
362 
363 	for_all_rings(t) {
364 		for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
365 			kring = &NAKR(na, t)[i];
366 			kring->ckr_users++;
367 			if (excl) {
368 				kring->ckr_flags |= CKRF_EXCLUSIVE;
369 			}
370 		}
371 	}
372 
373 	return 0;
374 }
375 
376 /* Hoisted out of line to reduce kernel stack footprint */
377 SK_NO_INLINE_ATTRIBUTE
378 static void
na_krings_unuse(struct kern_channel * ch)379 na_krings_unuse(struct kern_channel *ch)
380 {
381 	struct nexus_adapter *na = ch->ch_na;
382 	struct __kern_channel_ring *__single kring;
383 	boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
384 	enum txrx t;
385 	uint32_t i;
386 
387 	SK_DF(SK_VERB_NA | SK_VERB_RING,
388 	    "na \"%s\" (%p) releasing tx [%u, %u) rx [%u, %u)",
389 	    na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
390 	    ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
391 
392 	for_all_rings(t) {
393 		for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
394 			kring = &NAKR(na, t)[i];
395 			if (excl) {
396 				kring->ckr_flags &= ~CKRF_EXCLUSIVE;
397 			}
398 			kring->ckr_users--;
399 		}
400 	}
401 }
402 
403 /* Hoisted out of line to reduce kernel stack footprint */
404 SK_NO_INLINE_ATTRIBUTE
405 static void
na_krings_verify(struct nexus_adapter * na)406 na_krings_verify(struct nexus_adapter *na)
407 {
408 	struct __kern_channel_ring *__single kring;
409 	enum txrx t;
410 	uint32_t i;
411 
412 	for_all_rings(t) {
413 		for (i = 0; i < na_get_nrings(na, t); i++) {
414 			kring = &NAKR(na, t)[i];
415 			/* na_kr_create() validations */
416 			ASSERT(kring->ckr_num_slots > 0);
417 			ASSERT(kring->ckr_lim == (kring->ckr_num_slots - 1));
418 			ASSERT(kring->ckr_pp != NULL);
419 
420 			if (!(kring->ckr_flags & CKRF_MEM_RING_INITED)) {
421 				continue;
422 			}
423 			/* na_kr_setup() validations */
424 			if (KR_KERNEL_ONLY(kring)) {
425 				ASSERT(kring->ckr_ring == NULL);
426 			} else {
427 				ASSERT(kring->ckr_ring != NULL);
428 			}
429 			ASSERT(kring->ckr_ksds_last ==
430 			    &kring->ckr_ksds[kring->ckr_lim]);
431 		}
432 	}
433 }
434 
435 int
na_bind_channel(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr)436 na_bind_channel(struct nexus_adapter *na, struct kern_channel *ch,
437     struct chreq *chr)
438 {
439 	struct kern_pbufpool *rx_pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
440 	struct kern_pbufpool *tx_pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
441 	uint32_t ch_mode = chr->cr_mode;
442 	int err = 0;
443 
444 	SK_LOCK_ASSERT_HELD();
445 	ASSERT(ch->ch_schema == NULL);
446 	ASSERT(ch->ch_na == NULL);
447 
448 	/* ring configuration may have changed, fetch from the card */
449 	na_update_config(na);
450 	ch->ch_na = na; /* store the reference */
451 	err = na_set_ringid(ch, chr->cr_ring_set, chr->cr_ring_id);
452 	if (err != 0) {
453 		goto err;
454 	}
455 
456 	os_atomic_andnot(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE |
457 	    CHANF_USER_PACKET_POOL | CHANF_EVENT_RING), relaxed);
458 	if (ch_mode & CHMODE_EXCLUSIVE) {
459 		os_atomic_or(&ch->ch_flags, CHANF_EXCLUSIVE, relaxed);
460 	}
461 
462 	if (!!(na->na_flags & NAF_USER_PKT_POOL) ^
463 	    !!(ch_mode & CHMODE_USER_PACKET_POOL)) {
464 		SK_ERR("incompatible channel mode (0x%x), na_flags (0x%x)",
465 		    ch_mode, na->na_flags);
466 		err = EINVAL;
467 		goto err;
468 	}
469 
470 	if (na->na_arena->ar_flags & ARF_DEFUNCT) {
471 		err = ENXIO;
472 		goto err;
473 	}
474 
475 	if (ch_mode & CHMODE_USER_PACKET_POOL) {
476 		ASSERT(na->na_flags & NAF_USER_PKT_POOL);
477 		ASSERT(ch->ch_first[NR_A] != ch->ch_last[NR_A]);
478 		ASSERT(ch->ch_first[NR_F] != ch->ch_last[NR_F]);
479 		os_atomic_or(&ch->ch_flags, CHANF_USER_PACKET_POOL, relaxed);
480 	}
481 
482 	if (ch_mode & CHMODE_EVENT_RING) {
483 		ASSERT(na->na_flags & NAF_USER_PKT_POOL);
484 		ASSERT(na->na_flags & NAF_EVENT_RING);
485 		ASSERT(ch->ch_first[NR_EV] != ch->ch_last[NR_EV]);
486 		os_atomic_or(&ch->ch_flags, CHANF_EVENT_RING, relaxed);
487 	}
488 
489 	/*
490 	 * If this is the first channel of the adapter, create
491 	 * the rings and their in-kernel view, the krings.
492 	 */
493 	if (na->na_channels == 0) {
494 		err = na->na_krings_create(na, ch);
495 		if (err != 0) {
496 			goto err;
497 		}
498 
499 		/*
500 		 * Sanity check; this is already done in na_kr_create(),
501 		 * but we do it here as well to validate na_kr_setup().
502 		 */
503 		na_krings_verify(na);
504 		*(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
505 		    skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_type;
506 		*(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
507 		    skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_subtype;
508 	}
509 
510 	/*
511 	 * Validate ownership and usability of the krings; take into account
512 	 * whether some previous bind has exclusive ownership on them.
513 	 */
514 	err = na_krings_use(ch);
515 	if (err != 0) {
516 		goto err_del_rings;
517 	}
518 
519 	/* for user-facing channel, create a new channel schema */
520 	if (!(ch->ch_flags & CHANF_KERNEL)) {
521 		err = na_schema_alloc(ch);
522 		if (err != 0) {
523 			goto err_rel_excl;
524 		}
525 
526 		ASSERT(ch->ch_schema != NULL);
527 		ASSERT(ch->ch_schema_offset != (mach_vm_offset_t)-1);
528 	} else {
529 		ASSERT(ch->ch_schema == NULL);
530 		ch->ch_schema_offset = (mach_vm_offset_t)-1;
531 	}
532 
533 	/* update our work timestamp */
534 	na->na_work_ts = net_uptime();
535 
536 	na->na_channels++;
537 
538 	/*
539 	 * If user packet pool is desired, initialize the allocated
540 	 * object hash table in the pool, if not already.  This also
541 	 * retains a refcnt on the pool which the caller must release.
542 	 */
543 	ASSERT(ch->ch_pp == NULL);
544 	if (ch_mode & CHMODE_USER_PACKET_POOL) {
545 #pragma unused(tx_pp)
546 		ASSERT(rx_pp == tx_pp);
547 		err = pp_init_upp(rx_pp, TRUE);
548 		if (err != 0) {
549 			goto err_free_schema;
550 		}
551 		ch->ch_pp = rx_pp;
552 		ch->ch_schema->csm_upp_buf_total = rx_pp->pp_kmd_region->skr_c_obj_cnt;
553 	}
554 
555 	if (!NA_IS_ACTIVE(na)) {
556 		err = na->na_activate(na, NA_ACTIVATE_MODE_ON);
557 		if (err != 0) {
558 			goto err_release_pp;
559 		}
560 
561 		SK_DF(SK_VERB_NA, "activated \"%s\" adapter %p", na->na_name,
562 		    SK_KVA(na));
563 		SK_DF(SK_VERB_NA, "  na_md_type:    %u", na->na_md_type);
564 		SK_DF(SK_VERB_NA, "  na_md_subtype: %u", na->na_md_subtype);
565 	}
566 
567 	SK_DF(SK_VERB_NA, "ch %p", SK_KVA(ch));
568 	SK_DF(SK_VERB_NA, "  ch_flags:     0x%x", ch->ch_flags);
569 	if (ch->ch_schema != NULL) {
570 		SK_DF(SK_VERB_NA, "  ch_schema:    %p", SK_KVA(ch->ch_schema));
571 	}
572 	SK_DF(SK_VERB_NA, "  ch_na:        %p (chcnt %u)", SK_KVA(ch->ch_na),
573 	    ch->ch_na->na_channels);
574 	SK_DF(SK_VERB_NA, "  ch_tx_rings:  [%u,%u)", ch->ch_first[NR_TX],
575 	    ch->ch_last[NR_TX]);
576 	SK_DF(SK_VERB_NA, "  ch_rx_rings:  [%u,%u)", ch->ch_first[NR_RX],
577 	    ch->ch_last[NR_RX]);
578 	SK_DF(SK_VERB_NA, "  ch_alloc_rings:  [%u,%u)", ch->ch_first[NR_A],
579 	    ch->ch_last[NR_A]);
580 	SK_DF(SK_VERB_NA, "  ch_free_rings:  [%u,%u)", ch->ch_first[NR_F],
581 	    ch->ch_last[NR_F]);
582 	SK_DF(SK_VERB_NA, "  ch_ev_rings:  [%u,%u)", ch->ch_first[NR_EV],
583 	    ch->ch_last[NR_EV]);
584 
585 	return 0;
586 
587 err_release_pp:
588 	if (ch_mode & CHMODE_USER_PACKET_POOL) {
589 		ASSERT(ch->ch_pp != NULL);
590 		pp_release(rx_pp);
591 		ch->ch_pp = NULL;
592 	}
593 err_free_schema:
594 	*(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
595 	    NEXUS_META_TYPE_INVALID;
596 	*(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
597 	    NEXUS_META_SUBTYPE_INVALID;
598 	ASSERT(na->na_channels != 0);
599 	na->na_channels--;
600 	if (ch->ch_schema != NULL) {
601 		skmem_cache_free(
602 			skmem_arena_nexus(na->na_arena)->arn_schema_cache,
603 			ch->ch_schema);
604 		ch->ch_schema = NULL;
605 		ch->ch_schema_offset = (mach_vm_offset_t)-1;
606 	}
607 err_rel_excl:
608 	na_krings_unuse(ch);
609 err_del_rings:
610 	if (na->na_channels == 0) {
611 		na->na_krings_delete(na, ch, FALSE);
612 	}
613 err:
614 	ch->ch_na = NULL;
615 	ASSERT(err != 0);
616 
617 	return err;
618 }
619 
620 /*
621  * Undo everything that was done in na_bind_channel().
622  */
623 /* call with SK_LOCK held */
624 void
na_unbind_channel(struct kern_channel * ch)625 na_unbind_channel(struct kern_channel *ch)
626 {
627 	struct nexus_adapter *na = ch->ch_na;
628 
629 	SK_LOCK_ASSERT_HELD();
630 
631 	ASSERT(na->na_channels != 0);
632 	na->na_channels--;
633 
634 	/* release exclusive use if it was requested at bind time */
635 	na_krings_unuse(ch);
636 
637 	if (na->na_channels == 0) {     /* last instance */
638 		SK_DF(SK_VERB_NA, "%s(%d): deleting last channel instance for %s",
639 		    ch->ch_name, ch->ch_pid, na->na_name);
640 
641 		/*
642 		 * Free any remaining allocated packets attached to
643 		 * the slots, followed by a teardown of the arena.
644 		 */
645 		na_teardown(na, ch, FALSE);
646 
647 		*(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
648 		    NEXUS_META_TYPE_INVALID;
649 		*(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
650 		    NEXUS_META_SUBTYPE_INVALID;
651 	} else {
652 		SK_D("%s(%d): %s has %u remaining channel instance(s)",
653 		    ch->ch_name, ch->ch_pid, na->na_name, na->na_channels);
654 	}
655 
656 	/*
657 	 * Free any allocated packets (for the process) attached to the slots;
658 	 * note that na_teardown() could have done this there as well.
659 	 */
660 	if (ch->ch_pp != NULL) {
661 		ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
662 		pp_purge_upp(ch->ch_pp, ch->ch_pid);
663 		pp_release(ch->ch_pp);
664 		ch->ch_pp = NULL;
665 	}
666 
667 	/* possibily decrement counter of tx_si/rx_si users */
668 	na_unset_ringid(ch);
669 
670 	/* reap the caches now (purge if adapter is idle) */
671 	skmem_arena_reap(na->na_arena, true);
672 
673 	/* delete the csm */
674 	if (ch->ch_schema != NULL) {
675 		skmem_cache_free(
676 			skmem_arena_nexus(na->na_arena)->arn_schema_cache,
677 			ch->ch_schema);
678 		ch->ch_schema = NULL;
679 		ch->ch_schema_offset = (mach_vm_offset_t)-1;
680 	}
681 
682 	/* destroy the memory map */
683 	skmem_arena_munmap_channel(na->na_arena, ch);
684 
685 	/* mark the channel as unbound */
686 	os_atomic_andnot(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE), relaxed);
687 	ch->ch_na = NULL;
688 
689 	/* and finally release the nexus adapter; this might free it */
690 	(void) na_release_locked(na);
691 }
692 
693 static void
na_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)694 na_teardown(struct nexus_adapter *na, struct kern_channel *ch,
695     boolean_t defunct)
696 {
697 	SK_LOCK_ASSERT_HELD();
698 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
699 
700 	/*
701 	 * Deactive the adapter.
702 	 */
703 	(void) na->na_activate(na,
704 	    (defunct ? NA_ACTIVATE_MODE_DEFUNCT : NA_ACTIVATE_MODE_OFF));
705 
706 	/*
707 	 * Free any remaining allocated packets for this process.
708 	 */
709 	if (ch->ch_pp != NULL) {
710 		ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
711 		pp_purge_upp(ch->ch_pp, ch->ch_pid);
712 		if (!defunct) {
713 			pp_release(ch->ch_pp);
714 			ch->ch_pp = NULL;
715 		}
716 	}
717 
718 	/*
719 	 * Delete rings and buffers.
720 	 */
721 	na->na_krings_delete(na, ch, defunct);
722 }
723 
724 /* call with SK_LOCK held */
725 /*
726  * Allocate the per-fd structure __user_channel_schema.
727  */
728 static int
na_schema_alloc(struct kern_channel * ch)729 na_schema_alloc(struct kern_channel *ch)
730 {
731 	struct nexus_adapter *na = ch->ch_na;
732 	struct skmem_arena *ar = na->na_arena;
733 	struct skmem_arena_nexus *arn;
734 	mach_vm_offset_t roff[SKMEM_REGIONS];
735 	struct __kern_channel_ring *__single kr;
736 	struct __user_channel_schema *csm;
737 	struct skmem_obj_info csm_oi, ring_oi, ksd_oi, usd_oi;
738 	mach_vm_offset_t base;
739 	uint32_t i, j, k, n[NR_ALL];
740 	enum txrx t;
741 	/* -fbounds-safety */
742 	struct {
743 		uint32_t tx_rings;
744 		uint32_t rx_rings;
745 		uint32_t allocator_ring_pairs;
746 		uint32_t num_event_rings;
747 		uint32_t large_buf_alloc_rings;
748 	} ring_counts;
749 #define ASSERT_COUNT_TYPES_MATCH(FIELD_NAME) \
750 	_Static_assert(__builtin_types_compatible_p( \
751 	                typeof(ring_counts . FIELD_NAME), \
752 	                typeof(((struct __user_channel_schema*)0)->csm_ ## FIELD_NAME)), \
753 	        "type for " # FIELD_NAME "doesn't match")
754 
755 	ASSERT_COUNT_TYPES_MATCH(tx_rings);
756 	ASSERT_COUNT_TYPES_MATCH(rx_rings);
757 	ASSERT_COUNT_TYPES_MATCH(allocator_ring_pairs);
758 	ASSERT_COUNT_TYPES_MATCH(num_event_rings);
759 	ASSERT_COUNT_TYPES_MATCH(large_buf_alloc_rings);
760 #undef ASSERT_COUNT_TYPES_MATCH
761 
762 	/* see comments for struct __user_channel_schema */
763 	static_assert(offsetof(struct __user_channel_schema, csm_ver) == 0);
764 	static_assert(offsetof(struct __user_channel_schema, csm_flags) == sizeof(csm->csm_ver));
765 	static_assert(offsetof(struct __user_channel_schema, csm_kern_name) == sizeof(csm->csm_ver) + sizeof(csm->csm_flags));
766 	static_assert(offsetof(struct __user_channel_schema, csm_kern_uuid) == sizeof(csm->csm_ver) + sizeof(csm->csm_flags) + sizeof(csm->csm_kern_name));
767 
768 	SK_LOCK_ASSERT_HELD();
769 
770 	ASSERT(!(ch->ch_flags & CHANF_KERNEL));
771 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
772 	arn = skmem_arena_nexus(ar);
773 	ASSERT(arn != NULL);
774 	for_all_rings(t) {
775 		n[t] = 0;
776 	}
777 
778 	for_rx_tx(t) {
779 		ASSERT((ch->ch_last[t] > 0) || (ch->ch_first[t] == 0));
780 		n[t] = ch->ch_last[t] - ch->ch_first[t];
781 		ASSERT(n[t] == 0 || n[t] <= na_get_nrings(na, t));
782 	}
783 
784 	/* return total number of tx and rx rings for this channel */
785 	ring_counts.tx_rings = n[NR_TX];
786 	ring_counts.rx_rings = n[NR_RX];
787 
788 	if (ch->ch_flags & CHANF_USER_PACKET_POOL) {
789 		ring_counts.allocator_ring_pairs = na->na_num_allocator_ring_pairs;
790 		n[NR_A] = n[NR_F] = na->na_num_allocator_ring_pairs;
791 		ASSERT(n[NR_A] != 0 && n[NR_A] <= na_get_nrings(na, NR_A));
792 		ASSERT(n[NR_A] == (ch->ch_last[NR_A] - ch->ch_first[NR_A]));
793 		ASSERT(n[NR_F] == (ch->ch_last[NR_F] - ch->ch_first[NR_F]));
794 
795 		n[NR_LBA] = na->na_num_large_buf_alloc_rings;
796 		if (n[NR_LBA] != 0) {
797 			ring_counts.large_buf_alloc_rings = n[NR_LBA];
798 			ASSERT(n[NR_LBA] == (ch->ch_last[NR_LBA] - ch->ch_first[NR_LBA]));
799 		}
800 	}
801 
802 	if (ch->ch_flags & CHANF_EVENT_RING) {
803 		n[NR_EV] = ch->ch_last[NR_EV] - ch->ch_first[NR_EV];
804 		ASSERT(n[NR_EV] != 0 && n[NR_EV] <= na_get_nrings(na, NR_EV));
805 		ring_counts.num_event_rings = n[NR_EV];
806 	}
807 
808 	csm = skmem_cache_alloc(arn->arn_schema_cache, SKMEM_NOSLEEP);
809 	if (csm == NULL) {
810 		return ENOMEM;
811 	}
812 	skmem_cache_get_obj_info(arn->arn_schema_cache, csm, &csm_oi, NULL);
813 	bzero(__unsafe_forge_bidi_indexable(void *, csm, SKMEM_OBJ_SIZE(&csm_oi)),
814 	    SKMEM_OBJ_SIZE(&csm_oi));
815 
816 	csm->csm_tx_rings = ring_counts.tx_rings;
817 	csm->csm_rx_rings = ring_counts.rx_rings;
818 	csm->csm_allocator_ring_pairs = ring_counts.allocator_ring_pairs;
819 	csm->csm_large_buf_alloc_rings = ring_counts.large_buf_alloc_rings;
820 	csm->csm_num_event_rings = ring_counts.num_event_rings;
821 
822 	*(uint32_t *)(uintptr_t)&csm->csm_ver = CSM_CURRENT_VERSION;
823 
824 	/* kernel version and executable UUID */
825 	static_assert(sizeof(csm->csm_kern_name) == _SYS_NAMELEN);
826 
827 	(void) strlcpy(csm->csm_kern_name, version, sizeof(csm->csm_kern_name));
828 
829 #if !XNU_TARGET_OS_OSX
830 	(void) memcpy((void *)csm->csm_kern_uuid, kernelcache_uuid, sizeof(csm->csm_kern_uuid));
831 #else /* XNU_TARGET_OS_OSX */
832 	if (kernel_uuid != NULL) {
833 		(void) memcpy((void *)csm->csm_kern_uuid, kernel_uuid, sizeof(csm->csm_kern_uuid));
834 	}
835 #endif /* XNU_TARGET_OS_OSX */
836 
837 	bzero(&roff, sizeof(roff));
838 	for (i = 0; i < SKMEM_REGIONS; i++) {
839 		if (ar->ar_regions[i] == NULL) {
840 			ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
841 			    i == SKMEM_REGION_SCHEMA ||
842 			    i == SKMEM_REGION_BUF_LARGE ||
843 			    i == SKMEM_REGION_RXBUF_DEF ||
844 			    i == SKMEM_REGION_RXBUF_LARGE ||
845 			    i == SKMEM_REGION_TXBUF_DEF ||
846 			    i == SKMEM_REGION_TXBUF_LARGE ||
847 			    i == SKMEM_REGION_RXKMD ||
848 			    i == SKMEM_REGION_TXKMD ||
849 			    i == SKMEM_REGION_UMD ||
850 			    i == SKMEM_REGION_UBFT ||
851 			    i == SKMEM_REGION_KBFT ||
852 			    i == SKMEM_REGION_RXKBFT ||
853 			    i == SKMEM_REGION_TXKBFT ||
854 			    i == SKMEM_REGION_TXAUSD ||
855 			    i == SKMEM_REGION_RXFUSD ||
856 			    i == SKMEM_REGION_USTATS ||
857 			    i == SKMEM_REGION_KSTATS ||
858 			    i == SKMEM_REGION_INTRINSIC ||
859 			    i == SKMEM_REGION_FLOWADV ||
860 			    i == SKMEM_REGION_NEXUSADV ||
861 			    i == SKMEM_REGION_SYSCTLS ||
862 			    i == SKMEM_REGION_GUARD_TAIL);
863 			continue;
864 		}
865 
866 		/* not for nexus */
867 		ASSERT(i != SKMEM_REGION_SYSCTLS);
868 
869 		/*
870 		 * Get region offsets from base of mmap span; the arena
871 		 * doesn't need to be mmap'd at this point, since we
872 		 * simply compute the relative offset.
873 		 */
874 		roff[i] = skmem_arena_get_region_offset(ar, i);
875 	}
876 
877 	/*
878 	 * The schema is made up of the descriptor followed inline by an array
879 	 * of offsets to the tx, rx, allocator and event rings in the mmap span.
880 	 * They contain the offset between the ring and schema, so the
881 	 * information is usable in userspace to reach the ring from
882 	 * the schema.
883 	 */
884 	base = roff[SKMEM_REGION_SCHEMA] + SKMEM_OBJ_ROFF(&csm_oi);
885 
886 	/* initialize schema with tx ring info */
887 	for (i = 0, j = ch->ch_first[NR_TX]; i < n[NR_TX]; i++, j++) {
888 		kr = &na->na_tx_rings[j];
889 		if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
890 			continue;
891 		}
892 
893 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
894 		skmem_cache_get_obj_info(arn->arn_ring_cache,
895 		    kr->ckr_ring, &ring_oi, NULL);
896 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].ring_off =
897 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
898 
899 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
900 		skmem_cache_get_obj_info(kr->ckr_ksds_cache,
901 		    kr->ckr_ksds, &ksd_oi, &usd_oi);
902 
903 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].sd_off =
904 		    (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
905 		    base;
906 	}
907 	/* initialize schema with rx ring info */
908 	for (i = 0, j = ch->ch_first[NR_RX]; i < n[NR_RX]; i++, j++) {
909 		kr = &na->na_rx_rings[j];
910 		if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
911 			continue;
912 		}
913 
914 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
915 		skmem_cache_get_obj_info(arn->arn_ring_cache,
916 		    kr->ckr_ring, &ring_oi, NULL);
917 		*(mach_vm_offset_t *)
918 		(uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].ring_off =
919 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
920 
921 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
922 		skmem_cache_get_obj_info(kr->ckr_ksds_cache,
923 		    kr->ckr_ksds, &ksd_oi, &usd_oi);
924 
925 		*(mach_vm_offset_t *)
926 		(uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].sd_off =
927 		    (roff[SKMEM_REGION_RXFUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
928 		    base;
929 	}
930 	/* initialize schema with allocator ring info */
931 	for (i = 0, j = ch->ch_first[NR_A], k = n[NR_TX] + n[NR_RX];
932 	    i < n[NR_A]; i++, j++) {
933 		mach_vm_offset_t usd_roff;
934 
935 		usd_roff = roff[SKMEM_REGION_TXAUSD];
936 		kr = &na->na_alloc_rings[j];
937 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
938 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
939 
940 		skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
941 		    &ring_oi, NULL);
942 		*(mach_vm_offset_t *)
943 		(uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
944 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
945 
946 		skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
947 		    &ksd_oi, &usd_oi);
948 		*(mach_vm_offset_t *)
949 		(uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
950 		    (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
951 	}
952 	/* initialize schema with free ring info */
953 	for (i = 0, j = ch->ch_first[NR_F], k = n[NR_TX] + n[NR_RX] + n[NR_A];
954 	    i < n[NR_F]; i++, j++) {
955 		mach_vm_offset_t usd_roff;
956 
957 		usd_roff = roff[SKMEM_REGION_RXFUSD];
958 		kr = &na->na_free_rings[j];
959 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
960 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
961 
962 		skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
963 		    &ring_oi, NULL);
964 		*(mach_vm_offset_t *)
965 		(uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
966 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
967 
968 		skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
969 		    &ksd_oi, &usd_oi);
970 		*(mach_vm_offset_t *)
971 		(uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
972 		    (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
973 	}
974 	/* initialize schema with event ring info */
975 	for (i = 0, j = ch->ch_first[NR_EV], k = n[NR_TX] + n[NR_RX] +
976 	    n[NR_A] + n[NR_F]; i < n[NR_EV]; i++, j++) {
977 		ASSERT(csm->csm_num_event_rings != 0);
978 		kr = &na->na_event_rings[j];
979 		ASSERT(!KR_KERNEL_ONLY(kr));
980 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
981 		skmem_cache_get_obj_info(arn->arn_ring_cache,
982 		    kr->ckr_ring, &ring_oi, NULL);
983 		*(mach_vm_offset_t *)
984 		(uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
985 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
986 
987 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
988 		skmem_cache_get_obj_info(kr->ckr_ksds_cache,
989 		    kr->ckr_ksds, &ksd_oi, &usd_oi);
990 
991 		*(mach_vm_offset_t *)
992 		(uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
993 		    (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
994 		    base;
995 	}
996 	/* initialize schema with large buf alloc ring info */
997 	for (i = 0, j = ch->ch_first[NR_LBA], k = n[NR_TX] + n[NR_RX] +
998 	    n[NR_A] + n[NR_F] + n[NR_EV]; i < n[NR_LBA]; i++, j++) {
999 		ASSERT(csm->csm_large_buf_alloc_rings != 0);
1000 		kr = &na->na_large_buf_alloc_rings[j];
1001 		ASSERT(!KR_KERNEL_ONLY(kr));
1002 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
1003 		skmem_cache_get_obj_info(arn->arn_ring_cache,
1004 		    kr->ckr_ring, &ring_oi, NULL);
1005 		*(mach_vm_offset_t *)
1006 		(uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
1007 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
1008 
1009 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
1010 		skmem_cache_get_obj_info(kr->ckr_ksds_cache,
1011 		    kr->ckr_ksds, &ksd_oi, &usd_oi);
1012 
1013 		*(mach_vm_offset_t *)
1014 		(uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
1015 		    (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
1016 		    base;
1017 	}
1018 
1019 	*(uint64_t *)(uintptr_t)&csm->csm_md_redzone_cookie =
1020 	    __ch_umd_redzone_cookie;
1021 	*(nexus_meta_type_t *)(uintptr_t)&csm->csm_md_type = na->na_md_type;
1022 	*(nexus_meta_subtype_t *)(uintptr_t)&csm->csm_md_subtype =
1023 	    na->na_md_subtype;
1024 
1025 	if (arn->arn_stats_obj != NULL) {
1026 		ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
1027 		ASSERT(roff[SKMEM_REGION_USTATS] != 0);
1028 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs =
1029 		    roff[SKMEM_REGION_USTATS];
1030 		*(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1031 		    na->na_stats_type;
1032 	} else {
1033 		ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1034 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs = 0;
1035 		*(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1036 		    NEXUS_STATS_TYPE_INVALID;
1037 	}
1038 
1039 	if (arn->arn_flowadv_obj != NULL) {
1040 		ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL);
1041 		ASSERT(roff[SKMEM_REGION_FLOWADV] != 0);
1042 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs =
1043 		    roff[SKMEM_REGION_FLOWADV];
1044 		*(uint32_t *)(uintptr_t)&csm->csm_flowadv_max =
1045 		    na->na_flowadv_max;
1046 	} else {
1047 		ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1048 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs = 0;
1049 		*(uint32_t *)(uintptr_t)&csm->csm_flowadv_max = 0;
1050 	}
1051 
1052 	if (arn->arn_nexusadv_obj != NULL) {
1053 		struct __kern_nexus_adv_metadata *__single adv_md;
1054 
1055 		adv_md = arn->arn_nexusadv_obj;
1056 		ASSERT(adv_md->knam_version == NX_ADVISORY_MD_CURRENT_VERSION);
1057 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] != NULL);
1058 		ASSERT(roff[SKMEM_REGION_NEXUSADV] != 0);
1059 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs =
1060 		    roff[SKMEM_REGION_NEXUSADV];
1061 	} else {
1062 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1063 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs = 0;
1064 	}
1065 
1066 	ch->ch_schema = csm;
1067 	ch->ch_schema_offset = base;
1068 
1069 	return 0;
1070 }
1071 
1072 /*
1073  * Called by all routines that create nexus_adapters.
1074  * Attach na to the ifp (if any) and provide defaults
1075  * for optional callbacks. Defaults assume that we
1076  * are creating an hardware nexus_adapter.
1077  */
1078 void
na_attach_common(struct nexus_adapter * na,struct kern_nexus * nx,struct kern_nexus_domain_provider * nxdom_prov)1079 na_attach_common(struct nexus_adapter *na, struct kern_nexus *nx,
1080     struct kern_nexus_domain_provider *nxdom_prov)
1081 {
1082 	SK_LOCK_ASSERT_HELD();
1083 
1084 	ASSERT(nx != NULL);
1085 	ASSERT(nxdom_prov != NULL);
1086 	ASSERT(na->na_krings_create != NULL);
1087 	ASSERT(na->na_krings_delete != NULL);
1088 	if (na->na_type != NA_NETIF_COMPAT_DEV) {
1089 		ASSERT(na_get_nrings(na, NR_TX) != 0);
1090 	}
1091 	if (na->na_type != NA_NETIF_COMPAT_HOST) {
1092 		ASSERT(na_get_nrings(na, NR_RX) != 0);
1093 	}
1094 	ASSERT(na->na_channels == 0);
1095 
1096 	if (na->na_notify == NULL) {
1097 		na->na_notify = na_notify;
1098 	}
1099 
1100 	na->na_nx = nx;
1101 	na->na_nxdom_prov = nxdom_prov;
1102 
1103 	SK_DF(SK_VERB_NA, "na %p nx %p nxtype %u ar %p",
1104 	    SK_KVA(na), SK_KVA(nx), nxdom_prov->nxdom_prov_dom->nxdom_type,
1105 	    SK_KVA(na->na_arena));
1106 }
1107 
1108 void
na_post_event(struct __kern_channel_ring * kring,boolean_t nodelay,boolean_t within_kevent,boolean_t selwake,uint32_t hint)1109 na_post_event(struct __kern_channel_ring *kring, boolean_t nodelay,
1110     boolean_t within_kevent, boolean_t selwake, uint32_t hint)
1111 {
1112 	struct nexus_adapter *na = KRNA(kring);
1113 	enum txrx t = kring->ckr_tx;
1114 
1115 	SK_PDF(SK_VERB_EVENTS, current_proc(),
1116 	    "na \"%s\" (%p) kr %p kev %u sel %u hint 0x%x",
1117 	    na->na_name, SK_KVA(na), SK_KVA(kring), within_kevent, selwake,
1118 	    hint);
1119 
1120 	csi_selwakeup_one(kring, nodelay, within_kevent, selwake, hint);
1121 	/*
1122 	 * optimization: avoid a wake up on the global
1123 	 * queue if nobody has registered for more
1124 	 * than one ring
1125 	 */
1126 	if (na->na_si_users[t] > 0) {
1127 		csi_selwakeup_all(na, t, nodelay, within_kevent, selwake, hint);
1128 	}
1129 }
1130 
1131 /* default notify callback */
1132 static int
na_notify(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1133 na_notify(struct __kern_channel_ring *kring, struct proc *p, uint32_t flags)
1134 {
1135 #pragma unused(p)
1136 	SK_DF(SK_VERB_NOTIFY | ((kring->ckr_tx == NR_TX) ?
1137 	    SK_VERB_TX : SK_VERB_RX),
1138 	    "%s(%d) [%s] na \"%s\" (%p) kr \"%s\" (%p) krflags 0x%x "
1139 	    "flags 0x%x, kh %u kt %u | h %u t %u",
1140 	    sk_proc_name(p), sk_proc_pid(p),
1141 	    (kring->ckr_tx == NR_TX) ? "W" : "R", KRNA(kring)->na_name,
1142 	    SK_KVA(KRNA(kring)), kring->ckr_name, SK_KVA(kring),
1143 	    kring->ckr_flags, flags, kring->ckr_khead, kring->ckr_ktail,
1144 	    kring->ckr_rhead, kring->ckr_rtail);
1145 
1146 	na_post_event(kring, (flags & NA_NOTEF_PUSH),
1147 	    (flags & NA_NOTEF_IN_KEVENT), TRUE, 0);
1148 
1149 	return 0;
1150 }
1151 
1152 /*
1153  * Fetch configuration from the device, to cope with dynamic
1154  * reconfigurations after loading the module.
1155  */
1156 /* call with SK_LOCK held */
1157 int
na_update_config(struct nexus_adapter * na)1158 na_update_config(struct nexus_adapter *na)
1159 {
1160 	uint32_t txr, txd, rxr, rxd;
1161 
1162 	SK_LOCK_ASSERT_HELD();
1163 
1164 	txr = txd = rxr = rxd = 0;
1165 	if (na->na_config == NULL ||
1166 	    na->na_config(na, &txr, &txd, &rxr, &rxd)) {
1167 		/* take whatever we had at init time */
1168 		txr = na_get_nrings(na, NR_TX);
1169 		txd = na_get_nslots(na, NR_TX);
1170 		rxr = na_get_nrings(na, NR_RX);
1171 		rxd = na_get_nslots(na, NR_RX);
1172 	}
1173 
1174 	if (na_get_nrings(na, NR_TX) == txr &&
1175 	    na_get_nslots(na, NR_TX) == txd &&
1176 	    na_get_nrings(na, NR_RX) == rxr &&
1177 	    na_get_nslots(na, NR_RX) == rxd) {
1178 		return 0; /* nothing changed */
1179 	}
1180 	SK_DF(SK_VERB_NA, "stored config %s: txring %u x %u, rxring %u x %u",
1181 	    na->na_name, na_get_nrings(na, NR_TX), na_get_nslots(na, NR_TX),
1182 	    na_get_nrings(na, NR_RX), na_get_nslots(na, NR_RX));
1183 	SK_DF(SK_VERB_NA, "new config %s: txring %u x %u, rxring %u x %u",
1184 	    na->na_name, txr, txd, rxr, rxd);
1185 
1186 	if (na->na_channels == 0) {
1187 		SK_DF(SK_VERB_NA, "configuration changed (but fine)");
1188 		na_set_nrings(na, NR_TX, txr);
1189 		na_set_nslots(na, NR_TX, txd);
1190 		na_set_nrings(na, NR_RX, rxr);
1191 		na_set_nslots(na, NR_RX, rxd);
1192 		return 0;
1193 	}
1194 	SK_ERR("configuration changed while active, this is bad...");
1195 	return 1;
1196 }
1197 
1198 static void
na_kr_setup_netif_svc_map(struct nexus_adapter * na)1199 na_kr_setup_netif_svc_map(struct nexus_adapter *na)
1200 {
1201 	uint32_t i;
1202 	uint32_t num_tx_rings;
1203 
1204 	ASSERT(na->na_type == NA_NETIF_DEV);
1205 	num_tx_rings = na_get_nrings(na, NR_TX);
1206 
1207 	static_assert(NAKR_WMM_SC2RINGID(KPKT_SC_BK_SYS) == NAKR_WMM_SC2RINGID(KPKT_SC_BK));
1208 	static_assert(NAKR_WMM_SC2RINGID(KPKT_SC_BE) == NAKR_WMM_SC2RINGID(KPKT_SC_RD));
1209 	static_assert(NAKR_WMM_SC2RINGID(KPKT_SC_BE) == NAKR_WMM_SC2RINGID(KPKT_SC_OAM));
1210 	static_assert(NAKR_WMM_SC2RINGID(KPKT_SC_AV) == NAKR_WMM_SC2RINGID(KPKT_SC_RV));
1211 	static_assert(NAKR_WMM_SC2RINGID(KPKT_SC_AV) == NAKR_WMM_SC2RINGID(KPKT_SC_VI));
1212 	static_assert(NAKR_WMM_SC2RINGID(KPKT_SC_VO) == NAKR_WMM_SC2RINGID(KPKT_SC_CTL));
1213 
1214 	static_assert(NAKR_WMM_SC2RINGID(KPKT_SC_BK) < NA_NUM_WMM_CLASSES);
1215 	static_assert(NAKR_WMM_SC2RINGID(KPKT_SC_BE) < NA_NUM_WMM_CLASSES);
1216 	static_assert(NAKR_WMM_SC2RINGID(KPKT_SC_VI) < NA_NUM_WMM_CLASSES);
1217 	static_assert(NAKR_WMM_SC2RINGID(KPKT_SC_VO) < NA_NUM_WMM_CLASSES);
1218 
1219 	static_assert(MBUF_SCIDX(KPKT_SC_BK_SYS) < KPKT_SC_MAX_CLASSES);
1220 	static_assert(MBUF_SCIDX(KPKT_SC_BK) < KPKT_SC_MAX_CLASSES);
1221 	static_assert(MBUF_SCIDX(KPKT_SC_BE) < KPKT_SC_MAX_CLASSES);
1222 	static_assert(MBUF_SCIDX(KPKT_SC_RD) < KPKT_SC_MAX_CLASSES);
1223 	static_assert(MBUF_SCIDX(KPKT_SC_OAM) < KPKT_SC_MAX_CLASSES);
1224 	static_assert(MBUF_SCIDX(KPKT_SC_AV) < KPKT_SC_MAX_CLASSES);
1225 	static_assert(MBUF_SCIDX(KPKT_SC_RV) < KPKT_SC_MAX_CLASSES);
1226 	static_assert(MBUF_SCIDX(KPKT_SC_VI) < KPKT_SC_MAX_CLASSES);
1227 	static_assert(MBUF_SCIDX(KPKT_SC_SIG) < KPKT_SC_MAX_CLASSES);
1228 	static_assert(MBUF_SCIDX(KPKT_SC_VO) < KPKT_SC_MAX_CLASSES);
1229 	static_assert(MBUF_SCIDX(KPKT_SC_CTL) < KPKT_SC_MAX_CLASSES);
1230 
1231 	/*
1232 	 * we support the following 2 configurations:
1233 	 * 1. packets from all 10 service class map to one ring.
1234 	 * 2. a 10:4 mapping between service classes and the rings. These 4
1235 	 *    rings map to the 4 WMM access categories.
1236 	 */
1237 	if (na->na_nx->nx_prov->nxprov_params->nxp_qmap == NEXUS_QMAP_TYPE_WMM) {
1238 		ASSERT(num_tx_rings == NEXUS_NUM_WMM_QUEUES);
1239 		/* setup the adapter's service class LUT */
1240 		NAKR_SET_SVC_LUT(na, KPKT_SC_BK_SYS);
1241 		NAKR_SET_SVC_LUT(na, KPKT_SC_BK);
1242 		NAKR_SET_SVC_LUT(na, KPKT_SC_BE);
1243 		NAKR_SET_SVC_LUT(na, KPKT_SC_RD);
1244 		NAKR_SET_SVC_LUT(na, KPKT_SC_OAM);
1245 		NAKR_SET_SVC_LUT(na, KPKT_SC_AV);
1246 		NAKR_SET_SVC_LUT(na, KPKT_SC_RV);
1247 		NAKR_SET_SVC_LUT(na, KPKT_SC_VI);
1248 		NAKR_SET_SVC_LUT(na, KPKT_SC_SIG);
1249 		NAKR_SET_SVC_LUT(na, KPKT_SC_VO);
1250 		NAKR_SET_SVC_LUT(na, KPKT_SC_CTL);
1251 
1252 		/* Initialize the service class for each of the 4 ring */
1253 		NAKR_SET_KR_SVC(na, KPKT_SC_BK);
1254 		NAKR_SET_KR_SVC(na, KPKT_SC_BE);
1255 		NAKR_SET_KR_SVC(na, KPKT_SC_VI);
1256 		NAKR_SET_KR_SVC(na, KPKT_SC_VO);
1257 	} else {
1258 		ASSERT(na->na_nx->nx_prov->nxprov_params->nxp_qmap ==
1259 		    NEXUS_QMAP_TYPE_DEFAULT);
1260 		/* 10: 1 mapping */
1261 		for (i = 0; i < KPKT_SC_MAX_CLASSES; i++) {
1262 			na->na_kring_svc_lut[i] = 0;
1263 		}
1264 		for (i = 0; i < num_tx_rings; i++) {
1265 			NAKR(na, NR_TX)[i].ckr_svc = KPKT_SC_UNSPEC;
1266 		}
1267 	}
1268 }
1269 
1270 static LCK_GRP_DECLARE(channel_txq_lock_group, "sk_ch_txq_lock");
1271 static LCK_GRP_DECLARE(channel_rxq_lock_group, "sk_ch_rxq_lock");
1272 static LCK_GRP_DECLARE(channel_txs_lock_group, "sk_ch_txs_lock");
1273 static LCK_GRP_DECLARE(channel_rxs_lock_group, "sk_ch_rxs_lock");
1274 static LCK_GRP_DECLARE(channel_alloc_lock_group, "sk_ch_alloc_lock");
1275 static LCK_GRP_DECLARE(channel_evq_lock_group, "sk_ch_evq_lock");
1276 static LCK_GRP_DECLARE(channel_evs_lock_group, "sk_ch_evs_lock");
1277 
1278 static lck_grp_t *
na_kr_q_lck_grp(enum txrx t)1279 na_kr_q_lck_grp(enum txrx t)
1280 {
1281 	switch (t) {
1282 	case NR_TX:
1283 		return &channel_txq_lock_group;
1284 	case NR_RX:
1285 		return &channel_rxq_lock_group;
1286 	case NR_A:
1287 	case NR_F:
1288 	case NR_LBA:
1289 		return &channel_alloc_lock_group;
1290 	case NR_EV:
1291 		return &channel_evq_lock_group;
1292 	default:
1293 		VERIFY(0);
1294 		/* NOTREACHED */
1295 		__builtin_unreachable();
1296 	}
1297 }
1298 
1299 static lck_grp_t *
na_kr_s_lck_grp(enum txrx t)1300 na_kr_s_lck_grp(enum txrx t)
1301 {
1302 	switch (t) {
1303 	case NR_TX:
1304 		return &channel_txs_lock_group;
1305 	case NR_RX:
1306 		return &channel_rxs_lock_group;
1307 	case NR_A:
1308 	case NR_F:
1309 	case NR_LBA:
1310 		return &channel_alloc_lock_group;
1311 	case NR_EV:
1312 		return &channel_evs_lock_group;
1313 	default:
1314 		VERIFY(0);
1315 		/* NOTREACHED */
1316 		__builtin_unreachable();
1317 	}
1318 }
1319 
1320 static void
kr_init_tbr(struct __kern_channel_ring * r)1321 kr_init_tbr(struct __kern_channel_ring *r)
1322 {
1323 	r->ckr_tbr_depth = CKR_TBR_TOKEN_INVALID;
1324 	r->ckr_tbr_token = CKR_TBR_TOKEN_INVALID;
1325 	r->ckr_tbr_last = 0;
1326 }
1327 
1328 struct kern_pbufpool *
na_kr_get_pp(struct nexus_adapter * na,enum txrx t)1329 na_kr_get_pp(struct nexus_adapter *na, enum txrx t)
1330 {
1331 	struct kern_pbufpool *pp = NULL;
1332 	switch (t) {
1333 	case NR_RX:
1334 	case NR_F:
1335 	case NR_EV:
1336 		pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
1337 		break;
1338 	case NR_TX:
1339 	case NR_A:
1340 	case NR_LBA:
1341 		pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
1342 		break;
1343 	default:
1344 		VERIFY(0);
1345 		/* NOTREACHED */
1346 		__builtin_unreachable();
1347 	}
1348 
1349 	return pp;
1350 }
1351 
1352 /*
1353  * Create the krings array and initialize the fields common to all adapters.
1354  * The array layout is this:
1355  *
1356  *                                 +----------+
1357  * na->na_tx_rings ----->          |          | \
1358  *                                 |          |  } na->na_num_tx_rings
1359  *                                 |          | /
1360  * na->na_rx_rings ---->           +----------+
1361  *                                 |          | \
1362  *                                 |          |  } na->na_num_rx_rings
1363  *                                 |          | /
1364  * na->na_alloc_rings ->           +----------+
1365  *                                 |          | \
1366  * na->na_free_rings -->           +----------+  } na->na_num_allocator_ring_pairs
1367  *                                 |          | /
1368  * na->na_event_rings ->           +----------+
1369  *                                 |          | \
1370  *                                 |          |  } na->na_num_event_rings
1371  *                                 |          | /
1372  * na->na_large_buf_alloc_rings -> +----------+
1373  *                                 |          | \
1374  *                                 |          |  } na->na_num_large_buf_alloc_rings
1375  *                                 |          | /
1376  * na->na_tail ----->              +----------+
1377  */
1378 /* call with SK_LOCK held */
1379 static int
na_kr_create(struct nexus_adapter * na,boolean_t alloc_ctx)1380 na_kr_create(struct nexus_adapter *na, boolean_t alloc_ctx)
1381 {
1382 	lck_grp_t *q_lck_grp, *s_lck_grp;
1383 	uint32_t i, ndesc;
1384 	struct kern_pbufpool *pp = NULL;
1385 	uint32_t count;
1386 	uint32_t tmp_count;
1387 	struct __kern_channel_ring *__counted_by(count) rings;
1388 	struct __kern_channel_ring *__single kring;
1389 	uint32_t n[NR_ALL];
1390 	int c, tot_slots, err = 0;
1391 	enum txrx t;
1392 
1393 	SK_LOCK_ASSERT_HELD();
1394 
1395 	n[NR_TX] = na_get_nrings(na, NR_TX);
1396 	n[NR_RX] = na_get_nrings(na, NR_RX);
1397 	n[NR_A] = na_get_nrings(na, NR_A);
1398 	n[NR_F] = na_get_nrings(na, NR_F);
1399 	n[NR_EV] = na_get_nrings(na, NR_EV);
1400 	n[NR_LBA] = na_get_nrings(na, NR_LBA);
1401 
1402 	/*
1403 	 * -fbounds-safety: rings is __counted_by(count), so rings needs to be
1404 	 * assigned first, immediately followed by count's assignment.
1405 	 */
1406 	tmp_count = n[NR_TX] + n[NR_RX] + n[NR_A] + n[NR_F] + n[NR_EV] + n[NR_LBA];
1407 	rings = sk_alloc_type_array(struct __kern_channel_ring, tmp_count,
1408 	    Z_WAITOK, skmem_tag_nx_rings);
1409 	count = tmp_count;
1410 	na->na_all_rings = rings;
1411 	na->na_all_rings_cnt = count;
1412 
1413 	if (__improbable(rings == NULL)) {
1414 		SK_ERR("Cannot allocate krings");
1415 		err = ENOMEM;
1416 		goto error;
1417 	}
1418 	na->na_tx_rings = rings;
1419 	na->na_tx_rings_cnt = n[NR_TX];
1420 
1421 	na->na_rx_rings = rings + n[NR_TX];
1422 	na->na_rx_rings_cnt = n[NR_RX];
1423 	if (n[NR_A] != 0) {
1424 		na->na_alloc_rings = rings + n[NR_TX] + n[NR_RX];
1425 		na->na_free_rings = rings + n[NR_TX] + n[NR_RX] + n[NR_A];
1426 		na->na_alloc_free_rings_cnt = n[NR_A];
1427 	} else {
1428 		na->na_alloc_rings = NULL;
1429 		na->na_free_rings = NULL;
1430 		na->na_alloc_free_rings_cnt = 0;
1431 	}
1432 	if (n[NR_EV] != 0) {
1433 		if (na->na_free_rings != NULL) {
1434 			na->na_event_rings = rings + n[NR_TX] +
1435 			    n[NR_RX] + n[NR_A] + n[NR_F];
1436 			na->na_event_rings_cnt = n[NR_EV];
1437 		} else {
1438 			na->na_event_rings = rings + n[NR_TX] + n[NR_RX];
1439 			na->na_event_rings_cnt = n[NR_EV];
1440 		}
1441 	}
1442 	if (n[NR_LBA] != 0) {
1443 		ASSERT(n[NR_A] != 0);
1444 		if (na->na_event_rings != NULL) {
1445 			na->na_large_buf_alloc_rings = rings + n[NR_TX] + n[NR_RX] +
1446 			    n[NR_A] + n[NR_F] + n[NR_EV];
1447 			na->na_large_buf_alloc_rings_cnt = n[NR_LBA];
1448 		} else {
1449 			/* alloc/free rings must also be present */
1450 			ASSERT(na->na_free_rings != NULL);
1451 			na->na_large_buf_alloc_rings = rings + n[NR_TX] + n[NR_RX] +
1452 			    n[NR_A] + n[NR_F];
1453 			na->na_large_buf_alloc_rings_cnt = n[NR_LBA];
1454 		}
1455 	}
1456 
1457 	/* total number of slots for TX/RX adapter rings */
1458 	c = tot_slots = (n[NR_TX] * na_get_nslots(na, NR_TX)) +
1459 	    (n[NR_RX] * na_get_nslots(na, NR_RX));
1460 
1461 	/* for scratch space on alloc and free rings */
1462 	if (n[NR_A] != 0) {
1463 		tot_slots += n[NR_A] * na_get_nslots(na, NR_A);
1464 		tot_slots += n[NR_F] * na_get_nslots(na, NR_F);
1465 		tot_slots += n[NR_LBA] * na_get_nslots(na, NR_LBA);
1466 		c = tot_slots;
1467 	}
1468 	na->na_total_slots = tot_slots;
1469 
1470 	/* slot context (optional) for all TX/RX ring slots of this adapter */
1471 	if (alloc_ctx) {
1472 		na->na_slot_ctxs =
1473 		    skn_alloc_type_array(slot_ctxs, struct slot_ctx,
1474 		    na->na_total_slots, Z_WAITOK, skmem_tag_nx_contexts);
1475 		na->na_slot_ctxs_cnt = na->na_total_slots;
1476 		if (na->na_slot_ctxs == NULL) {
1477 			SK_ERR("Cannot allocate slot contexts");
1478 			err = ENOMEM;
1479 			na->na_slot_ctxs = NULL;
1480 			na->na_slot_ctxs_cnt = 0;
1481 			goto error;
1482 		}
1483 		os_atomic_or(&na->na_flags, NAF_SLOT_CONTEXT, relaxed);
1484 	}
1485 
1486 	/*
1487 	 * packet handle array storage for all TX/RX ring slots of this
1488 	 * adapter.
1489 	 */
1490 	na->na_scratch = skn_alloc_type_array(scratch, kern_packet_t,
1491 	    na->na_total_slots, Z_WAITOK, skmem_tag_nx_scratch);
1492 	na->na_scratch_cnt = na->na_total_slots;
1493 	if (na->na_scratch == NULL) {
1494 		SK_ERR("Cannot allocate slot contexts");
1495 		err = ENOMEM;
1496 		na->na_scratch = NULL;
1497 		na->na_scratch_cnt = 0;
1498 		goto error;
1499 	}
1500 
1501 	/*
1502 	 * All fields in krings are 0 except the one initialized below.
1503 	 * but better be explicit on important kring fields.
1504 	 */
1505 	for_all_rings(t) {
1506 		ndesc = na_get_nslots(na, t);
1507 		pp = na_kr_get_pp(na, t);
1508 		for (i = 0; i < n[t]; i++) {
1509 			kring = &NAKR(na, t)[i];
1510 			bzero(kring, sizeof(*kring));
1511 			kring->ckr_na = na;
1512 			kring->ckr_pp = pp;
1513 			kring->ckr_max_pkt_len =
1514 			    (t == NR_LBA ? PP_BUF_SIZE_LARGE(pp) :
1515 			    PP_BUF_SIZE_DEF(pp)) *
1516 			    pp->pp_max_frags;
1517 			kring->ckr_ring_id = i;
1518 			kring->ckr_tx = t;
1519 			kr_init_to_mhints(kring, ndesc);
1520 			kr_init_tbr(kring);
1521 			if (NA_KERNEL_ONLY(na)) {
1522 				kring->ckr_flags |= CKRF_KERNEL_ONLY;
1523 			}
1524 			if (na->na_flags & NAF_HOST_ONLY) {
1525 				kring->ckr_flags |= CKRF_HOST;
1526 			}
1527 			ASSERT((t >= NR_TXRX) || (c > 0));
1528 			if ((t < NR_TXRX) &&
1529 			    (na->na_flags & NAF_SLOT_CONTEXT)) {
1530 				ASSERT(na->na_slot_ctxs != NULL);
1531 				kring->ckr_flags |= CKRF_SLOT_CONTEXT;
1532 				kring->ckr_slot_ctxs =
1533 				    na->na_slot_ctxs + (tot_slots - c);
1534 				kring->ckr_slot_ctxs_cnt = kring->ckr_num_slots;
1535 			}
1536 			ASSERT(na->na_scratch != NULL);
1537 			if (t < NR_TXRXAF || t == NR_LBA) {
1538 				kring->ckr_scratch =
1539 				    na->na_scratch + (tot_slots - c);
1540 				kring->ckr_scratch_cnt = kring->ckr_num_slots;
1541 			}
1542 			if (t < NR_TXRXAF || t == NR_LBA) {
1543 				c -= ndesc;
1544 			}
1545 			switch (t) {
1546 			case NR_A:
1547 				if (i == 0) {
1548 					kring->ckr_na_sync =
1549 					    na_packet_pool_alloc_sync;
1550 					kring->ckr_alloc_ws =
1551 					    na_upp_alloc_lowat;
1552 				} else {
1553 					ASSERT(i == 1);
1554 					kring->ckr_na_sync =
1555 					    na_packet_pool_alloc_buf_sync;
1556 					kring->ckr_alloc_ws =
1557 					    na_upp_alloc_buf_lowat;
1558 				}
1559 				break;
1560 			case NR_F:
1561 				if (i == 0) {
1562 					kring->ckr_na_sync =
1563 					    na_packet_pool_free_sync;
1564 				} else {
1565 					ASSERT(i == 1);
1566 					kring->ckr_na_sync =
1567 					    na_packet_pool_free_buf_sync;
1568 				}
1569 				break;
1570 			case NR_TX:
1571 				kring->ckr_na_sync = na->na_txsync;
1572 				if (na->na_flags & NAF_TX_MITIGATION) {
1573 					kring->ckr_flags |= CKRF_MITIGATION;
1574 				}
1575 				switch (na->na_type) {
1576 #if CONFIG_NEXUS_USER_PIPE
1577 				case NA_USER_PIPE:
1578 					ASSERT(!(na->na_flags &
1579 					    NAF_USER_PKT_POOL));
1580 					kring->ckr_prologue = kr_txprologue;
1581 					kring->ckr_finalize = NULL;
1582 					break;
1583 #endif /* CONFIG_NEXUS_USER_PIPE */
1584 				default:
1585 					if (na->na_flags & NAF_USER_PKT_POOL) {
1586 						kring->ckr_prologue =
1587 						    kr_txprologue_upp;
1588 						kring->ckr_finalize =
1589 						    kr_txfinalize_upp;
1590 					} else {
1591 						kring->ckr_prologue =
1592 						    kr_txprologue;
1593 						kring->ckr_finalize =
1594 						    kr_txfinalize;
1595 					}
1596 					break;
1597 				}
1598 				break;
1599 			case NR_RX:
1600 				kring->ckr_na_sync = na->na_rxsync;
1601 				if (na->na_flags & NAF_RX_MITIGATION) {
1602 					kring->ckr_flags |= CKRF_MITIGATION;
1603 				}
1604 				switch (na->na_type) {
1605 #if CONFIG_NEXUS_USER_PIPE
1606 				case NA_USER_PIPE:
1607 					ASSERT(!(na->na_flags &
1608 					    NAF_USER_PKT_POOL));
1609 					kring->ckr_prologue =
1610 					    kr_rxprologue_nodetach;
1611 					kring->ckr_finalize = kr_rxfinalize;
1612 					break;
1613 #endif /* CONFIG_NEXUS_USER_PIPE */
1614 				default:
1615 					if (na->na_flags & NAF_USER_PKT_POOL) {
1616 						kring->ckr_prologue =
1617 						    kr_rxprologue_upp;
1618 						kring->ckr_finalize =
1619 						    kr_rxfinalize_upp;
1620 					} else {
1621 						kring->ckr_prologue =
1622 						    kr_rxprologue;
1623 						kring->ckr_finalize =
1624 						    kr_rxfinalize;
1625 					}
1626 					break;
1627 				}
1628 				break;
1629 			case NR_EV:
1630 				kring->ckr_na_sync = kern_channel_event_sync;
1631 				break;
1632 			case NR_LBA:
1633 				kring->ckr_na_sync = na_packet_pool_alloc_large_sync;
1634 				kring->ckr_alloc_ws = na_upp_alloc_lowat;
1635 				break;
1636 			default:
1637 				VERIFY(0);
1638 				/* NOTREACHED */
1639 				__builtin_unreachable();
1640 			}
1641 			if (t != NR_EV) {
1642 				kring->ckr_na_notify = na->na_notify;
1643 			} else {
1644 				kring->ckr_na_notify = NULL;
1645 			}
1646 			(void) snprintf(kring->ckr_name,
1647 			    sizeof(kring->ckr_name) - 1,
1648 			    "%s %s%u%s", na->na_name, sk_ring2str(t), i,
1649 			    ((kring->ckr_flags & CKRF_HOST) ? "^" : ""));
1650 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1651 			    "kr \"%s\" (%p) krflags 0x%x rh %u rt %u",
1652 			    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1653 			    kring->ckr_rhead, kring->ckr_rtail);
1654 			kring->ckr_state = KR_READY;
1655 			q_lck_grp = na_kr_q_lck_grp(t);
1656 			s_lck_grp = na_kr_s_lck_grp(t);
1657 			kring->ckr_qlock_group = q_lck_grp;
1658 			lck_mtx_init(&kring->ckr_qlock, kring->ckr_qlock_group,
1659 			    &channel_lock_attr);
1660 			kring->ckr_slock_group = s_lck_grp;
1661 			lck_spin_init(&kring->ckr_slock, kring->ckr_slock_group,
1662 			    &channel_lock_attr);
1663 			csi_init(&kring->ckr_si,
1664 			    (kring->ckr_flags & CKRF_MITIGATION),
1665 			    na->na_ch_mit_ival);
1666 		}
1667 		csi_init(&na->na_si[t],
1668 		    (na->na_flags & (NAF_TX_MITIGATION | NAF_RX_MITIGATION)),
1669 		    na->na_ch_mit_ival);
1670 	}
1671 	ASSERT(c == 0);
1672 	na->na_tail = rings + n[NR_TX] + n[NR_RX] + n[NR_A] + n[NR_F] +
1673 	    n[NR_EV] + n[NR_LBA];
1674 
1675 	if (na->na_type == NA_NETIF_DEV) {
1676 		na_kr_setup_netif_svc_map(na);
1677 	}
1678 
1679 	/* validate now for cases where we create only krings */
1680 	na_krings_verify(na);
1681 	return 0;
1682 
1683 error:
1684 	ASSERT(err != 0);
1685 	if (rings != NULL) {
1686 		sk_free_type_array_counted_by(struct __kern_channel_ring,
1687 		    na->na_all_rings_cnt, na->na_all_rings);
1688 		na->na_tx_rings = NULL;
1689 		na->na_tx_rings_cnt = 0;
1690 		na->na_rx_rings = NULL;
1691 		na->na_rx_rings_cnt = 0;
1692 		na->na_alloc_rings = NULL;
1693 		na->na_free_rings = NULL;
1694 		na->na_alloc_free_rings_cnt = 0;
1695 		na->na_event_rings = NULL;
1696 		na->na_event_rings_cnt = 0;
1697 		na->na_tail = NULL;
1698 	}
1699 	if (na->na_slot_ctxs != NULL) {
1700 		ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1701 		skn_free_type_array_counted_by(slot_ctxs, struct slot_ctx,
1702 		    na->na_slot_ctxs_cnt, na->na_slot_ctxs);
1703 		na->na_slot_ctxs = NULL;
1704 		na->na_slot_ctxs_cnt = 0;
1705 	}
1706 	if (na->na_scratch != NULL) {
1707 		skn_free_type_array_counted_by(scratch, kern_packet_t, na->na_scratch_cnt,
1708 		    na->na_scratch);
1709 		na->na_scratch = NULL;
1710 		na->na_scratch_cnt = 0;
1711 	}
1712 	return err;
1713 }
1714 
1715 /* undo the actions performed by na_kr_create() */
1716 /* call with SK_LOCK held */
1717 static void
na_kr_delete(struct nexus_adapter * na)1718 na_kr_delete(struct nexus_adapter *na)
1719 {
1720 	struct __kern_channel_ring *kring;
1721 	enum txrx t;
1722 
1723 	kring = na->na_all_rings;
1724 
1725 	ASSERT((kring != NULL) && (na->na_tail != NULL));
1726 	SK_LOCK_ASSERT_HELD();
1727 
1728 	for_all_rings(t) {
1729 		csi_destroy(&na->na_si[t]);
1730 	}
1731 	/* we rely on the krings layout described above */
1732 	for (; kring != na->na_tail; kring++) {
1733 		lck_mtx_destroy(&kring->ckr_qlock, kring->ckr_qlock_group);
1734 		lck_spin_destroy(&kring->ckr_slock, kring->ckr_slock_group);
1735 		csi_destroy(&kring->ckr_si);
1736 		if (kring->ckr_flags & CKRF_SLOT_CONTEXT) {
1737 			kring->ckr_flags &= ~CKRF_SLOT_CONTEXT;
1738 			ASSERT(kring->ckr_slot_ctxs != NULL);
1739 			kring->ckr_slot_ctxs = NULL;
1740 			kring->ckr_slot_ctxs_cnt = 0;
1741 		}
1742 		kring->ckr_scratch = NULL;
1743 		kring->ckr_scratch_cnt = 0;
1744 	}
1745 	if (na->na_slot_ctxs != NULL) {
1746 		ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1747 		os_atomic_andnot(&na->na_flags, NAF_SLOT_CONTEXT, relaxed);
1748 		skn_free_type_array_counted_by(na->na_slot_ctxs,
1749 		    struct slot_ctx, na->na_slot_ctxs_cnt,
1750 		    na->na_slot_ctxs);
1751 		na->na_slot_ctxs = NULL;
1752 		na->na_slot_ctxs_cnt = 0;
1753 	}
1754 	if (na->na_scratch != NULL) {
1755 		skn_free_type_array_counted_by(na->na_scratch,
1756 		    kern_packet_t, na->na_scratch_cnt,
1757 		    na->na_scratch);
1758 		na->na_scratch = NULL;
1759 		na->na_scratch_cnt = 0;
1760 	}
1761 	ASSERT(!(na->na_flags & NAF_SLOT_CONTEXT));
1762 	sk_free_type_array_counted_by(struct __kern_channel_ring,
1763 	    na->na_all_rings_cnt, na->na_all_rings);
1764 	na->na_tx_rings = NULL;
1765 	na->na_tx_rings_cnt = 0;
1766 	na->na_rx_rings = NULL;
1767 	na->na_rx_rings_cnt = 0;
1768 	na->na_alloc_rings = NULL;
1769 	na->na_free_rings = NULL;
1770 	na->na_alloc_free_rings_cnt = 0;
1771 	na->na_event_rings = NULL;
1772 	na->na_event_rings_cnt = 0;
1773 	na->na_tail = NULL;
1774 	na->na_all_rings = NULL;
1775 	na->na_all_rings_cnt = 0;
1776 }
1777 
1778 /*
1779  * -fbounds-safety: If kernel_only, usds is NULL, so marking it
1780  * __counted_by(ndesc) would fail bounds check. We could use __sized_by_or_null
1781  * when it's ready: rdar://75598414
1782  * If usds != NULL, then ksds_cnt == usds_cnt
1783  */
1784 static void
na_kr_slot_desc_init(struct __slot_desc * __counted_by (ksds_cnt)ksds,boolean_t kernel_only,struct __slot_desc * __counted_by (usds_cnt)usds,size_t ksds_cnt,size_t usds_cnt)1785 na_kr_slot_desc_init(struct __slot_desc *__counted_by(ksds_cnt)ksds,
1786     boolean_t kernel_only, struct __slot_desc *__counted_by(usds_cnt)usds,
1787     size_t ksds_cnt, size_t usds_cnt)
1788 {
1789 	size_t i;
1790 
1791 	bzero(ksds, ksds_cnt * SLOT_DESC_SZ);
1792 	if (usds != NULL) {
1793 		ASSERT(!kernel_only);
1794 		ASSERT(ksds_cnt == usds_cnt);
1795 		bzero(usds, usds_cnt * SLOT_DESC_SZ);
1796 	} else {
1797 		ASSERT(kernel_only);
1798 		ASSERT(usds_cnt == 0);
1799 	}
1800 
1801 	for (i = 0; i < ksds_cnt; i++) {
1802 		KSD_INIT(SLOT_DESC_KSD(&ksds[i]));
1803 		if (!kernel_only) {
1804 			USD_INIT(SLOT_DESC_USD(&usds[i]));
1805 		}
1806 	}
1807 }
1808 
1809 /* call with SK_LOCK held */
1810 static int
na_kr_setup(struct nexus_adapter * na,struct kern_channel * ch)1811 na_kr_setup(struct nexus_adapter *na, struct kern_channel *ch)
1812 {
1813 	struct skmem_arena *ar = na->na_arena;
1814 	struct skmem_arena_nexus *arn;
1815 	mach_vm_offset_t roff[SKMEM_REGIONS];
1816 	enum txrx t;
1817 	uint32_t i;
1818 	struct __slot_desc *ksds;
1819 
1820 	SK_LOCK_ASSERT_HELD();
1821 	ASSERT(!(na->na_flags & NAF_MEM_NO_INIT));
1822 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
1823 	arn = skmem_arena_nexus(ar);
1824 	ASSERT(arn != NULL);
1825 
1826 	bzero(&roff, sizeof(roff));
1827 	for (i = 0; i < SKMEM_REGIONS; i++) {
1828 		if (ar->ar_regions[i] == NULL) {
1829 			continue;
1830 		}
1831 
1832 		/* not for nexus */
1833 		ASSERT(i != SKMEM_REGION_SYSCTLS);
1834 
1835 		/*
1836 		 * Get region offsets from base of mmap span; the arena
1837 		 * doesn't need to be mmap'd at this point, since we
1838 		 * simply compute the relative offset.
1839 		 */
1840 		roff[i] = skmem_arena_get_region_offset(ar, i);
1841 	}
1842 
1843 	for_all_rings(t) {
1844 		for (i = 0; i < na_get_nrings(na, t); i++) {
1845 			struct __kern_channel_ring *kring = &NAKR(na, t)[i];
1846 			struct __user_channel_ring *__single ring = kring->ckr_ring;
1847 			mach_vm_offset_t ring_off, usd_roff;
1848 			struct skmem_obj_info oi, oim;
1849 			uint32_t ndesc;
1850 
1851 			if (ring != NULL) {
1852 				SK_DF(SK_VERB_NA | SK_VERB_RING,
1853 				    "kr %p (\"%s\") is already "
1854 				    "initialized", SK_KVA(kring),
1855 				    kring->ckr_name);
1856 				continue; /* already created by somebody else */
1857 			}
1858 
1859 			if (!KR_KERNEL_ONLY(kring) &&
1860 			    (ring = skmem_cache_alloc(arn->arn_ring_cache,
1861 			    SKMEM_NOSLEEP)) == NULL) {
1862 				SK_ERR("Cannot allocate %s_ring for kr "
1863 				    "%p (\"%s\")", sk_ring2str(t),
1864 				    SK_KVA(kring), kring->ckr_name);
1865 				goto cleanup;
1866 			}
1867 			kring->ckr_flags |= CKRF_MEM_RING_INITED;
1868 			kring->ckr_ring = ring;
1869 			ndesc = kring->ckr_num_slots;
1870 
1871 			if (ring == NULL) {
1872 				goto skip_user_ring_setup;
1873 			}
1874 
1875 			*(uint32_t *)(uintptr_t)&ring->ring_num_slots = ndesc;
1876 
1877 			/* offset of current ring in mmap span */
1878 			skmem_cache_get_obj_info(arn->arn_ring_cache,
1879 			    ring, &oi, NULL);
1880 			ring_off = (roff[SKMEM_REGION_RING] +
1881 			    SKMEM_OBJ_ROFF(&oi));
1882 
1883 			/*
1884 			 * ring_{buf,md,sd}_ofs offsets are relative to the
1885 			 * current ring, and not to the base of mmap span.
1886 			 */
1887 			*(mach_vm_offset_t *)(uintptr_t)
1888 			&ring->ring_def_buf_base =
1889 			    (roff[SKMEM_REGION_BUF_DEF] - ring_off);
1890 			*(mach_vm_offset_t *)(uintptr_t)
1891 			&ring->ring_large_buf_base =
1892 			    (roff[SKMEM_REGION_BUF_LARGE] - ring_off);
1893 			*(mach_vm_offset_t *)(uintptr_t)&ring->ring_md_base =
1894 			    (roff[SKMEM_REGION_UMD] - ring_off);
1895 			static_assert(sizeof(uint16_t) == sizeof(ring->ring_bft_size));
1896 			if (roff[SKMEM_REGION_UBFT] != 0) {
1897 				ASSERT(ar->ar_regions[SKMEM_REGION_UBFT] !=
1898 				    NULL);
1899 				*(mach_vm_offset_t *)(uintptr_t)
1900 				&ring->ring_bft_base =
1901 				    (roff[SKMEM_REGION_UBFT] - ring_off);
1902 				*(uint16_t *)(uintptr_t)&ring->ring_bft_size =
1903 				    (uint16_t)ar->ar_regions[SKMEM_REGION_UBFT]->
1904 				    skr_c_obj_size;
1905 				ASSERT(ring->ring_bft_size ==
1906 				    ar->ar_regions[SKMEM_REGION_KBFT]->
1907 				    skr_c_obj_size);
1908 			} else {
1909 				*(mach_vm_offset_t *)(uintptr_t)
1910 				&ring->ring_bft_base = 0;
1911 				*(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1912 			}
1913 
1914 			if (t == NR_TX || t == NR_A || t == NR_EV || t == NR_LBA) {
1915 				usd_roff = roff[SKMEM_REGION_TXAUSD];
1916 			} else {
1917 				ASSERT(t == NR_RX || t == NR_F);
1918 				usd_roff = roff[SKMEM_REGION_RXFUSD];
1919 			}
1920 			*(mach_vm_offset_t *)(uintptr_t)&ring->ring_sd_base =
1921 			    (usd_roff - ring_off);
1922 
1923 			/* copy values from kring */
1924 			ring->ring_head = kring->ckr_rhead;
1925 			*(slot_idx_t *)(uintptr_t)&ring->ring_khead =
1926 			    kring->ckr_khead;
1927 			*(slot_idx_t *)(uintptr_t)&ring->ring_tail =
1928 			    kring->ckr_rtail;
1929 
1930 			static_assert(sizeof(uint32_t) == sizeof(ring->ring_def_buf_size));
1931 			static_assert(sizeof(uint32_t) == sizeof(ring->ring_large_buf_size));
1932 			static_assert(sizeof(uint16_t) == sizeof(ring->ring_md_size));
1933 			*(uint32_t *)(uintptr_t)&ring->ring_def_buf_size =
1934 			    ar->ar_regions[SKMEM_REGION_BUF_DEF]->skr_c_obj_size;
1935 			if (ar->ar_regions[SKMEM_REGION_BUF_LARGE] != NULL) {
1936 				*(uint32_t *)(uintptr_t)&ring->ring_large_buf_size =
1937 				    ar->ar_regions[SKMEM_REGION_BUF_LARGE]->skr_c_obj_size;
1938 			} else {
1939 				*(uint32_t *)(uintptr_t)&ring->ring_large_buf_size = 0;
1940 			}
1941 			if (ar->ar_regions[SKMEM_REGION_UMD] != NULL) {
1942 				*(uint16_t *)(uintptr_t)&ring->ring_md_size =
1943 				    (uint16_t)ar->ar_regions[SKMEM_REGION_UMD]->
1944 				    skr_c_obj_size;
1945 				ASSERT(ring->ring_md_size ==
1946 				    ar->ar_regions[SKMEM_REGION_KMD]->
1947 				    skr_c_obj_size);
1948 			} else {
1949 				*(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1950 				ASSERT(PP_KERNEL_ONLY(arn->arn_rx_pp));
1951 				ASSERT(PP_KERNEL_ONLY(arn->arn_tx_pp));
1952 			}
1953 
1954 			/* ring info */
1955 			static_assert(sizeof(uint16_t) == sizeof(ring->ring_id));
1956 			static_assert(sizeof(uint16_t) == sizeof(ring->ring_kind));
1957 			*(uint16_t *)(uintptr_t)&ring->ring_id =
1958 			    (uint16_t)kring->ckr_ring_id;
1959 			*(uint16_t *)(uintptr_t)&ring->ring_kind =
1960 			    (uint16_t)kring->ckr_tx;
1961 
1962 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1963 			    "%s_ring at %p kr %p (\"%s\")",
1964 			    sk_ring2str(t), SK_KVA(ring), SK_KVA(kring),
1965 			    kring->ckr_name);
1966 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1967 			    "  num_slots:  %u", ring->ring_num_slots);
1968 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1969 			    "  def_buf_base:   0x%llx",
1970 			    (uint64_t)ring->ring_def_buf_base);
1971 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1972 			    "  large_buf_base:   0x%llx",
1973 			    (uint64_t)ring->ring_large_buf_base);
1974 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1975 			    "  md_base:    0x%llx",
1976 			    (uint64_t)ring->ring_md_base);
1977 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1978 			    "  sd_base:    0x%llx",
1979 			    (uint64_t)ring->ring_sd_base);
1980 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1981 			    "  h, t:    %u, %u", ring->ring_head,
1982 			    ring->ring_tail);
1983 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1984 			    "  md_size:    %llu",
1985 			    (uint64_t)ring->ring_md_size);
1986 
1987 			/* make sure they're in synch */
1988 			static_assert(NR_RX == CR_KIND_RX);
1989 			static_assert(NR_TX == CR_KIND_TX);
1990 			static_assert(NR_A == CR_KIND_ALLOC);
1991 			static_assert(NR_F == CR_KIND_FREE);
1992 			static_assert(NR_EV == CR_KIND_EVENT);
1993 			static_assert(NR_LBA == CR_KIND_LARGE_BUF_ALLOC);
1994 
1995 skip_user_ring_setup:
1996 			/*
1997 			 * This flag tells na_kr_teardown_all() that it should
1998 			 * go thru the checks to free up the slot maps.
1999 			 */
2000 			kring->ckr_flags |= CKRF_MEM_SD_INITED;
2001 			if (t == NR_TX || t == NR_A || t == NR_EV || t == NR_LBA) {
2002 				kring->ckr_ksds_cache = arn->arn_txaksd_cache;
2003 			} else {
2004 				ASSERT(t == NR_RX || t == NR_F);
2005 				kring->ckr_ksds_cache = arn->arn_rxfksd_cache;
2006 			}
2007 
2008 			ksds = skmem_cache_alloc(kring->ckr_ksds_cache,
2009 			    SKMEM_NOSLEEP);
2010 			if (ksds == NULL) {
2011 				SK_ERR("Cannot allocate %s_ksds for kr "
2012 				    "%p (\"%s\")", sk_ring2str(t),
2013 				    SK_KVA(kring), kring->ckr_name);
2014 				goto cleanup;
2015 			}
2016 			kring->ckr_ksds = ksds;
2017 			kring->ckr_ksds_cnt = kring->ckr_num_slots;
2018 			if (!KR_KERNEL_ONLY(kring)) {
2019 				skmem_cache_get_obj_info(kring->ckr_ksds_cache,
2020 				    kring->ckr_ksds, &oi, &oim);
2021 				kring->ckr_usds = SKMEM_OBJ_ADDR(&oim);
2022 				kring->ckr_usds_cnt = kring->ckr_num_slots;
2023 			}
2024 			na_kr_slot_desc_init(kring->ckr_ksds,
2025 			    KR_KERNEL_ONLY(kring), kring->ckr_usds,
2026 			    kring->ckr_ksds_cnt, kring->ckr_usds_cnt);
2027 
2028 			/* cache last slot descriptor address */
2029 			ASSERT(kring->ckr_lim == (ndesc - 1));
2030 			kring->ckr_ksds_last = &kring->ckr_ksds[kring->ckr_lim];
2031 
2032 			if ((t < NR_TXRX) &&
2033 			    !(na->na_flags & NAF_USER_PKT_POOL) &&
2034 			    na_kr_populate_slots(kring) != 0) {
2035 				SK_ERR("Cannot allocate buffers for kr "
2036 				    "%p (\"%s\")", SK_KVA(kring),
2037 				    kring->ckr_name);
2038 				goto cleanup;
2039 			}
2040 		}
2041 	}
2042 
2043 	return 0;
2044 
2045 cleanup:
2046 	na_kr_teardown_all(na, ch, FALSE);
2047 
2048 	return ENOMEM;
2049 }
2050 
2051 static void
na_kr_teardown_common(struct nexus_adapter * na,struct __kern_channel_ring * kring,enum txrx t,struct kern_channel * ch,boolean_t defunct)2052 na_kr_teardown_common(struct nexus_adapter *na,
2053     struct __kern_channel_ring *kring, enum txrx t, struct kern_channel *ch,
2054     boolean_t defunct)
2055 {
2056 	struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
2057 	struct __user_channel_ring *ckr_ring;
2058 	boolean_t sd_idle, sd_inited;
2059 
2060 	ASSERT(arn != NULL);
2061 	kr_enter(kring, TRUE);
2062 	/*
2063 	 * Check for CKRF_MEM_SD_INITED and CKRF_MEM_RING_INITED
2064 	 * to make sure that the freeing needs to happen (else just
2065 	 * nullify the values).
2066 	 * If this adapter owns the memory for the slot descriptors,
2067 	 * check if the region is marked as busy (sd_idle is false)
2068 	 * and leave the kring's slot descriptor fields alone if so,
2069 	 * at defunct time.  At final teardown time, sd_idle must be
2070 	 * true else we assert; this indicates a missing call to
2071 	 * skmem_arena_nexus_sd_set_noidle().
2072 	 */
2073 	sd_inited = ((kring->ckr_flags & CKRF_MEM_SD_INITED) != 0);
2074 	if (sd_inited) {
2075 		/* callee will do KR_KSD(), so check */
2076 		if (((t < NR_TXRX) || (t == NR_EV)) &&
2077 		    (kring->ckr_ksds != NULL)) {
2078 			na_kr_depopulate_slots(kring, ch, defunct);
2079 		}
2080 		/* leave CKRF_MEM_SD_INITED flag alone until idle */
2081 		sd_idle = skmem_arena_nexus_sd_idle(arn);
2082 		VERIFY(sd_idle || defunct);
2083 	} else {
2084 		sd_idle = TRUE;
2085 	}
2086 
2087 	if (sd_idle) {
2088 		kring->ckr_flags &= ~CKRF_MEM_SD_INITED;
2089 		if (kring->ckr_ksds != NULL) {
2090 			if (sd_inited) {
2091 				skmem_cache_free(kring->ckr_ksds_cache,
2092 				    kring->ckr_ksds);
2093 			}
2094 			kring->ckr_ksds = NULL;
2095 			kring->ckr_ksds_cnt = 0;
2096 			kring->ckr_ksds_last = NULL;
2097 			kring->ckr_usds = NULL;
2098 			kring->ckr_usds_cnt = 0;
2099 		}
2100 		ASSERT(kring->ckr_ksds_last == NULL);
2101 		ASSERT(kring->ckr_usds == NULL);
2102 	}
2103 
2104 	if ((ckr_ring = kring->ckr_ring) != NULL) {
2105 		kring->ckr_ring = NULL;
2106 	}
2107 
2108 	if (kring->ckr_flags & CKRF_MEM_RING_INITED) {
2109 		ASSERT(ckr_ring != NULL || KR_KERNEL_ONLY(kring));
2110 		if (ckr_ring != NULL) {
2111 			skmem_cache_free(arn->arn_ring_cache, ckr_ring);
2112 		}
2113 		kring->ckr_flags &= ~CKRF_MEM_RING_INITED;
2114 	}
2115 
2116 	if (defunct) {
2117 		/* if defunct, drop everything; see KR_DROP() */
2118 		kring->ckr_flags |= CKRF_DEFUNCT;
2119 	}
2120 	kr_exit(kring);
2121 }
2122 
2123 /*
2124  * Teardown ALL rings of a nexus adapter; this includes {tx,rx,alloc,free,event}
2125  */
2126 static void
na_kr_teardown_all(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2127 na_kr_teardown_all(struct nexus_adapter *na, struct kern_channel *ch,
2128     boolean_t defunct)
2129 {
2130 	enum txrx t;
2131 
2132 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2133 
2134 	/* skip if this adapter has no allocated rings */
2135 	if (na->na_tx_rings == NULL) {
2136 		return;
2137 	}
2138 
2139 	for_all_rings(t) {
2140 		for (uint32_t i = 0; i < na_get_nrings(na, t); i++) {
2141 			na_kr_teardown_common(na, &NAKR(na, t)[i],
2142 			    t, ch, defunct);
2143 		}
2144 	}
2145 }
2146 
2147 /*
2148  * Teardown only {tx,rx} rings assigned to the channel.
2149  */
2150 static void
na_kr_teardown_txrx(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct,struct proc * p)2151 na_kr_teardown_txrx(struct nexus_adapter *na, struct kern_channel *ch,
2152     boolean_t defunct, struct proc *p)
2153 {
2154 	enum txrx t;
2155 
2156 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2157 
2158 	for_rx_tx(t) {
2159 		ring_id_t qfirst = ch->ch_first[t];
2160 		ring_id_t qlast = ch->ch_last[t];
2161 		uint32_t i;
2162 
2163 		for (i = qfirst; i < qlast; i++) {
2164 			struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2165 			na_kr_teardown_common(na, kring, t, ch, defunct);
2166 
2167 			/*
2168 			 * Issue a notify to wake up anyone sleeping in kqueue
2169 			 * so that they notice the newly defuncted channels and
2170 			 * return an error
2171 			 */
2172 			kring->ckr_na_notify(kring, p, 0);
2173 		}
2174 	}
2175 }
2176 
2177 static int
na_kr_populate_slots(struct __kern_channel_ring * kring)2178 na_kr_populate_slots(struct __kern_channel_ring *kring)
2179 {
2180 	const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2181 	struct nexus_adapter *na = KRNA(kring);
2182 	kern_pbufpool_t pp = kring->ckr_pp;
2183 	uint32_t nslots = kring->ckr_num_slots;
2184 	uint32_t start_idx, i;
2185 	uint32_t sidx = 0;      /* slot counter */
2186 	struct __kern_slot_desc *ksd;
2187 	struct __user_slot_desc *usd;
2188 	struct __kern_quantum *kqum;
2189 	nexus_type_t nexus_type;
2190 	int err = 0;
2191 
2192 	ASSERT(kring->ckr_tx < NR_TXRX);
2193 	ASSERT(!(KRNA(kring)->na_flags & NAF_USER_PKT_POOL));
2194 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2195 	ASSERT(pp != NULL);
2196 
2197 	/*
2198 	 * xxx_ppool: remove this special case
2199 	 */
2200 	nexus_type = na->na_nxdom_prov->nxdom_prov_dom->nxdom_type;
2201 
2202 	switch (nexus_type) {
2203 	case NEXUS_TYPE_FLOW_SWITCH:
2204 	case NEXUS_TYPE_KERNEL_PIPE:
2205 		/*
2206 		 * xxx_ppool: This is temporary code until we come up with a
2207 		 * scheme for user space to alloc & attach packets to tx ring.
2208 		 */
2209 		if (kernel_only || kring->ckr_tx == NR_RX) {
2210 			return 0;
2211 		}
2212 		break;
2213 
2214 	case NEXUS_TYPE_NET_IF:
2215 		if (((na->na_type == NA_NETIF_DEV) ||
2216 		    (na->na_type == NA_NETIF_HOST)) &&
2217 		    (kernel_only || (kring->ckr_tx == NR_RX))) {
2218 			return 0;
2219 		}
2220 
2221 		ASSERT((na->na_type == NA_NETIF_COMPAT_DEV) ||
2222 		    (na->na_type == NA_NETIF_COMPAT_HOST) ||
2223 		    (na->na_type == NA_NETIF_DEV) ||
2224 		    (na->na_type == NA_NETIF_VP));
2225 
2226 		if (!kernel_only) {
2227 			if (kring->ckr_tx == NR_RX) {
2228 				return 0;
2229 			} else {
2230 				break;
2231 			}
2232 		}
2233 
2234 		ASSERT(kernel_only);
2235 
2236 		if ((na->na_type == NA_NETIF_COMPAT_DEV) ||
2237 		    (na->na_type == NA_NETIF_COMPAT_HOST)) {
2238 			return 0;
2239 		}
2240 		VERIFY(0);
2241 		/* NOTREACHED */
2242 		__builtin_unreachable();
2243 
2244 	case NEXUS_TYPE_USER_PIPE:
2245 		break;
2246 
2247 	default:
2248 		VERIFY(0);
2249 		/* NOTREACHED */
2250 		__builtin_unreachable();
2251 	}
2252 
2253 	/* Fill the ring with packets */
2254 	sidx = start_idx = 0;
2255 	for (i = 0; i < nslots; i++) {
2256 		kqum = SK_PTR_ADDR_KQUM(pp_alloc_packet(pp, pp->pp_max_frags,
2257 		    SKMEM_NOSLEEP));
2258 		if (kqum == NULL) {
2259 			err = ENOMEM;
2260 			SK_ERR("ar %p (\"%s\") no more buffers "
2261 			    "after %u of %u, err %d", SK_KVA(na->na_arena),
2262 			    na->na_arena->ar_name, i, nslots, err);
2263 			goto cleanup;
2264 		}
2265 		ksd = KR_KSD(kring, i);
2266 		usd = (kernel_only ? NULL : KR_USD(kring, i));
2267 
2268 		/* attach packet to slot */
2269 		kqum->qum_ksd = ksd;
2270 		ASSERT(!KSD_VALID_METADATA(ksd));
2271 		KSD_ATTACH_METADATA(ksd, kqum);
2272 		if (usd != NULL) {
2273 			USD_ATTACH_METADATA(usd, METADATA_IDX(kqum));
2274 			kr_externalize_metadata(kring, pp->pp_max_frags,
2275 			    kqum, current_proc());
2276 		}
2277 
2278 		SK_DF(SK_VERB_MEM, " C ksd [%-3d, %p] kqum [%-3u, %p] "
2279 		    " kbuf[%-3u, %p]", i, SK_KVA(ksd), METADATA_IDX(kqum),
2280 		    SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2281 		    SK_KVA(&kqum->qum_buf[0]));
2282 		if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2283 			SK_DF(SK_VERB_MEM, " C usd [%-3d, %p] "
2284 			    "uqum [%-3u, %p]  ubuf[%-3u, %p]",
2285 			    (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2286 			    SK_KVA(usd), METADATA_IDX(kqum),
2287 			    SK_KVA(kqum->qum_user),
2288 			    kqum->qum_user->qum_buf[0].buf_idx,
2289 			    SK_KVA(&kqum->qum_user->qum_buf[0]));
2290 		}
2291 
2292 		sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2293 	}
2294 
2295 	SK_DF(SK_VERB_NA | SK_VERB_RING, "ar %p (\"%s\") populated %u slots from idx %u",
2296 	    SK_KVA(na->na_arena), na->na_arena->ar_name, nslots, start_idx);
2297 
2298 cleanup:
2299 	if (err != 0) {
2300 		sidx = start_idx;
2301 		while (i-- > 0) {
2302 			ksd = KR_KSD(kring, i);
2303 			usd = (kernel_only ? NULL : KR_USD(kring, i));
2304 			kqum = ksd->sd_qum;
2305 
2306 			ASSERT(ksd == kqum->qum_ksd);
2307 			KSD_RESET(ksd);
2308 			if (usd != NULL) {
2309 				USD_RESET(usd);
2310 			}
2311 			/* detach packet from slot */
2312 			kqum->qum_ksd = NULL;
2313 			pp_free_packet(pp, SK_PTR_ADDR(kqum));
2314 
2315 			sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2316 		}
2317 	}
2318 	return err;
2319 }
2320 
2321 static void
na_kr_depopulate_slots(struct __kern_channel_ring * kring,struct kern_channel * ch,boolean_t defunct)2322 na_kr_depopulate_slots(struct __kern_channel_ring *kring,
2323     struct kern_channel *ch, boolean_t defunct)
2324 {
2325 #pragma unused(ch)
2326 	const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2327 	uint32_t i, j, n = kring->ckr_num_slots;
2328 	struct nexus_adapter *na = KRNA(kring);
2329 	struct kern_pbufpool *pp = kring->ckr_pp;
2330 	boolean_t upp = FALSE;
2331 	obj_idx_t midx;
2332 
2333 	ASSERT((kring->ckr_tx < NR_TXRX) || (kring->ckr_tx == NR_EV));
2334 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2335 
2336 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2337 
2338 	if (((na->na_flags & NAF_USER_PKT_POOL) != 0) &&
2339 	    (kring->ckr_tx != NR_EV)) {
2340 		upp = TRUE;
2341 	}
2342 	for (i = 0, j = 0; i < n; i++) {
2343 		struct __kern_slot_desc *ksd = KR_KSD(kring, i);
2344 		struct __user_slot_desc *usd;
2345 		struct __kern_quantum *qum, *kqum;
2346 		boolean_t free_packet = FALSE;
2347 		int err;
2348 
2349 		if (!KSD_VALID_METADATA(ksd)) {
2350 			continue;
2351 		}
2352 
2353 		kqum = ksd->sd_qum;
2354 		usd = (kernel_only ? NULL : KR_USD(kring, i));
2355 		midx = METADATA_IDX(kqum);
2356 
2357 		/*
2358 		 * if the packet is internalized it should not be in the
2359 		 * hash table of packets loaned to user space.
2360 		 */
2361 		if (upp && (kqum->qum_qflags & QUM_F_INTERNALIZED)) {
2362 			if ((qum = pp_find_upp(pp, midx)) != NULL) {
2363 				panic("internalized packet %p in htbl",
2364 				    SK_KVA(qum));
2365 				/* NOTREACHED */
2366 				__builtin_unreachable();
2367 			}
2368 			free_packet = TRUE;
2369 		} else if (upp) {
2370 			/*
2371 			 * if the packet is not internalized check if it is
2372 			 * in the list of packets loaned to user-space.
2373 			 * Remove from the list before freeing.
2374 			 */
2375 			ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
2376 			qum = pp_remove_upp(pp, midx, &err);
2377 			if (err != 0) {
2378 				SK_ERR("un-allocated packet or buflet %d %p",
2379 				    midx, SK_KVA(qum));
2380 				if (qum != NULL) {
2381 					free_packet = TRUE;
2382 				}
2383 			}
2384 		} else {
2385 			free_packet = TRUE;
2386 		}
2387 
2388 		/*
2389 		 * Clear the user and kernel slot descriptors.  Note that
2390 		 * if we are depopulating the slots due to defunct (and not
2391 		 * due to normal deallocation/teardown), we leave the user
2392 		 * slot descriptor alone.  At that point the process may
2393 		 * be suspended, and later when it resumes it would just
2394 		 * pick up the original contents and move forward with
2395 		 * whatever it was doing.
2396 		 */
2397 		KSD_RESET(ksd);
2398 		if (usd != NULL && !defunct) {
2399 			USD_RESET(usd);
2400 		}
2401 
2402 		/* detach packet from slot */
2403 		kqum->qum_ksd = NULL;
2404 
2405 		SK_DF(SK_VERB_MEM, " D ksd [%-3d, %p] kqum [%-3u, %p] "
2406 		    " kbuf[%-3u, %p]", i, SK_KVA(ksd),
2407 		    METADATA_IDX(kqum), SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2408 		    SK_KVA(&kqum->qum_buf[0]));
2409 		if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2410 			SK_DF(SK_VERB_MEM, " D usd [%-3u, %p] "
2411 			    "uqum [%-3u, %p]  ubuf[%-3u, %p]",
2412 			    (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2413 			    SK_KVA(usd), METADATA_IDX(kqum),
2414 			    SK_KVA(kqum->qum_user),
2415 			    kqum->qum_user->qum_buf[0].buf_idx,
2416 			    SK_KVA(&kqum->qum_user->qum_buf[0]));
2417 		}
2418 
2419 		if (free_packet) {
2420 			pp_free_packet(pp, SK_PTR_ADDR(kqum)); ++j;
2421 		}
2422 	}
2423 
2424 	SK_DF(SK_VERB_NA | SK_VERB_RING, "ar %p (\"%s\") depopulated %u of %u slots",
2425 	    SK_KVA(KRNA(kring)->na_arena), KRNA(kring)->na_arena->ar_name,
2426 	    j, n);
2427 }
2428 
2429 int
na_rings_mem_setup(struct nexus_adapter * na,boolean_t alloc_ctx,struct kern_channel * ch)2430 na_rings_mem_setup(struct nexus_adapter *na,
2431     boolean_t alloc_ctx, struct kern_channel *ch)
2432 {
2433 	boolean_t kronly;
2434 	int err;
2435 
2436 	SK_LOCK_ASSERT_HELD();
2437 	ASSERT(na->na_channels == 0);
2438 	/*
2439 	 * If NAF_MEM_NO_INIT is set, then only create the krings and not
2440 	 * the backing memory regions for the adapter.
2441 	 */
2442 	kronly = (na->na_flags & NAF_MEM_NO_INIT);
2443 	ASSERT(!kronly || NA_KERNEL_ONLY(na));
2444 
2445 	/*
2446 	 * Create and initialize the common fields of the krings array.
2447 	 * using the information that must be already available in the na.
2448 	 */
2449 	if ((err = na_kr_create(na, alloc_ctx)) == 0 && !kronly) {
2450 		err = na_kr_setup(na, ch);
2451 		if (err != 0) {
2452 			na_kr_delete(na);
2453 		}
2454 	}
2455 
2456 	return err;
2457 }
2458 
2459 void
na_rings_mem_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2460 na_rings_mem_teardown(struct nexus_adapter *na, struct kern_channel *ch,
2461     boolean_t defunct)
2462 {
2463 	SK_LOCK_ASSERT_HELD();
2464 	ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT));
2465 
2466 	/*
2467 	 * Deletes the kring and ring array of the adapter. They
2468 	 * must have been created using na_rings_mem_setup().
2469 	 *
2470 	 * XXX: [email protected] -- the parameter "ch" should not be
2471 	 * needed here; however na_kr_depopulate_slots() needs to
2472 	 * go thru the channel's user packet pool hash, and so for
2473 	 * now we leave it here.
2474 	 */
2475 	na_kr_teardown_all(na, ch, defunct);
2476 	if (!defunct) {
2477 		na_kr_delete(na);
2478 	}
2479 }
2480 
2481 void
na_ch_rings_defunct(struct kern_channel * ch,struct proc * p)2482 na_ch_rings_defunct(struct kern_channel *ch, struct proc *p)
2483 {
2484 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2485 
2486 	/*
2487 	 * Depopulate slots on the TX and RX rings of this channel,
2488 	 * but don't touch other rings owned by other channels if
2489 	 * this adapter is being shared.
2490 	 */
2491 	na_kr_teardown_txrx(ch->ch_na, ch, TRUE, p);
2492 }
2493 
2494 void
na_kr_drop(struct nexus_adapter * na,boolean_t drop)2495 na_kr_drop(struct nexus_adapter *na, boolean_t drop)
2496 {
2497 	enum txrx t;
2498 	uint32_t i;
2499 
2500 	for_rx_tx(t) {
2501 		for (i = 0; i < na_get_nrings(na, t); i++) {
2502 			struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2503 			int error;
2504 			error = kr_enter(kring, TRUE);
2505 			if (drop) {
2506 				kring->ckr_flags |= CKRF_DROP;
2507 			} else {
2508 				kring->ckr_flags &= ~CKRF_DROP;
2509 			}
2510 
2511 			if (error != 0) {
2512 				SK_ERR("na \"%s\" (%p) kr \"%s\" (%p) "
2513 				    "kr_enter failed %d",
2514 				    na->na_name, SK_KVA(na),
2515 				    kring->ckr_name, SK_KVA(kring),
2516 				    error);
2517 			} else {
2518 				kr_exit(kring);
2519 			}
2520 			SK_DF(SK_VERB_NA, "na \"%s\" (%p) kr \"%s\" (%p) "
2521 			    "krflags 0x%x", na->na_name, SK_KVA(na),
2522 			    kring->ckr_name, SK_KVA(kring), kring->ckr_flags);
2523 		}
2524 	}
2525 }
2526 
2527 /*
2528  * Set the stopped/enabled status of ring.  When stopping, they also wait
2529  * for all current activity on the ring to terminate.  The status change
2530  * is then notified using the na na_notify callback.
2531  */
2532 static void
na_set_ring(struct nexus_adapter * na,uint32_t ring_id,enum txrx t,uint32_t state)2533 na_set_ring(struct nexus_adapter *na, uint32_t ring_id, enum txrx t,
2534     uint32_t state)
2535 {
2536 	struct __kern_channel_ring *kr = &NAKR(na, t)[ring_id];
2537 
2538 	/*
2539 	 * Mark the ring as stopped/enabled, and run through the
2540 	 * locks to make sure other users get to see it.
2541 	 */
2542 	if (state == KR_READY) {
2543 		kr_start(kr);
2544 	} else {
2545 		kr_stop(kr, state);
2546 	}
2547 }
2548 
2549 
2550 /* stop or enable all the rings of na */
2551 static void
na_set_all_rings(struct nexus_adapter * na,uint32_t state)2552 na_set_all_rings(struct nexus_adapter *na, uint32_t state)
2553 {
2554 	uint32_t i;
2555 	enum txrx t;
2556 
2557 	SK_LOCK_ASSERT_HELD();
2558 
2559 	if (!NA_IS_ACTIVE(na)) {
2560 		return;
2561 	}
2562 
2563 	for_rx_tx(t) {
2564 		for (i = 0; i < na_get_nrings(na, t); i++) {
2565 			na_set_ring(na, i, t, state);
2566 		}
2567 	}
2568 }
2569 
2570 /*
2571  * Convenience function used in drivers.  Waits for current txsync()s/rxsync()s
2572  * to finish and prevents any new one from starting.  Call this before turning
2573  * Skywalk mode off, or before removing the harware rings (e.g., on module
2574  * onload).  As a rule of thumb for linux drivers, this should be placed near
2575  * each napi_disable().
2576  */
2577 void
na_disable_all_rings(struct nexus_adapter * na)2578 na_disable_all_rings(struct nexus_adapter *na)
2579 {
2580 	na_set_all_rings(na, KR_STOPPED);
2581 }
2582 
2583 /*
2584  * Convenience function used in drivers.  Re-enables rxsync and txsync on the
2585  * adapter's rings In linux drivers, this should be placed near each
2586  * napi_enable().
2587  */
2588 void
na_enable_all_rings(struct nexus_adapter * na)2589 na_enable_all_rings(struct nexus_adapter *na)
2590 {
2591 	na_set_all_rings(na, KR_READY /* enabled */);
2592 }
2593 
2594 void
na_lock_all_rings(struct nexus_adapter * na)2595 na_lock_all_rings(struct nexus_adapter *na)
2596 {
2597 	na_set_all_rings(na, KR_LOCKED);
2598 }
2599 
2600 void
na_unlock_all_rings(struct nexus_adapter * na)2601 na_unlock_all_rings(struct nexus_adapter *na)
2602 {
2603 	na_enable_all_rings(na);
2604 }
2605 
2606 int
na_connect(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct nxbind * nxb,struct proc * p)2607 na_connect(struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
2608     struct nxbind *nxb, struct proc *p)
2609 {
2610 	struct nexus_adapter *__single na = NULL;
2611 	mach_vm_size_t memsize = 0;
2612 	int err = 0;
2613 	enum txrx t;
2614 
2615 	ASSERT(!(chr->cr_mode & CHMODE_KERNEL));
2616 	ASSERT(!(ch->ch_flags & CHANF_KERNEL));
2617 
2618 	SK_LOCK_ASSERT_HELD();
2619 
2620 	/* find the nexus adapter and return the reference */
2621 	err = na_find(ch, nx, chr, nxb, p, &na, TRUE /* create */);
2622 	if (err != 0) {
2623 		ASSERT(na == NULL);
2624 		goto done;
2625 	}
2626 
2627 	if (NA_KERNEL_ONLY(na)) {
2628 		err = EBUSY;
2629 		goto done;
2630 	}
2631 
2632 	/* reject if the adapter is defunct of non-permissive */
2633 	if ((na->na_flags & NAF_DEFUNCT) || na_reject_channel(ch, na)) {
2634 		err = ENXIO;
2635 		goto done;
2636 	}
2637 
2638 	err = na_bind_channel(na, ch, chr);
2639 	if (err != 0) {
2640 		goto done;
2641 	}
2642 
2643 	ASSERT(ch->ch_schema != NULL);
2644 	ASSERT(na == ch->ch_na);
2645 
2646 	for_all_rings(t) {
2647 		if (na_get_nrings(na, t) == 0) {
2648 			ch->ch_si[t] = NULL;
2649 			continue;
2650 		}
2651 		ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2652 		    &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2653 	}
2654 
2655 	skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2656 
2657 	if (!(skmem_arena_nexus(na->na_arena)->arn_mode &
2658 	    AR_NEXUS_MODE_EXTERNAL_PPOOL)) {
2659 		os_atomic_or(__DECONST(uint32_t *, &ch->ch_schema->csm_flags),
2660 		    CSM_PRIV_MEM, relaxed);
2661 	}
2662 
2663 	err = skmem_arena_mmap(na->na_arena, p, &ch->ch_mmap);
2664 	if (err != 0) {
2665 		goto done;
2666 	}
2667 
2668 	os_atomic_or(__DECONST(uint32_t *, &ch->ch_schema->csm_flags), CSM_ACTIVE, relaxed);
2669 	chr->cr_memsize = memsize;
2670 	chr->cr_memoffset = ch->ch_schema_offset;
2671 
2672 	SK_DF(SK_VERB_NA, "%s(%d) ch %p <-> nx %p (%s:\"%s\":%d:%d) na %p naflags 0x%x",
2673 	    sk_proc_name(p), sk_proc_pid(p), SK_KVA(ch), SK_KVA(nx),
2674 	    NX_DOM_PROV(nx)->nxdom_prov_name, na->na_name, (int)chr->cr_port,
2675 	    (int)chr->cr_ring_id, SK_KVA(na), na->na_flags);
2676 
2677 done:
2678 	if (err != 0) {
2679 		if (ch->ch_schema != NULL || na != NULL) {
2680 			if (ch->ch_schema != NULL) {
2681 				ASSERT(na == ch->ch_na);
2682 				/*
2683 				 * Callee will unmap memory region if needed,
2684 				 * as well as release reference held on 'na'.
2685 				 */
2686 				na_disconnect(nx, ch);
2687 				na = NULL;
2688 			}
2689 			if (na != NULL) {
2690 				(void) na_release_locked(na);
2691 				na = NULL;
2692 			}
2693 		}
2694 	}
2695 
2696 	return err;
2697 }
2698 
2699 void
na_disconnect(struct kern_nexus * nx,struct kern_channel * ch)2700 na_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
2701 {
2702 #pragma unused(nx)
2703 	enum txrx t;
2704 
2705 	SK_LOCK_ASSERT_HELD();
2706 
2707 	SK_DF(SK_VERB_NA, "ch %p -!- nx %p (%s:\"%s\":%u:%d) na %p naflags 0x%x",
2708 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2709 	    ch->ch_na->na_name, ch->ch_info->cinfo_nx_port,
2710 	    (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(ch->ch_na),
2711 	    ch->ch_na->na_flags);
2712 
2713 	/* destroy mapping and release references */
2714 	na_unbind_channel(ch);
2715 	ASSERT(ch->ch_na == NULL);
2716 	ASSERT(ch->ch_schema == NULL);
2717 	for_all_rings(t) {
2718 		ch->ch_si[t] = NULL;
2719 	}
2720 }
2721 
2722 void
na_defunct(struct kern_nexus * nx,struct kern_channel * ch,struct nexus_adapter * na,boolean_t locked)2723 na_defunct(struct kern_nexus *nx, struct kern_channel *ch,
2724     struct nexus_adapter *na, boolean_t locked)
2725 {
2726 #pragma unused(nx)
2727 	SK_LOCK_ASSERT_HELD();
2728 	if (!locked) {
2729 		lck_mtx_lock(&ch->ch_lock);
2730 	}
2731 
2732 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2733 
2734 	if (!(na->na_flags & NAF_DEFUNCT)) {
2735 		/*
2736 		 * Mark this adapter as defunct to inform nexus-specific
2737 		 * teardown handler called by na_teardown() below.
2738 		 */
2739 		os_atomic_or(&na->na_flags, NAF_DEFUNCT, relaxed);
2740 
2741 		/*
2742 		 * Depopulate slots.
2743 		 */
2744 		na_teardown(na, ch, TRUE);
2745 
2746 		/*
2747 		 * And finally destroy any already-defunct memory regions.
2748 		 * Do this only if the nexus adapter owns the arena, i.e.
2749 		 * NAF_MEM_LOANED is not set.  Otherwise, we'd expect
2750 		 * that this routine be called again for the real owner.
2751 		 */
2752 		if (!(na->na_flags & NAF_MEM_LOANED)) {
2753 			skmem_arena_defunct(na->na_arena);
2754 		}
2755 	}
2756 
2757 	SK_DF(SK_VERB_NA, "%s(%d): ch %p -/- nx %p (%s:\"%s\":%u:%d) na %p naflags 0x%x",
2758 	    ch->ch_name, ch->ch_pid, SK_KVA(ch), SK_KVA(nx),
2759 	    NX_DOM_PROV(nx)->nxdom_prov_name, na->na_name,
2760 	    ch->ch_info->cinfo_nx_port, (int)ch->ch_info->cinfo_ch_ring_id,
2761 	    SK_KVA(na), na->na_flags);
2762 
2763 	if (!locked) {
2764 		lck_mtx_unlock(&ch->ch_lock);
2765 	}
2766 }
2767 
2768 /*
2769  * TODO: [email protected] -- merge this into na_connect()
2770  */
2771 int
na_connect_spec(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct proc * p)2772 na_connect_spec(struct kern_nexus *nx, struct kern_channel *ch,
2773     struct chreq *chr, struct proc *p)
2774 {
2775 #pragma unused(p)
2776 	struct nexus_adapter *__single na = NULL;
2777 	mach_vm_size_t memsize = 0;
2778 	int error = 0;
2779 	enum txrx t;
2780 
2781 	ASSERT(chr->cr_mode & CHMODE_KERNEL);
2782 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2783 	ASSERT(ch->ch_na == NULL);
2784 	ASSERT(ch->ch_schema == NULL);
2785 
2786 	SK_LOCK_ASSERT_HELD();
2787 
2788 	error = na_find(ch, nx, chr, NULL, kernproc, &na, TRUE);
2789 	if (error != 0) {
2790 		goto done;
2791 	}
2792 
2793 	if (na == NULL) {
2794 		error = EINVAL;
2795 		goto done;
2796 	}
2797 
2798 	if (na->na_channels > 0) {
2799 		error = EBUSY;
2800 		goto done;
2801 	}
2802 
2803 	if (na->na_flags & NAF_DEFUNCT) {
2804 		error = ENXIO;
2805 		goto done;
2806 	}
2807 
2808 	/*
2809 	 * Special connect requires the nexus adapter to handle its
2810 	 * own channel binding and unbinding via na_special(); bail
2811 	 * if this adapter doesn't support it.
2812 	 */
2813 	if (na->na_special == NULL) {
2814 		error = ENOTSUP;
2815 		goto done;
2816 	}
2817 
2818 	/* upon success, "ch->ch_na" will point to "na" */
2819 	error = na->na_special(na, ch, chr, NXSPEC_CMD_CONNECT);
2820 	if (error != 0) {
2821 		ASSERT(ch->ch_na == NULL);
2822 		goto done;
2823 	}
2824 
2825 	ASSERT(na->na_flags & NAF_SPEC_INIT);
2826 	ASSERT(na == ch->ch_na);
2827 	/* make sure this is still the case */
2828 	ASSERT(ch->ch_schema == NULL);
2829 
2830 	for_rx_tx(t) {
2831 		ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2832 		    &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2833 	}
2834 
2835 	skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2836 	chr->cr_memsize = memsize;
2837 
2838 	SK_DF(SK_VERB_NA, "%s(%d) ch %p <-> nx %p (%s:\"%s\":%d:%d) na %p naflags 0x%x",
2839 	    sk_proc_name(p), sk_proc_pid(p), SK_KVA(ch), SK_KVA(nx),
2840 	    NX_DOM_PROV(nx)->nxdom_prov_name, na->na_name, (int)chr->cr_port,
2841 	    (int)chr->cr_ring_id, SK_KVA(na), na->na_flags);
2842 
2843 done:
2844 	if (error != 0) {
2845 		if (ch->ch_na != NULL || na != NULL) {
2846 			if (ch->ch_na != NULL) {
2847 				ASSERT(na == ch->ch_na);
2848 				/* callee will release reference on 'na' */
2849 				na_disconnect_spec(nx, ch);
2850 				na = NULL;
2851 			}
2852 			if (na != NULL) {
2853 				(void) na_release_locked(na);
2854 				na = NULL;
2855 			}
2856 		}
2857 	}
2858 
2859 	return error;
2860 }
2861 
2862 /*
2863  * TODO: [email protected] -- merge this into na_disconnect()
2864  */
2865 void
na_disconnect_spec(struct kern_nexus * nx,struct kern_channel * ch)2866 na_disconnect_spec(struct kern_nexus *nx, struct kern_channel *ch)
2867 {
2868 #pragma unused(nx)
2869 	struct nexus_adapter *na = ch->ch_na;
2870 	enum txrx t;
2871 	int error;
2872 
2873 	SK_LOCK_ASSERT_HELD();
2874 	ASSERT(na != NULL);
2875 	ASSERT(na->na_flags & NAF_SPEC_INIT);   /* has been bound */
2876 
2877 	SK_DF(SK_VERB_NA, "ch %p -!- nx %p (%s:\"%s\":%u:%d) na %p naflags 0x%x",
2878 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2879 	    ch->ch_na->na_name, ch->ch_info->cinfo_nx_port,
2880 	    (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(ch->ch_na),
2881 	    ch->ch_na->na_flags);
2882 
2883 	/* take a reference for this routine */
2884 	na_retain_locked(na);
2885 
2886 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2887 	ASSERT(ch->ch_schema == NULL);
2888 	ASSERT(na->na_special != NULL);
2889 	/* unbind this channel */
2890 	error = na->na_special(na, ch, NULL, NXSPEC_CMD_DISCONNECT);
2891 	ASSERT(error == 0);
2892 	ASSERT(!(na->na_flags & NAF_SPEC_INIT));
2893 
2894 	/* now release our reference; this may be the last */
2895 	na_release_locked(na);
2896 	na = NULL;
2897 
2898 	ASSERT(ch->ch_na == NULL);
2899 	for_rx_tx(t) {
2900 		ch->ch_si[t] = NULL;
2901 	}
2902 }
2903 
2904 void
na_start_spec(struct kern_nexus * nx,struct kern_channel * ch)2905 na_start_spec(struct kern_nexus *nx, struct kern_channel *ch)
2906 {
2907 #pragma unused(nx)
2908 	struct nexus_adapter *na = ch->ch_na;
2909 
2910 	SK_LOCK_ASSERT_HELD();
2911 
2912 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2913 	ASSERT(NA_KERNEL_ONLY(na));
2914 	ASSERT(na->na_special != NULL);
2915 
2916 	na->na_special(na, ch, NULL, NXSPEC_CMD_START);
2917 }
2918 
2919 void
na_stop_spec(struct kern_nexus * nx,struct kern_channel * ch)2920 na_stop_spec(struct kern_nexus *nx, struct kern_channel *ch)
2921 {
2922 #pragma unused(nx)
2923 	struct nexus_adapter *na = ch->ch_na;
2924 
2925 	SK_LOCK_ASSERT_HELD();
2926 
2927 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2928 	ASSERT(NA_KERNEL_ONLY(na));
2929 	ASSERT(na->na_special != NULL);
2930 
2931 	na->na_special(na, ch, NULL, NXSPEC_CMD_STOP);
2932 }
2933 
2934 /*
2935  * MUST BE CALLED UNDER SK_LOCK()
2936  *
2937  * Get a refcounted reference to a nexus adapter attached
2938  * to the interface specified by chr.
2939  * This is always called in the execution of an ioctl().
2940  *
2941  * Return ENXIO if the interface specified by the request does
2942  * not exist, ENOTSUP if Skywalk is not supported by the interface,
2943  * EINVAL if parameters are invalid, ENOMEM if needed resources
2944  * could not be allocated.
2945  * If successful, hold a reference to the nexus adapter.
2946  *
2947  * No reference is kept on the real interface, which may then
2948  * disappear at any time.
2949  */
2950 int
na_find(struct kern_channel * ch,struct kern_nexus * nx,struct chreq * chr,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)2951 na_find(struct kern_channel *ch, struct kern_nexus *nx, struct chreq *chr,
2952     struct nxbind *nxb, struct proc *p, struct nexus_adapter **na,
2953     boolean_t create)
2954 {
2955 	int error = 0;
2956 
2957 	static_assert(sizeof(chr->cr_name) == sizeof((*na)->na_name));
2958 
2959 	*na = NULL;     /* default return value */
2960 
2961 	SK_LOCK_ASSERT_HELD();
2962 
2963 	/*
2964 	 * We cascade through all possibile types of nexus adapter.
2965 	 * All nx_*_na_find() functions return an error and an na,
2966 	 * with the following combinations:
2967 	 *
2968 	 * error    na
2969 	 *   0	   NULL		type doesn't match
2970 	 *  !0	   NULL		type matches, but na creation/lookup failed
2971 	 *   0	  !NULL		type matches and na created/found
2972 	 *  !0    !NULL		impossible
2973 	 */
2974 
2975 #if CONFIG_NEXUS_USER_PIPE
2976 	/* try to see if this is a pipe port */
2977 	error = nx_upipe_na_find(nx, ch, chr, nxb, p, na, create);
2978 	if (error != 0 || *na != NULL) {
2979 		return error;
2980 	}
2981 #endif /* CONFIG_NEXUS_USER_PIPE */
2982 #if CONFIG_NEXUS_KERNEL_PIPE
2983 	/* try to see if this is a kernel pipe port */
2984 	error = nx_kpipe_na_find(nx, ch, chr, nxb, p, na, create);
2985 	if (error != 0 || *na != NULL) {
2986 		return error;
2987 	}
2988 #endif /* CONFIG_NEXUS_KERNEL_PIPE */
2989 #if CONFIG_NEXUS_FLOWSWITCH
2990 	/* try to see if this is a flowswitch port */
2991 	error = nx_fsw_na_find(nx, ch, chr, nxb, p, na, create);
2992 	if (error != 0 || *na != NULL) {
2993 		return error;
2994 	}
2995 #endif /* CONFIG_NEXUS_FLOWSWITCH */
2996 #if CONFIG_NEXUS_NETIF
2997 	error = nx_netif_na_find(nx, ch, chr, nxb, p, na, create);
2998 	if (error != 0 || *na != NULL) {
2999 		return error;
3000 	}
3001 #endif /* CONFIG_NEXUS_NETIF */
3002 
3003 	ASSERT(*na == NULL);
3004 	return ENXIO;
3005 }
3006 
3007 void
na_retain_locked(struct nexus_adapter * na)3008 na_retain_locked(struct nexus_adapter *na)
3009 {
3010 	SK_LOCK_ASSERT_HELD();
3011 
3012 	if (na != NULL) {
3013 #if SK_LOG
3014 		uint32_t oref = os_atomic_inc_orig(&na->na_refcount, relaxed);
3015 		SK_DF(SK_VERB_REFCNT, "na \"%s\" (%p) refcnt %u chcnt %u",
3016 		    na->na_name, SK_KVA(na), oref + 1, na->na_channels);
3017 #else /* !SK_LOG */
3018 		os_atomic_inc(&na->na_refcount, relaxed);
3019 #endif /* !SK_LOG */
3020 	}
3021 }
3022 
3023 /* returns 1 iff the nexus_adapter is destroyed */
3024 int
na_release_locked(struct nexus_adapter * na)3025 na_release_locked(struct nexus_adapter *na)
3026 {
3027 	uint32_t oref;
3028 
3029 	SK_LOCK_ASSERT_HELD();
3030 
3031 	ASSERT(na->na_refcount > 0);
3032 	oref = os_atomic_dec_orig(&na->na_refcount, relaxed);
3033 	if (oref > 1) {
3034 		SK_DF(SK_VERB_REFCNT, "na \"%s\" (%p) refcnt %u chcnt %u",
3035 		    na->na_name, SK_KVA(na), oref - 1, na->na_channels);
3036 		return 0;
3037 	}
3038 	ASSERT(na->na_channels == 0);
3039 
3040 	if (na->na_dtor != NULL) {
3041 		na->na_dtor(na);
3042 	}
3043 
3044 	ASSERT(na->na_tx_rings == NULL && na->na_rx_rings == NULL);
3045 	ASSERT(na->na_slot_ctxs == NULL);
3046 	ASSERT(na->na_scratch == NULL);
3047 
3048 #if CONFIG_NEXUS_USER_PIPE
3049 	nx_upipe_na_dealloc(na);
3050 #endif /* CONFIG_NEXUS_USER_PIPE */
3051 	if (na->na_arena != NULL) {
3052 		skmem_arena_release(na->na_arena);
3053 		na->na_arena = NULL;
3054 	}
3055 
3056 	SK_DF(SK_VERB_MEM, "na \"%s\" (%p) being freed",
3057 	    na->na_name, SK_KVA(na));
3058 
3059 	NA_FREE(na);
3060 	return 1;
3061 }
3062 
3063 static struct nexus_adapter *
na_pseudo_alloc(zalloc_flags_t how)3064 na_pseudo_alloc(zalloc_flags_t how)
3065 {
3066 	struct nexus_adapter *na;
3067 
3068 	na = zalloc_flags(na_pseudo_zone, how | Z_ZERO);
3069 	if (na) {
3070 		na->na_type = NA_PSEUDO;
3071 		na->na_free = na_pseudo_free;
3072 	}
3073 	return na;
3074 }
3075 
3076 static void
na_pseudo_free(struct nexus_adapter * na)3077 na_pseudo_free(struct nexus_adapter *na)
3078 {
3079 	ASSERT(na->na_refcount == 0);
3080 	SK_DF(SK_VERB_MEM, "na %p FREE", SK_KVA(na));
3081 	bzero(na, sizeof(*na));
3082 	zfree(na_pseudo_zone, na);
3083 }
3084 
3085 static int
na_pseudo_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3086 na_pseudo_txsync(struct __kern_channel_ring *kring, struct proc *p,
3087     uint32_t flags)
3088 {
3089 #pragma unused(kring, p, flags)
3090 	SK_DF(SK_VERB_SYNC | SK_VERB_TX,
3091 	    "%s(%d) kr \"%s\" (%p) krflags 0x%x ring %u flags 0%x",
3092 	    sk_proc_name(p), sk_proc_pid(p), kring->ckr_name,
3093 	    SK_KVA(kring), kring->ckr_flags, kring->ckr_ring_id,
3094 	    flags);
3095 
3096 	return 0;
3097 }
3098 
3099 static int
na_pseudo_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3100 na_pseudo_rxsync(struct __kern_channel_ring *kring, struct proc *p,
3101     uint32_t flags)
3102 {
3103 #pragma unused(kring, p, flags)
3104 	SK_DF(SK_VERB_SYNC | SK_VERB_RX,
3105 	    "%s(%d) kr \"%s\" (%p) krflags 0x%x ring %u flags 0%x",
3106 	    sk_proc_name(p), sk_proc_pid(p), kring->ckr_name,
3107 	    SK_KVA(kring), kring->ckr_flags, kring->ckr_ring_id, flags);
3108 
3109 	ASSERT(kring->ckr_rhead <= kring->ckr_lim);
3110 
3111 	return 0;
3112 }
3113 
3114 static int
na_pseudo_activate(struct nexus_adapter * na,na_activate_mode_t mode)3115 na_pseudo_activate(struct nexus_adapter *na, na_activate_mode_t mode)
3116 {
3117 	SK_DF(SK_VERB_NA, "na \"%s\" (%p) %s", na->na_name,
3118 	    SK_KVA(na), na_activate_mode2str(mode));
3119 
3120 	switch (mode) {
3121 	case NA_ACTIVATE_MODE_ON:
3122 		os_atomic_or(&na->na_flags, NAF_ACTIVE, relaxed);
3123 		break;
3124 
3125 	case NA_ACTIVATE_MODE_DEFUNCT:
3126 		break;
3127 
3128 	case NA_ACTIVATE_MODE_OFF:
3129 		os_atomic_andnot(&na->na_flags, NAF_ACTIVE, relaxed);
3130 		break;
3131 
3132 	default:
3133 		VERIFY(0);
3134 		/* NOTREACHED */
3135 		__builtin_unreachable();
3136 	}
3137 
3138 	return 0;
3139 }
3140 
3141 static void
na_pseudo_dtor(struct nexus_adapter * na)3142 na_pseudo_dtor(struct nexus_adapter *na)
3143 {
3144 #pragma unused(na)
3145 }
3146 
3147 static int
na_pseudo_krings_create(struct nexus_adapter * na,struct kern_channel * ch)3148 na_pseudo_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
3149 {
3150 	return na_rings_mem_setup(na, FALSE, ch);
3151 }
3152 
3153 static void
na_pseudo_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)3154 na_pseudo_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
3155     boolean_t defunct)
3156 {
3157 	na_rings_mem_teardown(na, ch, defunct);
3158 }
3159 
3160 /*
3161  * Pseudo nexus adapter; typically used as a generic parent adapter.
3162  */
3163 int
na_pseudo_create(struct kern_nexus * nx,struct chreq * chr,struct nexus_adapter ** ret)3164 na_pseudo_create(struct kern_nexus *nx, struct chreq *chr,
3165     struct nexus_adapter **ret)
3166 {
3167 	struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
3168 	struct nexus_adapter *na;
3169 	int error;
3170 
3171 	SK_LOCK_ASSERT_HELD();
3172 	*ret = NULL;
3173 
3174 	na = na_pseudo_alloc(Z_WAITOK);
3175 
3176 	ASSERT(na->na_type == NA_PSEUDO);
3177 	ASSERT(na->na_free == na_pseudo_free);
3178 
3179 	(void) strbufcpy(na->na_name, chr->cr_name);
3180 	uuid_generate_random(na->na_uuid);
3181 
3182 	/*
3183 	 * Verify upper bounds; for all cases including user pipe nexus,
3184 	 * the parameters must have already been validated by corresponding
3185 	 * nxdom_prov_params() function defined by each domain.
3186 	 */
3187 	na_set_nrings(na, NR_TX, nxp->nxp_tx_rings);
3188 	na_set_nrings(na, NR_RX, nxp->nxp_rx_rings);
3189 	na_set_nslots(na, NR_TX, nxp->nxp_tx_slots);
3190 	na_set_nslots(na, NR_RX, nxp->nxp_rx_slots);
3191 	ASSERT(na_get_nrings(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
3192 	ASSERT(na_get_nrings(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
3193 	ASSERT(na_get_nslots(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
3194 	ASSERT(na_get_nslots(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
3195 
3196 	na->na_txsync = na_pseudo_txsync;
3197 	na->na_rxsync = na_pseudo_rxsync;
3198 	na->na_activate = na_pseudo_activate;
3199 	na->na_dtor = na_pseudo_dtor;
3200 	na->na_krings_create = na_pseudo_krings_create;
3201 	na->na_krings_delete = na_pseudo_krings_delete;
3202 
3203 	*(nexus_stats_type_t *)(uintptr_t)&na->na_stats_type =
3204 	    NEXUS_STATS_TYPE_INVALID;
3205 
3206 	/* other fields are set in the common routine */
3207 	na_attach_common(na, nx, NX_DOM_PROV(nx));
3208 
3209 	if ((error = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
3210 	    nx, na)) != 0) {
3211 		ASSERT(na->na_arena == NULL);
3212 		goto err;
3213 	}
3214 	ASSERT(na->na_arena != NULL);
3215 
3216 	*(uint32_t *)(uintptr_t)&na->na_flowadv_max = nxp->nxp_flowadv_max;
3217 	ASSERT(na->na_flowadv_max == 0 ||
3218 	    skmem_arena_nexus(na->na_arena)->arn_flowadv_obj != NULL);
3219 
3220 #if SK_LOG
3221 	uuid_string_t uuidstr;
3222 	SK_DF(SK_VERB_NA, "na_name: \"%s\"", na->na_name);
3223 	SK_DF(SK_VERB_NA, "  UUID:        %s", sk_uuid_unparse(na->na_uuid, uuidstr));
3224 	SK_DF(SK_VERB_NA, "  nx:          %p (\"%s\":\"%s\")",
3225 	    SK_KVA(na->na_nx), NX_DOM(na->na_nx)->nxdom_name,
3226 	    NX_DOM_PROV(na->na_nx)->nxdom_prov_name);
3227 	SK_DF(SK_VERB_NA, "  flags:       0x%x", na->na_flags);
3228 	SK_DF(SK_VERB_NA, "  flowadv_max: %u", na->na_flowadv_max);
3229 	SK_DF(SK_VERB_NA, "  rings:       tx %u rx %u",
3230 	    na_get_nrings(na, NR_TX), na_get_nrings(na, NR_RX));
3231 	SK_DF(SK_VERB_NA, "  slots:       tx %u rx %u",
3232 	    na_get_nslots(na, NR_TX), na_get_nslots(na, NR_RX));
3233 #if CONFIG_NEXUS_USER_PIPE
3234 	SK_DF(SK_VERB_NA, "  next_pipe:   %u", na->na_next_pipe);
3235 	SK_DF(SK_VERB_NA, "  max_pipes:   %u", na->na_max_pipes);
3236 #endif /* CONFIG_NEXUS_USER_PIPE */
3237 #endif /* SK_LOG */
3238 
3239 	*ret = na;
3240 	na_retain_locked(na);
3241 
3242 	return 0;
3243 
3244 err:
3245 	if (na != NULL) {
3246 		if (na->na_arena != NULL) {
3247 			skmem_arena_release(na->na_arena);
3248 			na->na_arena = NULL;
3249 		}
3250 		NA_FREE(na);
3251 	}
3252 	return error;
3253 }
3254 
3255 void
na_flowadv_entry_alloc(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx,const uint32_t flowid)3256 na_flowadv_entry_alloc(const struct nexus_adapter *na, uuid_t fae_id,
3257     const flowadv_idx_t fe_idx, const uint32_t flowid)
3258 {
3259 	struct skmem_arena *ar = na->na_arena;
3260 	struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
3261 	struct __flowadv_entry *__single fae;
3262 
3263 	ASSERT(NA_IS_ACTIVE(na) && na->na_flowadv_max != 0);
3264 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3265 
3266 	AR_LOCK(ar);
3267 
3268 	/* we must not get here if arena is defunct; this must be valid */
3269 	ASSERT(arn->arn_flowadv_obj != NULL);
3270 
3271 	VERIFY(fe_idx < na->na_flowadv_max);
3272 	fae = &arn->arn_flowadv_obj[fe_idx];
3273 	uuid_copy(fae->fae_id, fae_id);
3274 	fae->fae_flowid = flowid;
3275 	fae->fae_flags = FLOWADVF_VALID;
3276 
3277 	AR_UNLOCK(ar);
3278 }
3279 
3280 void
na_flowadv_entry_free(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx,const uint32_t flowid)3281 na_flowadv_entry_free(const struct nexus_adapter *na, uuid_t fae_id,
3282     const flowadv_idx_t fe_idx, const uint32_t flowid)
3283 {
3284 #pragma unused(fae_id)
3285 	struct skmem_arena *ar = na->na_arena;
3286 	struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3287 
3288 	ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3289 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3290 
3291 	AR_LOCK(ar);
3292 
3293 	ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3294 	if (arn->arn_flowadv_obj != NULL) {
3295 		struct __flowadv_entry *__single fae;
3296 
3297 		VERIFY(fe_idx < na->na_flowadv_max);
3298 		fae = &arn->arn_flowadv_obj[fe_idx];
3299 		ASSERT(uuid_compare(fae->fae_id, fae_id) == 0);
3300 		uuid_clear(fae->fae_id);
3301 		VERIFY(fae->fae_flowid == flowid);
3302 		fae->fae_flowid = 0;
3303 		fae->fae_flags = 0;
3304 	}
3305 
3306 	AR_UNLOCK(ar);
3307 }
3308 
3309 bool
na_flowadv_set(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3310 na_flowadv_set(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3311     const flowadv_token_t flow_token)
3312 {
3313 	struct nexus_adapter *na = ch->ch_na;
3314 	struct skmem_arena *ar = na->na_arena;
3315 	struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3316 	uuid_string_t fae_uuid_str;
3317 	bool suspend = false;
3318 
3319 	ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3320 	ASSERT(fe_idx < na->na_flowadv_max);
3321 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3322 
3323 	AR_LOCK(ar);
3324 
3325 	ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3326 
3327 	if (arn->arn_flowadv_obj != NULL) {
3328 		struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3329 
3330 		static_assert(sizeof(fae->fae_token) == sizeof(flow_token));
3331 		/*
3332 		 * We cannot guarantee that the flow is still around by now,
3333 		 * so check if that's the case and let the caller know.
3334 		 */
3335 		if ((suspend = (fae->fae_token == flow_token))) {
3336 			ASSERT(fae->fae_flags & FLOWADVF_VALID);
3337 			fae->fae_flags |= FLOWADVF_SUSPENDED;
3338 			uuid_unparse(fae->fae_id, fae_uuid_str);
3339 		}
3340 	} else {
3341 		suspend = false;
3342 	}
3343 	if (suspend) {
3344 		SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) %s flow token 0x%x fidx %u "
3345 		    "SUSPEND", sk_proc_name(current_proc()),
3346 		    sk_proc_pid(current_proc()), fae_uuid_str, flow_token, fe_idx);
3347 	} else {
3348 		SK_ERR("%s(%d) flow token 0x%x fidx %u no longer around",
3349 		    sk_proc_name(current_proc()),
3350 		    sk_proc_pid(current_proc()), flow_token, fe_idx);
3351 	}
3352 
3353 	AR_UNLOCK(ar);
3354 
3355 	return suspend;
3356 }
3357 
3358 bool
na_flowadv_clear(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3359 na_flowadv_clear(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3360     const flowadv_token_t flow_token)
3361 {
3362 	struct nexus_adapter *na = ch->ch_na;
3363 	struct skmem_arena *ar = na->na_arena;
3364 	struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3365 	uuid_string_t fae_uuid_str;
3366 	boolean_t resume = false;
3367 
3368 	ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3369 	ASSERT(fe_idx < na->na_flowadv_max);
3370 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3371 
3372 	AR_LOCK(ar);
3373 
3374 	ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3375 
3376 	if (arn->arn_flowadv_obj != NULL) {
3377 		struct __flowadv_entry *__single fae = &arn->arn_flowadv_obj[fe_idx];
3378 
3379 		static_assert(sizeof(fae->fae_token) == sizeof(flow_token));
3380 		/*
3381 		 * We cannot guarantee that the flow is still around by now,
3382 		 * so check if that's the case and let the caller know.
3383 		 */
3384 		if ((resume = (fae->fae_token == flow_token))) {
3385 			ASSERT(fae->fae_flags & FLOWADVF_VALID);
3386 			fae->fae_flags &= ~FLOWADVF_SUSPENDED;
3387 			uuid_unparse(fae->fae_id, fae_uuid_str);
3388 		}
3389 	} else {
3390 		resume = FALSE;
3391 	}
3392 	if (resume) {
3393 		SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) %s flow token 0x%x "
3394 		    "fidx %u RESUME", ch->ch_name, ch->ch_pid, fae_uuid_str,
3395 		    flow_token, fe_idx);
3396 	} else {
3397 		SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3398 		    ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3399 	}
3400 
3401 	AR_UNLOCK(ar);
3402 
3403 	return resume;
3404 }
3405 
3406 int
na_flowadv_report_congestion_event(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token,uint32_t congestion_cnt,__unused uint32_t l4s_ce_cnt,uint32_t total_pkt_cnt)3407 na_flowadv_report_congestion_event(const struct kern_channel *ch,
3408     const flowadv_idx_t fe_idx, const flowadv_token_t flow_token,
3409     uint32_t congestion_cnt, __unused uint32_t l4s_ce_cnt, uint32_t total_pkt_cnt)
3410 {
3411 	struct nexus_adapter *na = ch->ch_na;
3412 	struct skmem_arena *ar = na->na_arena;
3413 	struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3414 	uuid_string_t fae_uuid_str;
3415 	boolean_t added;
3416 
3417 	ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3418 	ASSERT(fe_idx < na->na_flowadv_max);
3419 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3420 
3421 	AR_LOCK(ar);
3422 
3423 	ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3424 
3425 	if (arn->arn_flowadv_obj != NULL) {
3426 		struct __flowadv_entry *__single fae = &arn->arn_flowadv_obj[fe_idx];
3427 
3428 		static_assert(sizeof(fae->fae_token) == sizeof(flow_token));
3429 		/*
3430 		 * We cannot guarantee that the flow is still around by now,
3431 		 * so check if that's the case and let the caller know.
3432 		 */
3433 		if ((added = (fae->fae_token == flow_token))) {
3434 			ASSERT(fae->fae_flags & FLOWADVF_VALID);
3435 			fae->fae_congestion_cnt += congestion_cnt;
3436 			fae->fae_pkt_cnt += total_pkt_cnt;
3437 			uuid_unparse(fae->fae_id, fae_uuid_str);
3438 		}
3439 	} else {
3440 		added = FALSE;
3441 	}
3442 	if (added) {
3443 		SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) %s flow token 0x%x "
3444 		    "fidx %u ce cnt incremented", ch->ch_name,
3445 		    ch->ch_pid, fae_uuid_str, flow_token, fe_idx);
3446 	} else {
3447 		SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3448 		    ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3449 	}
3450 
3451 	AR_UNLOCK(ar);
3452 
3453 	return added;
3454 }
3455 
3456 void
na_flowadv_event(struct __kern_channel_ring * kring)3457 na_flowadv_event(struct __kern_channel_ring *kring)
3458 {
3459 	ASSERT(kring->ckr_tx == NR_TX);
3460 
3461 	SK_DF(SK_VERB_EVENTS, "%s(%d) na \"%s\" (%p) kr %p",
3462 	    sk_proc_name(current_proc()), sk_proc_pid(current_proc()),
3463 	    KRNA(kring)->na_name, SK_KVA(KRNA(kring)), SK_KVA(kring));
3464 
3465 	na_post_event(kring, TRUE, FALSE, FALSE, CHAN_FILT_HINT_FLOW_ADV_UPD);
3466 }
3467 
3468 static int
na_packet_pool_free_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3469 na_packet_pool_free_sync(struct __kern_channel_ring *kring, struct proc *p,
3470     uint32_t flags)
3471 {
3472 #pragma unused(flags, p)
3473 	int n, ret = 0;
3474 	slot_idx_t j;
3475 	struct __kern_slot_desc *ksd;
3476 	struct __user_slot_desc *usd;
3477 	struct __kern_quantum *kqum;
3478 	struct kern_pbufpool *pp = kring->ckr_pp;
3479 	uint32_t nfree = 0;
3480 
3481 	/* packet pool list is protected by channel lock */
3482 	ASSERT(!KR_KERNEL_ONLY(kring));
3483 
3484 	/* # of new slots */
3485 	n = kring->ckr_rhead - kring->ckr_khead;
3486 	if (n < 0) {
3487 		n += kring->ckr_num_slots;
3488 	}
3489 
3490 	/* nothing to free */
3491 	if (__improbable(n == 0)) {
3492 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3493 		    sk_proc_name(p), sk_proc_pid(p), kring->ckr_name,
3494 		    "nothing to free");
3495 		goto done;
3496 	}
3497 
3498 	j = kring->ckr_khead;
3499 	PP_LOCK(pp);
3500 	while (n--) {
3501 		int err;
3502 
3503 		ksd = KR_KSD(kring, j);
3504 		usd = KR_USD(kring, j);
3505 
3506 		if (__improbable(!SD_VALID_METADATA(usd))) {
3507 			SK_ERR("bad slot %d %p", j, SK_KVA(ksd));
3508 			ret = EINVAL;
3509 			break;
3510 		}
3511 
3512 		kqum = pp_remove_upp_locked(pp, usd->sd_md_idx, &err);
3513 		if (__improbable(err != 0)) {
3514 			SK_ERR("un-allocated packet or buflet %d %p",
3515 			    usd->sd_md_idx, SK_KVA(kqum));
3516 			ret = EINVAL;
3517 			break;
3518 		}
3519 
3520 		/* detach and free the packet */
3521 		kqum->qum_qflags &= ~QUM_F_FINALIZED;
3522 		kqum->qum_ksd = NULL;
3523 		ASSERT(!KSD_VALID_METADATA(ksd));
3524 		USD_DETACH_METADATA(usd);
3525 		ASSERT(pp == kqum->qum_pp);
3526 		ASSERT(nfree < kring->ckr_num_slots);
3527 		kring->ckr_scratch[nfree++] = (uint64_t)kqum;
3528 		j = SLOT_NEXT(j, kring->ckr_lim);
3529 	}
3530 	PP_UNLOCK(pp);
3531 
3532 	if (__probable(nfree > 0)) {
3533 		pp_free_packet_batch(pp, &kring->ckr_scratch[0], nfree);
3534 	}
3535 
3536 	kring->ckr_khead = j;
3537 	kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3538 
3539 done:
3540 	return ret;
3541 }
3542 
3543 #define MAX_BUFLETS 64
3544 static int
alloc_packets(kern_pbufpool_t pp,uint64_t * __counted_by (* ph_cnt)buf_arr,bool large,uint32_t * ph_cnt)3545 alloc_packets(kern_pbufpool_t pp, uint64_t *__counted_by(*ph_cnt)buf_arr, bool large,
3546     uint32_t *ph_cnt)
3547 {
3548 	int err;
3549 	uint32_t need, need_orig, remain, alloced, i;
3550 	uint64_t buflets[MAX_BUFLETS];
3551 	uint64_t *__indexable pkts;
3552 
3553 	need_orig = *ph_cnt;
3554 	err = kern_pbufpool_alloc_batch_nosleep(pp, large ? 0 : 1, buf_arr, ph_cnt);
3555 	if (!large) {
3556 		return err;
3557 	}
3558 	if (*ph_cnt == 0) {
3559 		SK_ERR("failed to alloc %d packets for alloc ring: err %d",
3560 		    need_orig, err);
3561 		DTRACE_SKYWALK2(alloc__pkts__fail, uint32_t, need_orig, int, err);
3562 		return err;
3563 	}
3564 	need = remain = *ph_cnt;
3565 	alloced = 0;
3566 	pkts = buf_arr;
3567 	while (remain > 0) {
3568 		uint32_t cnt, cnt_orig;
3569 
3570 		cnt = MIN(remain, MAX_BUFLETS);
3571 		cnt_orig = cnt;
3572 		err = pp_alloc_buflet_batch(pp, buflets, &cnt, SKMEM_NOSLEEP, true);
3573 		if (cnt == 0) {
3574 			SK_ERR("failed to alloc %d buflets for alloc ring: "
3575 			    "remain %d, err %d", cnt_orig, remain, err);
3576 			DTRACE_SKYWALK3(alloc__bufs__fail, uint32_t, cnt_orig,
3577 			    uint32_t, remain, int, err);
3578 			break;
3579 		}
3580 		for (i = 0; i < cnt; i++) {
3581 			kern_packet_t ph = (kern_packet_t)pkts[i];
3582 			kern_buflet_t __single buf = __unsafe_forge_single(
3583 				kern_buflet_t, buflets[i]);
3584 			kern_buflet_t pbuf = kern_packet_get_next_buflet(ph, NULL);
3585 			VERIFY(kern_packet_add_buflet(ph, pbuf, buf) == 0);
3586 			buflets[i] = 0;
3587 		}
3588 		DTRACE_SKYWALK3(alloc__bufs, uint32_t, remain, uint32_t, cnt,
3589 		    uint32_t, cnt_orig);
3590 		pkts += cnt;
3591 		alloced += cnt;
3592 		remain -= cnt;
3593 	}
3594 	/* free packets without attached buffers */
3595 	if (remain > 0) {
3596 		DTRACE_SKYWALK1(remaining__pkts, uint32_t, remain);
3597 		ASSERT(remain + alloced == need);
3598 		pp_free_packet_batch(pp, pkts, remain);
3599 
3600 		/* pp_free_packet_batch() should clear the pkts array */
3601 		for (i = 0; i < remain; i++) {
3602 			ASSERT(pkts[i] == 0);
3603 		}
3604 	}
3605 	*ph_cnt = alloced;
3606 	if (*ph_cnt == 0) {
3607 		err = ENOMEM;
3608 	} else if (*ph_cnt < need_orig) {
3609 		err = EAGAIN;
3610 	} else {
3611 		err = 0;
3612 	}
3613 	DTRACE_SKYWALK3(alloc__packets, uint32_t, need_orig, uint32_t, *ph_cnt, int, err);
3614 	return err;
3615 }
3616 
3617 static int
na_packet_pool_alloc_sync_common(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags,bool large)3618 na_packet_pool_alloc_sync_common(struct __kern_channel_ring *kring, struct proc *p,
3619     uint32_t flags, bool large)
3620 {
3621 	int b, err;
3622 	uint32_t n = 0;
3623 	slot_idx_t j;
3624 	uint64_t now;
3625 	uint32_t curr_ws, ph_needed, ph_cnt;
3626 	struct __kern_slot_desc *ksd;
3627 	struct __user_slot_desc *usd;
3628 	struct __kern_quantum *kqum;
3629 	kern_pbufpool_t pp = kring->ckr_pp;
3630 	pid_t pid = proc_pid(p);
3631 
3632 	/* packet pool list is protected by channel lock */
3633 	ASSERT(!KR_KERNEL_ONLY(kring));
3634 	ASSERT(!PP_KERNEL_ONLY(pp));
3635 
3636 	now = net_uptime();
3637 	if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3638 		if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3639 			kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3640 		}
3641 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3642 		    "%s: purged curr_ws(%d)", kring->ckr_name,
3643 		    kring->ckr_alloc_ws);
3644 		return 0;
3645 	}
3646 	/* reclaim the completed slots */
3647 	kring->ckr_khead = kring->ckr_rhead;
3648 
3649 	/* # of busy (unclaimed) slots */
3650 	b = kring->ckr_ktail - kring->ckr_khead;
3651 	if (b < 0) {
3652 		b += kring->ckr_num_slots;
3653 	}
3654 
3655 	curr_ws = kring->ckr_alloc_ws;
3656 	if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3657 		/* increment the working set by 50% */
3658 		curr_ws += (curr_ws >> 1);
3659 		curr_ws = MIN(curr_ws, kring->ckr_lim);
3660 	} else {
3661 		if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3662 		    (uint32_t)b >= (curr_ws >> 2)) {
3663 			/* decrease the working set by 25% */
3664 			curr_ws -= (curr_ws >> 2);
3665 		}
3666 	}
3667 	curr_ws = MAX(curr_ws, na_upp_alloc_lowat);
3668 	if (curr_ws > (uint32_t)b) {
3669 		n = curr_ws - b;
3670 	}
3671 	kring->ckr_alloc_ws = curr_ws;
3672 	kring->ckr_sync_time = now;
3673 
3674 	/* min with # of avail free slots (subtract busy from max) */
3675 	n = ph_needed = MIN(n, kring->ckr_lim - b);
3676 	j = kring->ckr_ktail;
3677 	SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3678 	    "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3679 
3680 	if ((ph_cnt = ph_needed) == 0) {
3681 		goto done;
3682 	}
3683 
3684 	err = alloc_packets(pp, kring->ckr_scratch,
3685 	    PP_HAS_BUFFER_ON_DEMAND(pp) && large, &ph_cnt);
3686 	if (__improbable(ph_cnt == 0)) {
3687 		SK_ERR("kr %p failed to alloc %u packet s(%d)",
3688 		    SK_KVA(kring), ph_needed, err);
3689 		kring->ckr_err_stats.cres_pkt_alloc_failures += ph_needed;
3690 	} else {
3691 		/*
3692 		 * Add packets to the allocated list of user packet pool.
3693 		 */
3694 		pp_insert_upp_batch(pp, pid, kring->ckr_scratch, ph_cnt);
3695 	}
3696 
3697 	for (n = 0; n < ph_cnt; n++) {
3698 		ksd = KR_KSD(kring, j);
3699 		usd = KR_USD(kring, j);
3700 
3701 		kqum = SK_PTR_ADDR_KQUM(kring->ckr_scratch[n]);
3702 		kring->ckr_scratch[n] = 0;
3703 		ASSERT(kqum != NULL);
3704 
3705 		/* cleanup any stale slot mapping */
3706 		KSD_RESET(ksd);
3707 		ASSERT(usd != NULL);
3708 		USD_RESET(usd);
3709 
3710 		/*
3711 		 * Since this packet is freshly allocated and we need to
3712 		 * have the flag set for the attach to succeed, just set
3713 		 * it here rather than calling __packet_finalize().
3714 		 */
3715 		kqum->qum_qflags |= QUM_F_FINALIZED;
3716 
3717 		/* Attach packet to slot */
3718 		KR_SLOT_ATTACH_METADATA(kring, ksd, kqum);
3719 		/*
3720 		 * externalize the packet as it is being transferred to
3721 		 * user space.
3722 		 */
3723 		kr_externalize_metadata(kring, pp->pp_max_frags, kqum, p);
3724 
3725 		j = SLOT_NEXT(j, kring->ckr_lim);
3726 	}
3727 done:
3728 	ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3729 	kring->ckr_ktail = j;
3730 	return 0;
3731 }
3732 
3733 static int
na_packet_pool_alloc_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3734 na_packet_pool_alloc_sync(struct __kern_channel_ring *kring, struct proc *p,
3735     uint32_t flags)
3736 {
3737 	return na_packet_pool_alloc_sync_common(kring, p, flags, false);
3738 }
3739 
3740 static int
na_packet_pool_alloc_large_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3741 na_packet_pool_alloc_large_sync(struct __kern_channel_ring *kring, struct proc *p,
3742     uint32_t flags)
3743 {
3744 	return na_packet_pool_alloc_sync_common(kring, p, flags, true);
3745 }
3746 
3747 static int
na_packet_pool_free_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3748 na_packet_pool_free_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3749     uint32_t flags)
3750 {
3751 #pragma unused(flags, p)
3752 	int n, ret = 0;
3753 	slot_idx_t j;
3754 	struct __kern_slot_desc *ksd;
3755 	struct __user_slot_desc *usd;
3756 	struct __kern_buflet *kbft;
3757 	struct kern_pbufpool *pp = kring->ckr_pp;
3758 
3759 	/* packet pool list is protected by channel lock */
3760 	ASSERT(!KR_KERNEL_ONLY(kring));
3761 
3762 	/* # of new slots */
3763 	n = kring->ckr_rhead - kring->ckr_khead;
3764 	if (n < 0) {
3765 		n += kring->ckr_num_slots;
3766 	}
3767 
3768 	/* nothing to free */
3769 	if (__improbable(n == 0)) {
3770 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3771 		    sk_proc_name(p), sk_proc_pid(p), kring->ckr_name,
3772 		    "nothing to free");
3773 		goto done;
3774 	}
3775 
3776 	j = kring->ckr_khead;
3777 	while (n--) {
3778 		int err;
3779 
3780 		ksd = KR_KSD(kring, j);
3781 		usd = KR_USD(kring, j);
3782 
3783 		if (__improbable(!SD_VALID_METADATA(usd))) {
3784 			SK_ERR("bad slot %d %p", j, SK_KVA(ksd));
3785 			ret = EINVAL;
3786 			break;
3787 		}
3788 
3789 		kbft = pp_remove_upp_bft(pp, usd->sd_md_idx, &err);
3790 		if (__improbable(err != 0)) {
3791 			SK_ERR("un-allocated buflet %d %p", usd->sd_md_idx,
3792 			    SK_KVA(kbft));
3793 			ret = EINVAL;
3794 			break;
3795 		}
3796 
3797 		/* detach and free the packet */
3798 		ASSERT(!KSD_VALID_METADATA(ksd));
3799 		USD_DETACH_METADATA(usd);
3800 		pp_free_buflet(pp, kbft);
3801 		j = SLOT_NEXT(j, kring->ckr_lim);
3802 	}
3803 	kring->ckr_khead = j;
3804 	kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3805 
3806 done:
3807 	return ret;
3808 }
3809 
3810 static int
na_packet_pool_alloc_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3811 na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3812     uint32_t flags)
3813 {
3814 	int b, err;
3815 	uint32_t n = 0;
3816 	slot_idx_t j;
3817 	uint64_t now;
3818 	uint32_t curr_ws, bh_needed, bh_cnt;
3819 	struct __kern_slot_desc *ksd;
3820 	struct __user_slot_desc *usd;
3821 	struct __kern_buflet *kbft;
3822 	struct __kern_buflet_ext *kbe;
3823 	kern_pbufpool_t pp = kring->ckr_pp;
3824 	pid_t pid = proc_pid(p);
3825 
3826 	/* packet pool list is protected by channel lock */
3827 	ASSERT(!KR_KERNEL_ONLY(kring));
3828 	ASSERT(!PP_KERNEL_ONLY(pp));
3829 
3830 	now = net_uptime();
3831 	if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3832 		if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3833 			kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3834 		}
3835 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3836 		    "%s: purged curr_ws(%d)", kring->ckr_name,
3837 		    kring->ckr_alloc_ws);
3838 		return 0;
3839 	}
3840 	/* reclaim the completed slots */
3841 	kring->ckr_khead = kring->ckr_rhead;
3842 
3843 	/* # of busy (unclaimed) slots */
3844 	b = kring->ckr_ktail - kring->ckr_khead;
3845 	if (b < 0) {
3846 		b += kring->ckr_num_slots;
3847 	}
3848 
3849 	curr_ws = kring->ckr_alloc_ws;
3850 	if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3851 		/* increment the working set by 50% */
3852 		curr_ws += (curr_ws >> 1);
3853 		curr_ws = MIN(curr_ws, kring->ckr_lim);
3854 	} else {
3855 		if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3856 		    (uint32_t)b >= (curr_ws >> 2)) {
3857 			/* decrease the working set by 25% */
3858 			curr_ws -= (curr_ws >> 2);
3859 		}
3860 	}
3861 	curr_ws = MAX(curr_ws, na_upp_alloc_buf_lowat);
3862 	if (curr_ws > (uint32_t)b) {
3863 		n = curr_ws - b;
3864 	}
3865 	kring->ckr_alloc_ws = curr_ws;
3866 	kring->ckr_sync_time = now;
3867 
3868 	/* min with # of avail free slots (subtract busy from max) */
3869 	n = bh_needed = MIN(n, kring->ckr_lim - b);
3870 	j = kring->ckr_ktail;
3871 	SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3872 	    "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3873 
3874 	if ((bh_cnt = bh_needed) == 0) {
3875 		goto done;
3876 	}
3877 
3878 	err = pp_alloc_buflet_batch(pp, kring->ckr_scratch, &bh_cnt,
3879 	    SKMEM_NOSLEEP, false);
3880 
3881 	if (bh_cnt == 0) {
3882 		SK_ERR("kr %p failed to alloc %u buflets(%d)",
3883 		    SK_KVA(kring), bh_needed, err);
3884 		kring->ckr_err_stats.cres_pkt_alloc_failures += bh_needed;
3885 	}
3886 
3887 	for (n = 0; n < bh_cnt; n++) {
3888 		struct __user_buflet *ubft;
3889 
3890 		ksd = KR_KSD(kring, j);
3891 		usd = KR_USD(kring, j);
3892 
3893 		kbe = __unsafe_forge_single(struct __kern_buflet_ext *,
3894 		    (kring->ckr_scratch[n]));
3895 		kbft = &kbe->kbe_overlay;
3896 
3897 		kring->ckr_scratch[n] = 0;
3898 		ASSERT(kbft != NULL);
3899 
3900 		/*
3901 		 * Add buflet to the allocated list of user packet pool.
3902 		 */
3903 		pp_insert_upp_bft(pp, kbft, pid);
3904 
3905 		/*
3906 		 * externalize the buflet as it is being transferred to
3907 		 * user space.
3908 		 */
3909 		ubft = __DECONST(struct __user_buflet *, kbe->kbe_buf_user);
3910 		KBUF_EXTERNALIZE(kbft, ubft, pp);
3911 
3912 		/* cleanup any stale slot mapping */
3913 		KSD_RESET(ksd);
3914 		ASSERT(usd != NULL);
3915 		USD_RESET(usd);
3916 
3917 		/* Attach buflet to slot */
3918 		KR_SLOT_ATTACH_BUF_METADATA(kring, ksd, kbft);
3919 
3920 		j = SLOT_NEXT(j, kring->ckr_lim);
3921 	}
3922 done:
3923 	ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3924 	kring->ckr_ktail = j;
3925 	return 0;
3926 }
3927 
3928 /* The caller needs to ensure that the NA stays intact */
3929 void
na_drain(struct nexus_adapter * na,boolean_t purge)3930 na_drain(struct nexus_adapter *na, boolean_t purge)
3931 {
3932 	/* will be cleared on next channel sync */
3933 	if (!(os_atomic_or_orig(&na->na_flags, NAF_DRAINING, relaxed) &
3934 	    NAF_DRAINING) && NA_IS_ACTIVE(na)) {
3935 		SK_DF(SK_VERB_NA, "%s: %s na %p flags 0x%x",
3936 		    na->na_name, (purge ? "purging" : "pruning"),
3937 		    SK_KVA(na), na->na_flags);
3938 
3939 		/* reap (purge/prune) caches in the arena */
3940 		skmem_arena_reap(na->na_arena, purge);
3941 	}
3942 }
3943 
3944 #if SK_LOG
3945 SK_NO_INLINE_ATTRIBUTE
3946 char *
na2str(const struct nexus_adapter * na,char * __counted_by (dsz)dst,size_t dsz)3947 na2str(const struct nexus_adapter *na, char *__counted_by(dsz)dst,
3948     size_t dsz)
3949 {
3950 	(void) sk_snprintf(dst, dsz, "%p %s flags 0x%b",
3951 	    SK_KVA(na), na->na_name, na->na_flags, NAF_BITS);
3952 
3953 	return dst;
3954 }
3955 #endif
3956