xref: /xnu-8020.101.4/bsd/skywalk/nexus/nexus_adapter.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri.
31  * All rights reserved.
32  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  *   1. Redistributions of source code must retain the above copyright
38  *      notice, this list of conditions and the following disclaimer.
39  *   2. Redistributions in binary form must reproduce the above copyright
40  *      notice, this list of conditions and the following disclaimer in the
41  *      documentation and/or other materials provided with the distribution.
42  *
43  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53  * SUCH DAMAGE.
54  */
55 #include <sys/systm.h>
56 #include <skywalk/os_skywalk_private.h>
57 #include <skywalk/nexus/monitor/nx_monitor.h>
58 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
59 #include <skywalk/nexus/netif/nx_netif.h>
60 #include <skywalk/nexus/upipe/nx_user_pipe.h>
61 #include <skywalk/nexus/kpipe/nx_kernel_pipe.h>
62 #include <kern/thread.h>
63 
64 static int na_krings_use(struct kern_channel *);
65 static void na_krings_unuse(struct kern_channel *);
66 static void na_krings_verify(struct nexus_adapter *);
67 static int na_notify(struct __kern_channel_ring *, struct proc *, uint32_t);
68 static void na_set_ring(struct nexus_adapter *, uint32_t, enum txrx, uint32_t);
69 static void na_set_all_rings(struct nexus_adapter *, uint32_t);
70 static int na_set_ringid(struct kern_channel *, ring_set_t, ring_id_t);
71 static void na_unset_ringid(struct kern_channel *);
72 static void na_teardown(struct nexus_adapter *, struct kern_channel *,
73     boolean_t);
74 
75 static int na_kr_create(struct nexus_adapter *, uint32_t, boolean_t);
76 static void na_kr_delete(struct nexus_adapter *);
77 static int na_kr_setup(struct nexus_adapter *, struct kern_channel *);
78 static void na_kr_teardown_all(struct nexus_adapter *, struct kern_channel *,
79     boolean_t);
80 static void na_kr_teardown_txrx(struct nexus_adapter *, struct kern_channel *,
81     boolean_t, struct proc *);
82 static int na_kr_populate_slots(struct __kern_channel_ring *);
83 static void na_kr_depopulate_slots(struct __kern_channel_ring *,
84     struct kern_channel *, boolean_t defunct);
85 
86 static int na_schema_alloc(struct kern_channel *);
87 
88 static struct nexus_adapter *na_pseudo_alloc(zalloc_flags_t);
89 static void na_pseudo_free(struct nexus_adapter *);
90 static int na_pseudo_txsync(struct __kern_channel_ring *, struct proc *,
91     uint32_t);
92 static int na_pseudo_rxsync(struct __kern_channel_ring *, struct proc *,
93     uint32_t);
94 static int na_pseudo_activate(struct nexus_adapter *, na_activate_mode_t);
95 static void na_pseudo_dtor(struct nexus_adapter *);
96 static int na_pseudo_krings_create(struct nexus_adapter *,
97     struct kern_channel *);
98 static void na_pseudo_krings_delete(struct nexus_adapter *,
99     struct kern_channel *, boolean_t);
100 static int na_packet_pool_alloc_sync(struct __kern_channel_ring *,
101     struct proc *, uint32_t);
102 static int na_packet_pool_free_sync(struct __kern_channel_ring *,
103     struct proc *, uint32_t);
104 static int na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *,
105     struct proc *, uint32_t);
106 static int na_packet_pool_free_buf_sync(struct __kern_channel_ring *,
107     struct proc *, uint32_t);
108 
109 #define NA_KRING_IDLE_TIMEOUT   (NSEC_PER_SEC * 30) /* 30 seconds */
110 
111 static ZONE_DEFINE(na_pseudo_zone, SKMEM_ZONE_PREFIX ".na.pseudo",
112     sizeof(struct nexus_adapter), ZC_ZFREE_CLEARMEM);
113 
114 static int __na_inited = 0;
115 
116 #define NA_NUM_WMM_CLASSES      4
117 #define NAKR_WMM_SC2RINGID(_s)  PKT_SC2TC(_s)
118 #define NAKR_SET_SVC_LUT(_n, _s)                                        \
119 	(_n)->na_kring_svc_lut[MBUF_SCIDX(_s)] = NAKR_WMM_SC2RINGID(_s)
120 #define NAKR_SET_KR_SVC(_n, _s)                                         \
121 	NAKR((_n), NR_TX)[NAKR_WMM_SC2RINGID(_s)].ckr_svc = (_s)
122 
123 #define NA_UPP_ALLOC_LOWAT      8
124 static uint32_t na_upp_alloc_lowat = NA_UPP_ALLOC_LOWAT;
125 
126 #define NA_UPP_REAP_INTERVAL    10 /* seconds */
127 static uint32_t na_upp_reap_interval = NA_UPP_REAP_INTERVAL;
128 
129 #define NA_UPP_WS_HOLD_TIME     2 /* seconds */
130 static uint32_t na_upp_ws_hold_time = NA_UPP_WS_HOLD_TIME;
131 
132 #define NA_UPP_REAP_MIN_PKTS    0
133 static uint32_t na_upp_reap_min_pkts = NA_UPP_REAP_MIN_PKTS;
134 
135 #define NA_UPP_ALLOC_BUF_LOWAT     64
136 static uint32_t na_upp_alloc_buf_lowat = NA_UPP_ALLOC_BUF_LOWAT;
137 
138 #if (DEVELOPMENT || DEBUG)
139 static  uint64_t _na_inject_error = 0;
140 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) \
141 	_SK_INJECT_ERROR(_na_inject_error, _en, _ev, _ec, NULL, _f, __VA_ARGS__)
142 
143 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_ws_hold_time,
144     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_ws_hold_time,
145     NA_UPP_WS_HOLD_TIME, "");
146 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_interval,
147     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_interval,
148     NA_UPP_REAP_INTERVAL, "");
149 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_min_pkts,
150     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_min_pkts,
151     NA_UPP_REAP_MIN_PKTS, "");
152 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_lowat,
153     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_lowat,
154     NA_UPP_ALLOC_LOWAT, "");
155 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_buf_lowat,
156     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_buf_lowat,
157     NA_UPP_ALLOC_BUF_LOWAT, "");
158 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, na_inject_error,
159     CTLFLAG_RW | CTLFLAG_LOCKED, &_na_inject_error, "");
160 #else
161 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) do { } while (0)
162 #endif /* !DEVELOPMENT && !DEBUG */
163 
164 #define SKMEM_TAG_NX_RINGS      "com.apple.skywalk.nexus.rings"
165 static kern_allocation_name_t skmem_tag_nx_rings;
166 
167 #define SKMEM_TAG_NX_CONTEXTS   "com.apple.skywalk.nexus.contexts"
168 static kern_allocation_name_t skmem_tag_nx_contexts;
169 
170 #define SKMEM_TAG_NX_SCRATCH    "com.apple.skywalk.nexus.scratch"
171 static kern_allocation_name_t skmem_tag_nx_scratch;
172 
173 #if !XNU_TARGET_OS_OSX
174 /* see KLDBootstrap::readPrelinkedExtensions() for details */
175 extern uuid_t kernelcache_uuid;
176 #else /* XNU_TARGET_OS_OSX */
177 /* see panic_init() for details */
178 extern unsigned char *kernel_uuid;
179 #endif /* XNU_TARGET_OS_OSX */
180 
181 void
na_init(void)182 na_init(void)
183 {
184 	/*
185 	 * Changing the size of nexus_mdata structure won't break ABI,
186 	 * but we need to be mindful of memory consumption; Thus here
187 	 * we add a compile-time check to make sure the size is within
188 	 * the expected limit and that it's properly aligned.  This
189 	 * check may be adjusted in future as needed.
190 	 */
191 	_CASSERT(sizeof(struct nexus_mdata) <= 32 &&
192 	    IS_P2ALIGNED(sizeof(struct nexus_mdata), 8));
193 	_CASSERT(sizeof(struct nexus_mdata) <= sizeof(struct __user_quantum));
194 
195 	/* see comments on nexus_meta_type_t */
196 	_CASSERT(NEXUS_META_TYPE_MAX == 3);
197 	_CASSERT(NEXUS_META_SUBTYPE_MAX == 3);
198 
199 	ASSERT(!__na_inited);
200 
201 	ASSERT(skmem_tag_nx_rings == NULL);
202 	skmem_tag_nx_rings =
203 	    kern_allocation_name_allocate(SKMEM_TAG_NX_RINGS, 0);
204 	ASSERT(skmem_tag_nx_rings != NULL);
205 
206 	ASSERT(skmem_tag_nx_contexts == NULL);
207 	skmem_tag_nx_contexts =
208 	    kern_allocation_name_allocate(SKMEM_TAG_NX_CONTEXTS, 0);
209 	ASSERT(skmem_tag_nx_contexts != NULL);
210 
211 	ASSERT(skmem_tag_nx_scratch == NULL);
212 	skmem_tag_nx_scratch =
213 	    kern_allocation_name_allocate(SKMEM_TAG_NX_SCRATCH, 0);
214 	ASSERT(skmem_tag_nx_scratch != NULL);
215 
216 	__na_inited = 1;
217 }
218 
219 void
na_fini(void)220 na_fini(void)
221 {
222 	if (__na_inited) {
223 		if (skmem_tag_nx_rings != NULL) {
224 			kern_allocation_name_release(skmem_tag_nx_rings);
225 			skmem_tag_nx_rings = NULL;
226 		}
227 		if (skmem_tag_nx_contexts != NULL) {
228 			kern_allocation_name_release(skmem_tag_nx_contexts);
229 			skmem_tag_nx_contexts = NULL;
230 		}
231 		if (skmem_tag_nx_scratch != NULL) {
232 			kern_allocation_name_release(skmem_tag_nx_scratch);
233 			skmem_tag_nx_scratch = NULL;
234 		}
235 
236 		__na_inited = 0;
237 	}
238 }
239 
240 /*
241  * Interpret the ringid of an chreq, by translating it into a pair
242  * of intervals of ring indices:
243  *
244  * [txfirst, txlast) and [rxfirst, rxlast)
245  */
246 int
na_interp_ringid(struct nexus_adapter * na,ring_id_t ring_id,ring_set_t ring_set,uint32_t first[NR_TXRX],uint32_t last[NR_TXRX])247 na_interp_ringid(struct nexus_adapter *na, ring_id_t ring_id,
248     ring_set_t ring_set, uint32_t first[NR_TXRX], uint32_t last[NR_TXRX])
249 {
250 	enum txrx t;
251 
252 	switch (ring_set) {
253 	case RING_SET_ALL:
254 		/*
255 		 * Ring pair eligibility: all ring(s).
256 		 */
257 		if (ring_id != CHANNEL_RING_ID_ANY &&
258 		    ring_id >= na_get_nrings(na, NR_TX) &&
259 		    ring_id >= na_get_nrings(na, NR_RX)) {
260 			SK_ERR("\"%s\": invalid ring_id %d for ring_set %u",
261 			    na->na_name, (int)ring_id, ring_set);
262 			return EINVAL;
263 		}
264 		for_rx_tx(t) {
265 			if (ring_id == CHANNEL_RING_ID_ANY) {
266 				first[t] = 0;
267 				last[t] = na_get_nrings(na, t);
268 			} else {
269 				first[t] = ring_id;
270 				last[t] = ring_id + 1;
271 			}
272 		}
273 		break;
274 
275 	default:
276 		SK_ERR("\"%s\": invalid ring_set %u", na->na_name, ring_set);
277 		return EINVAL;
278 	}
279 
280 	SK_DF(SK_VERB_NA | SK_VERB_RING,
281 	    "\"%s\": ring_id %d, ring_set %u tx [%u,%u) rx [%u,%u)",
282 	    na->na_name, (int)ring_id, ring_set, first[NR_TX], last[NR_TX],
283 	    first[NR_RX], last[NR_RX]);
284 
285 	return 0;
286 }
287 
288 /*
289  * Set the ring ID. For devices with a single queue, a request
290  * for all rings is the same as a single ring.
291  */
292 static int
na_set_ringid(struct kern_channel * ch,ring_set_t ring_set,ring_id_t ring_id)293 na_set_ringid(struct kern_channel *ch, ring_set_t ring_set, ring_id_t ring_id)
294 {
295 	struct nexus_adapter *na = ch->ch_na;
296 	int error;
297 	enum txrx t;
298 	uint32_t n_alloc_rings;
299 
300 	if ((error = na_interp_ringid(na, ring_id, ring_set,
301 	    ch->ch_first, ch->ch_last)) != 0) {
302 		return error;
303 	}
304 
305 	n_alloc_rings = na_get_nrings(na, NR_A);
306 	if (n_alloc_rings != 0) {
307 		ch->ch_first[NR_A] = ch->ch_first[NR_F] = 0;
308 		ch->ch_last[NR_A] = ch->ch_last[NR_F] =
309 		    ch->ch_first[NR_A] + n_alloc_rings;
310 	} else {
311 		ch->ch_first[NR_A] = ch->ch_last[NR_A] = 0;
312 		ch->ch_first[NR_F] = ch->ch_last[NR_F] = 0;
313 	}
314 	ch->ch_first[NR_EV] = 0;
315 	ch->ch_last[NR_EV] = ch->ch_first[NR_EV] + na_get_nrings(na, NR_EV);
316 	/* XXX: should we initialize na_si_users for event ring ? */
317 
318 	/*
319 	 * Optimization: count the users registered for more than
320 	 * one ring, which are the ones sleeping on the global queue.
321 	 * The default na_notify() callback will then avoid signaling
322 	 * the global queue if nobody is using it
323 	 */
324 	for_rx_tx(t) {
325 		if (ch_is_multiplex(ch, t)) {
326 			na->na_si_users[t]++;
327 			ASSERT(na->na_si_users[t] != 0);
328 		}
329 	}
330 	return 0;
331 }
332 
333 static void
na_unset_ringid(struct kern_channel * ch)334 na_unset_ringid(struct kern_channel *ch)
335 {
336 	struct nexus_adapter *na = ch->ch_na;
337 	enum txrx t;
338 
339 	for_rx_tx(t) {
340 		if (ch_is_multiplex(ch, t)) {
341 			ASSERT(na->na_si_users[t] != 0);
342 			na->na_si_users[t]--;
343 		}
344 		ch->ch_first[t] = ch->ch_last[t] = 0;
345 	}
346 }
347 
348 /*
349  * Check that the rings we want to bind are not exclusively owned by a previous
350  * bind.  If exclusive ownership has been requested, we also mark the rings.
351  */
352 /* Hoisted out of line to reduce kernel stack footprint */
353 SK_NO_INLINE_ATTRIBUTE
354 static int
na_krings_use(struct kern_channel * ch)355 na_krings_use(struct kern_channel *ch)
356 {
357 	struct nexus_adapter *na = ch->ch_na;
358 	struct __kern_channel_ring *kring;
359 	boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
360 	enum txrx t;
361 	uint32_t i;
362 
363 	SK_DF(SK_VERB_NA | SK_VERB_RING, "na \"%s\" (0x%llx) grabbing tx [%u,%u) rx [%u,%u)",
364 	    na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
365 	    ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
366 
367 	/*
368 	 * First round: check that all the requested rings
369 	 * are neither alread exclusively owned, nor we
370 	 * want exclusive ownership when they are already in use
371 	 */
372 	for_all_rings(t) {
373 		for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
374 			kring = &NAKR(na, t)[i];
375 			if ((kring->ckr_flags & CKRF_EXCLUSIVE) ||
376 			    (kring->ckr_users && excl)) {
377 				SK_DF(SK_VERB_NA | SK_VERB_RING,
378 				    "kr \"%s\" (0x%llx) krflags 0x%b is busy",
379 				    kring->ckr_name, SK_KVA(kring),
380 				    kring->ckr_flags, CKRF_BITS);
381 				return EBUSY;
382 			}
383 		}
384 	}
385 
386 	/*
387 	 * Second round: increment usage count and possibly
388 	 * mark as exclusive
389 	 */
390 
391 	for_all_rings(t) {
392 		for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
393 			kring = &NAKR(na, t)[i];
394 			kring->ckr_users++;
395 			if (excl) {
396 				kring->ckr_flags |= CKRF_EXCLUSIVE;
397 			}
398 		}
399 	}
400 
401 	return 0;
402 }
403 
404 /* Hoisted out of line to reduce kernel stack footprint */
405 SK_NO_INLINE_ATTRIBUTE
406 static void
na_krings_unuse(struct kern_channel * ch)407 na_krings_unuse(struct kern_channel *ch)
408 {
409 	struct nexus_adapter *na = ch->ch_na;
410 	struct __kern_channel_ring *kring;
411 	boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
412 	enum txrx t;
413 	uint32_t i;
414 
415 	SK_DF(SK_VERB_NA | SK_VERB_RING,
416 	    "na \"%s\" (0x%llx) releasing tx [%u, %u) rx [%u, %u)",
417 	    na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
418 	    ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
419 
420 	for_all_rings(t) {
421 		for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
422 			kring = &NAKR(na, t)[i];
423 			if (excl) {
424 				kring->ckr_flags &= ~CKRF_EXCLUSIVE;
425 			}
426 			kring->ckr_users--;
427 		}
428 	}
429 }
430 
431 /* Hoisted out of line to reduce kernel stack footprint */
432 SK_NO_INLINE_ATTRIBUTE
433 static void
na_krings_verify(struct nexus_adapter * na)434 na_krings_verify(struct nexus_adapter *na)
435 {
436 	struct __kern_channel_ring *kring;
437 	enum txrx t;
438 	uint32_t i;
439 
440 	for_all_rings(t) {
441 		for (i = 0; i < na_get_nrings(na, t); i++) {
442 			kring = &NAKR(na, t)[i];
443 			/* na_kr_create() validations */
444 			ASSERT(kring->ckr_num_slots > 0);
445 			ASSERT(kring->ckr_lim == (kring->ckr_num_slots - 1));
446 			ASSERT(kring->ckr_pp != NULL);
447 
448 			if (!(kring->ckr_flags & CKRF_MEM_RING_INITED)) {
449 				continue;
450 			}
451 			/* na_kr_setup() validations */
452 			if (KR_KERNEL_ONLY(kring)) {
453 				ASSERT(kring->ckr_ring == NULL);
454 			} else {
455 				ASSERT(kring->ckr_ring != NULL);
456 			}
457 			ASSERT(kring->ckr_ksds_last ==
458 			    &kring->ckr_ksds[kring->ckr_lim]);
459 		}
460 	}
461 }
462 
463 int
na_bind_channel(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr)464 na_bind_channel(struct nexus_adapter *na, struct kern_channel *ch,
465     struct chreq *chr)
466 {
467 	struct kern_pbufpool *rx_pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
468 	struct kern_pbufpool *tx_pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
469 	uint32_t ch_mode = chr->cr_mode;
470 	int err = 0;
471 
472 	SK_LOCK_ASSERT_HELD();
473 	ASSERT(ch->ch_schema == NULL);
474 	ASSERT(ch->ch_na == NULL);
475 
476 	/* ring configuration may have changed, fetch from the card */
477 	na_update_config(na);
478 	ch->ch_na = na; /* store the reference */
479 	err = na_set_ringid(ch, chr->cr_ring_set, chr->cr_ring_id);
480 	if (err != 0) {
481 		goto err;
482 	}
483 
484 	atomic_bitclear_32(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE |
485 	    CHANF_USER_PACKET_POOL | CHANF_EVENT_RING));
486 	if (ch_mode & CHMODE_EXCLUSIVE) {
487 		atomic_bitset_32(&ch->ch_flags, CHANF_EXCLUSIVE);
488 	}
489 	/*
490 	 * Disallow automatic sync for monitor mode, since TX
491 	 * direction is disabled.
492 	 */
493 	if (ch_mode & CHMODE_MONITOR) {
494 		atomic_bitset_32(&ch->ch_flags, CHANF_RXONLY);
495 	}
496 
497 	if (!!(na->na_flags & NAF_USER_PKT_POOL) ^
498 	    !!(ch_mode & CHMODE_USER_PACKET_POOL)) {
499 		SK_ERR("incompatible channel mode (0x%b), na_flags (0x%b)",
500 		    ch_mode, CHMODE_BITS, na->na_flags, NAF_BITS);
501 		err = EINVAL;
502 		goto err;
503 	}
504 
505 	if (na->na_arena->ar_flags & ARF_DEFUNCT) {
506 		err = ENXIO;
507 		goto err;
508 	}
509 
510 	if (ch_mode & CHMODE_USER_PACKET_POOL) {
511 		ASSERT(na->na_flags & NAF_USER_PKT_POOL);
512 		ASSERT(ch->ch_first[NR_A] != ch->ch_last[NR_A]);
513 		ASSERT(ch->ch_first[NR_F] != ch->ch_last[NR_F]);
514 		atomic_bitset_32(&ch->ch_flags, CHANF_USER_PACKET_POOL);
515 	}
516 
517 	if (ch_mode & CHMODE_EVENT_RING) {
518 		ASSERT(na->na_flags & NAF_USER_PKT_POOL);
519 		ASSERT(na->na_flags & NAF_EVENT_RING);
520 		ASSERT(ch->ch_first[NR_EV] != ch->ch_last[NR_EV]);
521 		atomic_bitset_32(&ch->ch_flags, CHANF_EVENT_RING);
522 	}
523 
524 	/*
525 	 * If this is the first channel of the adapter, create
526 	 * the rings and their in-kernel view, the krings.
527 	 */
528 	if (na->na_channels == 0) {
529 		err = na->na_krings_create(na, ch);
530 		if (err != 0) {
531 			goto err;
532 		}
533 
534 		/*
535 		 * Sanity check; this is already done in na_kr_create(),
536 		 * but we do it here as well to validate na_kr_setup().
537 		 */
538 		na_krings_verify(na);
539 		*(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
540 		    skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_type;
541 		*(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
542 		    skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_subtype;
543 	}
544 
545 	/*
546 	 * Validate ownership and usability of the krings; take into account
547 	 * whether some previous bind has exclusive ownership on them.
548 	 */
549 	err = na_krings_use(ch);
550 	if (err != 0) {
551 		goto err_del_rings;
552 	}
553 
554 	/* for user-facing channel, create a new channel schema */
555 	if (!(ch->ch_flags & CHANF_KERNEL)) {
556 		err = na_schema_alloc(ch);
557 		if (err != 0) {
558 			goto err_rel_excl;
559 		}
560 
561 		ASSERT(ch->ch_schema != NULL);
562 		ASSERT(ch->ch_schema_offset != (mach_vm_offset_t)-1);
563 	} else {
564 		ASSERT(ch->ch_schema == NULL);
565 		ch->ch_schema_offset = (mach_vm_offset_t)-1;
566 	}
567 
568 	/* update our work timestamp */
569 	na->na_work_ts = net_uptime();
570 
571 	/* update our work timestamp */
572 	na->na_work_ts = net_uptime();
573 
574 	na->na_channels++;
575 
576 	/*
577 	 * If user packet pool is desired, initialize the allocated
578 	 * object hash table in the pool, if not already.  This also
579 	 * retains a refcnt on the pool which the caller must release.
580 	 */
581 	ASSERT(ch->ch_pp == NULL);
582 	if (ch_mode & CHMODE_USER_PACKET_POOL) {
583 #pragma unused(tx_pp)
584 		ASSERT(rx_pp == tx_pp);
585 		err = pp_init_upp(rx_pp, TRUE);
586 		if (err != 0) {
587 			goto err_free_schema;
588 		}
589 		ch->ch_pp = rx_pp;
590 	}
591 
592 	if (!NA_IS_ACTIVE(na)) {
593 		err = na->na_activate(na, NA_ACTIVATE_MODE_ON);
594 		if (err != 0) {
595 			goto err_release_pp;
596 		}
597 
598 		SK_D("activated \"%s\" adapter 0x%llx", na->na_name,
599 		    SK_KVA(na));
600 		SK_D("  na_md_type:    %u", na->na_md_type);
601 		SK_D("  na_md_subtype: %u", na->na_md_subtype);
602 	}
603 
604 	SK_D("ch 0x%llx", SK_KVA(ch));
605 	SK_D("  ch_flags:     0x%b", ch->ch_flags, CHANF_BITS);
606 	if (ch->ch_schema != NULL) {
607 		SK_D("  ch_schema:    0x%llx", SK_KVA(ch->ch_schema));
608 	}
609 	SK_D("  ch_na:        0x%llx (chcnt %u)", SK_KVA(ch->ch_na),
610 	    ch->ch_na->na_channels);
611 	SK_D("  ch_tx_rings:  [%u,%u)", ch->ch_first[NR_TX],
612 	    ch->ch_last[NR_TX]);
613 	SK_D("  ch_rx_rings:  [%u,%u)", ch->ch_first[NR_RX],
614 	    ch->ch_last[NR_RX]);
615 	SK_D("  ch_alloc_rings:  [%u,%u)", ch->ch_first[NR_A],
616 	    ch->ch_last[NR_A]);
617 	SK_D("  ch_free_rings:  [%u,%u)", ch->ch_first[NR_F],
618 	    ch->ch_last[NR_F]);
619 	SK_D("  ch_ev_rings:  [%u,%u)", ch->ch_first[NR_EV],
620 	    ch->ch_last[NR_EV]);
621 
622 	return 0;
623 
624 err_release_pp:
625 	if (ch_mode & CHMODE_USER_PACKET_POOL) {
626 		ASSERT(ch->ch_pp != NULL);
627 		pp_release(rx_pp);
628 		ch->ch_pp = NULL;
629 	}
630 err_free_schema:
631 	*(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
632 	    NEXUS_META_TYPE_INVALID;
633 	*(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
634 	    NEXUS_META_SUBTYPE_INVALID;
635 	ASSERT(na->na_channels != 0);
636 	na->na_channels--;
637 	if (ch->ch_schema != NULL) {
638 		skmem_cache_free(
639 			skmem_arena_nexus(na->na_arena)->arn_schema_cache,
640 			ch->ch_schema);
641 		ch->ch_schema = NULL;
642 		ch->ch_schema_offset = (mach_vm_offset_t)-1;
643 	}
644 err_rel_excl:
645 	na_krings_unuse(ch);
646 err_del_rings:
647 	if (na->na_channels == 0) {
648 		na->na_krings_delete(na, ch, FALSE);
649 	}
650 err:
651 	ch->ch_na = NULL;
652 	ASSERT(err != 0);
653 
654 	return err;
655 }
656 
657 /*
658  * Undo everything that was done in na_bind_channel().
659  */
660 /* call with SK_LOCK held */
661 void
na_unbind_channel(struct kern_channel * ch)662 na_unbind_channel(struct kern_channel *ch)
663 {
664 	struct nexus_adapter *na = ch->ch_na;
665 
666 	SK_LOCK_ASSERT_HELD();
667 
668 	ASSERT(na->na_channels != 0);
669 	na->na_channels--;
670 
671 	/* release exclusive use if it was requested at bind time */
672 	na_krings_unuse(ch);
673 
674 	if (na->na_channels == 0) {     /* last instance */
675 		SK_D("%s(%d): deleting last channel instance for %s",
676 		    ch->ch_name, ch->ch_pid, na->na_name);
677 
678 		/*
679 		 * Free any remaining allocated packets attached to
680 		 * the slots, followed by a teardown of the arena.
681 		 */
682 		na_teardown(na, ch, FALSE);
683 
684 		*(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
685 		    NEXUS_META_TYPE_INVALID;
686 		*(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
687 		    NEXUS_META_SUBTYPE_INVALID;
688 	} else {
689 		SK_D("%s(%d): %s has %u remaining channel instance(s)",
690 		    ch->ch_name, ch->ch_pid, na->na_name, na->na_channels);
691 	}
692 
693 	/*
694 	 * Free any allocated packets (for the process) attached to the slots;
695 	 * note that na_teardown() could have done this there as well.
696 	 */
697 	if (ch->ch_pp != NULL) {
698 		ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
699 		pp_purge_upp(ch->ch_pp, ch->ch_pid);
700 		pp_release(ch->ch_pp);
701 		ch->ch_pp = NULL;
702 	}
703 
704 	/* possibily decrement counter of tx_si/rx_si users */
705 	na_unset_ringid(ch);
706 
707 	/* reap the caches now (purge if adapter is idle) */
708 	skmem_arena_reap(na->na_arena, (na->na_channels == 0));
709 
710 	/* delete the csm */
711 	if (ch->ch_schema != NULL) {
712 		skmem_cache_free(
713 			skmem_arena_nexus(na->na_arena)->arn_schema_cache,
714 			ch->ch_schema);
715 		ch->ch_schema = NULL;
716 		ch->ch_schema_offset = (mach_vm_offset_t)-1;
717 	}
718 
719 	/* destroy the memory map */
720 	skmem_arena_munmap_channel(na->na_arena, ch);
721 
722 	/* mark the channel as unbound */
723 	atomic_bitclear_32(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE));
724 	ch->ch_na = NULL;
725 
726 	/* and finally release the nexus adapter; this might free it */
727 	(void) na_release_locked(na);
728 }
729 
730 static void
na_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)731 na_teardown(struct nexus_adapter *na, struct kern_channel *ch,
732     boolean_t defunct)
733 {
734 	SK_LOCK_ASSERT_HELD();
735 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
736 
737 #if CONFIG_NEXUS_MONITOR
738 	/*
739 	 * Walk through all the rings and tell any monitor
740 	 * that the port is going to exit Skywalk mode
741 	 */
742 	nx_mon_stop(na);
743 #endif /* CONFIG_NEXUS_MONITOR */
744 
745 	/*
746 	 * Deactive the adapter.
747 	 */
748 	(void) na->na_activate(na,
749 	    (defunct ? NA_ACTIVATE_MODE_DEFUNCT : NA_ACTIVATE_MODE_OFF));
750 
751 	/*
752 	 * Free any remaining allocated packets for this process.
753 	 */
754 	if (ch->ch_pp != NULL) {
755 		ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
756 		pp_purge_upp(ch->ch_pp, ch->ch_pid);
757 		if (!defunct) {
758 			pp_release(ch->ch_pp);
759 			ch->ch_pp = NULL;
760 		}
761 	}
762 
763 	/*
764 	 * Delete rings and buffers.
765 	 */
766 	na->na_krings_delete(na, ch, defunct);
767 }
768 
769 /* call with SK_LOCK held */
770 /*
771  * Allocate the per-fd structure __user_channel_schema.
772  */
773 static int
na_schema_alloc(struct kern_channel * ch)774 na_schema_alloc(struct kern_channel *ch)
775 {
776 	struct nexus_adapter *na = ch->ch_na;
777 	struct skmem_arena *ar = na->na_arena;
778 	struct skmem_arena_nexus *arn;
779 	mach_vm_offset_t roff[SKMEM_REGIONS];
780 	struct __kern_channel_ring *kr;
781 	struct __user_channel_schema *csm;
782 	struct skmem_obj_info csm_oi, ring_oi, ksd_oi, usd_oi;
783 	mach_vm_offset_t base;
784 	uint32_t i, j, k, n[NR_ALL];
785 	enum txrx t;
786 
787 	/* see comments for struct __user_channel_schema */
788 	_CASSERT(offsetof(struct __user_channel_schema, csm_ver) == 0);
789 	_CASSERT(offsetof(struct __user_channel_schema, csm_flags) ==
790 	    sizeof(csm->csm_ver));
791 	_CASSERT(offsetof(struct __user_channel_schema, csm_kern_name) ==
792 	    sizeof(csm->csm_ver) + sizeof(csm->csm_flags));
793 	_CASSERT(offsetof(struct __user_channel_schema, csm_kern_uuid) ==
794 	    sizeof(csm->csm_ver) + sizeof(csm->csm_flags) +
795 	    sizeof(csm->csm_kern_name));
796 
797 	SK_LOCK_ASSERT_HELD();
798 
799 	ASSERT(!(ch->ch_flags & CHANF_KERNEL));
800 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
801 	arn = skmem_arena_nexus(ar);
802 	ASSERT(arn != NULL);
803 	for_all_rings(t) {
804 		n[t] = 0;
805 	}
806 
807 	csm = skmem_cache_alloc(arn->arn_schema_cache, SKMEM_NOSLEEP);
808 	if (csm == NULL) {
809 		return ENOMEM;
810 	}
811 
812 	skmem_cache_get_obj_info(arn->arn_schema_cache, csm, &csm_oi, NULL);
813 	bzero(csm, SKMEM_OBJ_SIZE(&csm_oi));
814 
815 	*(uint32_t *)(uintptr_t)&csm->csm_ver = CSM_CURRENT_VERSION;
816 
817 	/* kernel version and executable UUID */
818 	_CASSERT(sizeof(csm->csm_kern_name) == _SYS_NAMELEN);
819 	(void) strncpy((char *)(uintptr_t)csm->csm_kern_name,
820 	    version, sizeof(csm->csm_kern_name) - 1);
821 #if !XNU_TARGET_OS_OSX
822 	(void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid,
823 	    kernelcache_uuid, sizeof(csm->csm_kern_uuid));
824 #else /* XNU_TARGET_OS_OSX */
825 	if (kernel_uuid != NULL) {
826 		(void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid,
827 		    kernel_uuid, sizeof(csm->csm_kern_uuid));
828 	}
829 #endif /* XNU_TARGET_OS_OSX */
830 
831 	for_rx_tx(t) {
832 		ASSERT((ch->ch_last[t] > 0) || (ch->ch_first[t] == 0));
833 		n[t] = ch->ch_last[t] - ch->ch_first[t];
834 		ASSERT(n[t] == 0 || n[t] <= na_get_nrings(na, t));
835 	}
836 
837 	/* return total number of tx and rx rings for this channel */
838 	*(uint32_t *)(uintptr_t)&csm->csm_tx_rings = n[NR_TX];
839 	*(uint32_t *)(uintptr_t)&csm->csm_rx_rings = n[NR_RX];
840 
841 	if (ch->ch_flags & CHANF_USER_PACKET_POOL) {
842 		*(uint32_t *)(uintptr_t)&csm->csm_allocator_ring_pairs =
843 		    na->na_num_allocator_ring_pairs;
844 		n[NR_A] = n[NR_F] = na->na_num_allocator_ring_pairs;
845 		ASSERT(n[NR_A] != 0 && n[NR_A] <= na_get_nrings(na, NR_A));
846 		ASSERT(n[NR_A] == (ch->ch_last[NR_A] - ch->ch_first[NR_A]));
847 		ASSERT(n[NR_F] == (ch->ch_last[NR_F] - ch->ch_first[NR_F]));
848 	}
849 
850 	if (ch->ch_flags & CHANF_EVENT_RING) {
851 		n[NR_EV] = ch->ch_last[NR_EV] - ch->ch_first[NR_EV];
852 		ASSERT(n[NR_EV] != 0 && n[NR_EV] <= na_get_nrings(na, NR_EV));
853 		*(uint32_t *)(uintptr_t)&csm->csm_num_event_rings = n[NR_EV];
854 	}
855 
856 	bzero(&roff, sizeof(roff));
857 	for (i = 0; i < SKMEM_REGIONS; i++) {
858 		if (ar->ar_regions[i] == NULL) {
859 			ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
860 			    i == SKMEM_REGION_SCHEMA ||
861 			    i == SKMEM_REGION_RXBUF ||
862 			    i == SKMEM_REGION_TXBUF ||
863 			    i == SKMEM_REGION_RXKMD ||
864 			    i == SKMEM_REGION_TXKMD ||
865 			    i == SKMEM_REGION_UMD ||
866 			    i == SKMEM_REGION_UBFT ||
867 			    i == SKMEM_REGION_KBFT ||
868 			    i == SKMEM_REGION_RXKBFT ||
869 			    i == SKMEM_REGION_TXKBFT ||
870 			    i == SKMEM_REGION_TXAUSD ||
871 			    i == SKMEM_REGION_RXFUSD ||
872 			    i == SKMEM_REGION_USTATS ||
873 			    i == SKMEM_REGION_KSTATS ||
874 			    i == SKMEM_REGION_INTRINSIC ||
875 			    i == SKMEM_REGION_FLOWADV ||
876 			    i == SKMEM_REGION_NEXUSADV ||
877 			    i == SKMEM_REGION_SYSCTLS ||
878 			    i == SKMEM_REGION_GUARD_TAIL);
879 			continue;
880 		}
881 
882 		/* not for nexus */
883 		ASSERT(i != SKMEM_REGION_SYSCTLS);
884 
885 		/*
886 		 * Get region offsets from base of mmap span; the arena
887 		 * doesn't need to be mmap'd at this point, since we
888 		 * simply compute the relative offset.
889 		 */
890 		roff[i] = skmem_arena_get_region_offset(ar, i);
891 	}
892 
893 	/*
894 	 * The schema is made up of the descriptor followed inline by an array
895 	 * of offsets to the tx, rx, allocator and event rings in the mmap span.
896 	 * They contain the offset between the ring and schema, so the
897 	 * information is usable in userspace to reach the ring from
898 	 * the schema.
899 	 */
900 	base = roff[SKMEM_REGION_SCHEMA] + SKMEM_OBJ_ROFF(&csm_oi);
901 
902 	/* initialize schema with tx ring info */
903 	for (i = 0, j = ch->ch_first[NR_TX]; i < n[NR_TX]; i++, j++) {
904 		kr = &na->na_tx_rings[j];
905 		if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
906 			continue;
907 		}
908 
909 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
910 		skmem_cache_get_obj_info(arn->arn_ring_cache,
911 		    kr->ckr_ring, &ring_oi, NULL);
912 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].ring_off =
913 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
914 
915 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
916 		skmem_cache_get_obj_info(kr->ckr_ksds_cache,
917 		    kr->ckr_ksds, &ksd_oi, &usd_oi);
918 
919 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].sd_off =
920 		    (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
921 		    base;
922 	}
923 	/* initialize schema with rx ring info */
924 	for (i = 0, j = ch->ch_first[NR_RX]; i < n[NR_RX]; i++, j++) {
925 		kr = &na->na_rx_rings[j];
926 		if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
927 			continue;
928 		}
929 
930 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
931 		skmem_cache_get_obj_info(arn->arn_ring_cache,
932 		    kr->ckr_ring, &ring_oi, NULL);
933 		*(mach_vm_offset_t *)
934 		(uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].ring_off =
935 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
936 
937 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
938 		skmem_cache_get_obj_info(kr->ckr_ksds_cache,
939 		    kr->ckr_ksds, &ksd_oi, &usd_oi);
940 
941 		*(mach_vm_offset_t *)
942 		(uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].sd_off =
943 		    (roff[SKMEM_REGION_RXFUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
944 		    base;
945 	}
946 	/* initialize schema with allocator ring info */
947 	for (i = 0, j = ch->ch_first[NR_A], k = n[NR_TX] + n[NR_RX];
948 	    i < n[NR_A]; i++, j++) {
949 		mach_vm_offset_t usd_roff;
950 
951 		usd_roff = roff[SKMEM_REGION_TXAUSD];
952 		kr = &na->na_alloc_rings[j];
953 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
954 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
955 
956 		skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
957 		    &ring_oi, NULL);
958 		*(mach_vm_offset_t *)
959 		(uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
960 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
961 
962 		skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
963 		    &ksd_oi, &usd_oi);
964 		*(mach_vm_offset_t *)
965 		(uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
966 		    (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
967 	}
968 	/* initialize schema with free ring info */
969 	for (i = 0, j = ch->ch_first[NR_F], k = n[NR_TX] + n[NR_RX] + n[NR_A];
970 	    i < n[NR_F]; i++, j++) {
971 		mach_vm_offset_t usd_roff;
972 
973 		usd_roff = roff[SKMEM_REGION_RXFUSD];
974 		kr = &na->na_free_rings[j];
975 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
976 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
977 
978 		skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
979 		    &ring_oi, NULL);
980 		*(mach_vm_offset_t *)
981 		(uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
982 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
983 
984 		skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
985 		    &ksd_oi, &usd_oi);
986 		*(mach_vm_offset_t *)
987 		(uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
988 		    (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
989 	}
990 	/* initialize schema with event ring info */
991 	for (i = 0, j = ch->ch_first[NR_EV], k = n[NR_TX] + n[NR_RX] +
992 	    n[NR_A] + n[NR_F]; i < n[NR_EV]; i++, j++) {
993 		ASSERT(csm->csm_num_event_rings != 0);
994 		kr = &na->na_event_rings[j];
995 		ASSERT(!KR_KERNEL_ONLY(kr));
996 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
997 		skmem_cache_get_obj_info(arn->arn_ring_cache,
998 		    kr->ckr_ring, &ring_oi, NULL);
999 		*(mach_vm_offset_t *)
1000 		(uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
1001 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
1002 
1003 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
1004 		skmem_cache_get_obj_info(kr->ckr_ksds_cache,
1005 		    kr->ckr_ksds, &ksd_oi, &usd_oi);
1006 
1007 		*(mach_vm_offset_t *)
1008 		(uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
1009 		    (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
1010 		    base;
1011 	}
1012 
1013 	*(uint64_t *)(uintptr_t)&csm->csm_md_redzone_cookie =
1014 	    __ch_umd_redzone_cookie;
1015 	*(nexus_meta_type_t *)(uintptr_t)&csm->csm_md_type = na->na_md_type;
1016 	*(nexus_meta_subtype_t *)(uintptr_t)&csm->csm_md_subtype =
1017 	    na->na_md_subtype;
1018 
1019 	if (arn->arn_stats_obj != NULL) {
1020 		ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
1021 		ASSERT(roff[SKMEM_REGION_USTATS] != 0);
1022 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs =
1023 		    roff[SKMEM_REGION_USTATS];
1024 		*(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1025 		    na->na_stats_type;
1026 	} else {
1027 		ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1028 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs = 0;
1029 		*(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1030 		    NEXUS_STATS_TYPE_INVALID;
1031 	}
1032 
1033 	if (arn->arn_flowadv_obj != NULL) {
1034 		ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL);
1035 		ASSERT(roff[SKMEM_REGION_FLOWADV] != 0);
1036 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs =
1037 		    roff[SKMEM_REGION_FLOWADV];
1038 		*(uint32_t *)(uintptr_t)&csm->csm_flowadv_max =
1039 		    na->na_flowadv_max;
1040 	} else {
1041 		ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1042 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs = 0;
1043 		*(uint32_t *)(uintptr_t)&csm->csm_flowadv_max = 0;
1044 	}
1045 
1046 	if (arn->arn_nexusadv_obj != NULL) {
1047 		struct __kern_nexus_adv_metadata *adv_md;
1048 
1049 		adv_md = arn->arn_nexusadv_obj;
1050 		ASSERT(adv_md->knam_version == NX_ADVISORY_MD_CURRENT_VERSION);
1051 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] != NULL);
1052 		ASSERT(roff[SKMEM_REGION_NEXUSADV] != 0);
1053 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs =
1054 		    roff[SKMEM_REGION_NEXUSADV];
1055 	} else {
1056 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1057 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs = 0;
1058 	}
1059 
1060 	ch->ch_schema = csm;
1061 	ch->ch_schema_offset = base;
1062 
1063 	return 0;
1064 }
1065 
1066 /*
1067  * Called by all routines that create nexus_adapters.
1068  * Attach na to the ifp (if any) and provide defaults
1069  * for optional callbacks. Defaults assume that we
1070  * are creating an hardware nexus_adapter.
1071  */
1072 void
na_attach_common(struct nexus_adapter * na,struct kern_nexus * nx,struct kern_nexus_domain_provider * nxdom_prov)1073 na_attach_common(struct nexus_adapter *na, struct kern_nexus *nx,
1074     struct kern_nexus_domain_provider *nxdom_prov)
1075 {
1076 	SK_LOCK_ASSERT_HELD();
1077 
1078 	ASSERT(nx != NULL);
1079 	ASSERT(nxdom_prov != NULL);
1080 	ASSERT(na->na_krings_create != NULL);
1081 	ASSERT(na->na_krings_delete != NULL);
1082 	if (na->na_type != NA_NETIF_COMPAT_DEV) {
1083 		ASSERT(na_get_nrings(na, NR_TX) != 0);
1084 	}
1085 	if (na->na_type != NA_NETIF_COMPAT_HOST) {
1086 		ASSERT(na_get_nrings(na, NR_RX) != 0);
1087 	}
1088 	ASSERT(na->na_channels == 0);
1089 
1090 	if (na->na_notify == NULL) {
1091 		na->na_notify = na_notify;
1092 	}
1093 
1094 	na->na_nx = nx;
1095 	na->na_nxdom_prov = nxdom_prov;
1096 
1097 	SK_D("na 0x%llx nx 0x%llx nxtype %u ar 0x%llx",
1098 	    SK_KVA(na), SK_KVA(nx), nxdom_prov->nxdom_prov_dom->nxdom_type,
1099 	    SK_KVA(na->na_arena));
1100 }
1101 
1102 void
na_post_event(struct __kern_channel_ring * kring,boolean_t nodelay,boolean_t within_kevent,boolean_t selwake,uint32_t hint)1103 na_post_event(struct __kern_channel_ring *kring, boolean_t nodelay,
1104     boolean_t within_kevent, boolean_t selwake, uint32_t hint)
1105 {
1106 	struct nexus_adapter *na = KRNA(kring);
1107 	enum txrx t = kring->ckr_tx;
1108 
1109 	SK_DF(SK_VERB_EVENTS,
1110 	    "%s(%d) na \"%s\" (0x%llx) kr 0x%llx kev %u sel %u hint 0x%b",
1111 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1112 	    na->na_name, SK_KVA(na), SK_KVA(kring), within_kevent, selwake,
1113 	    hint, CHAN_FILT_HINT_BITS);
1114 
1115 	csi_selwakeup_one(kring, nodelay, within_kevent, selwake, hint);
1116 	/*
1117 	 * optimization: avoid a wake up on the global
1118 	 * queue if nobody has registered for more
1119 	 * than one ring
1120 	 */
1121 	if (na->na_si_users[t] > 0) {
1122 		csi_selwakeup_all(na, t, nodelay, within_kevent, selwake, hint);
1123 	}
1124 }
1125 
1126 /* default notify callback */
1127 static int
na_notify(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1128 na_notify(struct __kern_channel_ring *kring, struct proc *p, uint32_t flags)
1129 {
1130 #pragma unused(p)
1131 	SK_DF(SK_VERB_NOTIFY | ((kring->ckr_tx == NR_TX) ?
1132 	    SK_VERB_TX : SK_VERB_RX),
1133 	    "%s(%d) [%s] na \"%s\" (0x%llx) kr \"%s\" (0x%llx) krflags 0x%b "
1134 	    "flags 0x%x, kh %u kt %u | h %u t %u",
1135 	    sk_proc_name_address(p), sk_proc_pid(p),
1136 	    (kring->ckr_tx == NR_TX) ? "W" : "R", KRNA(kring)->na_name,
1137 	    SK_KVA(KRNA(kring)), kring->ckr_name, SK_KVA(kring),
1138 	    kring->ckr_flags, CKRF_BITS, flags, kring->ckr_khead,
1139 	    kring->ckr_ktail, kring->ckr_rhead, kring->ckr_rtail);
1140 
1141 	na_post_event(kring, (flags & NA_NOTEF_PUSH),
1142 	    (flags & NA_NOTEF_IN_KEVENT), TRUE, 0);
1143 
1144 	return 0;
1145 }
1146 
1147 /*
1148  * Fetch configuration from the device, to cope with dynamic
1149  * reconfigurations after loading the module.
1150  */
1151 /* call with SK_LOCK held */
1152 int
na_update_config(struct nexus_adapter * na)1153 na_update_config(struct nexus_adapter *na)
1154 {
1155 	uint32_t txr, txd, rxr, rxd;
1156 
1157 	SK_LOCK_ASSERT_HELD();
1158 
1159 	txr = txd = rxr = rxd = 0;
1160 	if (na->na_config == NULL ||
1161 	    na->na_config(na, &txr, &txd, &rxr, &rxd)) {
1162 		/* take whatever we had at init time */
1163 		txr = na_get_nrings(na, NR_TX);
1164 		txd = na_get_nslots(na, NR_TX);
1165 		rxr = na_get_nrings(na, NR_RX);
1166 		rxd = na_get_nslots(na, NR_RX);
1167 	}
1168 
1169 	if (na_get_nrings(na, NR_TX) == txr &&
1170 	    na_get_nslots(na, NR_TX) == txd &&
1171 	    na_get_nrings(na, NR_RX) == rxr &&
1172 	    na_get_nslots(na, NR_RX) == rxd) {
1173 		return 0; /* nothing changed */
1174 	}
1175 	SK_D("stored config %s: txring %u x %u, rxring %u x %u",
1176 	    na->na_name, na_get_nrings(na, NR_TX), na_get_nslots(na, NR_TX),
1177 	    na_get_nrings(na, NR_RX), na_get_nslots(na, NR_RX));
1178 	SK_D("new config %s: txring %u x %u, rxring %u x %u",
1179 	    na->na_name, txr, txd, rxr, rxd);
1180 
1181 	if (na->na_channels == 0) {
1182 		SK_D("configuration changed (but fine)");
1183 		na_set_nrings(na, NR_TX, txr);
1184 		na_set_nslots(na, NR_TX, txd);
1185 		na_set_nrings(na, NR_RX, rxr);
1186 		na_set_nslots(na, NR_RX, rxd);
1187 		return 0;
1188 	}
1189 	SK_ERR("configuration changed while active, this is bad...");
1190 	return 1;
1191 }
1192 
1193 static void
na_kr_setup_netif_svc_map(struct nexus_adapter * na)1194 na_kr_setup_netif_svc_map(struct nexus_adapter *na)
1195 {
1196 	uint32_t i;
1197 	uint32_t num_tx_rings;
1198 
1199 	ASSERT(na->na_type == NA_NETIF_DEV);
1200 	num_tx_rings = na_get_nrings(na, NR_TX);
1201 
1202 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK_SYS) ==
1203 	    NAKR_WMM_SC2RINGID(KPKT_SC_BK));
1204 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1205 	    NAKR_WMM_SC2RINGID(KPKT_SC_RD));
1206 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1207 	    NAKR_WMM_SC2RINGID(KPKT_SC_OAM));
1208 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1209 	    NAKR_WMM_SC2RINGID(KPKT_SC_RV));
1210 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1211 	    NAKR_WMM_SC2RINGID(KPKT_SC_VI));
1212 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) ==
1213 	    NAKR_WMM_SC2RINGID(KPKT_SC_CTL));
1214 
1215 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK) < NA_NUM_WMM_CLASSES);
1216 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) < NA_NUM_WMM_CLASSES);
1217 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VI) < NA_NUM_WMM_CLASSES);
1218 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) < NA_NUM_WMM_CLASSES);
1219 
1220 	_CASSERT(MBUF_SCIDX(KPKT_SC_BK_SYS) < KPKT_SC_MAX_CLASSES);
1221 	_CASSERT(MBUF_SCIDX(KPKT_SC_BK) < KPKT_SC_MAX_CLASSES);
1222 	_CASSERT(MBUF_SCIDX(KPKT_SC_BE) < KPKT_SC_MAX_CLASSES);
1223 	_CASSERT(MBUF_SCIDX(KPKT_SC_RD) < KPKT_SC_MAX_CLASSES);
1224 	_CASSERT(MBUF_SCIDX(KPKT_SC_OAM) < KPKT_SC_MAX_CLASSES);
1225 	_CASSERT(MBUF_SCIDX(KPKT_SC_AV) < KPKT_SC_MAX_CLASSES);
1226 	_CASSERT(MBUF_SCIDX(KPKT_SC_RV) < KPKT_SC_MAX_CLASSES);
1227 	_CASSERT(MBUF_SCIDX(KPKT_SC_VI) < KPKT_SC_MAX_CLASSES);
1228 	_CASSERT(MBUF_SCIDX(KPKT_SC_SIG) < KPKT_SC_MAX_CLASSES);
1229 	_CASSERT(MBUF_SCIDX(KPKT_SC_VO) < KPKT_SC_MAX_CLASSES);
1230 	_CASSERT(MBUF_SCIDX(KPKT_SC_CTL) < KPKT_SC_MAX_CLASSES);
1231 
1232 	/*
1233 	 * we support the following 2 configurations:
1234 	 * 1. packets from all 10 service class map to one ring.
1235 	 * 2. a 10:4 mapping between service classes and the rings. These 4
1236 	 *    rings map to the 4 WMM access categories.
1237 	 */
1238 	if (na->na_nx->nx_prov->nxprov_params->nxp_qmap == NEXUS_QMAP_TYPE_WMM) {
1239 		ASSERT(num_tx_rings == NEXUS_NUM_WMM_QUEUES);
1240 		/* setup the adapter's service class LUT */
1241 		NAKR_SET_SVC_LUT(na, KPKT_SC_BK_SYS);
1242 		NAKR_SET_SVC_LUT(na, KPKT_SC_BK);
1243 		NAKR_SET_SVC_LUT(na, KPKT_SC_BE);
1244 		NAKR_SET_SVC_LUT(na, KPKT_SC_RD);
1245 		NAKR_SET_SVC_LUT(na, KPKT_SC_OAM);
1246 		NAKR_SET_SVC_LUT(na, KPKT_SC_AV);
1247 		NAKR_SET_SVC_LUT(na, KPKT_SC_RV);
1248 		NAKR_SET_SVC_LUT(na, KPKT_SC_VI);
1249 		NAKR_SET_SVC_LUT(na, KPKT_SC_SIG);
1250 		NAKR_SET_SVC_LUT(na, KPKT_SC_VO);
1251 		NAKR_SET_SVC_LUT(na, KPKT_SC_CTL);
1252 
1253 		/* Initialize the service class for each of the 4 ring */
1254 		NAKR_SET_KR_SVC(na, KPKT_SC_BK);
1255 		NAKR_SET_KR_SVC(na, KPKT_SC_BE);
1256 		NAKR_SET_KR_SVC(na, KPKT_SC_VI);
1257 		NAKR_SET_KR_SVC(na, KPKT_SC_VO);
1258 	} else {
1259 		ASSERT(na->na_nx->nx_prov->nxprov_params->nxp_qmap ==
1260 		    NEXUS_QMAP_TYPE_DEFAULT);
1261 		/* 10: 1 mapping */
1262 		for (i = 0; i < KPKT_SC_MAX_CLASSES; i++) {
1263 			na->na_kring_svc_lut[i] = 0;
1264 		}
1265 		for (i = 0; i < num_tx_rings; i++) {
1266 			NAKR(na, NR_TX)[i].ckr_svc = KPKT_SC_UNSPEC;
1267 		}
1268 	}
1269 }
1270 
1271 static LCK_GRP_DECLARE(channel_txq_lock_group, "sk_ch_txq_lock");
1272 static LCK_GRP_DECLARE(channel_rxq_lock_group, "sk_ch_rxq_lock");
1273 static LCK_GRP_DECLARE(channel_txs_lock_group, "sk_ch_txs_lock");
1274 static LCK_GRP_DECLARE(channel_rxs_lock_group, "sk_ch_rxs_lock");
1275 static LCK_GRP_DECLARE(channel_alloc_lock_group, "sk_ch_alloc_lock");
1276 static LCK_GRP_DECLARE(channel_evq_lock_group, "sk_ch_evq_lock");
1277 static LCK_GRP_DECLARE(channel_evs_lock_group, "sk_ch_evs_lock");
1278 
1279 static lck_grp_t *
na_kr_q_lck_grp(enum txrx t)1280 na_kr_q_lck_grp(enum txrx t)
1281 {
1282 	switch (t) {
1283 	case NR_TX:
1284 		return &channel_txq_lock_group;
1285 	case NR_RX:
1286 		return &channel_rxq_lock_group;
1287 	case NR_A:
1288 	case NR_F:
1289 		return &channel_alloc_lock_group;
1290 	case NR_EV:
1291 		return &channel_evq_lock_group;
1292 	default:
1293 		VERIFY(0);
1294 		/* NOTREACHED */
1295 		__builtin_unreachable();
1296 	}
1297 }
1298 
1299 static lck_grp_t *
na_kr_s_lck_grp(enum txrx t)1300 na_kr_s_lck_grp(enum txrx t)
1301 {
1302 	switch (t) {
1303 	case NR_TX:
1304 		return &channel_txs_lock_group;
1305 	case NR_RX:
1306 		return &channel_rxs_lock_group;
1307 	case NR_A:
1308 	case NR_F:
1309 		return &channel_alloc_lock_group;
1310 	case NR_EV:
1311 		return &channel_evs_lock_group;
1312 	default:
1313 		VERIFY(0);
1314 		/* NOTREACHED */
1315 		__builtin_unreachable();
1316 	}
1317 }
1318 
1319 static void
kr_init_tbr(struct __kern_channel_ring * r)1320 kr_init_tbr(struct __kern_channel_ring *r)
1321 {
1322 	r->ckr_tbr_depth = CKR_TBR_TOKEN_INVALID;
1323 	r->ckr_tbr_token = CKR_TBR_TOKEN_INVALID;
1324 	r->ckr_tbr_last = 0;
1325 }
1326 
1327 struct kern_pbufpool *
na_kr_get_pp(struct nexus_adapter * na,enum txrx t)1328 na_kr_get_pp(struct nexus_adapter *na, enum txrx t)
1329 {
1330 	struct kern_pbufpool *pp = NULL;
1331 	switch (t) {
1332 	case NR_RX:
1333 	case NR_F:
1334 	case NR_EV:
1335 		pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
1336 		break;
1337 	case NR_TX:
1338 	case NR_A:
1339 		pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
1340 		break;
1341 	default:
1342 		VERIFY(0);
1343 		/* NOTREACHED */
1344 		__builtin_unreachable();
1345 	}
1346 
1347 	return pp;
1348 }
1349 
1350 /*
1351  * Create the krings array and initialize the fields common to all adapters.
1352  * The array layout is this:
1353  *
1354  *                       +----------+
1355  * na->na_tx_rings ----->|          | \
1356  *                       |          |  } na->num_tx_ring
1357  *                       |          | /
1358  * na->na_rx_rings ----> +----------+
1359  *                       |          | \
1360  *                       |          |  } na->na_num_rx_rings
1361  *                       |          | /
1362  * na->na_alloc_rings -> +----------+
1363  *                       |          | \
1364  * na->na_free_rings --> +----------+  } na->na_num_allocator_ring_pairs
1365  *                       |          | /
1366  * na->na_event_rings -> +----------+
1367  *                       |          | \
1368  *                       |          |  } na->na_num_event_rings
1369  *                       |          | /
1370  *                       +----------+
1371  * na->na_tailroom ----->|          | \
1372  *                       |          |  } tailroom bytes
1373  *                       |          | /
1374  *                       +----------+
1375  *
1376  * The tailroom space is currently used by flow switch ports for allocating
1377  * leases.
1378  */
1379 /* call with SK_LOCK held */
1380 static int
na_kr_create(struct nexus_adapter * na,uint32_t tailroom,boolean_t alloc_ctx)1381 na_kr_create(struct nexus_adapter *na, uint32_t tailroom, boolean_t alloc_ctx)
1382 {
1383 	lck_grp_t *q_lck_grp, *s_lck_grp;
1384 	uint32_t i, len, ndesc;
1385 	struct kern_pbufpool *pp = NULL;
1386 	struct __kern_channel_ring *kring;
1387 	uint32_t n[NR_ALL];
1388 	int c, tot_slots, err = 0;
1389 	enum txrx t;
1390 
1391 	SK_LOCK_ASSERT_HELD();
1392 
1393 	n[NR_TX] = na_get_nrings(na, NR_TX);
1394 	n[NR_RX] = na_get_nrings(na, NR_RX);
1395 	n[NR_A] = na_get_nrings(na, NR_A);
1396 	n[NR_F] = na_get_nrings(na, NR_F);
1397 	n[NR_EV] = na_get_nrings(na, NR_EV);
1398 
1399 	len = ((n[NR_TX] + n[NR_RX] + n[NR_A] + n[NR_F] + n[NR_EV]) *
1400 	    sizeof(struct __kern_channel_ring)) + tailroom;
1401 
1402 	na->na_rings_mem_sz = (size_t)len;
1403 	na->na_tx_rings = sk_alloc((size_t)len, Z_WAITOK, skmem_tag_nx_rings);
1404 	if (__improbable(na->na_tx_rings == NULL)) {
1405 		SK_ERR("Cannot allocate krings");
1406 		err = ENOMEM;
1407 		goto error;
1408 	}
1409 	na->na_rx_rings = na->na_tx_rings + n[NR_TX];
1410 	if (n[NR_A] != 0) {
1411 		na->na_alloc_rings = na->na_rx_rings + n[NR_RX];
1412 		na->na_free_rings = na->na_alloc_rings + n[NR_A];
1413 	} else {
1414 		na->na_alloc_rings = na->na_free_rings = NULL;
1415 	}
1416 	if (n[NR_EV] != 0) {
1417 		if (na->na_free_rings != NULL) {
1418 			na->na_event_rings = na->na_free_rings + n[NR_F];
1419 		} else {
1420 			na->na_event_rings = na->na_rx_rings + n[NR_RX];
1421 		}
1422 	}
1423 
1424 	/* total number of slots for TX/RX adapter rings */
1425 	c = tot_slots = (n[NR_TX] * na_get_nslots(na, NR_TX)) +
1426 	    (n[NR_RX] * na_get_nslots(na, NR_RX));
1427 
1428 	/* for scratch space on alloc and free rings */
1429 	if (n[NR_A] != 0) {
1430 		tot_slots += n[NR_A] * na_get_nslots(na, NR_A);
1431 		tot_slots += n[NR_F] * na_get_nslots(na, NR_F);
1432 		c = tot_slots;
1433 	}
1434 	na->na_total_slots = tot_slots;
1435 
1436 	/* slot context (optional) for all TX/RX ring slots of this adapter */
1437 	if (alloc_ctx) {
1438 		na->na_slot_ctxs =
1439 		    skn_alloc_type_array(slot_ctxs, struct slot_ctx,
1440 		    na->na_total_slots, Z_WAITOK, skmem_tag_nx_contexts);
1441 		if (na->na_slot_ctxs == NULL) {
1442 			SK_ERR("Cannot allocate slot contexts");
1443 			err = ENOMEM;
1444 			goto error;
1445 		}
1446 		atomic_bitset_32(&na->na_flags, NAF_SLOT_CONTEXT);
1447 	}
1448 
1449 	/*
1450 	 * packet handle array storage for all TX/RX ring slots of this
1451 	 * adapter.
1452 	 */
1453 	na->na_scratch = skn_alloc_type_array(scratch, kern_packet_t,
1454 	    na->na_total_slots, Z_WAITOK, skmem_tag_nx_scratch);
1455 	if (na->na_scratch == NULL) {
1456 		SK_ERR("Cannot allocate slot contexts");
1457 		err = ENOMEM;
1458 		goto error;
1459 	}
1460 
1461 	/*
1462 	 * All fields in krings are 0 except the one initialized below.
1463 	 * but better be explicit on important kring fields.
1464 	 */
1465 	for_all_rings(t) {
1466 		ndesc = na_get_nslots(na, t);
1467 		pp = na_kr_get_pp(na, t);
1468 		for (i = 0; i < n[t]; i++) {
1469 			kring = &NAKR(na, t)[i];
1470 			bzero(kring, sizeof(*kring));
1471 			kring->ckr_na = na;
1472 			kring->ckr_pp = pp;
1473 			kring->ckr_max_pkt_len = pp->pp_buflet_size *
1474 			    pp->pp_max_frags;
1475 			kring->ckr_ring_id = i;
1476 			kring->ckr_tx = t;
1477 			kr_init_to_mhints(kring, ndesc);
1478 			kr_init_tbr(kring);
1479 			if (NA_KERNEL_ONLY(na)) {
1480 				kring->ckr_flags |= CKRF_KERNEL_ONLY;
1481 			}
1482 			if (na->na_flags & NAF_HOST_ONLY) {
1483 				kring->ckr_flags |= CKRF_HOST;
1484 			}
1485 			ASSERT((t >= NR_TXRX) || (c > 0));
1486 			if ((t < NR_TXRX) &&
1487 			    (na->na_flags & NAF_SLOT_CONTEXT)) {
1488 				ASSERT(na->na_slot_ctxs != NULL);
1489 				kring->ckr_flags |= CKRF_SLOT_CONTEXT;
1490 				kring->ckr_slot_ctxs =
1491 				    na->na_slot_ctxs + (tot_slots - c);
1492 			}
1493 			ASSERT(na->na_scratch != NULL);
1494 			if (t < NR_TXRXAF) {
1495 				kring->ckr_scratch =
1496 				    na->na_scratch + (tot_slots - c);
1497 			}
1498 			if (t < NR_TXRXAF) {
1499 				c -= ndesc;
1500 			}
1501 			switch (t) {
1502 			case NR_A:
1503 				if (i == 0) {
1504 					kring->ckr_na_sync =
1505 					    na_packet_pool_alloc_sync;
1506 					kring->ckr_alloc_ws =
1507 					    na_upp_alloc_lowat;
1508 				} else {
1509 					ASSERT(i == 1);
1510 					kring->ckr_na_sync =
1511 					    na_packet_pool_alloc_buf_sync;
1512 					kring->ckr_alloc_ws =
1513 					    na_upp_alloc_buf_lowat;
1514 				}
1515 				break;
1516 			case NR_F:
1517 				if (i == 0) {
1518 					kring->ckr_na_sync =
1519 					    na_packet_pool_free_sync;
1520 				} else {
1521 					ASSERT(i == 1);
1522 					kring->ckr_na_sync =
1523 					    na_packet_pool_free_buf_sync;
1524 				}
1525 				break;
1526 			case NR_TX:
1527 				kring->ckr_na_sync = na->na_txsync;
1528 				if (na->na_flags & NAF_TX_MITIGATION) {
1529 					kring->ckr_flags |= CKRF_MITIGATION;
1530 				}
1531 				switch (na->na_type) {
1532 #if CONFIG_NEXUS_USER_PIPE
1533 				case NA_USER_PIPE:
1534 					ASSERT(!(na->na_flags &
1535 					    NAF_USER_PKT_POOL));
1536 					kring->ckr_prologue = kr_txprologue;
1537 					kring->ckr_finalize = NULL;
1538 					break;
1539 #endif /* CONFIG_NEXUS_USER_PIPE */
1540 #if CONFIG_NEXUS_MONITOR
1541 				case NA_MONITOR:
1542 					ASSERT(!(na->na_flags &
1543 					    NAF_USER_PKT_POOL));
1544 					kring->ckr_prologue = kr_txprologue;
1545 					kring->ckr_finalize = NULL;
1546 					break;
1547 #endif /* CONFIG_NEXUS_MONITOR */
1548 				default:
1549 					if (na->na_flags & NAF_USER_PKT_POOL) {
1550 						kring->ckr_prologue =
1551 						    kr_txprologue_upp;
1552 						kring->ckr_finalize =
1553 						    kr_txfinalize_upp;
1554 					} else {
1555 						kring->ckr_prologue =
1556 						    kr_txprologue;
1557 						kring->ckr_finalize =
1558 						    kr_txfinalize;
1559 					}
1560 					break;
1561 				}
1562 				break;
1563 			case NR_RX:
1564 				kring->ckr_na_sync = na->na_rxsync;
1565 				if (na->na_flags & NAF_RX_MITIGATION) {
1566 					kring->ckr_flags |= CKRF_MITIGATION;
1567 				}
1568 				switch (na->na_type) {
1569 #if CONFIG_NEXUS_USER_PIPE
1570 				case NA_USER_PIPE:
1571 					ASSERT(!(na->na_flags &
1572 					    NAF_USER_PKT_POOL));
1573 					kring->ckr_prologue =
1574 					    kr_rxprologue_nodetach;
1575 					kring->ckr_finalize = kr_rxfinalize;
1576 					break;
1577 #endif /* CONFIG_NEXUS_USER_PIPE */
1578 #if CONFIG_NEXUS_MONITOR
1579 				case NA_MONITOR:
1580 					ASSERT(!(na->na_flags &
1581 					    NAF_USER_PKT_POOL));
1582 					kring->ckr_prologue =
1583 					    kr_rxprologue_nodetach;
1584 					kring->ckr_finalize = kr_rxfinalize;
1585 					break;
1586 #endif /* CONFIG_NEXUS_MONITOR */
1587 				default:
1588 					if (na->na_flags & NAF_USER_PKT_POOL) {
1589 						kring->ckr_prologue =
1590 						    kr_rxprologue_upp;
1591 						kring->ckr_finalize =
1592 						    kr_rxfinalize_upp;
1593 					} else {
1594 						kring->ckr_prologue =
1595 						    kr_rxprologue;
1596 						kring->ckr_finalize =
1597 						    kr_rxfinalize;
1598 					}
1599 					break;
1600 				}
1601 				break;
1602 			case NR_EV:
1603 				kring->ckr_na_sync = kern_channel_event_sync;
1604 				break;
1605 			default:
1606 				VERIFY(0);
1607 				/* NOTREACHED */
1608 				__builtin_unreachable();
1609 			}
1610 			if (t != NR_EV) {
1611 				kring->ckr_na_notify = na->na_notify;
1612 			} else {
1613 				kring->ckr_na_notify = NULL;
1614 			}
1615 			(void) snprintf(kring->ckr_name,
1616 			    sizeof(kring->ckr_name) - 1,
1617 			    "%s %s%u%s", na->na_name, sk_ring2str(t), i,
1618 			    ((kring->ckr_flags & CKRF_HOST) ? "^" : ""));
1619 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1620 			    "kr \"%s\" (0x%llx) krflags 0x%b rh %u rt %u",
1621 			    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1622 			    CKRF_BITS, kring->ckr_rhead, kring->ckr_rtail);
1623 			kring->ckr_state = KR_READY;
1624 			q_lck_grp = na_kr_q_lck_grp(t);
1625 			s_lck_grp = na_kr_s_lck_grp(t);
1626 			kring->ckr_qlock_group = q_lck_grp;
1627 			lck_mtx_init(&kring->ckr_qlock, kring->ckr_qlock_group,
1628 			    &channel_lock_attr);
1629 			kring->ckr_slock_group = s_lck_grp;
1630 			lck_spin_init(&kring->ckr_slock, kring->ckr_slock_group,
1631 			    &channel_lock_attr);
1632 			csi_init(&kring->ckr_si,
1633 			    (kring->ckr_flags & CKRF_MITIGATION),
1634 			    na->na_ch_mit_ival);
1635 		}
1636 		csi_init(&na->na_si[t],
1637 		    (na->na_flags & (NAF_TX_MITIGATION | NAF_RX_MITIGATION)),
1638 		    na->na_ch_mit_ival);
1639 	}
1640 	ASSERT(c == 0);
1641 	na->na_tailroom = na->na_rx_rings + n[NR_RX] + n[NR_A] + n[NR_F];
1642 
1643 	if (na->na_type == NA_NETIF_DEV) {
1644 		na_kr_setup_netif_svc_map(na);
1645 	}
1646 
1647 	/* validate now for cases where we create only krings */
1648 	na_krings_verify(na);
1649 	return 0;
1650 
1651 error:
1652 	ASSERT(err != 0);
1653 	if (na->na_tx_rings != NULL) {
1654 		sk_free(na->na_tx_rings, na->na_rings_mem_sz);
1655 		na->na_tx_rings = NULL;
1656 	}
1657 	if (na->na_slot_ctxs != NULL) {
1658 		ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1659 		skn_free_type_array(slot_ctxs,
1660 		    struct slot_ctx, na->na_total_slots,
1661 		    na->na_slot_ctxs);
1662 		na->na_slot_ctxs = NULL;
1663 	}
1664 	if (na->na_scratch != NULL) {
1665 		skn_free_type_array(scratch,
1666 		    kern_packet_t, na->na_total_slots,
1667 		    na->na_scratch);
1668 		na->na_scratch = NULL;
1669 	}
1670 	return err;
1671 }
1672 
1673 /* undo the actions performed by na_kr_create() */
1674 /* call with SK_LOCK held */
1675 static void
na_kr_delete(struct nexus_adapter * na)1676 na_kr_delete(struct nexus_adapter *na)
1677 {
1678 	struct __kern_channel_ring *kring = na->na_tx_rings;
1679 	enum txrx t;
1680 
1681 	ASSERT((kring != NULL) && (na->na_tailroom != NULL));
1682 	SK_LOCK_ASSERT_HELD();
1683 
1684 	for_all_rings(t) {
1685 		csi_destroy(&na->na_si[t]);
1686 	}
1687 	/* we rely on the krings layout described above */
1688 	for (; kring != na->na_tailroom; kring++) {
1689 		lck_mtx_destroy(&kring->ckr_qlock, kring->ckr_qlock_group);
1690 		lck_spin_destroy(&kring->ckr_slock, kring->ckr_slock_group);
1691 		csi_destroy(&kring->ckr_si);
1692 		if (kring->ckr_flags & CKRF_SLOT_CONTEXT) {
1693 			kring->ckr_flags &= ~CKRF_SLOT_CONTEXT;
1694 			ASSERT(kring->ckr_slot_ctxs != NULL);
1695 			kring->ckr_slot_ctxs = NULL;
1696 		}
1697 	}
1698 	if (na->na_slot_ctxs != NULL) {
1699 		ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1700 		atomic_bitclear_32(&na->na_flags, NAF_SLOT_CONTEXT);
1701 		skn_free_type_array(slot_ctxs,
1702 		    struct slot_ctx, na->na_total_slots,
1703 		    na->na_slot_ctxs);
1704 		na->na_slot_ctxs = NULL;
1705 	}
1706 	if (na->na_scratch != NULL) {
1707 		skn_free_type_array(scratch,
1708 		    kern_packet_t, na->na_total_slots,
1709 		    na->na_scratch);
1710 		na->na_scratch = NULL;
1711 	}
1712 	ASSERT(!(na->na_flags & NAF_SLOT_CONTEXT));
1713 	sk_free(na->na_tx_rings, na->na_rings_mem_sz);
1714 	na->na_tx_rings = na->na_rx_rings = na->na_alloc_rings =
1715 	    na->na_free_rings = na->na_event_rings = na->na_tailroom = NULL;
1716 }
1717 
1718 static void
na_kr_slot_desc_init(struct __slot_desc * ksds,boolean_t kernel_only,struct __slot_desc * usds,size_t ndesc)1719 na_kr_slot_desc_init(struct __slot_desc *ksds,
1720     boolean_t kernel_only, struct __slot_desc *usds, size_t ndesc)
1721 {
1722 	size_t i;
1723 
1724 	bzero(ksds, ndesc * SLOT_DESC_SZ);
1725 	if (usds != NULL) {
1726 		ASSERT(!kernel_only);
1727 		bzero(usds, ndesc * SLOT_DESC_SZ);
1728 	} else {
1729 		ASSERT(kernel_only);
1730 	}
1731 
1732 	for (i = 0; i < ndesc; i++) {
1733 		KSD_INIT(SLOT_DESC_KSD(&ksds[i]));
1734 		if (!kernel_only) {
1735 			USD_INIT(SLOT_DESC_USD(&usds[i]));
1736 		}
1737 	}
1738 }
1739 
1740 /* call with SK_LOCK held */
1741 static int
na_kr_setup(struct nexus_adapter * na,struct kern_channel * ch)1742 na_kr_setup(struct nexus_adapter *na, struct kern_channel *ch)
1743 {
1744 	struct skmem_arena *ar = na->na_arena;
1745 	struct skmem_arena_nexus *arn;
1746 	mach_vm_offset_t roff[SKMEM_REGIONS];
1747 	enum txrx t;
1748 	uint32_t i;
1749 
1750 	SK_LOCK_ASSERT_HELD();
1751 	ASSERT(!(na->na_flags & NAF_MEM_NO_INIT));
1752 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
1753 	arn = skmem_arena_nexus(ar);
1754 	ASSERT(arn != NULL);
1755 
1756 	bzero(&roff, sizeof(roff));
1757 	for (i = 0; i < SKMEM_REGIONS; i++) {
1758 		if (ar->ar_regions[i] == NULL) {
1759 			continue;
1760 		}
1761 
1762 		/* not for nexus */
1763 		ASSERT(i != SKMEM_REGION_SYSCTLS);
1764 
1765 		/*
1766 		 * Get region offsets from base of mmap span; the arena
1767 		 * doesn't need to be mmap'd at this point, since we
1768 		 * simply compute the relative offset.
1769 		 */
1770 		roff[i] = skmem_arena_get_region_offset(ar, i);
1771 	}
1772 
1773 	for_all_rings(t) {
1774 		for (i = 0; i < na_get_nrings(na, t); i++) {
1775 			struct __kern_channel_ring *kring = &NAKR(na, t)[i];
1776 			struct __user_channel_ring *ring = kring->ckr_ring;
1777 			mach_vm_offset_t ring_off, usd_roff;
1778 			struct skmem_obj_info oi, oim;
1779 			uint32_t ndesc;
1780 
1781 			if (ring != NULL) {
1782 				SK_DF(SK_VERB_NA | SK_VERB_RING,
1783 				    "kr 0x%llx (\"%s\") is already "
1784 				    "initialized", SK_KVA(kring),
1785 				    kring->ckr_name);
1786 				continue; /* already created by somebody else */
1787 			}
1788 
1789 			if (!KR_KERNEL_ONLY(kring) &&
1790 			    (ring = skmem_cache_alloc(arn->arn_ring_cache,
1791 			    SKMEM_NOSLEEP)) == NULL) {
1792 				SK_ERR("Cannot allocate %s_ring for kr "
1793 				    "0x%llx (\"%s\")", sk_ring2str(t),
1794 				    SK_KVA(kring), kring->ckr_name);
1795 				goto cleanup;
1796 			}
1797 			kring->ckr_flags |= CKRF_MEM_RING_INITED;
1798 			kring->ckr_ring = ring;
1799 			ndesc = kring->ckr_num_slots;
1800 
1801 			if (ring == NULL) {
1802 				goto skip_user_ring_setup;
1803 			}
1804 
1805 			*(uint32_t *)(uintptr_t)&ring->ring_num_slots = ndesc;
1806 
1807 			/* offset of current ring in mmap span */
1808 			skmem_cache_get_obj_info(arn->arn_ring_cache,
1809 			    ring, &oi, NULL);
1810 			ring_off = (roff[SKMEM_REGION_RING] +
1811 			    SKMEM_OBJ_ROFF(&oi));
1812 
1813 			/*
1814 			 * ring_{buf,md,sd}_ofs offsets are relative to the
1815 			 * current ring, and not to the base of mmap span.
1816 			 */
1817 			*(mach_vm_offset_t *)(uintptr_t)&ring->ring_buf_base =
1818 			    (roff[SKMEM_REGION_BUF] - ring_off);
1819 			*(mach_vm_offset_t *)(uintptr_t)&ring->ring_md_base =
1820 			    (roff[SKMEM_REGION_UMD] - ring_off);
1821 			_CASSERT(sizeof(uint16_t) ==
1822 			    sizeof(ring->ring_bft_size));
1823 			if (roff[SKMEM_REGION_UBFT] != 0) {
1824 				ASSERT(ar->ar_regions[SKMEM_REGION_UBFT] !=
1825 				    NULL);
1826 				*(mach_vm_offset_t *)(uintptr_t)
1827 				&ring->ring_bft_base =
1828 				    (roff[SKMEM_REGION_UBFT] - ring_off);
1829 				*(uint16_t *)(uintptr_t)&ring->ring_bft_size =
1830 				    (uint16_t)ar->ar_regions[SKMEM_REGION_UBFT]->
1831 				    skr_c_obj_size;
1832 				ASSERT(ring->ring_bft_size ==
1833 				    ar->ar_regions[SKMEM_REGION_KBFT]->
1834 				    skr_c_obj_size);
1835 			} else {
1836 				*(mach_vm_offset_t *)(uintptr_t)
1837 				&ring->ring_bft_base = 0;
1838 				*(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1839 			}
1840 
1841 			if (t == NR_TX || t == NR_A || t == NR_EV) {
1842 				usd_roff = roff[SKMEM_REGION_TXAUSD];
1843 			} else {
1844 				ASSERT(t == NR_RX || t == NR_F);
1845 				usd_roff = roff[SKMEM_REGION_RXFUSD];
1846 			}
1847 			*(mach_vm_offset_t *)(uintptr_t)&ring->ring_sd_base =
1848 			    (usd_roff - ring_off);
1849 
1850 			/* copy values from kring */
1851 			ring->ring_head = kring->ckr_rhead;
1852 			*(slot_idx_t *)(uintptr_t)&ring->ring_khead =
1853 			    kring->ckr_khead;
1854 			*(slot_idx_t *)(uintptr_t)&ring->ring_tail =
1855 			    kring->ckr_rtail;
1856 
1857 			_CASSERT(sizeof(uint32_t) ==
1858 			    sizeof(ring->ring_buf_size));
1859 			_CASSERT(sizeof(uint16_t) ==
1860 			    sizeof(ring->ring_md_size));
1861 			*(uint32_t *)(uintptr_t)&ring->ring_buf_size =
1862 			    ar->ar_regions[SKMEM_REGION_BUF]->skr_c_obj_size;
1863 			if (ar->ar_regions[SKMEM_REGION_UMD] != NULL) {
1864 				*(uint16_t *)(uintptr_t)&ring->ring_md_size =
1865 				    (uint16_t)ar->ar_regions[SKMEM_REGION_UMD]->
1866 				    skr_c_obj_size;
1867 				ASSERT(ring->ring_md_size ==
1868 				    ar->ar_regions[SKMEM_REGION_KMD]->
1869 				    skr_c_obj_size);
1870 			} else {
1871 				*(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1872 				ASSERT(PP_KERNEL_ONLY(arn->arn_rx_pp));
1873 				ASSERT(PP_KERNEL_ONLY(arn->arn_tx_pp));
1874 			}
1875 
1876 			/* ring info */
1877 			_CASSERT(sizeof(uint16_t) == sizeof(ring->ring_id));
1878 			_CASSERT(sizeof(uint16_t) == sizeof(ring->ring_kind));
1879 			*(uint16_t *)(uintptr_t)&ring->ring_id =
1880 			    (uint16_t)kring->ckr_ring_id;
1881 			*(uint16_t *)(uintptr_t)&ring->ring_kind =
1882 			    (uint16_t)kring->ckr_tx;
1883 
1884 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1885 			    "%s_ring at 0x%llx kr 0x%llx (\"%s\")",
1886 			    sk_ring2str(t), SK_KVA(ring), SK_KVA(kring),
1887 			    kring->ckr_name);
1888 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1889 			    "  num_slots:  %u", ring->ring_num_slots);
1890 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1891 			    "  buf_base:   0x%llx",
1892 			    (uint64_t)ring->ring_buf_base);
1893 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1894 			    "  md_base:    0x%llx",
1895 			    (uint64_t)ring->ring_md_base);
1896 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1897 			    "  sd_base:    0x%llx",
1898 			    (uint64_t)ring->ring_sd_base);
1899 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1900 			    "  h, t:    %u, %u, %u", ring->ring_head,
1901 			    ring->ring_tail);
1902 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1903 			    "  md_size:    %d",
1904 			    (uint64_t)ring->ring_md_size);
1905 
1906 			/* make sure they're in synch */
1907 			_CASSERT(NR_RX == CR_KIND_RX);
1908 			_CASSERT(NR_TX == CR_KIND_TX);
1909 			_CASSERT(NR_A == CR_KIND_ALLOC);
1910 			_CASSERT(NR_F == CR_KIND_FREE);
1911 			_CASSERT(NR_EV == CR_KIND_EVENT);
1912 
1913 skip_user_ring_setup:
1914 			/*
1915 			 * This flag tells na_kr_teardown_all() that it should
1916 			 * go thru the checks to free up the slot maps.
1917 			 */
1918 			kring->ckr_flags |= CKRF_MEM_SD_INITED;
1919 			if (t == NR_TX || t == NR_A || t == NR_EV) {
1920 				kring->ckr_ksds_cache = arn->arn_txaksd_cache;
1921 			} else {
1922 				ASSERT(t == NR_RX || t == NR_F);
1923 				kring->ckr_ksds_cache = arn->arn_rxfksd_cache;
1924 			}
1925 			kring->ckr_ksds =
1926 			    skmem_cache_alloc(kring->ckr_ksds_cache,
1927 			    SKMEM_NOSLEEP);
1928 			if (kring->ckr_ksds == NULL) {
1929 				SK_ERR("Cannot allocate %s_ksds for kr "
1930 				    "0x%llx (\"%s\")", sk_ring2str(t),
1931 				    SK_KVA(kring), kring->ckr_name);
1932 				goto cleanup;
1933 			}
1934 			if (!KR_KERNEL_ONLY(kring)) {
1935 				skmem_cache_get_obj_info(kring->ckr_ksds_cache,
1936 				    kring->ckr_ksds, &oi, &oim);
1937 				kring->ckr_usds = SKMEM_OBJ_ADDR(&oim);
1938 			}
1939 			na_kr_slot_desc_init(kring->ckr_ksds,
1940 			    KR_KERNEL_ONLY(kring), kring->ckr_usds, ndesc);
1941 
1942 			/* cache last slot descriptor address */
1943 			ASSERT(kring->ckr_lim == (ndesc - 1));
1944 			kring->ckr_ksds_last = &kring->ckr_ksds[kring->ckr_lim];
1945 
1946 			if ((t < NR_TXRX) &&
1947 			    !(na->na_flags & NAF_USER_PKT_POOL) &&
1948 			    na_kr_populate_slots(kring) != 0) {
1949 				SK_ERR("Cannot allocate buffers for kr "
1950 				    "0x%llx (\"%s\")", SK_KVA(kring),
1951 				    kring->ckr_name);
1952 				goto cleanup;
1953 			}
1954 		}
1955 	}
1956 
1957 	return 0;
1958 
1959 cleanup:
1960 	na_kr_teardown_all(na, ch, FALSE);
1961 
1962 	return ENOMEM;
1963 }
1964 
1965 static void
na_kr_teardown_common(struct nexus_adapter * na,struct __kern_channel_ring * kring,enum txrx t,struct kern_channel * ch,boolean_t defunct)1966 na_kr_teardown_common(struct nexus_adapter *na,
1967     struct __kern_channel_ring *kring, enum txrx t, struct kern_channel *ch,
1968     boolean_t defunct)
1969 {
1970 	struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
1971 	struct __user_channel_ring *ckr_ring;
1972 	boolean_t sd_idle, sd_inited;
1973 
1974 	ASSERT(arn != NULL);
1975 	kr_enter(kring, TRUE);
1976 	/*
1977 	 * Check for CKRF_MEM_SD_INITED and CKRF_MEM_RING_INITED
1978 	 * to make sure that the freeing needs to happen (else just
1979 	 * nullify the values).
1980 	 * If this adapter owns the memory for the slot descriptors,
1981 	 * check if the region is marked as busy (sd_idle is false)
1982 	 * and leave the kring's slot descriptor fields alone if so,
1983 	 * at defunct time.  At final teardown time, sd_idle must be
1984 	 * true else we assert; this indicates a missing call to
1985 	 * skmem_arena_nexus_sd_set_noidle().
1986 	 */
1987 	sd_inited = ((kring->ckr_flags & CKRF_MEM_SD_INITED) != 0);
1988 	if (sd_inited) {
1989 		/* callee will do KR_KSD(), so check */
1990 		if (((t < NR_TXRX) || (t == NR_EV)) &&
1991 		    (kring->ckr_ksds != NULL)) {
1992 			na_kr_depopulate_slots(kring, ch, defunct);
1993 		}
1994 		/* leave CKRF_MEM_SD_INITED flag alone until idle */
1995 		sd_idle = skmem_arena_nexus_sd_idle(arn);
1996 		VERIFY(sd_idle || defunct);
1997 	} else {
1998 		sd_idle = TRUE;
1999 	}
2000 
2001 	if (sd_idle) {
2002 		kring->ckr_flags &= ~CKRF_MEM_SD_INITED;
2003 		if (kring->ckr_ksds != NULL) {
2004 			if (sd_inited) {
2005 				skmem_cache_free(kring->ckr_ksds_cache,
2006 				    kring->ckr_ksds);
2007 			}
2008 			kring->ckr_ksds = NULL;
2009 			kring->ckr_ksds_last = NULL;
2010 			kring->ckr_usds = NULL;
2011 		}
2012 		ASSERT(kring->ckr_ksds_last == NULL);
2013 		ASSERT(kring->ckr_usds == NULL);
2014 	}
2015 
2016 	if ((ckr_ring = kring->ckr_ring) != NULL) {
2017 		kring->ckr_ring = NULL;
2018 	}
2019 
2020 	if (kring->ckr_flags & CKRF_MEM_RING_INITED) {
2021 		ASSERT(ckr_ring != NULL || KR_KERNEL_ONLY(kring));
2022 		if (ckr_ring != NULL) {
2023 			skmem_cache_free(arn->arn_ring_cache, ckr_ring);
2024 		}
2025 		kring->ckr_flags &= ~CKRF_MEM_RING_INITED;
2026 	}
2027 
2028 	if (defunct) {
2029 		/* if defunct, drop everything; see KR_DROP() */
2030 		kring->ckr_flags |= CKRF_DEFUNCT;
2031 	}
2032 	kr_exit(kring);
2033 }
2034 
2035 /*
2036  * Teardown ALL rings of a nexus adapter; this includes {tx,rx,alloc,free,event}
2037  */
2038 static void
na_kr_teardown_all(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2039 na_kr_teardown_all(struct nexus_adapter *na, struct kern_channel *ch,
2040     boolean_t defunct)
2041 {
2042 	enum txrx t;
2043 
2044 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2045 
2046 	/* skip if this adapter has no allocated rings */
2047 	if (na->na_tx_rings == NULL) {
2048 		return;
2049 	}
2050 
2051 	for_all_rings(t) {
2052 		for (uint32_t i = 0; i < na_get_nrings(na, t); i++) {
2053 			na_kr_teardown_common(na, &NAKR(na, t)[i],
2054 			    t, ch, defunct);
2055 		}
2056 	}
2057 }
2058 
2059 /*
2060  * Teardown only {tx,rx} rings assigned to the channel.
2061  */
2062 static void
na_kr_teardown_txrx(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct,struct proc * p)2063 na_kr_teardown_txrx(struct nexus_adapter *na, struct kern_channel *ch,
2064     boolean_t defunct, struct proc *p)
2065 {
2066 	enum txrx t;
2067 
2068 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2069 
2070 	for_rx_tx(t) {
2071 		ring_id_t qfirst = ch->ch_first[t];
2072 		ring_id_t qlast = ch->ch_last[t];
2073 		uint32_t i;
2074 
2075 		for (i = qfirst; i < qlast; i++) {
2076 			struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2077 			na_kr_teardown_common(na, kring, t, ch, defunct);
2078 
2079 			/*
2080 			 * Issue a notify to wake up anyone sleeping in kqueue
2081 			 * so that they notice the newly defuncted channels and
2082 			 * return an error
2083 			 */
2084 			kring->ckr_na_notify(kring, p, 0);
2085 		}
2086 	}
2087 }
2088 
2089 static int
na_kr_populate_slots(struct __kern_channel_ring * kring)2090 na_kr_populate_slots(struct __kern_channel_ring *kring)
2091 {
2092 	const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2093 	struct nexus_adapter *na = KRNA(kring);
2094 	kern_pbufpool_t pp = kring->ckr_pp;
2095 	uint32_t nslots = kring->ckr_num_slots;
2096 	uint32_t start_idx, i;
2097 	uint32_t sidx = 0;      /* slot counter */
2098 	struct __kern_slot_desc *ksd;
2099 	struct __user_slot_desc *usd;
2100 	struct __kern_quantum *kqum;
2101 	nexus_type_t nexus_type;
2102 	int err = 0;
2103 
2104 	ASSERT(kring->ckr_tx < NR_TXRX);
2105 	ASSERT(!(KRNA(kring)->na_flags & NAF_USER_PKT_POOL));
2106 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2107 	ASSERT(pp != NULL);
2108 
2109 	/*
2110 	 * xxx_ppool: remove this special case
2111 	 */
2112 	nexus_type = na->na_nxdom_prov->nxdom_prov_dom->nxdom_type;
2113 
2114 	switch (nexus_type) {
2115 	case NEXUS_TYPE_FLOW_SWITCH:
2116 	case NEXUS_TYPE_KERNEL_PIPE:
2117 		/*
2118 		 * xxx_ppool: This is temporary code until we come up with a
2119 		 * scheme for user space to alloc & attach packets to tx ring.
2120 		 */
2121 		if (kernel_only || kring->ckr_tx == NR_RX) {
2122 			return 0;
2123 		}
2124 		break;
2125 
2126 	case NEXUS_TYPE_NET_IF:
2127 		if (((na->na_type == NA_NETIF_DEV) ||
2128 		    (na->na_type == NA_NETIF_HOST)) &&
2129 		    (kernel_only || (kring->ckr_tx == NR_RX))) {
2130 			return 0;
2131 		}
2132 
2133 		ASSERT((na->na_type == NA_NETIF_COMPAT_DEV) ||
2134 		    (na->na_type == NA_NETIF_COMPAT_HOST) ||
2135 		    (na->na_type == NA_NETIF_DEV) ||
2136 		    (na->na_type == NA_NETIF_VP));
2137 
2138 		if (!kernel_only) {
2139 			if (kring->ckr_tx == NR_RX) {
2140 				return 0;
2141 			} else {
2142 				break;
2143 			}
2144 		}
2145 
2146 		ASSERT(kernel_only);
2147 
2148 		if ((na->na_type == NA_NETIF_COMPAT_DEV) ||
2149 		    (na->na_type == NA_NETIF_COMPAT_HOST)) {
2150 			return 0;
2151 		}
2152 		VERIFY(0);
2153 		/* NOTREACHED */
2154 		__builtin_unreachable();
2155 
2156 	case NEXUS_TYPE_USER_PIPE:
2157 	case NEXUS_TYPE_MONITOR:
2158 		break;
2159 
2160 	default:
2161 		VERIFY(0);
2162 		/* NOTREACHED */
2163 		__builtin_unreachable();
2164 	}
2165 
2166 	/* Fill the ring with packets */
2167 	sidx = start_idx = 0;
2168 	for (i = 0; i < nslots; i++) {
2169 		kqum = SK_PTR_ADDR_KQUM(pp_alloc_packet(pp, pp->pp_max_frags,
2170 		    SKMEM_NOSLEEP));
2171 		if (kqum == NULL) {
2172 			err = ENOMEM;
2173 			SK_ERR("ar 0x%llx (\"%s\") no more buffers "
2174 			    "after %u of %u, err %d", SK_KVA(na->na_arena),
2175 			    na->na_arena->ar_name, i, nslots, err);
2176 			goto cleanup;
2177 		}
2178 		ksd = KR_KSD(kring, i);
2179 		usd = (kernel_only ? NULL : KR_USD(kring, i));
2180 
2181 		/* attach packet to slot */
2182 		kqum->qum_ksd = ksd;
2183 		ASSERT(!KSD_VALID_METADATA(ksd));
2184 		KSD_ATTACH_METADATA(ksd, kqum);
2185 		if (usd != NULL) {
2186 			USD_ATTACH_METADATA(usd, METADATA_IDX(kqum));
2187 			kr_externalize_metadata(kring, pp->pp_max_frags,
2188 			    kqum, current_proc());
2189 		}
2190 
2191 		SK_DF(SK_VERB_MEM, " C ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2192 		    " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd), METADATA_IDX(kqum),
2193 		    SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2194 		    SK_KVA(&kqum->qum_buf[0]));
2195 		if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2196 			SK_DF(SK_VERB_MEM, " C usd [%-3d, 0x%llx] "
2197 			    "uqum [%-3u, 0x%llx]  ubuf[%-3u, 0x%llx]",
2198 			    (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2199 			    SK_KVA(usd), METADATA_IDX(kqum),
2200 			    SK_KVA(kqum->qum_user),
2201 			    kqum->qum_user->qum_buf[0].buf_idx,
2202 			    SK_KVA(&kqum->qum_user->qum_buf[0]));
2203 		}
2204 
2205 		sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2206 	}
2207 
2208 	SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") populated %u slots from idx %u",
2209 	    SK_KVA(na->na_arena), na->na_arena->ar_name, nslots, start_idx);
2210 
2211 cleanup:
2212 	if (err != 0) {
2213 		sidx = start_idx;
2214 		while (i-- > 0) {
2215 			ksd = KR_KSD(kring, i);
2216 			usd = (kernel_only ? NULL : KR_USD(kring, i));
2217 			kqum = ksd->sd_qum;
2218 
2219 			ASSERT(ksd == kqum->qum_ksd);
2220 			KSD_RESET(ksd);
2221 			if (usd != NULL) {
2222 				USD_RESET(usd);
2223 			}
2224 			/* detach packet from slot */
2225 			kqum->qum_ksd = NULL;
2226 			pp_free_packet(pp, SK_PTR_ADDR(kqum));
2227 
2228 			sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2229 		}
2230 	}
2231 	return err;
2232 }
2233 
2234 static void
na_kr_depopulate_slots(struct __kern_channel_ring * kring,struct kern_channel * ch,boolean_t defunct)2235 na_kr_depopulate_slots(struct __kern_channel_ring *kring,
2236     struct kern_channel *ch, boolean_t defunct)
2237 {
2238 #pragma unused(ch)
2239 	const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2240 	uint32_t i, j, n = kring->ckr_num_slots;
2241 	struct nexus_adapter *na = KRNA(kring);
2242 	struct kern_pbufpool *pp = kring->ckr_pp;
2243 	boolean_t upp = FALSE;
2244 	obj_idx_t midx;
2245 
2246 	ASSERT((kring->ckr_tx < NR_TXRX) || (kring->ckr_tx == NR_EV));
2247 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2248 
2249 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2250 
2251 	if (((na->na_flags & NAF_USER_PKT_POOL) != 0) &&
2252 	    (kring->ckr_tx != NR_EV)) {
2253 		upp = TRUE;
2254 	}
2255 	for (i = 0, j = 0; i < n; i++) {
2256 		struct __kern_slot_desc *ksd = KR_KSD(kring, i);
2257 		struct __user_slot_desc *usd;
2258 		struct __kern_quantum *qum, *kqum;
2259 		boolean_t free_packet = FALSE;
2260 		int err;
2261 
2262 		if (!KSD_VALID_METADATA(ksd)) {
2263 			continue;
2264 		}
2265 
2266 		kqum = ksd->sd_qum;
2267 		usd = (kernel_only ? NULL : KR_USD(kring, i));
2268 		midx = METADATA_IDX(kqum);
2269 
2270 		/*
2271 		 * if the packet is internalized it should not be in the
2272 		 * hash table of packets loaned to user space.
2273 		 */
2274 		if (upp && (kqum->qum_qflags & QUM_F_INTERNALIZED)) {
2275 			if ((qum = pp_find_upp(pp, midx)) != NULL) {
2276 				panic("internalized packet 0x%llx in htbl",
2277 				    SK_KVA(qum));
2278 				/* NOTREACHED */
2279 				__builtin_unreachable();
2280 			}
2281 			free_packet = TRUE;
2282 		} else if (upp) {
2283 			/*
2284 			 * if the packet is not internalized check if it is
2285 			 * in the list of packets loaned to user-space.
2286 			 * Remove from the list before freeing.
2287 			 */
2288 			ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
2289 			qum = pp_remove_upp(pp, midx, &err);
2290 			if (err != 0) {
2291 				SK_ERR("un-allocated packet or buflet %d %p",
2292 				    midx, SK_KVA(qum));
2293 				if (qum != NULL) {
2294 					free_packet = TRUE;
2295 				}
2296 			}
2297 		} else {
2298 			free_packet = TRUE;
2299 		}
2300 
2301 		/*
2302 		 * Clear the user and kernel slot descriptors.  Note that
2303 		 * if we are depopulating the slots due to defunct (and not
2304 		 * due to normal deallocation/teardown), we leave the user
2305 		 * slot descriptor alone.  At that point the process may
2306 		 * be suspended, and later when it resumes it would just
2307 		 * pick up the original contents and move forward with
2308 		 * whatever it was doing.
2309 		 */
2310 		KSD_RESET(ksd);
2311 		if (usd != NULL && !defunct) {
2312 			USD_RESET(usd);
2313 		}
2314 
2315 		/* detach packet from slot */
2316 		kqum->qum_ksd = NULL;
2317 
2318 		SK_DF(SK_VERB_MEM, " D ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2319 		    " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd),
2320 		    METADATA_IDX(kqum), SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2321 		    SK_KVA(&kqum->qum_buf[0]));
2322 		if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2323 			SK_DF(SK_VERB_MEM, " D usd [%-3u, 0x%llx] "
2324 			    "uqum [%-3u, 0x%llx]  ubuf[%-3u, 0x%llx]",
2325 			    (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2326 			    SK_KVA(usd), METADATA_IDX(kqum),
2327 			    SK_KVA(kqum->qum_user),
2328 			    kqum->qum_user->qum_buf[0].buf_idx,
2329 			    SK_KVA(&kqum->qum_user->qum_buf[0]));
2330 		}
2331 
2332 		if (free_packet) {
2333 			pp_free_packet(pp, SK_PTR_ADDR(kqum)); ++j;
2334 		}
2335 	}
2336 
2337 	SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") depopulated %u of %u slots",
2338 	    SK_KVA(KRNA(kring)->na_arena), KRNA(kring)->na_arena->ar_name,
2339 	    j, n);
2340 }
2341 
2342 int
na_rings_mem_setup(struct nexus_adapter * na,uint32_t tailroom,boolean_t alloc_ctx,struct kern_channel * ch)2343 na_rings_mem_setup(struct nexus_adapter *na, uint32_t tailroom,
2344     boolean_t alloc_ctx, struct kern_channel *ch)
2345 {
2346 	boolean_t kronly;
2347 	int err;
2348 
2349 	SK_LOCK_ASSERT_HELD();
2350 	ASSERT(na->na_channels == 0);
2351 	/*
2352 	 * If NAF_MEM_NO_INIT is set, then only create the krings and not
2353 	 * the backing memory regions for the adapter.
2354 	 */
2355 	kronly = (na->na_flags & NAF_MEM_NO_INIT);
2356 	ASSERT(!kronly || NA_KERNEL_ONLY(na));
2357 
2358 	/*
2359 	 * Create and initialize the common fields of the krings array.
2360 	 * using the information that must be already available in the na.
2361 	 * tailroom can be used to request the allocation of additional
2362 	 * tailroom bytes after the krings array.  This is used by
2363 	 * nexus_vp_adapter's (i.e., flow switch ports) to make room
2364 	 * for leasing-related data structures.
2365 	 */
2366 	if ((err = na_kr_create(na, tailroom, alloc_ctx)) == 0 && !kronly) {
2367 		err = na_kr_setup(na, ch);
2368 		if (err != 0) {
2369 			na_kr_delete(na);
2370 		}
2371 	}
2372 
2373 	return err;
2374 }
2375 
2376 void
na_rings_mem_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2377 na_rings_mem_teardown(struct nexus_adapter *na, struct kern_channel *ch,
2378     boolean_t defunct)
2379 {
2380 	SK_LOCK_ASSERT_HELD();
2381 	ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT));
2382 
2383 	/*
2384 	 * Deletes the kring and ring array of the adapter. They
2385 	 * must have been created using na_rings_mem_setup().
2386 	 *
2387 	 * XXX: [email protected] -- the parameter "ch" should not be
2388 	 * needed here; however na_kr_depopulate_slots() needs to
2389 	 * go thru the channel's user packet pool hash, and so for
2390 	 * now we leave it here.
2391 	 */
2392 	na_kr_teardown_all(na, ch, defunct);
2393 	if (!defunct) {
2394 		na_kr_delete(na);
2395 	}
2396 }
2397 
2398 void
na_ch_rings_defunct(struct kern_channel * ch,struct proc * p)2399 na_ch_rings_defunct(struct kern_channel *ch, struct proc *p)
2400 {
2401 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2402 
2403 	/*
2404 	 * Depopulate slots on the TX and RX rings of this channel,
2405 	 * but don't touch other rings owned by other channels if
2406 	 * this adapter is being shared.
2407 	 */
2408 	na_kr_teardown_txrx(ch->ch_na, ch, TRUE, p);
2409 }
2410 
2411 void
na_kr_drop(struct nexus_adapter * na,boolean_t drop)2412 na_kr_drop(struct nexus_adapter *na, boolean_t drop)
2413 {
2414 	enum txrx t;
2415 	uint32_t i;
2416 
2417 	for_rx_tx(t) {
2418 		for (i = 0; i < na_get_nrings(na, t); i++) {
2419 			struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2420 			int error;
2421 			error = kr_enter(kring, TRUE);
2422 			if (drop) {
2423 				kring->ckr_flags |= CKRF_DROP;
2424 			} else {
2425 				kring->ckr_flags &= ~CKRF_DROP;
2426 			}
2427 
2428 			if (error != 0) {
2429 				SK_ERR("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2430 				    "kr_enter failed %d",
2431 				    na->na_name, SK_KVA(na),
2432 				    kring->ckr_name, SK_KVA(kring),
2433 				    error);
2434 			} else {
2435 				kr_exit(kring);
2436 			}
2437 			SK_D("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2438 			    "krflags 0x%b", na->na_name, SK_KVA(na),
2439 			    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
2440 			    CKRF_BITS);
2441 		}
2442 	}
2443 }
2444 
2445 /*
2446  * Set the stopped/enabled status of ring.  When stopping, they also wait
2447  * for all current activity on the ring to terminate.  The status change
2448  * is then notified using the na na_notify callback.
2449  */
2450 static void
na_set_ring(struct nexus_adapter * na,uint32_t ring_id,enum txrx t,uint32_t state)2451 na_set_ring(struct nexus_adapter *na, uint32_t ring_id, enum txrx t,
2452     uint32_t state)
2453 {
2454 	struct __kern_channel_ring *kr = &NAKR(na, t)[ring_id];
2455 
2456 	/*
2457 	 * Mark the ring as stopped/enabled, and run through the
2458 	 * locks to make sure other users get to see it.
2459 	 */
2460 	if (state == KR_READY) {
2461 		kr_start(kr);
2462 	} else {
2463 		kr_stop(kr, state);
2464 	}
2465 }
2466 
2467 
2468 /* stop or enable all the rings of na */
2469 static void
na_set_all_rings(struct nexus_adapter * na,uint32_t state)2470 na_set_all_rings(struct nexus_adapter *na, uint32_t state)
2471 {
2472 	uint32_t i;
2473 	enum txrx t;
2474 
2475 	SK_LOCK_ASSERT_HELD();
2476 
2477 	if (!NA_IS_ACTIVE(na)) {
2478 		return;
2479 	}
2480 
2481 	for_rx_tx(t) {
2482 		for (i = 0; i < na_get_nrings(na, t); i++) {
2483 			na_set_ring(na, i, t, state);
2484 		}
2485 	}
2486 }
2487 
2488 /*
2489  * Convenience function used in drivers.  Waits for current txsync()s/rxsync()s
2490  * to finish and prevents any new one from starting.  Call this before turning
2491  * Skywalk mode off, or before removing the harware rings (e.g., on module
2492  * onload).  As a rule of thumb for linux drivers, this should be placed near
2493  * each napi_disable().
2494  */
2495 void
na_disable_all_rings(struct nexus_adapter * na)2496 na_disable_all_rings(struct nexus_adapter *na)
2497 {
2498 	na_set_all_rings(na, KR_STOPPED);
2499 }
2500 
2501 /*
2502  * Convenience function used in drivers.  Re-enables rxsync and txsync on the
2503  * adapter's rings In linux drivers, this should be placed near each
2504  * napi_enable().
2505  */
2506 void
na_enable_all_rings(struct nexus_adapter * na)2507 na_enable_all_rings(struct nexus_adapter *na)
2508 {
2509 	na_set_all_rings(na, KR_READY /* enabled */);
2510 }
2511 
2512 void
na_lock_all_rings(struct nexus_adapter * na)2513 na_lock_all_rings(struct nexus_adapter *na)
2514 {
2515 	na_set_all_rings(na, KR_LOCKED);
2516 }
2517 
2518 void
na_unlock_all_rings(struct nexus_adapter * na)2519 na_unlock_all_rings(struct nexus_adapter *na)
2520 {
2521 	na_enable_all_rings(na);
2522 }
2523 
2524 int
na_connect(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)2525 na_connect(struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
2526     struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
2527 {
2528 	struct nexus_adapter *na = NULL;
2529 	mach_vm_size_t memsize = 0;
2530 	int err = 0;
2531 	enum txrx t;
2532 
2533 	ASSERT(!(chr->cr_mode & CHMODE_KERNEL));
2534 	ASSERT(!(ch->ch_flags & CHANF_KERNEL));
2535 
2536 	SK_LOCK_ASSERT_HELD();
2537 
2538 	/* find the nexus adapter and return the reference */
2539 	err = na_find(ch, nx, chr, ch0, nxb, p, &na, TRUE /* create */);
2540 	if (err != 0) {
2541 		ASSERT(na == NULL);
2542 		goto done;
2543 	}
2544 
2545 	if (NA_KERNEL_ONLY(na)) {
2546 		err = EBUSY;
2547 		goto done;
2548 	}
2549 
2550 	/* reject if the adapter is defunct of non-permissive */
2551 	if ((na->na_flags & NAF_DEFUNCT) || na_reject_channel(ch, na)) {
2552 		err = ENXIO;
2553 		goto done;
2554 	}
2555 
2556 	err = na_bind_channel(na, ch, chr);
2557 	if (err != 0) {
2558 		goto done;
2559 	}
2560 
2561 	ASSERT(ch->ch_schema != NULL);
2562 	ASSERT(na == ch->ch_na);
2563 
2564 	for_all_rings(t) {
2565 		if (na_get_nrings(na, t) == 0) {
2566 			ch->ch_si[t] = NULL;
2567 			continue;
2568 		}
2569 		ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2570 		    &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2571 	}
2572 
2573 	skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2574 
2575 	if (!(skmem_arena_nexus(na->na_arena)->arn_mode &
2576 	    AR_NEXUS_MODE_EXTERNAL_PPOOL)) {
2577 		atomic_bitset_32(__DECONST(uint32_t *,
2578 		    &ch->ch_schema->csm_flags), CSM_PRIV_MEM);
2579 	}
2580 
2581 	err = skmem_arena_mmap(na->na_arena, p, &ch->ch_mmap);
2582 	if (err != 0) {
2583 		goto done;
2584 	}
2585 
2586 	atomic_bitset_32(__DECONST(uint32_t *, &ch->ch_schema->csm_flags),
2587 	    CSM_ACTIVE);
2588 	chr->cr_memsize = memsize;
2589 	chr->cr_memoffset = ch->ch_schema_offset;
2590 
2591 	SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2592 	    "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2593 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2594 	    na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2595 	    na->na_flags, NAF_BITS);
2596 
2597 done:
2598 	if (err != 0) {
2599 		if (ch->ch_schema != NULL || na != NULL) {
2600 			if (ch->ch_schema != NULL) {
2601 				ASSERT(na == ch->ch_na);
2602 				/*
2603 				 * Callee will unmap memory region if needed,
2604 				 * as well as release reference held on 'na'.
2605 				 */
2606 				na_disconnect(nx, ch);
2607 				na = NULL;
2608 			}
2609 			if (na != NULL) {
2610 				(void) na_release_locked(na);
2611 				na = NULL;
2612 			}
2613 		}
2614 	}
2615 
2616 	return err;
2617 }
2618 
2619 void
na_disconnect(struct kern_nexus * nx,struct kern_channel * ch)2620 na_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
2621 {
2622 #pragma unused(nx)
2623 	enum txrx t;
2624 
2625 	SK_LOCK_ASSERT_HELD();
2626 
2627 	SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2628 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2629 	    ch->ch_na->na_name, ch->ch_info->cinfo_nx_port,
2630 	    (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(ch->ch_na),
2631 	    ch->ch_na->na_flags, NAF_BITS);
2632 
2633 	/* destroy mapping and release references */
2634 	na_unbind_channel(ch);
2635 	ASSERT(ch->ch_na == NULL);
2636 	ASSERT(ch->ch_schema == NULL);
2637 	for_all_rings(t) {
2638 		ch->ch_si[t] = NULL;
2639 	}
2640 }
2641 
2642 void
na_defunct(struct kern_nexus * nx,struct kern_channel * ch,struct nexus_adapter * na,boolean_t locked)2643 na_defunct(struct kern_nexus *nx, struct kern_channel *ch,
2644     struct nexus_adapter *na, boolean_t locked)
2645 {
2646 #pragma unused(nx)
2647 	SK_LOCK_ASSERT_HELD();
2648 	if (!locked) {
2649 		lck_mtx_lock(&ch->ch_lock);
2650 	}
2651 
2652 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2653 
2654 	if (!(na->na_flags & NAF_DEFUNCT)) {
2655 		/*
2656 		 * Mark this adapter as defunct to inform nexus-specific
2657 		 * teardown handler called by na_teardown() below.
2658 		 */
2659 		atomic_bitset_32(&na->na_flags, NAF_DEFUNCT);
2660 
2661 		/*
2662 		 * Depopulate slots.
2663 		 */
2664 		na_teardown(na, ch, TRUE);
2665 
2666 		/*
2667 		 * And finally destroy any already-defunct memory regions.
2668 		 * Do this only if the nexus adapter owns the arena, i.e.
2669 		 * NAF_MEM_LOANED is not set.  Otherwise, we'd expect
2670 		 * that this routine be called again for the real owner.
2671 		 */
2672 		if (!(na->na_flags & NAF_MEM_LOANED)) {
2673 			skmem_arena_defunct(na->na_arena);
2674 		}
2675 	}
2676 
2677 	SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d) "
2678 	    "na 0x%llx naflags %b", ch->ch_name, ch->ch_pid,
2679 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2680 	    na->na_name, ch->ch_info->cinfo_nx_port,
2681 	    (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2682 	    na->na_flags, NAF_BITS);
2683 
2684 	if (!locked) {
2685 		lck_mtx_unlock(&ch->ch_lock);
2686 	}
2687 }
2688 
2689 /*
2690  * TODO: [email protected] -- merge this into na_connect()
2691  */
2692 int
na_connect_spec(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct proc * p)2693 na_connect_spec(struct kern_nexus *nx, struct kern_channel *ch,
2694     struct chreq *chr, struct proc *p)
2695 {
2696 #pragma unused(p)
2697 	struct nexus_adapter *na = NULL;
2698 	mach_vm_size_t memsize = 0;
2699 	int error = 0;
2700 	enum txrx t;
2701 
2702 	ASSERT(chr->cr_mode & CHMODE_KERNEL);
2703 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2704 	ASSERT(ch->ch_na == NULL);
2705 	ASSERT(ch->ch_schema == NULL);
2706 
2707 	SK_LOCK_ASSERT_HELD();
2708 
2709 	error = na_find(ch, nx, chr, NULL, NULL, kernproc, &na, TRUE);
2710 	if (error != 0) {
2711 		goto done;
2712 	}
2713 
2714 	if (na == NULL) {
2715 		error = EINVAL;
2716 		goto done;
2717 	}
2718 
2719 	if (na->na_channels > 0) {
2720 		error = EBUSY;
2721 		goto done;
2722 	}
2723 
2724 	if (na->na_flags & NAF_DEFUNCT) {
2725 		error = ENXIO;
2726 		goto done;
2727 	}
2728 
2729 	/*
2730 	 * Special connect requires the nexus adapter to handle its
2731 	 * own channel binding and unbinding via na_special(); bail
2732 	 * if this adapter doesn't support it.
2733 	 */
2734 	if (na->na_special == NULL) {
2735 		error = ENOTSUP;
2736 		goto done;
2737 	}
2738 
2739 	/* upon success, "ch->ch_na" will point to "na" */
2740 	error = na->na_special(na, ch, chr, NXSPEC_CMD_CONNECT);
2741 	if (error != 0) {
2742 		ASSERT(ch->ch_na == NULL);
2743 		goto done;
2744 	}
2745 
2746 	ASSERT(na->na_flags & NAF_SPEC_INIT);
2747 	ASSERT(na == ch->ch_na);
2748 	/* make sure this is still the case */
2749 	ASSERT(ch->ch_schema == NULL);
2750 
2751 	for_rx_tx(t) {
2752 		ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2753 		    &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2754 	}
2755 
2756 	skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2757 	chr->cr_memsize = memsize;
2758 
2759 	SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2760 	    "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2761 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2762 	    na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2763 	    na->na_flags, NAF_BITS);
2764 
2765 done:
2766 	if (error != 0) {
2767 		if (ch->ch_na != NULL || na != NULL) {
2768 			if (ch->ch_na != NULL) {
2769 				ASSERT(na == ch->ch_na);
2770 				/* callee will release reference on 'na' */
2771 				na_disconnect_spec(nx, ch);
2772 				na = NULL;
2773 			}
2774 			if (na != NULL) {
2775 				(void) na_release_locked(na);
2776 				na = NULL;
2777 			}
2778 		}
2779 	}
2780 
2781 	return error;
2782 }
2783 
2784 /*
2785  * TODO: [email protected] -- merge this into na_disconnect()
2786  */
2787 void
na_disconnect_spec(struct kern_nexus * nx,struct kern_channel * ch)2788 na_disconnect_spec(struct kern_nexus *nx, struct kern_channel *ch)
2789 {
2790 #pragma unused(nx)
2791 	struct nexus_adapter *na = ch->ch_na;
2792 	enum txrx t;
2793 	int error;
2794 
2795 	SK_LOCK_ASSERT_HELD();
2796 	ASSERT(na != NULL);
2797 	ASSERT(na->na_flags & NAF_SPEC_INIT);   /* has been bound */
2798 
2799 	SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2800 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2801 	    na->na_name, ch->ch_info->cinfo_nx_port,
2802 	    (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2803 	    na->na_flags, NAF_BITS);
2804 
2805 	/* take a reference for this routine */
2806 	na_retain_locked(na);
2807 
2808 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2809 	ASSERT(ch->ch_schema == NULL);
2810 	ASSERT(na->na_special != NULL);
2811 	/* unbind this channel */
2812 	error = na->na_special(na, ch, NULL, NXSPEC_CMD_DISCONNECT);
2813 	ASSERT(error == 0);
2814 	ASSERT(!(na->na_flags & NAF_SPEC_INIT));
2815 
2816 	/* now release our reference; this may be the last */
2817 	na_release_locked(na);
2818 	na = NULL;
2819 
2820 	ASSERT(ch->ch_na == NULL);
2821 	for_rx_tx(t) {
2822 		ch->ch_si[t] = NULL;
2823 	}
2824 }
2825 
2826 void
na_start_spec(struct kern_nexus * nx,struct kern_channel * ch)2827 na_start_spec(struct kern_nexus *nx, struct kern_channel *ch)
2828 {
2829 #pragma unused(nx)
2830 	struct nexus_adapter *na = ch->ch_na;
2831 
2832 	SK_LOCK_ASSERT_HELD();
2833 
2834 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2835 	ASSERT(NA_KERNEL_ONLY(na));
2836 	ASSERT(na->na_special != NULL);
2837 
2838 	na->na_special(na, ch, NULL, NXSPEC_CMD_START);
2839 }
2840 
2841 void
na_stop_spec(struct kern_nexus * nx,struct kern_channel * ch)2842 na_stop_spec(struct kern_nexus *nx, struct kern_channel *ch)
2843 {
2844 #pragma unused(nx)
2845 	struct nexus_adapter *na = ch->ch_na;
2846 
2847 	SK_LOCK_ASSERT_HELD();
2848 
2849 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2850 	ASSERT(NA_KERNEL_ONLY(na));
2851 	ASSERT(na->na_special != NULL);
2852 
2853 	na->na_special(na, ch, NULL, NXSPEC_CMD_STOP);
2854 }
2855 
2856 /*
2857  * MUST BE CALLED UNDER SK_LOCK()
2858  *
2859  * Get a refcounted reference to a nexus adapter attached
2860  * to the interface specified by chr.
2861  * This is always called in the execution of an ioctl().
2862  *
2863  * Return ENXIO if the interface specified by the request does
2864  * not exist, ENOTSUP if Skywalk is not supported by the interface,
2865  * EINVAL if parameters are invalid, ENOMEM if needed resources
2866  * could not be allocated.
2867  * If successful, hold a reference to the nexus adapter.
2868  *
2869  * No reference is kept on the real interface, which may then
2870  * disappear at any time.
2871  */
2872 int
na_find(struct kern_channel * ch,struct kern_nexus * nx,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)2873 na_find(struct kern_channel *ch, struct kern_nexus *nx, struct chreq *chr,
2874     struct kern_channel *ch0, struct nxbind *nxb, struct proc *p,
2875     struct nexus_adapter **na, boolean_t create)
2876 {
2877 	int error = 0;
2878 
2879 	_CASSERT(sizeof(chr->cr_name) == sizeof((*na)->na_name));
2880 
2881 	*na = NULL;     /* default return value */
2882 
2883 	SK_LOCK_ASSERT_HELD();
2884 
2885 	/*
2886 	 * We cascade through all possibile types of nexus adapter.
2887 	 * All nx_*_na_find() functions return an error and an na,
2888 	 * with the following combinations:
2889 	 *
2890 	 * error    na
2891 	 *   0	   NULL		type doesn't match
2892 	 *  !0	   NULL		type matches, but na creation/lookup failed
2893 	 *   0	  !NULL		type matches and na created/found
2894 	 *  !0    !NULL		impossible
2895 	 */
2896 
2897 #if CONFIG_NEXUS_MONITOR
2898 	/* try to see if this is a monitor port */
2899 	error = nx_monitor_na_find(nx, ch, chr, ch0, nxb, p, na, create);
2900 	if (error != 0 || *na != NULL) {
2901 		return error;
2902 	}
2903 #endif /* CONFIG_NEXUS_MONITOR */
2904 #if CONFIG_NEXUS_USER_PIPE
2905 	/* try to see if this is a pipe port */
2906 	error = nx_upipe_na_find(nx, ch, chr, nxb, p, na, create);
2907 	if (error != 0 || *na != NULL) {
2908 		return error;
2909 	}
2910 #endif /* CONFIG_NEXUS_USER_PIPE */
2911 #if CONFIG_NEXUS_KERNEL_PIPE
2912 	/* try to see if this is a kernel pipe port */
2913 	error = nx_kpipe_na_find(nx, ch, chr, nxb, p, na, create);
2914 	if (error != 0 || *na != NULL) {
2915 		return error;
2916 	}
2917 #endif /* CONFIG_NEXUS_KERNEL_PIPE */
2918 #if CONFIG_NEXUS_FLOWSWITCH
2919 	/* try to see if this is a flowswitch port */
2920 	error = nx_fsw_na_find(nx, ch, chr, nxb, p, na, create);
2921 	if (error != 0 || *na != NULL) {
2922 		return error;
2923 	}
2924 #endif /* CONFIG_NEXUS_FLOWSWITCH */
2925 #if CONFIG_NEXUS_NETIF
2926 	error = nx_netif_na_find(nx, ch, chr, nxb, p, na, create);
2927 	if (error != 0 || *na != NULL) {
2928 		return error;
2929 	}
2930 #endif /* CONFIG_NEXUS_NETIF */
2931 
2932 	ASSERT(*na == NULL);
2933 	return ENXIO;
2934 }
2935 
2936 void
na_retain_locked(struct nexus_adapter * na)2937 na_retain_locked(struct nexus_adapter *na)
2938 {
2939 	SK_LOCK_ASSERT_HELD();
2940 
2941 	if (na != NULL) {
2942 #if SK_LOG
2943 		uint32_t oref = atomic_add_32_ov(&na->na_refcount, 1);
2944 		SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
2945 		    na->na_name, SK_KVA(na), oref + 1, na->na_channels);
2946 #else /* !SK_LOG */
2947 		atomic_add_32(&na->na_refcount, 1);
2948 #endif /* !SK_LOG */
2949 	}
2950 }
2951 
2952 /* returns 1 iff the nexus_adapter is destroyed */
2953 int
na_release_locked(struct nexus_adapter * na)2954 na_release_locked(struct nexus_adapter *na)
2955 {
2956 	uint32_t oref;
2957 
2958 	SK_LOCK_ASSERT_HELD();
2959 
2960 	ASSERT(na->na_refcount > 0);
2961 	oref = atomic_add_32_ov(&na->na_refcount, -1);
2962 	if (oref > 1) {
2963 		SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
2964 		    na->na_name, SK_KVA(na), oref - 1, na->na_channels);
2965 		return 0;
2966 	}
2967 	ASSERT(na->na_channels == 0);
2968 
2969 	if (na->na_dtor != NULL) {
2970 		na->na_dtor(na);
2971 	}
2972 
2973 	ASSERT(na->na_tx_rings == NULL && na->na_rx_rings == NULL);
2974 	ASSERT(na->na_slot_ctxs == NULL);
2975 	ASSERT(na->na_scratch == NULL);
2976 
2977 #if CONFIG_NEXUS_USER_PIPE
2978 	nx_upipe_na_dealloc(na);
2979 #endif /* CONFIG_NEXUS_USER_PIPE */
2980 	if (na->na_arena != NULL) {
2981 		skmem_arena_release(na->na_arena);
2982 		na->na_arena = NULL;
2983 	}
2984 
2985 	SK_DF(SK_VERB_MEM, "na \"%s\" (0x%llx) being freed",
2986 	    na->na_name, SK_KVA(na));
2987 
2988 	NA_FREE(na);
2989 	return 1;
2990 }
2991 
2992 static struct nexus_adapter *
na_pseudo_alloc(zalloc_flags_t how)2993 na_pseudo_alloc(zalloc_flags_t how)
2994 {
2995 	struct nexus_adapter *na;
2996 
2997 	na = zalloc_flags(na_pseudo_zone, how | Z_ZERO);
2998 	if (na) {
2999 		na->na_type = NA_PSEUDO;
3000 		na->na_free = na_pseudo_free;
3001 	}
3002 	return na;
3003 }
3004 
3005 static void
na_pseudo_free(struct nexus_adapter * na)3006 na_pseudo_free(struct nexus_adapter *na)
3007 {
3008 	ASSERT(na->na_refcount == 0);
3009 	SK_DF(SK_VERB_MEM, "na 0x%llx FREE", SK_KVA(na));
3010 	bzero(na, sizeof(*na));
3011 	zfree(na_pseudo_zone, na);
3012 }
3013 
3014 static int
na_pseudo_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3015 na_pseudo_txsync(struct __kern_channel_ring *kring, struct proc *p,
3016     uint32_t flags)
3017 {
3018 #pragma unused(kring, p, flags)
3019 	SK_DF(SK_VERB_SYNC | SK_VERB_TX,
3020 	    "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3021 	    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3022 	    SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3023 	    flags);
3024 
3025 	return 0;
3026 }
3027 
3028 static int
na_pseudo_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3029 na_pseudo_rxsync(struct __kern_channel_ring *kring, struct proc *p,
3030     uint32_t flags)
3031 {
3032 #pragma unused(kring, p, flags)
3033 	SK_DF(SK_VERB_SYNC | SK_VERB_RX,
3034 	    "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3035 	    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3036 	    SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3037 	    flags);
3038 
3039 	ASSERT(kring->ckr_rhead <= kring->ckr_lim);
3040 
3041 	return 0;
3042 }
3043 
3044 static int
na_pseudo_activate(struct nexus_adapter * na,na_activate_mode_t mode)3045 na_pseudo_activate(struct nexus_adapter *na, na_activate_mode_t mode)
3046 {
3047 	SK_D("na \"%s\" (0x%llx) %s", na->na_name,
3048 	    SK_KVA(na), na_activate_mode2str(mode));
3049 
3050 	switch (mode) {
3051 	case NA_ACTIVATE_MODE_ON:
3052 		atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
3053 		break;
3054 
3055 	case NA_ACTIVATE_MODE_DEFUNCT:
3056 		break;
3057 
3058 	case NA_ACTIVATE_MODE_OFF:
3059 		atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
3060 		break;
3061 
3062 	default:
3063 		VERIFY(0);
3064 		/* NOTREACHED */
3065 		__builtin_unreachable();
3066 	}
3067 
3068 	return 0;
3069 }
3070 
3071 static void
na_pseudo_dtor(struct nexus_adapter * na)3072 na_pseudo_dtor(struct nexus_adapter *na)
3073 {
3074 #pragma unused(na)
3075 }
3076 
3077 static int
na_pseudo_krings_create(struct nexus_adapter * na,struct kern_channel * ch)3078 na_pseudo_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
3079 {
3080 	return na_rings_mem_setup(na, 0, FALSE, ch);
3081 }
3082 
3083 static void
na_pseudo_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)3084 na_pseudo_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
3085     boolean_t defunct)
3086 {
3087 	na_rings_mem_teardown(na, ch, defunct);
3088 }
3089 
3090 /*
3091  * Pseudo nexus adapter; typically used as a generic parent adapter.
3092  */
3093 int
na_pseudo_create(struct kern_nexus * nx,struct chreq * chr,struct nexus_adapter ** ret)3094 na_pseudo_create(struct kern_nexus *nx, struct chreq *chr,
3095     struct nexus_adapter **ret)
3096 {
3097 	struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
3098 	struct nexus_adapter *na;
3099 	int error;
3100 
3101 	SK_LOCK_ASSERT_HELD();
3102 	*ret = NULL;
3103 
3104 	na = na_pseudo_alloc(Z_WAITOK);
3105 
3106 	ASSERT(na->na_type == NA_PSEUDO);
3107 	ASSERT(na->na_free == na_pseudo_free);
3108 
3109 	(void) strncpy(na->na_name, chr->cr_name, sizeof(na->na_name) - 1);
3110 	na->na_name[sizeof(na->na_name) - 1] = '\0';
3111 	uuid_generate_random(na->na_uuid);
3112 
3113 	/*
3114 	 * Verify upper bounds; for all cases including user pipe nexus,
3115 	 * the parameters must have already been validated by corresponding
3116 	 * nxdom_prov_params() function defined by each domain.
3117 	 */
3118 	na_set_nrings(na, NR_TX, nxp->nxp_tx_rings);
3119 	na_set_nrings(na, NR_RX, nxp->nxp_rx_rings);
3120 	na_set_nslots(na, NR_TX, nxp->nxp_tx_slots);
3121 	na_set_nslots(na, NR_RX, nxp->nxp_rx_slots);
3122 	ASSERT(na_get_nrings(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
3123 	ASSERT(na_get_nrings(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
3124 	ASSERT(na_get_nslots(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
3125 	ASSERT(na_get_nslots(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
3126 
3127 	na->na_txsync = na_pseudo_txsync;
3128 	na->na_rxsync = na_pseudo_rxsync;
3129 	na->na_activate = na_pseudo_activate;
3130 	na->na_dtor = na_pseudo_dtor;
3131 	na->na_krings_create = na_pseudo_krings_create;
3132 	na->na_krings_delete = na_pseudo_krings_delete;
3133 
3134 	*(nexus_stats_type_t *)(uintptr_t)&na->na_stats_type =
3135 	    NEXUS_STATS_TYPE_INVALID;
3136 
3137 	/* other fields are set in the common routine */
3138 	na_attach_common(na, nx, NX_DOM_PROV(nx));
3139 
3140 	if ((error = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
3141 	    nx, na)) != 0) {
3142 		ASSERT(na->na_arena == NULL);
3143 		goto err;
3144 	}
3145 	ASSERT(na->na_arena != NULL);
3146 
3147 	*(uint32_t *)(uintptr_t)&na->na_flowadv_max = nxp->nxp_flowadv_max;
3148 	ASSERT(na->na_flowadv_max == 0 ||
3149 	    skmem_arena_nexus(na->na_arena)->arn_flowadv_obj != NULL);
3150 
3151 #if SK_LOG
3152 	uuid_string_t uuidstr;
3153 	SK_D("na_name: \"%s\"", na->na_name);
3154 	SK_D("  UUID:        %s", sk_uuid_unparse(na->na_uuid, uuidstr));
3155 	SK_D("  nx:          0x%llx (\"%s\":\"%s\")",
3156 	    SK_KVA(na->na_nx), NX_DOM(na->na_nx)->nxdom_name,
3157 	    NX_DOM_PROV(na->na_nx)->nxdom_prov_name);
3158 	SK_D("  flags:       %b", na->na_flags, NAF_BITS);
3159 	SK_D("  flowadv_max: %u", na->na_flowadv_max);
3160 	SK_D("  rings:       tx %u rx %u",
3161 	    na_get_nrings(na, NR_TX), na_get_nrings(na, NR_RX));
3162 	SK_D("  slots:       tx %u rx %u",
3163 	    na_get_nslots(na, NR_TX), na_get_nslots(na, NR_RX));
3164 #if CONFIG_NEXUS_USER_PIPE
3165 	SK_D("  next_pipe:   %u", na->na_next_pipe);
3166 	SK_D("  max_pipes:   %u", na->na_max_pipes);
3167 #endif /* CONFIG_NEXUS_USER_PIPE */
3168 #endif /* SK_LOG */
3169 
3170 	*ret = na;
3171 	na_retain_locked(na);
3172 
3173 	return 0;
3174 
3175 err:
3176 	if (na != NULL) {
3177 		if (na->na_arena != NULL) {
3178 			skmem_arena_release(na->na_arena);
3179 			na->na_arena = NULL;
3180 		}
3181 		NA_FREE(na);
3182 	}
3183 	return error;
3184 }
3185 
3186 void
na_flowadv_entry_alloc(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx)3187 na_flowadv_entry_alloc(const struct nexus_adapter *na, uuid_t fae_id,
3188     const flowadv_idx_t fe_idx)
3189 {
3190 	struct skmem_arena *ar = na->na_arena;
3191 	struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
3192 	struct __flowadv_entry *fae;
3193 
3194 	ASSERT(NA_IS_ACTIVE(na) && na->na_flowadv_max != 0);
3195 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3196 
3197 	AR_LOCK(ar);
3198 
3199 	/* we must not get here if arena is defunct; this must be valid */
3200 	ASSERT(arn->arn_flowadv_obj != NULL);
3201 
3202 	VERIFY(fe_idx < na->na_flowadv_max);
3203 	fae = &arn->arn_flowadv_obj[fe_idx];
3204 	uuid_copy(fae->fae_id, fae_id);
3205 	fae->fae_flags |= FLOWADVF_VALID;
3206 
3207 	AR_UNLOCK(ar);
3208 }
3209 
3210 void
na_flowadv_entry_free(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx)3211 na_flowadv_entry_free(const struct nexus_adapter *na, uuid_t fae_id,
3212     const flowadv_idx_t fe_idx)
3213 {
3214 #pragma unused(fae_id)
3215 	struct skmem_arena *ar = na->na_arena;
3216 	struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3217 
3218 	ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3219 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3220 
3221 	AR_LOCK(ar);
3222 
3223 	ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3224 	if (arn->arn_flowadv_obj != NULL) {
3225 		struct __flowadv_entry *fae;
3226 
3227 		VERIFY(fe_idx < na->na_flowadv_max);
3228 		fae = &arn->arn_flowadv_obj[fe_idx];
3229 		ASSERT(uuid_compare(fae->fae_id, fae_id) == 0);
3230 		uuid_clear(fae->fae_id);
3231 		fae->fae_flags &= ~FLOWADVF_VALID;
3232 	}
3233 
3234 	AR_UNLOCK(ar);
3235 }
3236 
3237 bool
na_flowadv_set(const struct nexus_adapter * na,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3238 na_flowadv_set(const struct nexus_adapter *na, const flowadv_idx_t fe_idx,
3239     const flowadv_token_t flow_token)
3240 {
3241 	struct skmem_arena *ar = na->na_arena;
3242 	struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3243 	bool suspend;
3244 
3245 	ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3246 	ASSERT(fe_idx < na->na_flowadv_max);
3247 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3248 
3249 	AR_LOCK(ar);
3250 
3251 	ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3252 
3253 	if (arn->arn_flowadv_obj != NULL) {
3254 		struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3255 
3256 		_CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3257 		/*
3258 		 * We cannot guarantee that the flow is still around by now,
3259 		 * so check if that's the case and let the caller know.
3260 		 */
3261 		if ((suspend = (fae->fae_token == flow_token))) {
3262 			ASSERT(fae->fae_flags & FLOWADVF_VALID);
3263 			fae->fae_flags |= FLOWADVF_SUSPENDED;
3264 		}
3265 	} else {
3266 		suspend = false;
3267 	}
3268 	if (suspend) {
3269 		SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) flow token 0x%llu fidx %u "
3270 		    "SUSPEND", sk_proc_name_address(current_proc()),
3271 		    sk_proc_pid(current_proc()), flow_token, fe_idx);
3272 	} else {
3273 		SK_ERR("%s(%d) flow token 0x%llu fidx %u no longer around",
3274 		    sk_proc_name_address(current_proc()),
3275 		    sk_proc_pid(current_proc()), flow_token, fe_idx);
3276 	}
3277 
3278 	AR_UNLOCK(ar);
3279 
3280 	return suspend;
3281 }
3282 
3283 int
na_flowadv_clear(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3284 na_flowadv_clear(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3285     const flowadv_token_t flow_token)
3286 {
3287 	struct nexus_adapter *na = ch->ch_na;
3288 	struct skmem_arena *ar = na->na_arena;
3289 	struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3290 	boolean_t resume;
3291 
3292 	ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3293 	ASSERT(fe_idx < na->na_flowadv_max);
3294 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3295 
3296 	AR_LOCK(ar);
3297 
3298 	ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3299 
3300 	if (arn->arn_flowadv_obj != NULL) {
3301 		struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3302 
3303 		_CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3304 		/*
3305 		 * We cannot guarantee that the flow is still around by now,
3306 		 * so check if that's the case and let the caller know.
3307 		 */
3308 		if ((resume = (fae->fae_token == flow_token))) {
3309 			ASSERT(fae->fae_flags & FLOWADVF_VALID);
3310 			fae->fae_flags &= ~FLOWADVF_SUSPENDED;
3311 		}
3312 	} else {
3313 		resume = FALSE;
3314 	}
3315 	if (resume) {
3316 		SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d): flow token 0x%x "
3317 		    "fidx %u RESUME", ch->ch_name, ch->ch_pid, flow_token,
3318 		    fe_idx);
3319 	} else {
3320 		SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3321 		    ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3322 	}
3323 
3324 	AR_UNLOCK(ar);
3325 
3326 	return resume;
3327 }
3328 
3329 void
na_flowadv_event(struct __kern_channel_ring * kring)3330 na_flowadv_event(struct __kern_channel_ring *kring)
3331 {
3332 	ASSERT(kring->ckr_tx == NR_TX);
3333 
3334 	SK_DF(SK_VERB_EVENTS, "%s(%d) na \"%s\" (0x%llx) kr 0x%llx",
3335 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
3336 	    KRNA(kring)->na_name, SK_KVA(KRNA(kring)), SK_KVA(kring));
3337 
3338 	na_post_event(kring, TRUE, FALSE, FALSE, CHAN_FILT_HINT_FLOW_ADV_UPD);
3339 }
3340 
3341 static int
na_packet_pool_free_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3342 na_packet_pool_free_sync(struct __kern_channel_ring *kring, struct proc *p,
3343     uint32_t flags)
3344 {
3345 #pragma unused(flags, p)
3346 	int n, ret = 0;
3347 	slot_idx_t j;
3348 	struct __kern_slot_desc *ksd;
3349 	struct __user_slot_desc *usd;
3350 	struct __kern_quantum *kqum;
3351 	struct kern_pbufpool *pp = kring->ckr_pp;
3352 	uint32_t nfree = 0;
3353 
3354 	/* packet pool list is protected by channel lock */
3355 	ASSERT(!KR_KERNEL_ONLY(kring));
3356 
3357 	/* # of new slots */
3358 	n = kring->ckr_rhead - kring->ckr_khead;
3359 	if (n < 0) {
3360 		n += kring->ckr_num_slots;
3361 	}
3362 
3363 	/* nothing to free */
3364 	if (__improbable(n == 0)) {
3365 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3366 		    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3367 		    "nothing to free");
3368 		goto done;
3369 	}
3370 
3371 	j = kring->ckr_khead;
3372 	PP_LOCK(pp);
3373 	while (n--) {
3374 		int err;
3375 
3376 		ksd = KR_KSD(kring, j);
3377 		usd = KR_USD(kring, j);
3378 
3379 		if (__improbable(!SD_VALID_METADATA(usd))) {
3380 			SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3381 			ret = EINVAL;
3382 			break;
3383 		}
3384 
3385 		kqum = pp_remove_upp_locked(pp, usd->sd_md_idx, &err);
3386 		if (__improbable(err != 0)) {
3387 			SK_ERR("un-allocated packet or buflet %d %p",
3388 			    usd->sd_md_idx, SK_KVA(kqum));
3389 			ret = EINVAL;
3390 			break;
3391 		}
3392 
3393 		/* detach and free the packet */
3394 		kqum->qum_qflags &= ~QUM_F_FINALIZED;
3395 		kqum->qum_ksd = NULL;
3396 		ASSERT(!KSD_VALID_METADATA(ksd));
3397 		USD_DETACH_METADATA(usd);
3398 		ASSERT(pp == kqum->qum_pp);
3399 		ASSERT(nfree < kring->ckr_num_slots);
3400 		kring->ckr_scratch[nfree++] = (uint64_t)kqum;
3401 		j = SLOT_NEXT(j, kring->ckr_lim);
3402 	}
3403 	PP_UNLOCK(pp);
3404 
3405 	if (__probable(nfree > 0)) {
3406 		pp_free_packet_batch(pp, &kring->ckr_scratch[0], nfree);
3407 	}
3408 
3409 	kring->ckr_khead = j;
3410 	kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3411 
3412 done:
3413 	return ret;
3414 }
3415 
3416 static int
na_packet_pool_alloc_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3417 na_packet_pool_alloc_sync(struct __kern_channel_ring *kring, struct proc *p,
3418     uint32_t flags)
3419 {
3420 	int b, err;
3421 	uint32_t n = 0;
3422 	slot_idx_t j;
3423 	uint64_t now;
3424 	uint32_t curr_ws, ph_needed, ph_cnt;
3425 	struct __kern_slot_desc *ksd;
3426 	struct __user_slot_desc *usd;
3427 	struct __kern_quantum *kqum;
3428 	kern_pbufpool_t pp = kring->ckr_pp;
3429 	pid_t pid = proc_pid(p);
3430 
3431 	/* packet pool list is protected by channel lock */
3432 	ASSERT(!KR_KERNEL_ONLY(kring));
3433 	ASSERT(!PP_KERNEL_ONLY(pp));
3434 
3435 	now = _net_uptime;
3436 	if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3437 		if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3438 			kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3439 		}
3440 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3441 		    "%s: purged curr_ws(%d)", kring->ckr_name,
3442 		    kring->ckr_alloc_ws);
3443 		return 0;
3444 	}
3445 	/* reclaim the completed slots */
3446 	kring->ckr_khead = kring->ckr_rhead;
3447 
3448 	/* # of busy (unclaimed) slots */
3449 	b = kring->ckr_ktail - kring->ckr_khead;
3450 	if (b < 0) {
3451 		b += kring->ckr_num_slots;
3452 	}
3453 
3454 	curr_ws = kring->ckr_alloc_ws;
3455 	if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3456 		/* increment the working set by 50% */
3457 		curr_ws += (curr_ws >> 1);
3458 		curr_ws = MIN(curr_ws, kring->ckr_lim);
3459 	} else {
3460 		if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3461 		    (uint32_t)b >= (curr_ws >> 2)) {
3462 			/* decrease the working set by 25% */
3463 			curr_ws -= (curr_ws >> 2);
3464 		}
3465 	}
3466 	curr_ws = MAX(curr_ws, na_upp_alloc_lowat);
3467 	if (curr_ws > (uint32_t)b) {
3468 		n = curr_ws - b;
3469 	}
3470 	kring->ckr_alloc_ws = curr_ws;
3471 	kring->ckr_sync_time = now;
3472 
3473 	/* min with # of avail free slots (subtract busy from max) */
3474 	n = ph_needed = MIN(n, kring->ckr_lim - b);
3475 	j = kring->ckr_ktail;
3476 	SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3477 	    "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3478 
3479 	if ((ph_cnt = ph_needed) == 0) {
3480 		goto done;
3481 	}
3482 
3483 	err = kern_pbufpool_alloc_batch_nosleep(pp, 1, kring->ckr_scratch,
3484 	    &ph_cnt);
3485 
3486 	if (__improbable(ph_cnt == 0)) {
3487 		SK_ERR("kr 0x%llx failed to alloc %u packet s(%d)",
3488 		    SK_KVA(kring), ph_needed, err);
3489 		kring->ckr_err_stats.cres_pkt_alloc_failures += ph_needed;
3490 	} else {
3491 		/*
3492 		 * Add packets to the allocated list of user packet pool.
3493 		 */
3494 		pp_insert_upp_batch(pp, pid, kring->ckr_scratch, ph_cnt);
3495 	}
3496 
3497 
3498 	for (n = 0; n < ph_cnt; n++) {
3499 		ksd = KR_KSD(kring, j);
3500 		usd = KR_USD(kring, j);
3501 
3502 		kqum = SK_PTR_ADDR_KQUM(kring->ckr_scratch[n]);
3503 		kring->ckr_scratch[n] = 0;
3504 		ASSERT(kqum != NULL);
3505 
3506 		/* cleanup any stale slot mapping */
3507 		KSD_RESET(ksd);
3508 		ASSERT(usd != NULL);
3509 		USD_RESET(usd);
3510 
3511 		/*
3512 		 * Since this packet is freshly allocated and we need to
3513 		 * have the flag set for the attach to succeed, just set
3514 		 * it here rather than calling __packet_finalize().
3515 		 */
3516 		kqum->qum_qflags |= QUM_F_FINALIZED;
3517 
3518 		/* Attach packet to slot */
3519 		KR_SLOT_ATTACH_METADATA(kring, ksd, kqum);
3520 		/*
3521 		 * externalize the packet as it is being transferred to
3522 		 * user space.
3523 		 */
3524 		kr_externalize_metadata(kring, pp->pp_max_frags, kqum, p);
3525 
3526 		j = SLOT_NEXT(j, kring->ckr_lim);
3527 	}
3528 done:
3529 	ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3530 	kring->ckr_ktail = j;
3531 	return 0;
3532 }
3533 
3534 static int
na_packet_pool_free_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3535 na_packet_pool_free_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3536     uint32_t flags)
3537 {
3538 #pragma unused(flags, p)
3539 	int n, ret = 0;
3540 	slot_idx_t j;
3541 	struct __kern_slot_desc *ksd;
3542 	struct __user_slot_desc *usd;
3543 	struct __kern_buflet *kbft;
3544 	struct kern_pbufpool *pp = kring->ckr_pp;
3545 
3546 	/* packet pool list is protected by channel lock */
3547 	ASSERT(!KR_KERNEL_ONLY(kring));
3548 
3549 	/* # of new slots */
3550 	n = kring->ckr_rhead - kring->ckr_khead;
3551 	if (n < 0) {
3552 		n += kring->ckr_num_slots;
3553 	}
3554 
3555 	/* nothing to free */
3556 	if (__improbable(n == 0)) {
3557 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3558 		    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3559 		    "nothing to free");
3560 		goto done;
3561 	}
3562 
3563 	j = kring->ckr_khead;
3564 	while (n--) {
3565 		int err;
3566 
3567 		ksd = KR_KSD(kring, j);
3568 		usd = KR_USD(kring, j);
3569 
3570 		if (__improbable(!SD_VALID_METADATA(usd))) {
3571 			SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3572 			ret = EINVAL;
3573 			break;
3574 		}
3575 
3576 		kbft = pp_remove_upp_bft(pp, usd->sd_md_idx, &err);
3577 		if (__improbable(err != 0)) {
3578 			SK_ERR("un-allocated buflet %d %p", usd->sd_md_idx,
3579 			    SK_KVA(kbft));
3580 			ret = EINVAL;
3581 			break;
3582 		}
3583 
3584 		/* detach and free the packet */
3585 		ASSERT(!KSD_VALID_METADATA(ksd));
3586 		USD_DETACH_METADATA(usd);
3587 		pp_free_buflet(pp, kbft);
3588 		j = SLOT_NEXT(j, kring->ckr_lim);
3589 	}
3590 	kring->ckr_khead = j;
3591 	kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3592 
3593 done:
3594 	return ret;
3595 }
3596 
3597 static int
na_packet_pool_alloc_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3598 na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3599     uint32_t flags)
3600 {
3601 	int b, err;
3602 	uint32_t n = 0;
3603 	slot_idx_t j;
3604 	uint64_t now;
3605 	uint32_t curr_ws, bh_needed, bh_cnt;
3606 	struct __kern_slot_desc *ksd;
3607 	struct __user_slot_desc *usd;
3608 	struct __kern_buflet *kbft;
3609 	struct __kern_buflet_ext *kbe;
3610 	kern_pbufpool_t pp = kring->ckr_pp;
3611 	pid_t pid = proc_pid(p);
3612 
3613 	/* packet pool list is protected by channel lock */
3614 	ASSERT(!KR_KERNEL_ONLY(kring));
3615 	ASSERT(!PP_KERNEL_ONLY(pp));
3616 
3617 	now = _net_uptime;
3618 	if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3619 		if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3620 			kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3621 		}
3622 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3623 		    "%s: purged curr_ws(%d)", kring->ckr_name,
3624 		    kring->ckr_alloc_ws);
3625 		return 0;
3626 	}
3627 	/* reclaim the completed slots */
3628 	kring->ckr_khead = kring->ckr_rhead;
3629 
3630 	/* # of busy (unclaimed) slots */
3631 	b = kring->ckr_ktail - kring->ckr_khead;
3632 	if (b < 0) {
3633 		b += kring->ckr_num_slots;
3634 	}
3635 
3636 	curr_ws = kring->ckr_alloc_ws;
3637 	if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3638 		/* increment the working set by 50% */
3639 		curr_ws += (curr_ws >> 1);
3640 		curr_ws = MIN(curr_ws, kring->ckr_lim);
3641 	} else {
3642 		if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3643 		    (uint32_t)b >= (curr_ws >> 2)) {
3644 			/* decrease the working set by 25% */
3645 			curr_ws -= (curr_ws >> 2);
3646 		}
3647 	}
3648 	curr_ws = MAX(curr_ws, na_upp_alloc_buf_lowat);
3649 	if (curr_ws > (uint32_t)b) {
3650 		n = curr_ws - b;
3651 	}
3652 	kring->ckr_alloc_ws = curr_ws;
3653 	kring->ckr_sync_time = now;
3654 
3655 	/* min with # of avail free slots (subtract busy from max) */
3656 	n = bh_needed = MIN(n, kring->ckr_lim - b);
3657 	j = kring->ckr_ktail;
3658 	SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3659 	    "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3660 
3661 	if ((bh_cnt = bh_needed) == 0) {
3662 		goto done;
3663 	}
3664 
3665 	err = pp_alloc_buflet_batch(pp, kring->ckr_scratch, &bh_cnt,
3666 	    SKMEM_NOSLEEP);
3667 
3668 	if (bh_cnt == 0) {
3669 		SK_ERR("kr 0x%llx failed to alloc %u buflets(%d)",
3670 		    SK_KVA(kring), bh_needed, err);
3671 		kring->ckr_err_stats.cres_pkt_alloc_failures += bh_needed;
3672 	}
3673 
3674 	for (n = 0; n < bh_cnt; n++) {
3675 		struct __user_buflet *ubft;
3676 
3677 		ksd = KR_KSD(kring, j);
3678 		usd = KR_USD(kring, j);
3679 
3680 		kbft = (struct __kern_buflet *)(kring->ckr_scratch[n]);
3681 		kbe = (struct __kern_buflet_ext *)kbft;
3682 		kring->ckr_scratch[n] = 0;
3683 		ASSERT(kbft != NULL);
3684 
3685 		/*
3686 		 * Add buflet to the allocated list of user packet pool.
3687 		 */
3688 		pp_insert_upp_bft(pp, kbft, pid);
3689 
3690 		/*
3691 		 * externalize the buflet as it is being transferred to
3692 		 * user space.
3693 		 */
3694 		ubft = __DECONST(struct __user_buflet *, kbe->kbe_buf_user);
3695 		KBUF_EXTERNALIZE(kbft, ubft, pp);
3696 
3697 		/* cleanup any stale slot mapping */
3698 		KSD_RESET(ksd);
3699 		ASSERT(usd != NULL);
3700 		USD_RESET(usd);
3701 
3702 		/* Attach buflet to slot */
3703 		KR_SLOT_ATTACH_BUF_METADATA(kring, ksd, kbft);
3704 
3705 		j = SLOT_NEXT(j, kring->ckr_lim);
3706 	}
3707 done:
3708 	ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3709 	kring->ckr_ktail = j;
3710 	return 0;
3711 }
3712 
3713 /* The caller needs to ensure that the NA stays intact */
3714 void
na_drain(struct nexus_adapter * na,boolean_t purge)3715 na_drain(struct nexus_adapter *na, boolean_t purge)
3716 {
3717 	/* will be cleared on next channel sync */
3718 	if (!(atomic_bitset_32_ov(&na->na_flags, NAF_DRAINING) &
3719 	    NAF_DRAINING) && NA_IS_ACTIVE(na)) {
3720 		SK_DF(SK_VERB_NA, "%s: %s na 0x%llx flags %b",
3721 		    na->na_name, (purge ? "purging" : "pruning"),
3722 		    SK_KVA(na), na->na_flags, NAF_BITS);
3723 
3724 		/* reap (purge/prune) caches in the arena */
3725 		skmem_arena_reap(na->na_arena, purge);
3726 	}
3727 }
3728