xref: /xnu-8019.80.24/bsd/skywalk/nexus/nexus_adapter.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri.
31  * All rights reserved.
32  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  *   1. Redistributions of source code must retain the above copyright
38  *      notice, this list of conditions and the following disclaimer.
39  *   2. Redistributions in binary form must reproduce the above copyright
40  *      notice, this list of conditions and the following disclaimer in the
41  *      documentation and/or other materials provided with the distribution.
42  *
43  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53  * SUCH DAMAGE.
54  */
55 #include <sys/systm.h>
56 #include <skywalk/os_skywalk_private.h>
57 #include <skywalk/nexus/monitor/nx_monitor.h>
58 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
59 #include <skywalk/nexus/netif/nx_netif.h>
60 #include <skywalk/nexus/upipe/nx_user_pipe.h>
61 #include <skywalk/nexus/kpipe/nx_kernel_pipe.h>
62 #include <kern/thread.h>
63 
64 static int na_krings_use(struct kern_channel *);
65 static void na_krings_unuse(struct kern_channel *);
66 static void na_krings_verify(struct nexus_adapter *);
67 static int na_notify(struct __kern_channel_ring *, struct proc *, uint32_t);
68 static void na_set_ring(struct nexus_adapter *, uint32_t, enum txrx, uint32_t);
69 static void na_set_all_rings(struct nexus_adapter *, uint32_t);
70 static int na_set_ringid(struct kern_channel *, ring_set_t, ring_id_t);
71 static void na_unset_ringid(struct kern_channel *);
72 static void na_teardown(struct nexus_adapter *, struct kern_channel *,
73     boolean_t);
74 
75 static int na_kr_create(struct nexus_adapter *, uint32_t, boolean_t);
76 static void na_kr_delete(struct nexus_adapter *);
77 static int na_kr_setup(struct nexus_adapter *, struct kern_channel *);
78 static void na_kr_teardown_all(struct nexus_adapter *, struct kern_channel *,
79     boolean_t);
80 static void na_kr_teardown_txrx(struct nexus_adapter *, struct kern_channel *,
81     boolean_t, struct proc *);
82 static int na_kr_populate_slots(struct __kern_channel_ring *);
83 static void na_kr_depopulate_slots(struct __kern_channel_ring *,
84     struct kern_channel *, boolean_t defunct);
85 
86 static int na_schema_alloc(struct kern_channel *);
87 
88 static struct nexus_adapter *na_pseudo_alloc(zalloc_flags_t);
89 static void na_pseudo_free(struct nexus_adapter *);
90 static int na_pseudo_txsync(struct __kern_channel_ring *, struct proc *,
91     uint32_t);
92 static int na_pseudo_rxsync(struct __kern_channel_ring *, struct proc *,
93     uint32_t);
94 static int na_pseudo_activate(struct nexus_adapter *, na_activate_mode_t);
95 static void na_pseudo_dtor(struct nexus_adapter *);
96 static int na_pseudo_krings_create(struct nexus_adapter *,
97     struct kern_channel *);
98 static void na_pseudo_krings_delete(struct nexus_adapter *,
99     struct kern_channel *, boolean_t);
100 static int na_packet_pool_alloc_sync(struct __kern_channel_ring *,
101     struct proc *, uint32_t);
102 static int na_packet_pool_free_sync(struct __kern_channel_ring *,
103     struct proc *, uint32_t);
104 static int na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *,
105     struct proc *, uint32_t);
106 static int na_packet_pool_free_buf_sync(struct __kern_channel_ring *,
107     struct proc *, uint32_t);
108 
109 static void na_destroyer_final(struct nexus_adapter *);
110 static void na_destroyer_enqueue(struct nexus_adapter *);
111 static struct nexus_adapter *na_destroyer_dequeue(void);
112 static int na_destroyer_thread_cont(int);
113 static void na_destroyer_thread_func(void *, wait_result_t);
114 
115 extern kern_return_t thread_terminate(thread_t);
116 
117 #define NA_KRING_IDLE_TIMEOUT   (NSEC_PER_SEC * 30) /* 30 seconds */
118 
119 static ZONE_DECLARE(na_pseudo_zone, SKMEM_ZONE_PREFIX ".na.pseudo",
120     sizeof(struct nexus_adapter), ZC_ZFREE_CLEARMEM);
121 
122 static int __na_inited = 0;
123 
124 #define NA_NUM_WMM_CLASSES      4
125 #define NAKR_WMM_SC2RINGID(_s)  PKT_SC2TC(_s)
126 #define NAKR_SET_SVC_LUT(_n, _s)                                        \
127 	(_n)->na_kring_svc_lut[MBUF_SCIDX(_s)] = NAKR_WMM_SC2RINGID(_s)
128 #define NAKR_SET_KR_SVC(_n, _s)                                         \
129 	NAKR((_n), NR_TX)[NAKR_WMM_SC2RINGID(_s)].ckr_svc = (_s)
130 
131 #define NA_UPP_ALLOC_LOWAT      8
132 static uint32_t na_upp_alloc_lowat = NA_UPP_ALLOC_LOWAT;
133 
134 #define NA_UPP_REAP_INTERVAL    10 /* seconds */
135 static uint32_t na_upp_reap_interval = NA_UPP_REAP_INTERVAL;
136 
137 #define NA_UPP_WS_HOLD_TIME     2 /* seconds */
138 static uint32_t na_upp_ws_hold_time = NA_UPP_WS_HOLD_TIME;
139 
140 #define NA_UPP_REAP_MIN_PKTS    0
141 static uint32_t na_upp_reap_min_pkts = NA_UPP_REAP_MIN_PKTS;
142 
143 #define NA_UPP_ALLOC_BUF_LOWAT     64
144 static uint32_t na_upp_alloc_buf_lowat = NA_UPP_ALLOC_BUF_LOWAT;
145 
146 #if (DEVELOPMENT || DEBUG)
147 static  uint64_t _na_inject_error = 0;
148 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) \
149 	_SK_INJECT_ERROR(_na_inject_error, _en, _ev, _ec, NULL, _f, __VA_ARGS__)
150 
151 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_ws_hold_time,
152     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_ws_hold_time,
153     NA_UPP_WS_HOLD_TIME, "");
154 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_interval,
155     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_interval,
156     NA_UPP_REAP_INTERVAL, "");
157 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_min_pkts,
158     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_min_pkts,
159     NA_UPP_REAP_MIN_PKTS, "");
160 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_lowat,
161     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_lowat,
162     NA_UPP_ALLOC_LOWAT, "");
163 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_buf_lowat,
164     CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_buf_lowat,
165     NA_UPP_ALLOC_BUF_LOWAT, "");
166 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, na_inject_error,
167     CTLFLAG_RW | CTLFLAG_LOCKED, &_na_inject_error, "");
168 #else
169 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) do { } while (0)
170 #endif /* !DEVELOPMENT && !DEBUG */
171 
172 #define SKMEM_TAG_NX_RINGS      "com.apple.skywalk.nexus.rings"
173 static kern_allocation_name_t skmem_tag_nx_rings;
174 
175 #define SKMEM_TAG_NX_CONTEXTS   "com.apple.skywalk.nexus.contexts"
176 static kern_allocation_name_t skmem_tag_nx_contexts;
177 
178 #define SKMEM_TAG_NX_SCRATCH    "com.apple.skywalk.nexus.scratch"
179 static kern_allocation_name_t skmem_tag_nx_scratch;
180 
181 #if !XNU_TARGET_OS_OSX
182 /* see KLDBootstrap::readPrelinkedExtensions() for details */
183 extern uuid_t kernelcache_uuid;
184 #else /* XNU_TARGET_OS_OSX */
185 /* see panic_init() for details */
186 extern unsigned char *kernel_uuid;
187 #endif /* XNU_TARGET_OS_OSX */
188 
189 /* The following are protected by sk_lock */
190 static TAILQ_HEAD(, nexus_adapter) na_destroyer_head;
191 static uint32_t na_destroyer_cnt;
192 static void *na_destroyer_run; /* wait channel for destroyer thread */
193 static thread_t na_destroyer_thread = THREAD_NULL;
194 
195 void
na_init(void)196 na_init(void)
197 {
198 	/*
199 	 * Changing the size of nexus_mdata structure won't break ABI,
200 	 * but we need to be mindful of memory consumption; Thus here
201 	 * we add a compile-time check to make sure the size is within
202 	 * the expected limit and that it's properly aligned.  This
203 	 * check may be adjusted in future as needed.
204 	 */
205 	_CASSERT(sizeof(struct nexus_mdata) <= 32 &&
206 	    IS_P2ALIGNED(sizeof(struct nexus_mdata), 8));
207 	_CASSERT(sizeof(struct nexus_mdata) <= sizeof(struct __user_quantum));
208 
209 	/* see comments on nexus_meta_type_t */
210 	_CASSERT(NEXUS_META_TYPE_MAX == 3);
211 	_CASSERT(NEXUS_META_SUBTYPE_MAX == 3);
212 
213 	ASSERT(!__na_inited);
214 
215 	ASSERT(skmem_tag_nx_rings == NULL);
216 	skmem_tag_nx_rings =
217 	    kern_allocation_name_allocate(SKMEM_TAG_NX_RINGS, 0);
218 	ASSERT(skmem_tag_nx_rings != NULL);
219 
220 	ASSERT(skmem_tag_nx_contexts == NULL);
221 	skmem_tag_nx_contexts =
222 	    kern_allocation_name_allocate(SKMEM_TAG_NX_CONTEXTS, 0);
223 	ASSERT(skmem_tag_nx_contexts != NULL);
224 
225 	ASSERT(skmem_tag_nx_scratch == NULL);
226 	skmem_tag_nx_scratch =
227 	    kern_allocation_name_allocate(SKMEM_TAG_NX_SCRATCH, 0);
228 	ASSERT(skmem_tag_nx_scratch != NULL);
229 
230 	TAILQ_INIT(&na_destroyer_head);
231 	ASSERT(na_destroyer_thread == THREAD_NULL);
232 	if (kernel_thread_start(na_destroyer_thread_func,
233 	    NULL, &na_destroyer_thread) != KERN_SUCCESS) {
234 		panic_plain("%s: couldn't create destroyer thread", __func__);
235 		/* NOTREACHED */
236 	}
237 
238 	__na_inited = 1;
239 }
240 
241 void
na_fini(void)242 na_fini(void)
243 {
244 	if (__na_inited) {
245 		if (na_destroyer_thread != THREAD_NULL) {
246 			/* for the extra refcnt from kernel_thread_start() */
247 			thread_deallocate(na_destroyer_thread);
248 			/* terminate it */
249 			(void) thread_terminate(na_destroyer_thread);
250 			na_destroyer_thread = THREAD_NULL;
251 		}
252 		ASSERT(TAILQ_EMPTY(&na_destroyer_head));
253 		ASSERT(na_destroyer_cnt == 0);
254 
255 		if (skmem_tag_nx_rings != NULL) {
256 			kern_allocation_name_release(skmem_tag_nx_rings);
257 			skmem_tag_nx_rings = NULL;
258 		}
259 		if (skmem_tag_nx_contexts != NULL) {
260 			kern_allocation_name_release(skmem_tag_nx_contexts);
261 			skmem_tag_nx_contexts = NULL;
262 		}
263 		if (skmem_tag_nx_scratch != NULL) {
264 			kern_allocation_name_release(skmem_tag_nx_scratch);
265 			skmem_tag_nx_scratch = NULL;
266 		}
267 
268 		__na_inited = 0;
269 	}
270 }
271 
272 /*
273  * Interpret the ringid of an chreq, by translating it into a pair
274  * of intervals of ring indices:
275  *
276  * [txfirst, txlast) and [rxfirst, rxlast)
277  */
278 int
na_interp_ringid(struct nexus_adapter * na,ring_id_t ring_id,ring_set_t ring_set,uint32_t first[NR_TXRX],uint32_t last[NR_TXRX])279 na_interp_ringid(struct nexus_adapter *na, ring_id_t ring_id,
280     ring_set_t ring_set, uint32_t first[NR_TXRX], uint32_t last[NR_TXRX])
281 {
282 	enum txrx t;
283 
284 	switch (ring_set) {
285 	case RING_SET_ALL:
286 		/*
287 		 * Ring pair eligibility: all ring(s).
288 		 */
289 		if (ring_id != CHANNEL_RING_ID_ANY &&
290 		    ring_id >= na_get_nrings(na, NR_TX) &&
291 		    ring_id >= na_get_nrings(na, NR_RX)) {
292 			SK_ERR("\"%s\": invalid ring_id %d for ring_set %u",
293 			    na->na_name, (int)ring_id, ring_set);
294 			return EINVAL;
295 		}
296 		for_rx_tx(t) {
297 			if (ring_id == CHANNEL_RING_ID_ANY) {
298 				first[t] = 0;
299 				last[t] = na_get_nrings(na, t);
300 			} else {
301 				first[t] = ring_id;
302 				last[t] = ring_id + 1;
303 			}
304 		}
305 		break;
306 
307 	default:
308 		SK_ERR("\"%s\": invalid ring_set %u", na->na_name, ring_set);
309 		return EINVAL;
310 	}
311 
312 	SK_DF(SK_VERB_NA | SK_VERB_RING,
313 	    "\"%s\": ring_id %d, ring_set %u tx [%u,%u) rx [%u,%u)",
314 	    na->na_name, (int)ring_id, ring_set, first[NR_TX], last[NR_TX],
315 	    first[NR_RX], last[NR_RX]);
316 
317 	return 0;
318 }
319 
320 /*
321  * Set the ring ID. For devices with a single queue, a request
322  * for all rings is the same as a single ring.
323  */
324 static int
na_set_ringid(struct kern_channel * ch,ring_set_t ring_set,ring_id_t ring_id)325 na_set_ringid(struct kern_channel *ch, ring_set_t ring_set, ring_id_t ring_id)
326 {
327 	struct nexus_adapter *na = ch->ch_na;
328 	int error;
329 	enum txrx t;
330 	uint32_t n_alloc_rings;
331 
332 	if ((error = na_interp_ringid(na, ring_id, ring_set,
333 	    ch->ch_first, ch->ch_last)) != 0) {
334 		return error;
335 	}
336 
337 	n_alloc_rings = na_get_nrings(na, NR_A);
338 	if (n_alloc_rings != 0) {
339 		ch->ch_first[NR_A] = ch->ch_first[NR_F] = 0;
340 		ch->ch_last[NR_A] = ch->ch_last[NR_F] =
341 		    ch->ch_first[NR_A] + n_alloc_rings;
342 	} else {
343 		ch->ch_first[NR_A] = ch->ch_last[NR_A] = 0;
344 		ch->ch_first[NR_F] = ch->ch_last[NR_F] = 0;
345 	}
346 	ch->ch_first[NR_EV] = 0;
347 	ch->ch_last[NR_EV] = ch->ch_first[NR_EV] + na_get_nrings(na, NR_EV);
348 	/* XXX: should we initialize na_si_users for event ring ? */
349 
350 	/*
351 	 * Optimization: count the users registered for more than
352 	 * one ring, which are the ones sleeping on the global queue.
353 	 * The default na_notify() callback will then avoid signaling
354 	 * the global queue if nobody is using it
355 	 */
356 	for_rx_tx(t) {
357 		if (ch_is_multiplex(ch, t)) {
358 			na->na_si_users[t]++;
359 			ASSERT(na->na_si_users[t] != 0);
360 		}
361 	}
362 	return 0;
363 }
364 
365 static void
na_unset_ringid(struct kern_channel * ch)366 na_unset_ringid(struct kern_channel *ch)
367 {
368 	struct nexus_adapter *na = ch->ch_na;
369 	enum txrx t;
370 
371 	for_rx_tx(t) {
372 		if (ch_is_multiplex(ch, t)) {
373 			ASSERT(na->na_si_users[t] != 0);
374 			na->na_si_users[t]--;
375 		}
376 		ch->ch_first[t] = ch->ch_last[t] = 0;
377 	}
378 }
379 
380 /*
381  * Check that the rings we want to bind are not exclusively owned by a previous
382  * bind.  If exclusive ownership has been requested, we also mark the rings.
383  */
384 /* Hoisted out of line to reduce kernel stack footprint */
385 SK_NO_INLINE_ATTRIBUTE
386 static int
na_krings_use(struct kern_channel * ch)387 na_krings_use(struct kern_channel *ch)
388 {
389 	struct nexus_adapter *na = ch->ch_na;
390 	struct __kern_channel_ring *kring;
391 	boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
392 	enum txrx t;
393 	uint32_t i;
394 
395 	SK_DF(SK_VERB_NA | SK_VERB_RING, "na \"%s\" (0x%llx) grabbing tx [%u,%u) rx [%u,%u)",
396 	    na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
397 	    ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
398 
399 	/*
400 	 * First round: check that all the requested rings
401 	 * are neither alread exclusively owned, nor we
402 	 * want exclusive ownership when they are already in use
403 	 */
404 	for_all_rings(t) {
405 		for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
406 			kring = &NAKR(na, t)[i];
407 			if ((kring->ckr_flags & CKRF_EXCLUSIVE) ||
408 			    (kring->ckr_users && excl)) {
409 				SK_DF(SK_VERB_NA | SK_VERB_RING,
410 				    "kr \"%s\" (0x%llx) krflags 0x%b is busy",
411 				    kring->ckr_name, SK_KVA(kring),
412 				    kring->ckr_flags, CKRF_BITS);
413 				return EBUSY;
414 			}
415 		}
416 	}
417 
418 	/*
419 	 * Second round: increment usage count and possibly
420 	 * mark as exclusive
421 	 */
422 
423 	for_all_rings(t) {
424 		for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
425 			kring = &NAKR(na, t)[i];
426 			kring->ckr_users++;
427 			if (excl) {
428 				kring->ckr_flags |= CKRF_EXCLUSIVE;
429 			}
430 		}
431 	}
432 
433 	return 0;
434 }
435 
436 /* Hoisted out of line to reduce kernel stack footprint */
437 SK_NO_INLINE_ATTRIBUTE
438 static void
na_krings_unuse(struct kern_channel * ch)439 na_krings_unuse(struct kern_channel *ch)
440 {
441 	struct nexus_adapter *na = ch->ch_na;
442 	struct __kern_channel_ring *kring;
443 	boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
444 	enum txrx t;
445 	uint32_t i;
446 
447 	SK_DF(SK_VERB_NA | SK_VERB_RING,
448 	    "na \"%s\" (0x%llx) releasing tx [%u, %u) rx [%u, %u)",
449 	    na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
450 	    ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
451 
452 	for_all_rings(t) {
453 		for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
454 			kring = &NAKR(na, t)[i];
455 			if (excl) {
456 				kring->ckr_flags &= ~CKRF_EXCLUSIVE;
457 			}
458 			kring->ckr_users--;
459 		}
460 	}
461 }
462 
463 /* Hoisted out of line to reduce kernel stack footprint */
464 SK_NO_INLINE_ATTRIBUTE
465 static void
na_krings_verify(struct nexus_adapter * na)466 na_krings_verify(struct nexus_adapter *na)
467 {
468 	struct __kern_channel_ring *kring;
469 	enum txrx t;
470 	uint32_t i;
471 
472 	for_all_rings(t) {
473 		for (i = 0; i < na_get_nrings(na, t); i++) {
474 			kring = &NAKR(na, t)[i];
475 			/* na_kr_create() validations */
476 			ASSERT(kring->ckr_num_slots > 0);
477 			ASSERT(kring->ckr_lim == (kring->ckr_num_slots - 1));
478 			ASSERT(kring->ckr_pp != NULL);
479 
480 			if (!(kring->ckr_flags & CKRF_MEM_RING_INITED)) {
481 				continue;
482 			}
483 			/* na_kr_setup() validations */
484 			if (KR_KERNEL_ONLY(kring)) {
485 				ASSERT(kring->ckr_ring == NULL);
486 			} else {
487 				ASSERT(kring->ckr_ring != NULL);
488 			}
489 			ASSERT(kring->ckr_ksds_last ==
490 			    &kring->ckr_ksds[kring->ckr_lim]);
491 		}
492 	}
493 }
494 
495 int
na_bind_channel(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr)496 na_bind_channel(struct nexus_adapter *na, struct kern_channel *ch,
497     struct chreq *chr)
498 {
499 	struct kern_pbufpool *rx_pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
500 	struct kern_pbufpool *tx_pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
501 	uint32_t ch_mode = chr->cr_mode;
502 	int err = 0;
503 
504 	SK_LOCK_ASSERT_HELD();
505 	ASSERT(ch->ch_schema == NULL);
506 	ASSERT(ch->ch_na == NULL);
507 
508 	/* ring configuration may have changed, fetch from the card */
509 	na_update_config(na);
510 	ch->ch_na = na; /* store the reference */
511 	err = na_set_ringid(ch, chr->cr_ring_set, chr->cr_ring_id);
512 	if (err != 0) {
513 		goto err;
514 	}
515 
516 	atomic_bitclear_32(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE |
517 	    CHANF_USER_PACKET_POOL | CHANF_EVENT_RING));
518 	if (ch_mode & CHMODE_EXCLUSIVE) {
519 		atomic_bitset_32(&ch->ch_flags, CHANF_EXCLUSIVE);
520 	}
521 	/*
522 	 * Disallow automatic sync for monitor mode, since TX
523 	 * direction is disabled.
524 	 */
525 	if (ch_mode & CHMODE_MONITOR) {
526 		atomic_bitset_32(&ch->ch_flags, CHANF_RXONLY);
527 	}
528 
529 	if (!!(na->na_flags & NAF_USER_PKT_POOL) ^
530 	    !!(ch_mode & CHMODE_USER_PACKET_POOL)) {
531 		SK_ERR("incompatible channel mode (0x%b), na_flags (0x%b)",
532 		    ch_mode, CHMODE_BITS, na->na_flags, NAF_BITS);
533 		err = EINVAL;
534 		goto err;
535 	}
536 
537 	if (na->na_arena->ar_flags & ARF_DEFUNCT) {
538 		err = ENXIO;
539 		goto err;
540 	}
541 
542 	if (ch_mode & CHMODE_USER_PACKET_POOL) {
543 		ASSERT(na->na_flags & NAF_USER_PKT_POOL);
544 		ASSERT(ch->ch_first[NR_A] != ch->ch_last[NR_A]);
545 		ASSERT(ch->ch_first[NR_F] != ch->ch_last[NR_F]);
546 		atomic_bitset_32(&ch->ch_flags, CHANF_USER_PACKET_POOL);
547 	}
548 
549 	if (ch_mode & CHMODE_EVENT_RING) {
550 		ASSERT(na->na_flags & NAF_USER_PKT_POOL);
551 		ASSERT(na->na_flags & NAF_EVENT_RING);
552 		ASSERT(ch->ch_first[NR_EV] != ch->ch_last[NR_EV]);
553 		atomic_bitset_32(&ch->ch_flags, CHANF_EVENT_RING);
554 	}
555 
556 	/*
557 	 * If this is the first channel of the adapter, create
558 	 * the rings and their in-kernel view, the krings.
559 	 */
560 	if (na->na_channels == 0) {
561 		err = na->na_krings_create(na, ch);
562 		if (err != 0) {
563 			goto err;
564 		}
565 
566 		/*
567 		 * Sanity check; this is already done in na_kr_create(),
568 		 * but we do it here as well to validate na_kr_setup().
569 		 */
570 		na_krings_verify(na);
571 		*(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
572 		    skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_type;
573 		*(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
574 		    skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_subtype;
575 	}
576 
577 	/*
578 	 * Validate ownership and usability of the krings; take into account
579 	 * whether some previous bind has exclusive ownership on them.
580 	 */
581 	err = na_krings_use(ch);
582 	if (err != 0) {
583 		goto err_del_rings;
584 	}
585 
586 	/* for user-facing channel, create a new channel schema */
587 	if (!(ch->ch_flags & CHANF_KERNEL)) {
588 		err = na_schema_alloc(ch);
589 		if (err != 0) {
590 			goto err_rel_excl;
591 		}
592 
593 		ASSERT(ch->ch_schema != NULL);
594 		ASSERT(ch->ch_schema_offset != (mach_vm_offset_t)-1);
595 	} else {
596 		ASSERT(ch->ch_schema == NULL);
597 		ch->ch_schema_offset = (mach_vm_offset_t)-1;
598 	}
599 
600 	/* update our work timestamp */
601 	na->na_work_ts = net_uptime();
602 
603 	/* update our work timestamp */
604 	na->na_work_ts = net_uptime();
605 
606 	na->na_channels++;
607 
608 	/*
609 	 * If user packet pool is desired, initialize the allocated
610 	 * object hash table in the pool, if not already.  This also
611 	 * retains a refcnt on the pool which the caller must release.
612 	 */
613 	ASSERT(ch->ch_pp == NULL);
614 	if (ch_mode & CHMODE_USER_PACKET_POOL) {
615 #pragma unused(tx_pp)
616 		ASSERT(rx_pp == tx_pp);
617 		err = pp_init_upp(rx_pp, TRUE);
618 		if (err != 0) {
619 			goto err_free_schema;
620 		}
621 		ch->ch_pp = rx_pp;
622 	}
623 
624 	if (!NA_IS_ACTIVE(na)) {
625 		err = na->na_activate(na, NA_ACTIVATE_MODE_ON);
626 		if (err != 0) {
627 			goto err_release_pp;
628 		}
629 
630 		SK_D("activated \"%s\" adapter 0x%llx", na->na_name,
631 		    SK_KVA(na));
632 		SK_D("  na_md_type:    %u", na->na_md_type);
633 		SK_D("  na_md_subtype: %u", na->na_md_subtype);
634 	}
635 
636 	SK_D("ch 0x%llx", SK_KVA(ch));
637 	SK_D("  ch_flags:     0x%b", ch->ch_flags, CHANF_BITS);
638 	if (ch->ch_schema != NULL) {
639 		SK_D("  ch_schema:    0x%llx", SK_KVA(ch->ch_schema));
640 	}
641 	SK_D("  ch_na:        0x%llx (chcnt %u)", SK_KVA(ch->ch_na),
642 	    ch->ch_na->na_channels);
643 	SK_D("  ch_tx_rings:  [%u,%u)", ch->ch_first[NR_TX],
644 	    ch->ch_last[NR_TX]);
645 	SK_D("  ch_rx_rings:  [%u,%u)", ch->ch_first[NR_RX],
646 	    ch->ch_last[NR_RX]);
647 	SK_D("  ch_alloc_rings:  [%u,%u)", ch->ch_first[NR_A],
648 	    ch->ch_last[NR_A]);
649 	SK_D("  ch_free_rings:  [%u,%u)", ch->ch_first[NR_F],
650 	    ch->ch_last[NR_F]);
651 	SK_D("  ch_ev_rings:  [%u,%u)", ch->ch_first[NR_EV],
652 	    ch->ch_last[NR_EV]);
653 
654 	return 0;
655 
656 err_release_pp:
657 	if (ch_mode & CHMODE_USER_PACKET_POOL) {
658 		ASSERT(ch->ch_pp != NULL);
659 		pp_release(rx_pp);
660 		ch->ch_pp = NULL;
661 	}
662 err_free_schema:
663 	*(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
664 	    NEXUS_META_TYPE_INVALID;
665 	*(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
666 	    NEXUS_META_SUBTYPE_INVALID;
667 	ASSERT(na->na_channels != 0);
668 	na->na_channels--;
669 	if (ch->ch_schema != NULL) {
670 		skmem_cache_free(
671 			skmem_arena_nexus(na->na_arena)->arn_schema_cache,
672 			ch->ch_schema);
673 		ch->ch_schema = NULL;
674 		ch->ch_schema_offset = (mach_vm_offset_t)-1;
675 	}
676 err_rel_excl:
677 	na_krings_unuse(ch);
678 err_del_rings:
679 	if (na->na_channels == 0) {
680 		na->na_krings_delete(na, ch, FALSE);
681 	}
682 err:
683 	ch->ch_na = NULL;
684 	ASSERT(err != 0);
685 
686 	return err;
687 }
688 
689 /*
690  * Undo everything that was done in na_bind_channel().
691  */
692 /* call with SK_LOCK held */
693 void
na_unbind_channel(struct kern_channel * ch)694 na_unbind_channel(struct kern_channel *ch)
695 {
696 	struct nexus_adapter *na = ch->ch_na;
697 
698 	SK_LOCK_ASSERT_HELD();
699 
700 	ASSERT(na->na_channels != 0);
701 	na->na_channels--;
702 
703 	/* release exclusive use if it was requested at bind time */
704 	na_krings_unuse(ch);
705 
706 	if (na->na_channels == 0) {     /* last instance */
707 		SK_D("%s(%d): deleting last channel instance for %s",
708 		    ch->ch_name, ch->ch_pid, na->na_name);
709 
710 		/*
711 		 * Free any remaining allocated packets attached to
712 		 * the slots, followed by a teardown of the arena.
713 		 */
714 		na_teardown(na, ch, FALSE);
715 
716 		*(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
717 		    NEXUS_META_TYPE_INVALID;
718 		*(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
719 		    NEXUS_META_SUBTYPE_INVALID;
720 	} else {
721 		SK_D("%s(%d): %s has %u remaining channel instance(s)",
722 		    ch->ch_name, ch->ch_pid, na->na_name, na->na_channels);
723 	}
724 
725 	/*
726 	 * Free any allocated packets (for the process) attached to the slots;
727 	 * note that na_teardown() could have done this there as well.
728 	 */
729 	if (ch->ch_pp != NULL) {
730 		ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
731 		pp_purge_upp(ch->ch_pp, ch->ch_pid);
732 		pp_release(ch->ch_pp);
733 		ch->ch_pp = NULL;
734 	}
735 
736 	/* possibily decrement counter of tx_si/rx_si users */
737 	na_unset_ringid(ch);
738 
739 	/* reap the caches now (purge if adapter is idle) */
740 	skmem_arena_reap(na->na_arena, (na->na_channels == 0));
741 
742 	/* delete the csm */
743 	if (ch->ch_schema != NULL) {
744 		skmem_cache_free(
745 			skmem_arena_nexus(na->na_arena)->arn_schema_cache,
746 			ch->ch_schema);
747 		ch->ch_schema = NULL;
748 		ch->ch_schema_offset = (mach_vm_offset_t)-1;
749 	}
750 
751 	/* destroy the memory map */
752 	skmem_arena_munmap_channel(na->na_arena, ch);
753 
754 	/* mark the channel as unbound */
755 	atomic_bitclear_32(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE));
756 	ch->ch_na = NULL;
757 
758 	/* and finally release the nexus adapter; this might free it */
759 	(void) na_release_locked(na);
760 }
761 
762 static void
na_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)763 na_teardown(struct nexus_adapter *na, struct kern_channel *ch,
764     boolean_t defunct)
765 {
766 	SK_LOCK_ASSERT_HELD();
767 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
768 
769 #if CONFIG_NEXUS_MONITOR
770 	/*
771 	 * Walk through all the rings and tell any monitor
772 	 * that the port is going to exit Skywalk mode
773 	 */
774 	nx_mon_stop(na);
775 #endif /* CONFIG_NEXUS_MONITOR */
776 
777 	/*
778 	 * Deactive the adapter.
779 	 */
780 	(void) na->na_activate(na,
781 	    (defunct ? NA_ACTIVATE_MODE_DEFUNCT : NA_ACTIVATE_MODE_OFF));
782 
783 	/*
784 	 * Free any remaining allocated packets for this process.
785 	 */
786 	if (ch->ch_pp != NULL) {
787 		ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
788 		pp_purge_upp(ch->ch_pp, ch->ch_pid);
789 		if (!defunct) {
790 			pp_release(ch->ch_pp);
791 			ch->ch_pp = NULL;
792 		}
793 	}
794 
795 	/*
796 	 * Delete rings and buffers.
797 	 */
798 	na->na_krings_delete(na, ch, defunct);
799 }
800 
801 /* call with SK_LOCK held */
802 /*
803  * Allocate the per-fd structure __user_channel_schema.
804  */
805 static int
na_schema_alloc(struct kern_channel * ch)806 na_schema_alloc(struct kern_channel *ch)
807 {
808 	struct nexus_adapter *na = ch->ch_na;
809 	struct skmem_arena *ar = na->na_arena;
810 	struct skmem_arena_nexus *arn;
811 	mach_vm_offset_t roff[SKMEM_REGIONS];
812 	struct __kern_channel_ring *kr;
813 	struct __user_channel_schema *csm;
814 	struct skmem_obj_info csm_oi, ring_oi, ksd_oi, usd_oi;
815 	mach_vm_offset_t base;
816 	uint32_t i, j, k, n[NR_ALL];
817 	enum txrx t;
818 
819 	/* see comments for struct __user_channel_schema */
820 	_CASSERT(offsetof(struct __user_channel_schema, csm_ver) == 0);
821 	_CASSERT(offsetof(struct __user_channel_schema, csm_flags) ==
822 	    sizeof(csm->csm_ver));
823 	_CASSERT(offsetof(struct __user_channel_schema, csm_kern_name) ==
824 	    sizeof(csm->csm_ver) + sizeof(csm->csm_flags));
825 	_CASSERT(offsetof(struct __user_channel_schema, csm_kern_uuid) ==
826 	    sizeof(csm->csm_ver) + sizeof(csm->csm_flags) +
827 	    sizeof(csm->csm_kern_name));
828 
829 	SK_LOCK_ASSERT_HELD();
830 
831 	ASSERT(!(ch->ch_flags & CHANF_KERNEL));
832 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
833 	arn = skmem_arena_nexus(ar);
834 	ASSERT(arn != NULL);
835 	for_all_rings(t) {
836 		n[t] = 0;
837 	}
838 
839 	csm = skmem_cache_alloc(arn->arn_schema_cache, SKMEM_NOSLEEP);
840 	if (csm == NULL) {
841 		return ENOMEM;
842 	}
843 
844 	skmem_cache_get_obj_info(arn->arn_schema_cache, csm, &csm_oi, NULL);
845 	bzero(csm, SKMEM_OBJ_SIZE(&csm_oi));
846 
847 	*(uint32_t *)(uintptr_t)&csm->csm_ver = CSM_CURRENT_VERSION;
848 
849 	/* kernel version and executable UUID */
850 	_CASSERT(sizeof(csm->csm_kern_name) == _SYS_NAMELEN);
851 	(void) strncpy((char *)(uintptr_t)csm->csm_kern_name,
852 	    version, sizeof(csm->csm_kern_name) - 1);
853 #if !XNU_TARGET_OS_OSX
854 	(void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid,
855 	    kernelcache_uuid, sizeof(csm->csm_kern_uuid));
856 #else /* XNU_TARGET_OS_OSX */
857 	if (kernel_uuid != NULL) {
858 		(void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid,
859 		    kernel_uuid, sizeof(csm->csm_kern_uuid));
860 	}
861 #endif /* XNU_TARGET_OS_OSX */
862 
863 	for_rx_tx(t) {
864 		ASSERT((ch->ch_last[t] > 0) || (ch->ch_first[t] == 0));
865 		n[t] = ch->ch_last[t] - ch->ch_first[t];
866 		ASSERT(n[t] == 0 || n[t] <= na_get_nrings(na, t));
867 	}
868 
869 	/* return total number of tx and rx rings for this channel */
870 	*(uint32_t *)(uintptr_t)&csm->csm_tx_rings = n[NR_TX];
871 	*(uint32_t *)(uintptr_t)&csm->csm_rx_rings = n[NR_RX];
872 
873 	if (ch->ch_flags & CHANF_USER_PACKET_POOL) {
874 		*(uint32_t *)(uintptr_t)&csm->csm_allocator_ring_pairs =
875 		    na->na_num_allocator_ring_pairs;
876 		n[NR_A] = n[NR_F] = na->na_num_allocator_ring_pairs;
877 		ASSERT(n[NR_A] != 0 && n[NR_A] <= na_get_nrings(na, NR_A));
878 		ASSERT(n[NR_A] == (ch->ch_last[NR_A] - ch->ch_first[NR_A]));
879 		ASSERT(n[NR_F] == (ch->ch_last[NR_F] - ch->ch_first[NR_F]));
880 	}
881 
882 	if (ch->ch_flags & CHANF_EVENT_RING) {
883 		n[NR_EV] = ch->ch_last[NR_EV] - ch->ch_first[NR_EV];
884 		ASSERT(n[NR_EV] != 0 && n[NR_EV] <= na_get_nrings(na, NR_EV));
885 		*(uint32_t *)(uintptr_t)&csm->csm_num_event_rings = n[NR_EV];
886 	}
887 
888 	bzero(&roff, sizeof(roff));
889 	for (i = 0; i < SKMEM_REGIONS; i++) {
890 		if (ar->ar_regions[i] == NULL) {
891 			ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
892 			    i == SKMEM_REGION_SCHEMA ||
893 			    i == SKMEM_REGION_RXBUF ||
894 			    i == SKMEM_REGION_TXBUF ||
895 			    i == SKMEM_REGION_RXKMD ||
896 			    i == SKMEM_REGION_TXKMD ||
897 			    i == SKMEM_REGION_UMD ||
898 			    i == SKMEM_REGION_UBFT ||
899 			    i == SKMEM_REGION_KBFT ||
900 			    i == SKMEM_REGION_RXKBFT ||
901 			    i == SKMEM_REGION_TXKBFT ||
902 			    i == SKMEM_REGION_TXAUSD ||
903 			    i == SKMEM_REGION_RXFUSD ||
904 			    i == SKMEM_REGION_USTATS ||
905 			    i == SKMEM_REGION_KSTATS ||
906 			    i == SKMEM_REGION_INTRINSIC ||
907 			    i == SKMEM_REGION_FLOWADV ||
908 			    i == SKMEM_REGION_NEXUSADV ||
909 			    i == SKMEM_REGION_SYSCTLS ||
910 			    i == SKMEM_REGION_GUARD_TAIL);
911 			continue;
912 		}
913 
914 		/* not for nexus */
915 		ASSERT(i != SKMEM_REGION_SYSCTLS);
916 
917 		/*
918 		 * Get region offsets from base of mmap span; the arena
919 		 * doesn't need to be mmap'd at this point, since we
920 		 * simply compute the relative offset.
921 		 */
922 		roff[i] = skmem_arena_get_region_offset(ar, i);
923 	}
924 
925 	/*
926 	 * The schema is made up of the descriptor followed inline by an array
927 	 * of offsets to the tx, rx, allocator and event rings in the mmap span.
928 	 * They contain the offset between the ring and schema, so the
929 	 * information is usable in userspace to reach the ring from
930 	 * the schema.
931 	 */
932 	base = roff[SKMEM_REGION_SCHEMA] + SKMEM_OBJ_ROFF(&csm_oi);
933 
934 	/* initialize schema with tx ring info */
935 	for (i = 0, j = ch->ch_first[NR_TX]; i < n[NR_TX]; i++, j++) {
936 		kr = &na->na_tx_rings[j];
937 		if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
938 			continue;
939 		}
940 
941 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
942 		skmem_cache_get_obj_info(arn->arn_ring_cache,
943 		    kr->ckr_ring, &ring_oi, NULL);
944 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].ring_off =
945 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
946 
947 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
948 		skmem_cache_get_obj_info(kr->ckr_ksds_cache,
949 		    kr->ckr_ksds, &ksd_oi, &usd_oi);
950 
951 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].sd_off =
952 		    (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
953 		    base;
954 	}
955 	/* initialize schema with rx ring info */
956 	for (i = 0, j = ch->ch_first[NR_RX]; i < n[NR_RX]; i++, j++) {
957 		kr = &na->na_rx_rings[j];
958 		if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
959 			continue;
960 		}
961 
962 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
963 		skmem_cache_get_obj_info(arn->arn_ring_cache,
964 		    kr->ckr_ring, &ring_oi, NULL);
965 		*(mach_vm_offset_t *)
966 		(uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].ring_off =
967 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
968 
969 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
970 		skmem_cache_get_obj_info(kr->ckr_ksds_cache,
971 		    kr->ckr_ksds, &ksd_oi, &usd_oi);
972 
973 		*(mach_vm_offset_t *)
974 		(uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].sd_off =
975 		    (roff[SKMEM_REGION_RXFUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
976 		    base;
977 	}
978 	/* initialize schema with allocator ring info */
979 	for (i = 0, j = ch->ch_first[NR_A], k = n[NR_TX] + n[NR_RX];
980 	    i < n[NR_A]; i++, j++) {
981 		mach_vm_offset_t usd_roff;
982 
983 		usd_roff = roff[SKMEM_REGION_TXAUSD];
984 		kr = &na->na_alloc_rings[j];
985 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
986 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
987 
988 		skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
989 		    &ring_oi, NULL);
990 		*(mach_vm_offset_t *)
991 		(uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
992 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
993 
994 		skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
995 		    &ksd_oi, &usd_oi);
996 		*(mach_vm_offset_t *)
997 		(uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
998 		    (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
999 	}
1000 	/* initialize schema with free ring info */
1001 	for (i = 0, j = ch->ch_first[NR_F], k = n[NR_TX] + n[NR_RX] + n[NR_A];
1002 	    i < n[NR_F]; i++, j++) {
1003 		mach_vm_offset_t usd_roff;
1004 
1005 		usd_roff = roff[SKMEM_REGION_RXFUSD];
1006 		kr = &na->na_free_rings[j];
1007 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
1008 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
1009 
1010 		skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
1011 		    &ring_oi, NULL);
1012 		*(mach_vm_offset_t *)
1013 		(uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
1014 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
1015 
1016 		skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
1017 		    &ksd_oi, &usd_oi);
1018 		*(mach_vm_offset_t *)
1019 		(uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
1020 		    (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
1021 	}
1022 	/* initialize schema with event ring info */
1023 	for (i = 0, j = ch->ch_first[NR_EV], k = n[NR_TX] + n[NR_RX] +
1024 	    n[NR_A] + n[NR_F]; i < n[NR_EV]; i++, j++) {
1025 		ASSERT(csm->csm_num_event_rings != 0);
1026 		kr = &na->na_event_rings[j];
1027 		ASSERT(!KR_KERNEL_ONLY(kr));
1028 		ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
1029 		skmem_cache_get_obj_info(arn->arn_ring_cache,
1030 		    kr->ckr_ring, &ring_oi, NULL);
1031 		*(mach_vm_offset_t *)
1032 		(uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
1033 		    (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
1034 
1035 		ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
1036 		skmem_cache_get_obj_info(kr->ckr_ksds_cache,
1037 		    kr->ckr_ksds, &ksd_oi, &usd_oi);
1038 
1039 		*(mach_vm_offset_t *)
1040 		(uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
1041 		    (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
1042 		    base;
1043 	}
1044 
1045 	*(uint64_t *)(uintptr_t)&csm->csm_md_redzone_cookie =
1046 	    __ch_umd_redzone_cookie;
1047 	*(nexus_meta_type_t *)(uintptr_t)&csm->csm_md_type = na->na_md_type;
1048 	*(nexus_meta_subtype_t *)(uintptr_t)&csm->csm_md_subtype =
1049 	    na->na_md_subtype;
1050 
1051 	if (arn->arn_stats_obj != NULL) {
1052 		ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
1053 		ASSERT(roff[SKMEM_REGION_USTATS] != 0);
1054 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs =
1055 		    roff[SKMEM_REGION_USTATS];
1056 		*(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1057 		    na->na_stats_type;
1058 	} else {
1059 		ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1060 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs = 0;
1061 		*(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1062 		    NEXUS_STATS_TYPE_INVALID;
1063 	}
1064 
1065 	if (arn->arn_flowadv_obj != NULL) {
1066 		ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL);
1067 		ASSERT(roff[SKMEM_REGION_FLOWADV] != 0);
1068 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs =
1069 		    roff[SKMEM_REGION_FLOWADV];
1070 		*(uint32_t *)(uintptr_t)&csm->csm_flowadv_max =
1071 		    na->na_flowadv_max;
1072 	} else {
1073 		ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1074 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs = 0;
1075 		*(uint32_t *)(uintptr_t)&csm->csm_flowadv_max = 0;
1076 	}
1077 
1078 	if (arn->arn_nexusadv_obj != NULL) {
1079 		struct __kern_nexus_adv_metadata *adv_md;
1080 
1081 		adv_md = arn->arn_nexusadv_obj;
1082 		ASSERT(adv_md->knam_version == NX_ADVISORY_MD_CURRENT_VERSION);
1083 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] != NULL);
1084 		ASSERT(roff[SKMEM_REGION_NEXUSADV] != 0);
1085 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs =
1086 		    roff[SKMEM_REGION_NEXUSADV];
1087 	} else {
1088 		ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1089 		*(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs = 0;
1090 	}
1091 
1092 	ch->ch_schema = csm;
1093 	ch->ch_schema_offset = base;
1094 
1095 	return 0;
1096 }
1097 
1098 /*
1099  * Called by all routines that create nexus_adapters.
1100  * Attach na to the ifp (if any) and provide defaults
1101  * for optional callbacks. Defaults assume that we
1102  * are creating an hardware nexus_adapter.
1103  */
1104 void
na_attach_common(struct nexus_adapter * na,struct kern_nexus * nx,struct kern_nexus_domain_provider * nxdom_prov)1105 na_attach_common(struct nexus_adapter *na, struct kern_nexus *nx,
1106     struct kern_nexus_domain_provider *nxdom_prov)
1107 {
1108 	SK_LOCK_ASSERT_HELD();
1109 
1110 	ASSERT(nx != NULL);
1111 	ASSERT(nxdom_prov != NULL);
1112 	ASSERT(na->na_krings_create != NULL);
1113 	ASSERT(na->na_krings_delete != NULL);
1114 	if (na->na_type != NA_NETIF_COMPAT_DEV) {
1115 		ASSERT(na_get_nrings(na, NR_TX) != 0);
1116 	}
1117 	if (na->na_type != NA_NETIF_COMPAT_HOST) {
1118 		ASSERT(na_get_nrings(na, NR_RX) != 0);
1119 	}
1120 	ASSERT(na->na_channels == 0);
1121 
1122 	if (na->na_notify == NULL) {
1123 		na->na_notify = na_notify;
1124 	}
1125 
1126 	na->na_nx = nx;
1127 	na->na_nxdom_prov = nxdom_prov;
1128 
1129 	SK_D("na 0x%llx nx 0x%llx nxtype %u ar 0x%llx",
1130 	    SK_KVA(na), SK_KVA(nx), nxdom_prov->nxdom_prov_dom->nxdom_type,
1131 	    SK_KVA(na->na_arena));
1132 }
1133 
1134 void
na_post_event(struct __kern_channel_ring * kring,boolean_t nodelay,boolean_t within_kevent,boolean_t selwake,uint32_t hint)1135 na_post_event(struct __kern_channel_ring *kring, boolean_t nodelay,
1136     boolean_t within_kevent, boolean_t selwake, uint32_t hint)
1137 {
1138 	struct nexus_adapter *na = KRNA(kring);
1139 	enum txrx t = kring->ckr_tx;
1140 
1141 	SK_DF(SK_VERB_EVENTS,
1142 	    "%s(%d) na \"%s\" (0x%llx) kr 0x%llx kev %u sel %u hint 0x%b",
1143 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1144 	    na->na_name, SK_KVA(na), SK_KVA(kring), within_kevent, selwake,
1145 	    hint, CHAN_FILT_HINT_BITS);
1146 
1147 	csi_selwakeup_one(kring, nodelay, within_kevent, selwake, hint);
1148 	/*
1149 	 * optimization: avoid a wake up on the global
1150 	 * queue if nobody has registered for more
1151 	 * than one ring
1152 	 */
1153 	if (na->na_si_users[t] > 0) {
1154 		csi_selwakeup_all(na, t, nodelay, within_kevent, selwake, hint);
1155 	}
1156 }
1157 
1158 /* default notify callback */
1159 static int
na_notify(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1160 na_notify(struct __kern_channel_ring *kring, struct proc *p, uint32_t flags)
1161 {
1162 #pragma unused(p)
1163 	SK_DF(SK_VERB_NOTIFY | ((kring->ckr_tx == NR_TX) ?
1164 	    SK_VERB_TX : SK_VERB_RX),
1165 	    "%s(%d) [%s] na \"%s\" (0x%llx) kr \"%s\" (0x%llx) krflags 0x%b "
1166 	    "flags 0x%x, kh %u kt %u | h %u t %u",
1167 	    sk_proc_name_address(p), sk_proc_pid(p),
1168 	    (kring->ckr_tx == NR_TX) ? "W" : "R", KRNA(kring)->na_name,
1169 	    SK_KVA(KRNA(kring)), kring->ckr_name, SK_KVA(kring),
1170 	    kring->ckr_flags, CKRF_BITS, flags, kring->ckr_khead,
1171 	    kring->ckr_ktail, kring->ckr_rhead, kring->ckr_rtail);
1172 
1173 	na_post_event(kring, (flags & NA_NOTEF_PUSH),
1174 	    (flags & NA_NOTEF_IN_KEVENT), TRUE, 0);
1175 
1176 	return 0;
1177 }
1178 
1179 /*
1180  * Fetch configuration from the device, to cope with dynamic
1181  * reconfigurations after loading the module.
1182  */
1183 /* call with SK_LOCK held */
1184 int
na_update_config(struct nexus_adapter * na)1185 na_update_config(struct nexus_adapter *na)
1186 {
1187 	uint32_t txr, txd, rxr, rxd;
1188 
1189 	SK_LOCK_ASSERT_HELD();
1190 
1191 	txr = txd = rxr = rxd = 0;
1192 	if (na->na_config == NULL ||
1193 	    na->na_config(na, &txr, &txd, &rxr, &rxd)) {
1194 		/* take whatever we had at init time */
1195 		txr = na_get_nrings(na, NR_TX);
1196 		txd = na_get_nslots(na, NR_TX);
1197 		rxr = na_get_nrings(na, NR_RX);
1198 		rxd = na_get_nslots(na, NR_RX);
1199 	}
1200 
1201 	if (na_get_nrings(na, NR_TX) == txr &&
1202 	    na_get_nslots(na, NR_TX) == txd &&
1203 	    na_get_nrings(na, NR_RX) == rxr &&
1204 	    na_get_nslots(na, NR_RX) == rxd) {
1205 		return 0; /* nothing changed */
1206 	}
1207 	SK_D("stored config %s: txring %u x %u, rxring %u x %u",
1208 	    na->na_name, na_get_nrings(na, NR_TX), na_get_nslots(na, NR_TX),
1209 	    na_get_nrings(na, NR_RX), na_get_nslots(na, NR_RX));
1210 	SK_D("new config %s: txring %u x %u, rxring %u x %u",
1211 	    na->na_name, txr, txd, rxr, rxd);
1212 
1213 	if (na->na_channels == 0) {
1214 		SK_D("configuration changed (but fine)");
1215 		na_set_nrings(na, NR_TX, txr);
1216 		na_set_nslots(na, NR_TX, txd);
1217 		na_set_nrings(na, NR_RX, rxr);
1218 		na_set_nslots(na, NR_RX, rxd);
1219 		return 0;
1220 	}
1221 	SK_ERR("configuration changed while active, this is bad...");
1222 	return 1;
1223 }
1224 
1225 static void
na_kr_setup_netif_svc_map(struct nexus_adapter * na)1226 na_kr_setup_netif_svc_map(struct nexus_adapter *na)
1227 {
1228 	uint32_t i;
1229 	uint32_t num_tx_rings;
1230 
1231 	ASSERT(na->na_type == NA_NETIF_DEV);
1232 	num_tx_rings = na_get_nrings(na, NR_TX);
1233 
1234 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK_SYS) ==
1235 	    NAKR_WMM_SC2RINGID(KPKT_SC_BK));
1236 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1237 	    NAKR_WMM_SC2RINGID(KPKT_SC_RD));
1238 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1239 	    NAKR_WMM_SC2RINGID(KPKT_SC_OAM));
1240 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1241 	    NAKR_WMM_SC2RINGID(KPKT_SC_RV));
1242 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1243 	    NAKR_WMM_SC2RINGID(KPKT_SC_VI));
1244 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) ==
1245 	    NAKR_WMM_SC2RINGID(KPKT_SC_CTL));
1246 
1247 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK) < NA_NUM_WMM_CLASSES);
1248 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) < NA_NUM_WMM_CLASSES);
1249 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VI) < NA_NUM_WMM_CLASSES);
1250 	_CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) < NA_NUM_WMM_CLASSES);
1251 
1252 	_CASSERT(MBUF_SCIDX(KPKT_SC_BK_SYS) < KPKT_SC_MAX_CLASSES);
1253 	_CASSERT(MBUF_SCIDX(KPKT_SC_BK) < KPKT_SC_MAX_CLASSES);
1254 	_CASSERT(MBUF_SCIDX(KPKT_SC_BE) < KPKT_SC_MAX_CLASSES);
1255 	_CASSERT(MBUF_SCIDX(KPKT_SC_RD) < KPKT_SC_MAX_CLASSES);
1256 	_CASSERT(MBUF_SCIDX(KPKT_SC_OAM) < KPKT_SC_MAX_CLASSES);
1257 	_CASSERT(MBUF_SCIDX(KPKT_SC_AV) < KPKT_SC_MAX_CLASSES);
1258 	_CASSERT(MBUF_SCIDX(KPKT_SC_RV) < KPKT_SC_MAX_CLASSES);
1259 	_CASSERT(MBUF_SCIDX(KPKT_SC_VI) < KPKT_SC_MAX_CLASSES);
1260 	_CASSERT(MBUF_SCIDX(KPKT_SC_SIG) < KPKT_SC_MAX_CLASSES);
1261 	_CASSERT(MBUF_SCIDX(KPKT_SC_VO) < KPKT_SC_MAX_CLASSES);
1262 	_CASSERT(MBUF_SCIDX(KPKT_SC_CTL) < KPKT_SC_MAX_CLASSES);
1263 
1264 	/*
1265 	 * we support the following 2 configurations:
1266 	 * 1. packets from all 10 service class map to one ring.
1267 	 * 2. a 10:4 mapping between service classes and the rings. These 4
1268 	 *    rings map to the 4 WMM access categories.
1269 	 */
1270 	if (na->na_nx->nx_prov->nxprov_params->nxp_qmap == NEXUS_QMAP_TYPE_WMM) {
1271 		ASSERT(num_tx_rings == NEXUS_NUM_WMM_QUEUES);
1272 		/* setup the adapter's service class LUT */
1273 		NAKR_SET_SVC_LUT(na, KPKT_SC_BK_SYS);
1274 		NAKR_SET_SVC_LUT(na, KPKT_SC_BK);
1275 		NAKR_SET_SVC_LUT(na, KPKT_SC_BE);
1276 		NAKR_SET_SVC_LUT(na, KPKT_SC_RD);
1277 		NAKR_SET_SVC_LUT(na, KPKT_SC_OAM);
1278 		NAKR_SET_SVC_LUT(na, KPKT_SC_AV);
1279 		NAKR_SET_SVC_LUT(na, KPKT_SC_RV);
1280 		NAKR_SET_SVC_LUT(na, KPKT_SC_VI);
1281 		NAKR_SET_SVC_LUT(na, KPKT_SC_SIG);
1282 		NAKR_SET_SVC_LUT(na, KPKT_SC_VO);
1283 		NAKR_SET_SVC_LUT(na, KPKT_SC_CTL);
1284 
1285 		/* Initialize the service class for each of the 4 ring */
1286 		NAKR_SET_KR_SVC(na, KPKT_SC_BK);
1287 		NAKR_SET_KR_SVC(na, KPKT_SC_BE);
1288 		NAKR_SET_KR_SVC(na, KPKT_SC_VI);
1289 		NAKR_SET_KR_SVC(na, KPKT_SC_VO);
1290 	} else {
1291 		ASSERT(na->na_nx->nx_prov->nxprov_params->nxp_qmap ==
1292 		    NEXUS_QMAP_TYPE_DEFAULT);
1293 		/* 10: 1 mapping */
1294 		for (i = 0; i < KPKT_SC_MAX_CLASSES; i++) {
1295 			na->na_kring_svc_lut[i] = 0;
1296 		}
1297 		for (i = 0; i < num_tx_rings; i++) {
1298 			NAKR(na, NR_TX)[i].ckr_svc = KPKT_SC_UNSPEC;
1299 		}
1300 	}
1301 }
1302 
1303 static LCK_GRP_DECLARE(channel_txq_lock_group, "sk_ch_txq_lock");
1304 static LCK_GRP_DECLARE(channel_rxq_lock_group, "sk_ch_rxq_lock");
1305 static LCK_GRP_DECLARE(channel_txs_lock_group, "sk_ch_txs_lock");
1306 static LCK_GRP_DECLARE(channel_rxs_lock_group, "sk_ch_rxs_lock");
1307 static LCK_GRP_DECLARE(channel_alloc_lock_group, "sk_ch_alloc_lock");
1308 static LCK_GRP_DECLARE(channel_evq_lock_group, "sk_ch_evq_lock");
1309 static LCK_GRP_DECLARE(channel_evs_lock_group, "sk_ch_evs_lock");
1310 
1311 static lck_grp_t *
na_kr_q_lck_grp(enum txrx t)1312 na_kr_q_lck_grp(enum txrx t)
1313 {
1314 	switch (t) {
1315 	case NR_TX:
1316 		return &channel_txq_lock_group;
1317 	case NR_RX:
1318 		return &channel_rxq_lock_group;
1319 	case NR_A:
1320 	case NR_F:
1321 		return &channel_alloc_lock_group;
1322 	case NR_EV:
1323 		return &channel_evq_lock_group;
1324 	default:
1325 		VERIFY(0);
1326 		/* NOTREACHED */
1327 		__builtin_unreachable();
1328 	}
1329 }
1330 
1331 static lck_grp_t *
na_kr_s_lck_grp(enum txrx t)1332 na_kr_s_lck_grp(enum txrx t)
1333 {
1334 	switch (t) {
1335 	case NR_TX:
1336 		return &channel_txs_lock_group;
1337 	case NR_RX:
1338 		return &channel_rxs_lock_group;
1339 	case NR_A:
1340 	case NR_F:
1341 		return &channel_alloc_lock_group;
1342 	case NR_EV:
1343 		return &channel_evs_lock_group;
1344 	default:
1345 		VERIFY(0);
1346 		/* NOTREACHED */
1347 		__builtin_unreachable();
1348 	}
1349 }
1350 
1351 static void
kr_init_tbr(struct __kern_channel_ring * r)1352 kr_init_tbr(struct __kern_channel_ring *r)
1353 {
1354 	r->ckr_tbr_depth = CKR_TBR_TOKEN_INVALID;
1355 	r->ckr_tbr_token = CKR_TBR_TOKEN_INVALID;
1356 	r->ckr_tbr_last = 0;
1357 }
1358 
1359 struct kern_pbufpool *
na_kr_get_pp(struct nexus_adapter * na,enum txrx t)1360 na_kr_get_pp(struct nexus_adapter *na, enum txrx t)
1361 {
1362 	struct kern_pbufpool *pp = NULL;
1363 	switch (t) {
1364 	case NR_RX:
1365 	case NR_F:
1366 	case NR_EV:
1367 		pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
1368 		break;
1369 	case NR_TX:
1370 	case NR_A:
1371 		pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
1372 		break;
1373 	default:
1374 		VERIFY(0);
1375 		/* NOTREACHED */
1376 		__builtin_unreachable();
1377 	}
1378 
1379 	return pp;
1380 }
1381 
1382 /*
1383  * Create the krings array and initialize the fields common to all adapters.
1384  * The array layout is this:
1385  *
1386  *                       +----------+
1387  * na->na_tx_rings ----->|          | \
1388  *                       |          |  } na->num_tx_ring
1389  *                       |          | /
1390  * na->na_rx_rings ----> +----------+
1391  *                       |          | \
1392  *                       |          |  } na->na_num_rx_rings
1393  *                       |          | /
1394  * na->na_alloc_rings -> +----------+
1395  *                       |          | \
1396  * na->na_free_rings --> +----------+  } na->na_num_allocator_ring_pairs
1397  *                       |          | /
1398  * na->na_event_rings -> +----------+
1399  *                       |          | \
1400  *                       |          |  } na->na_num_event_rings
1401  *                       |          | /
1402  *                       +----------+
1403  * na->na_tailroom ----->|          | \
1404  *                       |          |  } tailroom bytes
1405  *                       |          | /
1406  *                       +----------+
1407  *
1408  * The tailroom space is currently used by flow switch ports for allocating
1409  * leases.
1410  */
1411 /* call with SK_LOCK held */
1412 static int
na_kr_create(struct nexus_adapter * na,uint32_t tailroom,boolean_t alloc_ctx)1413 na_kr_create(struct nexus_adapter *na, uint32_t tailroom, boolean_t alloc_ctx)
1414 {
1415 	lck_grp_t *q_lck_grp, *s_lck_grp;
1416 	uint32_t i, len, ndesc;
1417 	struct kern_pbufpool *pp = NULL;
1418 	struct __kern_channel_ring *kring;
1419 	uint32_t n[NR_ALL];
1420 	int c, tot_slots, err = 0;
1421 	enum txrx t;
1422 
1423 	SK_LOCK_ASSERT_HELD();
1424 
1425 	n[NR_TX] = na_get_nrings(na, NR_TX);
1426 	n[NR_RX] = na_get_nrings(na, NR_RX);
1427 	n[NR_A] = na_get_nrings(na, NR_A);
1428 	n[NR_F] = na_get_nrings(na, NR_F);
1429 	n[NR_EV] = na_get_nrings(na, NR_EV);
1430 
1431 	len = ((n[NR_TX] + n[NR_RX] + n[NR_A] + n[NR_F] + n[NR_EV]) *
1432 	    sizeof(struct __kern_channel_ring)) + tailroom;
1433 
1434 	na->na_rings_mem_sz = (size_t)len;
1435 	na->na_tx_rings = sk_alloc((size_t)len, Z_WAITOK, skmem_tag_nx_rings);
1436 	if (__improbable(na->na_tx_rings == NULL)) {
1437 		SK_ERR("Cannot allocate krings");
1438 		err = ENOMEM;
1439 		goto error;
1440 	}
1441 	na->na_rx_rings = na->na_tx_rings + n[NR_TX];
1442 	if (n[NR_A] != 0) {
1443 		na->na_alloc_rings = na->na_rx_rings + n[NR_RX];
1444 		na->na_free_rings = na->na_alloc_rings + n[NR_A];
1445 	} else {
1446 		na->na_alloc_rings = na->na_free_rings = NULL;
1447 	}
1448 	if (n[NR_EV] != 0) {
1449 		if (na->na_free_rings != NULL) {
1450 			na->na_event_rings = na->na_free_rings + n[NR_F];
1451 		} else {
1452 			na->na_event_rings = na->na_rx_rings + n[NR_RX];
1453 		}
1454 	}
1455 
1456 	/* total number of slots for TX/RX adapter rings */
1457 	c = tot_slots = (n[NR_TX] * na_get_nslots(na, NR_TX)) +
1458 	    (n[NR_RX] * na_get_nslots(na, NR_RX));
1459 
1460 	/* for scratch space on alloc and free rings */
1461 	if (n[NR_A] != 0) {
1462 		tot_slots += n[NR_A] * na_get_nslots(na, NR_A);
1463 		tot_slots += n[NR_F] * na_get_nslots(na, NR_F);
1464 		c = tot_slots;
1465 	}
1466 	na->na_total_slots = tot_slots;
1467 
1468 	/* slot context (optional) for all TX/RX ring slots of this adapter */
1469 	if (alloc_ctx) {
1470 		na->na_slot_ctxs =
1471 		    skn_alloc_type_array(slot_ctxs, struct slot_ctx,
1472 		    na->na_total_slots, Z_WAITOK, skmem_tag_nx_contexts);
1473 		if (na->na_slot_ctxs == NULL) {
1474 			SK_ERR("Cannot allocate slot contexts");
1475 			err = ENOMEM;
1476 			goto error;
1477 		}
1478 		atomic_bitset_32(&na->na_flags, NAF_SLOT_CONTEXT);
1479 	}
1480 
1481 	/*
1482 	 * packet handle array storage for all TX/RX ring slots of this
1483 	 * adapter.
1484 	 */
1485 	na->na_scratch = skn_alloc_type_array(scratch, kern_packet_t,
1486 	    na->na_total_slots, Z_WAITOK, skmem_tag_nx_scratch);
1487 	if (na->na_scratch == NULL) {
1488 		SK_ERR("Cannot allocate slot contexts");
1489 		err = ENOMEM;
1490 		goto error;
1491 	}
1492 
1493 	/*
1494 	 * All fields in krings are 0 except the one initialized below.
1495 	 * but better be explicit on important kring fields.
1496 	 */
1497 	for_all_rings(t) {
1498 		ndesc = na_get_nslots(na, t);
1499 		pp = na_kr_get_pp(na, t);
1500 		for (i = 0; i < n[t]; i++) {
1501 			kring = &NAKR(na, t)[i];
1502 			bzero(kring, sizeof(*kring));
1503 			kring->ckr_na = na;
1504 			kring->ckr_pp = pp;
1505 			kring->ckr_max_pkt_len = pp->pp_buflet_size *
1506 			    pp->pp_max_frags;
1507 			kring->ckr_ring_id = i;
1508 			kring->ckr_tx = t;
1509 			kr_init_to_mhints(kring, ndesc);
1510 			kr_init_tbr(kring);
1511 			if (NA_KERNEL_ONLY(na)) {
1512 				kring->ckr_flags |= CKRF_KERNEL_ONLY;
1513 			}
1514 			if (na->na_flags & NAF_HOST_ONLY) {
1515 				kring->ckr_flags |= CKRF_HOST;
1516 			}
1517 			ASSERT((t >= NR_TXRX) || (c > 0));
1518 			if ((t < NR_TXRX) &&
1519 			    (na->na_flags & NAF_SLOT_CONTEXT)) {
1520 				ASSERT(na->na_slot_ctxs != NULL);
1521 				kring->ckr_flags |= CKRF_SLOT_CONTEXT;
1522 				kring->ckr_slot_ctxs =
1523 				    na->na_slot_ctxs + (tot_slots - c);
1524 			}
1525 			ASSERT(na->na_scratch != NULL);
1526 			if (t < NR_TXRXAF) {
1527 				kring->ckr_scratch =
1528 				    na->na_scratch + (tot_slots - c);
1529 			}
1530 			if (t < NR_TXRXAF) {
1531 				c -= ndesc;
1532 			}
1533 			switch (t) {
1534 			case NR_A:
1535 				if (i == 0) {
1536 					kring->ckr_na_sync =
1537 					    na_packet_pool_alloc_sync;
1538 					kring->ckr_alloc_ws =
1539 					    na_upp_alloc_lowat;
1540 				} else {
1541 					ASSERT(i == 1);
1542 					kring->ckr_na_sync =
1543 					    na_packet_pool_alloc_buf_sync;
1544 					kring->ckr_alloc_ws =
1545 					    na_upp_alloc_buf_lowat;
1546 				}
1547 				break;
1548 			case NR_F:
1549 				if (i == 0) {
1550 					kring->ckr_na_sync =
1551 					    na_packet_pool_free_sync;
1552 				} else {
1553 					ASSERT(i == 1);
1554 					kring->ckr_na_sync =
1555 					    na_packet_pool_free_buf_sync;
1556 				}
1557 				break;
1558 			case NR_TX:
1559 				kring->ckr_na_sync = na->na_txsync;
1560 				if (na->na_flags & NAF_TX_MITIGATION) {
1561 					kring->ckr_flags |= CKRF_MITIGATION;
1562 				}
1563 				switch (na->na_type) {
1564 #if CONFIG_NEXUS_USER_PIPE
1565 				case NA_USER_PIPE:
1566 					ASSERT(!(na->na_flags &
1567 					    NAF_USER_PKT_POOL));
1568 					kring->ckr_prologue = kr_txprologue;
1569 					kring->ckr_finalize = NULL;
1570 					break;
1571 #endif /* CONFIG_NEXUS_USER_PIPE */
1572 #if CONFIG_NEXUS_MONITOR
1573 				case NA_MONITOR:
1574 					ASSERT(!(na->na_flags &
1575 					    NAF_USER_PKT_POOL));
1576 					kring->ckr_prologue = kr_txprologue;
1577 					kring->ckr_finalize = NULL;
1578 					break;
1579 #endif /* CONFIG_NEXUS_MONITOR */
1580 				default:
1581 					if (na->na_flags & NAF_USER_PKT_POOL) {
1582 						kring->ckr_prologue =
1583 						    kr_txprologue_upp;
1584 						kring->ckr_finalize =
1585 						    kr_txfinalize_upp;
1586 					} else {
1587 						kring->ckr_prologue =
1588 						    kr_txprologue;
1589 						kring->ckr_finalize =
1590 						    kr_txfinalize;
1591 					}
1592 					break;
1593 				}
1594 				break;
1595 			case NR_RX:
1596 				kring->ckr_na_sync = na->na_rxsync;
1597 				if (na->na_flags & NAF_RX_MITIGATION) {
1598 					kring->ckr_flags |= CKRF_MITIGATION;
1599 				}
1600 				switch (na->na_type) {
1601 #if CONFIG_NEXUS_USER_PIPE
1602 				case NA_USER_PIPE:
1603 					ASSERT(!(na->na_flags &
1604 					    NAF_USER_PKT_POOL));
1605 					kring->ckr_prologue =
1606 					    kr_rxprologue_nodetach;
1607 					kring->ckr_finalize = kr_rxfinalize;
1608 					break;
1609 #endif /* CONFIG_NEXUS_USER_PIPE */
1610 #if CONFIG_NEXUS_MONITOR
1611 				case NA_MONITOR:
1612 					ASSERT(!(na->na_flags &
1613 					    NAF_USER_PKT_POOL));
1614 					kring->ckr_prologue =
1615 					    kr_rxprologue_nodetach;
1616 					kring->ckr_finalize = kr_rxfinalize;
1617 					break;
1618 #endif /* CONFIG_NEXUS_MONITOR */
1619 				default:
1620 					if (na->na_flags & NAF_USER_PKT_POOL) {
1621 						kring->ckr_prologue =
1622 						    kr_rxprologue_upp;
1623 						kring->ckr_finalize =
1624 						    kr_rxfinalize_upp;
1625 					} else {
1626 						kring->ckr_prologue =
1627 						    kr_rxprologue;
1628 						kring->ckr_finalize =
1629 						    kr_rxfinalize;
1630 					}
1631 					break;
1632 				}
1633 				break;
1634 			case NR_EV:
1635 				kring->ckr_na_sync = kern_channel_event_sync;
1636 				break;
1637 			default:
1638 				VERIFY(0);
1639 				/* NOTREACHED */
1640 				__builtin_unreachable();
1641 			}
1642 			if (t != NR_EV) {
1643 				kring->ckr_na_notify = na->na_notify;
1644 			} else {
1645 				kring->ckr_na_notify = NULL;
1646 			}
1647 			(void) snprintf(kring->ckr_name,
1648 			    sizeof(kring->ckr_name) - 1,
1649 			    "%s %s%u%s", na->na_name, sk_ring2str(t), i,
1650 			    ((kring->ckr_flags & CKRF_HOST) ? "^" : ""));
1651 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1652 			    "kr \"%s\" (0x%llx) krflags 0x%b rh %u rt %u",
1653 			    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1654 			    CKRF_BITS, kring->ckr_rhead, kring->ckr_rtail);
1655 			kring->ckr_state = KR_READY;
1656 			q_lck_grp = na_kr_q_lck_grp(t);
1657 			s_lck_grp = na_kr_s_lck_grp(t);
1658 			kring->ckr_qlock_group = q_lck_grp;
1659 			lck_mtx_init(&kring->ckr_qlock, kring->ckr_qlock_group,
1660 			    &channel_lock_attr);
1661 			kring->ckr_slock_group = s_lck_grp;
1662 			lck_spin_init(&kring->ckr_slock, kring->ckr_slock_group,
1663 			    &channel_lock_attr);
1664 			csi_init(&kring->ckr_si,
1665 			    (kring->ckr_flags & CKRF_MITIGATION),
1666 			    na->na_ch_mit_ival);
1667 		}
1668 		csi_init(&na->na_si[t],
1669 		    (na->na_flags & (NAF_TX_MITIGATION | NAF_RX_MITIGATION)),
1670 		    na->na_ch_mit_ival);
1671 	}
1672 	ASSERT(c == 0);
1673 	na->na_tailroom = na->na_rx_rings + n[NR_RX] + n[NR_A] + n[NR_F];
1674 
1675 	if (na->na_type == NA_NETIF_DEV) {
1676 		na_kr_setup_netif_svc_map(na);
1677 	}
1678 
1679 	/* validate now for cases where we create only krings */
1680 	na_krings_verify(na);
1681 	return 0;
1682 
1683 error:
1684 	ASSERT(err != 0);
1685 	if (na->na_tx_rings != NULL) {
1686 		sk_free(na->na_tx_rings, na->na_rings_mem_sz);
1687 		na->na_tx_rings = NULL;
1688 	}
1689 	if (na->na_slot_ctxs != NULL) {
1690 		ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1691 		skn_free_type_array(slot_ctxs,
1692 		    struct slot_ctx, na->na_total_slots,
1693 		    na->na_slot_ctxs);
1694 		na->na_slot_ctxs = NULL;
1695 	}
1696 	if (na->na_scratch != NULL) {
1697 		skn_free_type_array(scratch,
1698 		    kern_packet_t, na->na_total_slots,
1699 		    na->na_scratch);
1700 		na->na_scratch = NULL;
1701 	}
1702 	return err;
1703 }
1704 
1705 /* undo the actions performed by na_kr_create() */
1706 /* call with SK_LOCK held */
1707 static void
na_kr_delete(struct nexus_adapter * na)1708 na_kr_delete(struct nexus_adapter *na)
1709 {
1710 	struct __kern_channel_ring *kring = na->na_tx_rings;
1711 	enum txrx t;
1712 
1713 	ASSERT((kring != NULL) && (na->na_tailroom != NULL));
1714 	SK_LOCK_ASSERT_HELD();
1715 
1716 	for_all_rings(t) {
1717 		csi_destroy(&na->na_si[t]);
1718 	}
1719 	/* we rely on the krings layout described above */
1720 	for (; kring != na->na_tailroom; kring++) {
1721 		lck_mtx_destroy(&kring->ckr_qlock, kring->ckr_qlock_group);
1722 		lck_spin_destroy(&kring->ckr_slock, kring->ckr_slock_group);
1723 		csi_destroy(&kring->ckr_si);
1724 		if (kring->ckr_flags & CKRF_SLOT_CONTEXT) {
1725 			kring->ckr_flags &= ~CKRF_SLOT_CONTEXT;
1726 			ASSERT(kring->ckr_slot_ctxs != NULL);
1727 			kring->ckr_slot_ctxs = NULL;
1728 		}
1729 	}
1730 	if (na->na_slot_ctxs != NULL) {
1731 		ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1732 		atomic_bitclear_32(&na->na_flags, NAF_SLOT_CONTEXT);
1733 		skn_free_type_array(slot_ctxs,
1734 		    struct slot_ctx, na->na_total_slots,
1735 		    na->na_slot_ctxs);
1736 		na->na_slot_ctxs = NULL;
1737 	}
1738 	if (na->na_scratch != NULL) {
1739 		skn_free_type_array(scratch,
1740 		    kern_packet_t, na->na_total_slots,
1741 		    na->na_scratch);
1742 		na->na_scratch = NULL;
1743 	}
1744 	ASSERT(!(na->na_flags & NAF_SLOT_CONTEXT));
1745 	sk_free(na->na_tx_rings, na->na_rings_mem_sz);
1746 	na->na_tx_rings = na->na_rx_rings = na->na_alloc_rings =
1747 	    na->na_free_rings = na->na_event_rings = na->na_tailroom = NULL;
1748 }
1749 
1750 static void
na_kr_slot_desc_init(struct __slot_desc * ksds,boolean_t kernel_only,struct __slot_desc * usds,size_t ndesc)1751 na_kr_slot_desc_init(struct __slot_desc *ksds,
1752     boolean_t kernel_only, struct __slot_desc *usds, size_t ndesc)
1753 {
1754 	size_t i;
1755 
1756 	bzero(ksds, ndesc * SLOT_DESC_SZ);
1757 	if (usds != NULL) {
1758 		ASSERT(!kernel_only);
1759 		bzero(usds, ndesc * SLOT_DESC_SZ);
1760 	} else {
1761 		ASSERT(kernel_only);
1762 	}
1763 
1764 	for (i = 0; i < ndesc; i++) {
1765 		KSD_INIT(SLOT_DESC_KSD(&ksds[i]));
1766 		if (!kernel_only) {
1767 			USD_INIT(SLOT_DESC_USD(&usds[i]));
1768 		}
1769 	}
1770 }
1771 
1772 /* call with SK_LOCK held */
1773 static int
na_kr_setup(struct nexus_adapter * na,struct kern_channel * ch)1774 na_kr_setup(struct nexus_adapter *na, struct kern_channel *ch)
1775 {
1776 	struct skmem_arena *ar = na->na_arena;
1777 	struct skmem_arena_nexus *arn;
1778 	mach_vm_offset_t roff[SKMEM_REGIONS];
1779 	enum txrx t;
1780 	uint32_t i;
1781 
1782 	SK_LOCK_ASSERT_HELD();
1783 	ASSERT(!(na->na_flags & NAF_MEM_NO_INIT));
1784 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
1785 	arn = skmem_arena_nexus(ar);
1786 	ASSERT(arn != NULL);
1787 
1788 	bzero(&roff, sizeof(roff));
1789 	for (i = 0; i < SKMEM_REGIONS; i++) {
1790 		if (ar->ar_regions[i] == NULL) {
1791 			continue;
1792 		}
1793 
1794 		/* not for nexus */
1795 		ASSERT(i != SKMEM_REGION_SYSCTLS);
1796 
1797 		/*
1798 		 * Get region offsets from base of mmap span; the arena
1799 		 * doesn't need to be mmap'd at this point, since we
1800 		 * simply compute the relative offset.
1801 		 */
1802 		roff[i] = skmem_arena_get_region_offset(ar, i);
1803 	}
1804 
1805 	for_all_rings(t) {
1806 		for (i = 0; i < na_get_nrings(na, t); i++) {
1807 			struct __kern_channel_ring *kring = &NAKR(na, t)[i];
1808 			struct __user_channel_ring *ring = kring->ckr_ring;
1809 			mach_vm_offset_t ring_off, usd_roff;
1810 			struct skmem_obj_info oi, oim;
1811 			uint32_t ndesc;
1812 
1813 			if (ring != NULL) {
1814 				SK_DF(SK_VERB_NA | SK_VERB_RING,
1815 				    "kr 0x%llx (\"%s\") is already "
1816 				    "initialized", SK_KVA(kring),
1817 				    kring->ckr_name);
1818 				continue; /* already created by somebody else */
1819 			}
1820 
1821 			if (!KR_KERNEL_ONLY(kring) &&
1822 			    (ring = skmem_cache_alloc(arn->arn_ring_cache,
1823 			    SKMEM_NOSLEEP)) == NULL) {
1824 				SK_ERR("Cannot allocate %s_ring for kr "
1825 				    "0x%llx (\"%s\")", sk_ring2str(t),
1826 				    SK_KVA(kring), kring->ckr_name);
1827 				goto cleanup;
1828 			}
1829 			kring->ckr_flags |= CKRF_MEM_RING_INITED;
1830 			kring->ckr_ring = ring;
1831 			ndesc = kring->ckr_num_slots;
1832 
1833 			if (ring == NULL) {
1834 				goto skip_user_ring_setup;
1835 			}
1836 
1837 			*(uint32_t *)(uintptr_t)&ring->ring_num_slots = ndesc;
1838 
1839 			/* offset of current ring in mmap span */
1840 			skmem_cache_get_obj_info(arn->arn_ring_cache,
1841 			    ring, &oi, NULL);
1842 			ring_off = (roff[SKMEM_REGION_RING] +
1843 			    SKMEM_OBJ_ROFF(&oi));
1844 
1845 			/*
1846 			 * ring_{buf,md,sd}_ofs offsets are relative to the
1847 			 * current ring, and not to the base of mmap span.
1848 			 */
1849 			*(mach_vm_offset_t *)(uintptr_t)&ring->ring_buf_base =
1850 			    (roff[SKMEM_REGION_BUF] - ring_off);
1851 			*(mach_vm_offset_t *)(uintptr_t)&ring->ring_md_base =
1852 			    (roff[SKMEM_REGION_UMD] - ring_off);
1853 			_CASSERT(sizeof(uint16_t) ==
1854 			    sizeof(ring->ring_bft_size));
1855 			if (roff[SKMEM_REGION_UBFT] != 0) {
1856 				ASSERT(ar->ar_regions[SKMEM_REGION_UBFT] !=
1857 				    NULL);
1858 				*(mach_vm_offset_t *)(uintptr_t)
1859 				&ring->ring_bft_base =
1860 				    (roff[SKMEM_REGION_UBFT] - ring_off);
1861 				*(uint16_t *)(uintptr_t)&ring->ring_bft_size =
1862 				    (uint16_t)ar->ar_regions[SKMEM_REGION_UBFT]->
1863 				    skr_c_obj_size;
1864 				ASSERT(ring->ring_bft_size ==
1865 				    ar->ar_regions[SKMEM_REGION_KBFT]->
1866 				    skr_c_obj_size);
1867 			} else {
1868 				*(mach_vm_offset_t *)(uintptr_t)
1869 				&ring->ring_bft_base = 0;
1870 				*(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1871 			}
1872 
1873 			if (t == NR_TX || t == NR_A || t == NR_EV) {
1874 				usd_roff = roff[SKMEM_REGION_TXAUSD];
1875 			} else {
1876 				ASSERT(t == NR_RX || t == NR_F);
1877 				usd_roff = roff[SKMEM_REGION_RXFUSD];
1878 			}
1879 			*(mach_vm_offset_t *)(uintptr_t)&ring->ring_sd_base =
1880 			    (usd_roff - ring_off);
1881 
1882 			/* copy values from kring */
1883 			ring->ring_head = kring->ckr_rhead;
1884 			*(slot_idx_t *)(uintptr_t)&ring->ring_khead =
1885 			    kring->ckr_khead;
1886 			*(slot_idx_t *)(uintptr_t)&ring->ring_tail =
1887 			    kring->ckr_rtail;
1888 
1889 			_CASSERT(sizeof(uint32_t) ==
1890 			    sizeof(ring->ring_buf_size));
1891 			_CASSERT(sizeof(uint16_t) ==
1892 			    sizeof(ring->ring_md_size));
1893 			*(uint32_t *)(uintptr_t)&ring->ring_buf_size =
1894 			    ar->ar_regions[SKMEM_REGION_BUF]->skr_c_obj_size;
1895 			if (ar->ar_regions[SKMEM_REGION_UMD] != NULL) {
1896 				*(uint16_t *)(uintptr_t)&ring->ring_md_size =
1897 				    (uint16_t)ar->ar_regions[SKMEM_REGION_UMD]->
1898 				    skr_c_obj_size;
1899 				ASSERT(ring->ring_md_size ==
1900 				    ar->ar_regions[SKMEM_REGION_KMD]->
1901 				    skr_c_obj_size);
1902 			} else {
1903 				*(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1904 				ASSERT(PP_KERNEL_ONLY(arn->arn_rx_pp));
1905 				ASSERT(PP_KERNEL_ONLY(arn->arn_tx_pp));
1906 			}
1907 
1908 			/* ring info */
1909 			_CASSERT(sizeof(uint16_t) == sizeof(ring->ring_id));
1910 			_CASSERT(sizeof(uint16_t) == sizeof(ring->ring_kind));
1911 			*(uint16_t *)(uintptr_t)&ring->ring_id =
1912 			    (uint16_t)kring->ckr_ring_id;
1913 			*(uint16_t *)(uintptr_t)&ring->ring_kind =
1914 			    (uint16_t)kring->ckr_tx;
1915 
1916 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1917 			    "%s_ring at 0x%llx kr 0x%llx (\"%s\")",
1918 			    sk_ring2str(t), SK_KVA(ring), SK_KVA(kring),
1919 			    kring->ckr_name);
1920 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1921 			    "  num_slots:  %u", ring->ring_num_slots);
1922 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1923 			    "  buf_base:   0x%llx",
1924 			    (uint64_t)ring->ring_buf_base);
1925 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1926 			    "  md_base:    0x%llx",
1927 			    (uint64_t)ring->ring_md_base);
1928 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1929 			    "  sd_base:    0x%llx",
1930 			    (uint64_t)ring->ring_sd_base);
1931 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1932 			    "  h, t:    %u, %u, %u", ring->ring_head,
1933 			    ring->ring_tail);
1934 			SK_DF(SK_VERB_NA | SK_VERB_RING,
1935 			    "  md_size:    %d",
1936 			    (uint64_t)ring->ring_md_size);
1937 
1938 			/* make sure they're in synch */
1939 			_CASSERT(NR_RX == CR_KIND_RX);
1940 			_CASSERT(NR_TX == CR_KIND_TX);
1941 			_CASSERT(NR_A == CR_KIND_ALLOC);
1942 			_CASSERT(NR_F == CR_KIND_FREE);
1943 			_CASSERT(NR_EV == CR_KIND_EVENT);
1944 
1945 skip_user_ring_setup:
1946 			/*
1947 			 * This flag tells na_kr_teardown_all() that it should
1948 			 * go thru the checks to free up the slot maps.
1949 			 */
1950 			kring->ckr_flags |= CKRF_MEM_SD_INITED;
1951 			if (t == NR_TX || t == NR_A || t == NR_EV) {
1952 				kring->ckr_ksds_cache = arn->arn_txaksd_cache;
1953 			} else {
1954 				ASSERT(t == NR_RX || t == NR_F);
1955 				kring->ckr_ksds_cache = arn->arn_rxfksd_cache;
1956 			}
1957 			kring->ckr_ksds =
1958 			    skmem_cache_alloc(kring->ckr_ksds_cache,
1959 			    SKMEM_NOSLEEP);
1960 			if (kring->ckr_ksds == NULL) {
1961 				SK_ERR("Cannot allocate %s_ksds for kr "
1962 				    "0x%llx (\"%s\")", sk_ring2str(t),
1963 				    SK_KVA(kring), kring->ckr_name);
1964 				goto cleanup;
1965 			}
1966 			if (!KR_KERNEL_ONLY(kring)) {
1967 				skmem_cache_get_obj_info(kring->ckr_ksds_cache,
1968 				    kring->ckr_ksds, &oi, &oim);
1969 				kring->ckr_usds = SKMEM_OBJ_ADDR(&oim);
1970 			}
1971 			na_kr_slot_desc_init(kring->ckr_ksds,
1972 			    KR_KERNEL_ONLY(kring), kring->ckr_usds, ndesc);
1973 
1974 			/* cache last slot descriptor address */
1975 			ASSERT(kring->ckr_lim == (ndesc - 1));
1976 			kring->ckr_ksds_last = &kring->ckr_ksds[kring->ckr_lim];
1977 
1978 			if ((t < NR_TXRX) &&
1979 			    !(na->na_flags & NAF_USER_PKT_POOL) &&
1980 			    na_kr_populate_slots(kring) != 0) {
1981 				SK_ERR("Cannot allocate buffers for kr "
1982 				    "0x%llx (\"%s\")", SK_KVA(kring),
1983 				    kring->ckr_name);
1984 				goto cleanup;
1985 			}
1986 		}
1987 	}
1988 
1989 	return 0;
1990 
1991 cleanup:
1992 	na_kr_teardown_all(na, ch, FALSE);
1993 
1994 	return ENOMEM;
1995 }
1996 
1997 static void
na_kr_teardown_common(struct nexus_adapter * na,struct __kern_channel_ring * kring,enum txrx t,struct kern_channel * ch,boolean_t defunct)1998 na_kr_teardown_common(struct nexus_adapter *na,
1999     struct __kern_channel_ring *kring, enum txrx t, struct kern_channel *ch,
2000     boolean_t defunct)
2001 {
2002 	struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
2003 	struct __user_channel_ring *ckr_ring;
2004 	boolean_t sd_idle, sd_inited;
2005 
2006 	ASSERT(arn != NULL);
2007 	kr_enter(kring, TRUE);
2008 	/*
2009 	 * Check for CKRF_MEM_SD_INITED and CKRF_MEM_RING_INITED
2010 	 * to make sure that the freeing needs to happen (else just
2011 	 * nullify the values).
2012 	 * If this adapter owns the memory for the slot descriptors,
2013 	 * check if the region is marked as busy (sd_idle is false)
2014 	 * and leave the kring's slot descriptor fields alone if so,
2015 	 * at defunct time.  At final teardown time, sd_idle must be
2016 	 * true else we assert; this indicates a missing call to
2017 	 * skmem_arena_nexus_sd_set_noidle().
2018 	 */
2019 	sd_inited = ((kring->ckr_flags & CKRF_MEM_SD_INITED) != 0);
2020 	if (sd_inited) {
2021 		/* callee will do KR_KSD(), so check */
2022 		if (((t < NR_TXRX) || (t == NR_EV)) &&
2023 		    (kring->ckr_ksds != NULL)) {
2024 			na_kr_depopulate_slots(kring, ch, defunct);
2025 		}
2026 		/* leave CKRF_MEM_SD_INITED flag alone until idle */
2027 		sd_idle = skmem_arena_nexus_sd_idle(arn);
2028 		VERIFY(sd_idle || defunct);
2029 	} else {
2030 		sd_idle = TRUE;
2031 	}
2032 
2033 	if (sd_idle) {
2034 		kring->ckr_flags &= ~CKRF_MEM_SD_INITED;
2035 		if (kring->ckr_ksds != NULL) {
2036 			if (sd_inited) {
2037 				skmem_cache_free(kring->ckr_ksds_cache,
2038 				    kring->ckr_ksds);
2039 			}
2040 			kring->ckr_ksds = NULL;
2041 			kring->ckr_ksds_last = NULL;
2042 			kring->ckr_usds = NULL;
2043 		}
2044 		ASSERT(kring->ckr_ksds_last == NULL);
2045 		ASSERT(kring->ckr_usds == NULL);
2046 	}
2047 
2048 	if ((ckr_ring = kring->ckr_ring) != NULL) {
2049 		kring->ckr_ring = NULL;
2050 	}
2051 
2052 	if (kring->ckr_flags & CKRF_MEM_RING_INITED) {
2053 		ASSERT(ckr_ring != NULL || KR_KERNEL_ONLY(kring));
2054 		if (ckr_ring != NULL) {
2055 			skmem_cache_free(arn->arn_ring_cache, ckr_ring);
2056 		}
2057 		kring->ckr_flags &= ~CKRF_MEM_RING_INITED;
2058 	}
2059 
2060 	if (defunct) {
2061 		/* if defunct, drop everything; see KR_DROP() */
2062 		kring->ckr_flags |= CKRF_DEFUNCT;
2063 	}
2064 	kr_exit(kring);
2065 }
2066 
2067 /*
2068  * Teardown ALL rings of a nexus adapter; this includes {tx,rx,alloc,free,event}
2069  */
2070 static void
na_kr_teardown_all(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2071 na_kr_teardown_all(struct nexus_adapter *na, struct kern_channel *ch,
2072     boolean_t defunct)
2073 {
2074 	enum txrx t;
2075 
2076 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2077 
2078 	/* skip if this adapter has no allocated rings */
2079 	if (na->na_tx_rings == NULL) {
2080 		return;
2081 	}
2082 
2083 	for_all_rings(t) {
2084 		for (uint32_t i = 0; i < na_get_nrings(na, t); i++) {
2085 			na_kr_teardown_common(na, &NAKR(na, t)[i],
2086 			    t, ch, defunct);
2087 		}
2088 	}
2089 }
2090 
2091 /*
2092  * Teardown only {tx,rx} rings assigned to the channel.
2093  */
2094 static void
na_kr_teardown_txrx(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct,struct proc * p)2095 na_kr_teardown_txrx(struct nexus_adapter *na, struct kern_channel *ch,
2096     boolean_t defunct, struct proc *p)
2097 {
2098 	enum txrx t;
2099 
2100 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2101 
2102 	for_rx_tx(t) {
2103 		ring_id_t qfirst = ch->ch_first[t];
2104 		ring_id_t qlast = ch->ch_last[t];
2105 		uint32_t i;
2106 
2107 		for (i = qfirst; i < qlast; i++) {
2108 			struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2109 			na_kr_teardown_common(na, kring, t, ch, defunct);
2110 
2111 			/*
2112 			 * Issue a notify to wake up anyone sleeping in kqueue
2113 			 * so that they notice the newly defuncted channels and
2114 			 * return an error
2115 			 */
2116 			kring->ckr_na_notify(kring, p, 0);
2117 		}
2118 	}
2119 }
2120 
2121 static int
na_kr_populate_slots(struct __kern_channel_ring * kring)2122 na_kr_populate_slots(struct __kern_channel_ring *kring)
2123 {
2124 	const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2125 	struct nexus_adapter *na = KRNA(kring);
2126 	kern_pbufpool_t pp = kring->ckr_pp;
2127 	uint32_t nslots = kring->ckr_num_slots;
2128 	uint32_t start_idx, i;
2129 	uint32_t sidx = 0;      /* slot counter */
2130 	struct __kern_slot_desc *ksd;
2131 	struct __user_slot_desc *usd;
2132 	struct __kern_quantum *kqum;
2133 	nexus_type_t nexus_type;
2134 	int err = 0;
2135 
2136 	ASSERT(kring->ckr_tx < NR_TXRX);
2137 	ASSERT(!(KRNA(kring)->na_flags & NAF_USER_PKT_POOL));
2138 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2139 	ASSERT(pp != NULL);
2140 
2141 	/*
2142 	 * xxx_ppool: remove this special case
2143 	 */
2144 	nexus_type = na->na_nxdom_prov->nxdom_prov_dom->nxdom_type;
2145 
2146 	switch (nexus_type) {
2147 	case NEXUS_TYPE_FLOW_SWITCH:
2148 	case NEXUS_TYPE_KERNEL_PIPE:
2149 		/*
2150 		 * xxx_ppool: This is temporary code until we come up with a
2151 		 * scheme for user space to alloc & attach packets to tx ring.
2152 		 */
2153 		if (kernel_only || kring->ckr_tx == NR_RX) {
2154 			return 0;
2155 		}
2156 		break;
2157 
2158 	case NEXUS_TYPE_NET_IF:
2159 		if (((na->na_type == NA_NETIF_DEV) ||
2160 		    (na->na_type == NA_NETIF_HOST)) &&
2161 		    (kernel_only || (kring->ckr_tx == NR_RX))) {
2162 			return 0;
2163 		}
2164 
2165 		ASSERT((na->na_type == NA_NETIF_COMPAT_DEV) ||
2166 		    (na->na_type == NA_NETIF_COMPAT_HOST) ||
2167 		    (na->na_type == NA_NETIF_DEV) ||
2168 		    (na->na_type == NA_NETIF_VP));
2169 
2170 		if (!kernel_only) {
2171 			if (kring->ckr_tx == NR_RX) {
2172 				return 0;
2173 			} else {
2174 				break;
2175 			}
2176 		}
2177 
2178 		ASSERT(kernel_only);
2179 
2180 		if ((na->na_type == NA_NETIF_COMPAT_DEV) ||
2181 		    (na->na_type == NA_NETIF_COMPAT_HOST)) {
2182 			return 0;
2183 		}
2184 		VERIFY(0);
2185 		/* NOTREACHED */
2186 		__builtin_unreachable();
2187 
2188 	case NEXUS_TYPE_USER_PIPE:
2189 	case NEXUS_TYPE_MONITOR:
2190 		break;
2191 
2192 	default:
2193 		VERIFY(0);
2194 		/* NOTREACHED */
2195 		__builtin_unreachable();
2196 	}
2197 
2198 	/* Fill the ring with packets */
2199 	sidx = start_idx = 0;
2200 	for (i = 0; i < nslots; i++) {
2201 		kqum = SK_PTR_ADDR_KQUM(pp_alloc_packet(pp, pp->pp_max_frags,
2202 		    SKMEM_NOSLEEP));
2203 		if (kqum == NULL) {
2204 			err = ENOMEM;
2205 			SK_ERR("ar 0x%llx (\"%s\") no more buffers "
2206 			    "after %u of %u, err %d", SK_KVA(na->na_arena),
2207 			    na->na_arena->ar_name, i, nslots, err);
2208 			goto cleanup;
2209 		}
2210 		ksd = KR_KSD(kring, i);
2211 		usd = (kernel_only ? NULL : KR_USD(kring, i));
2212 
2213 		/* attach packet to slot */
2214 		kqum->qum_ksd = ksd;
2215 		ASSERT(!KSD_VALID_METADATA(ksd));
2216 		KSD_ATTACH_METADATA(ksd, kqum);
2217 		if (usd != NULL) {
2218 			USD_ATTACH_METADATA(usd, METADATA_IDX(kqum));
2219 			kr_externalize_metadata(kring, pp->pp_max_frags,
2220 			    kqum, current_proc());
2221 		}
2222 
2223 		SK_DF(SK_VERB_MEM, " C ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2224 		    " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd), METADATA_IDX(kqum),
2225 		    SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2226 		    SK_KVA(&kqum->qum_buf[0]));
2227 		if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2228 			SK_DF(SK_VERB_MEM, " C usd [%-3d, 0x%llx] "
2229 			    "uqum [%-3u, 0x%llx]  ubuf[%-3u, 0x%llx]",
2230 			    (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2231 			    SK_KVA(usd), METADATA_IDX(kqum),
2232 			    SK_KVA(kqum->qum_user),
2233 			    kqum->qum_user->qum_buf[0].buf_idx,
2234 			    SK_KVA(&kqum->qum_user->qum_buf[0]));
2235 		}
2236 
2237 		sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2238 	}
2239 
2240 	SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") populated %u slots from idx %u",
2241 	    SK_KVA(na->na_arena), na->na_arena->ar_name, nslots, start_idx);
2242 
2243 cleanup:
2244 	if (err != 0) {
2245 		sidx = start_idx;
2246 		while (i-- > 0) {
2247 			ksd = KR_KSD(kring, i);
2248 			usd = (kernel_only ? NULL : KR_USD(kring, i));
2249 			kqum = ksd->sd_qum;
2250 
2251 			ASSERT(ksd == kqum->qum_ksd);
2252 			KSD_RESET(ksd);
2253 			if (usd != NULL) {
2254 				USD_RESET(usd);
2255 			}
2256 			/* detach packet from slot */
2257 			kqum->qum_ksd = NULL;
2258 			pp_free_packet(pp, SK_PTR_ADDR(kqum));
2259 
2260 			sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2261 		}
2262 	}
2263 	return err;
2264 }
2265 
2266 static void
na_kr_depopulate_slots(struct __kern_channel_ring * kring,struct kern_channel * ch,boolean_t defunct)2267 na_kr_depopulate_slots(struct __kern_channel_ring *kring,
2268     struct kern_channel *ch, boolean_t defunct)
2269 {
2270 #pragma unused(ch)
2271 	const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2272 	uint32_t i, j, n = kring->ckr_num_slots;
2273 	struct nexus_adapter *na = KRNA(kring);
2274 	struct kern_pbufpool *pp = kring->ckr_pp;
2275 	boolean_t upp = FALSE;
2276 	obj_idx_t midx;
2277 
2278 	ASSERT((kring->ckr_tx < NR_TXRX) || (kring->ckr_tx == NR_EV));
2279 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2280 
2281 	ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2282 
2283 	if (((na->na_flags & NAF_USER_PKT_POOL) != 0) &&
2284 	    (kring->ckr_tx != NR_EV)) {
2285 		upp = TRUE;
2286 	}
2287 	for (i = 0, j = 0; i < n; i++) {
2288 		struct __kern_slot_desc *ksd = KR_KSD(kring, i);
2289 		struct __user_slot_desc *usd;
2290 		struct __kern_quantum *qum, *kqum;
2291 		boolean_t free_packet = FALSE;
2292 		int err;
2293 
2294 		if (!KSD_VALID_METADATA(ksd)) {
2295 			continue;
2296 		}
2297 
2298 		kqum = ksd->sd_qum;
2299 		usd = (kernel_only ? NULL : KR_USD(kring, i));
2300 		midx = METADATA_IDX(kqum);
2301 
2302 		/*
2303 		 * if the packet is internalized it should not be in the
2304 		 * hash table of packets loaned to user space.
2305 		 */
2306 		if (upp && (kqum->qum_qflags & QUM_F_INTERNALIZED)) {
2307 			if ((qum = pp_find_upp(pp, midx)) != NULL) {
2308 				panic("internalized packet 0x%llx in htbl",
2309 				    SK_KVA(qum));
2310 				/* NOTREACHED */
2311 				__builtin_unreachable();
2312 			}
2313 			free_packet = TRUE;
2314 		} else if (upp) {
2315 			/*
2316 			 * if the packet is not internalized check if it is
2317 			 * in the list of packets loaned to user-space.
2318 			 * Remove from the list before freeing.
2319 			 */
2320 			ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
2321 			qum = pp_remove_upp(pp, midx, &err);
2322 			if (err != 0) {
2323 				SK_ERR("un-allocated packet or buflet %d %p",
2324 				    midx, SK_KVA(qum));
2325 				if (qum != NULL) {
2326 					free_packet = TRUE;
2327 				}
2328 			}
2329 		} else {
2330 			free_packet = TRUE;
2331 		}
2332 
2333 		/*
2334 		 * Clear the user and kernel slot descriptors.  Note that
2335 		 * if we are depopulating the slots due to defunct (and not
2336 		 * due to normal deallocation/teardown), we leave the user
2337 		 * slot descriptor alone.  At that point the process may
2338 		 * be suspended, and later when it resumes it would just
2339 		 * pick up the original contents and move forward with
2340 		 * whatever it was doing.
2341 		 */
2342 		KSD_RESET(ksd);
2343 		if (usd != NULL && !defunct) {
2344 			USD_RESET(usd);
2345 		}
2346 
2347 		/* detach packet from slot */
2348 		kqum->qum_ksd = NULL;
2349 
2350 		SK_DF(SK_VERB_MEM, " D ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2351 		    " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd),
2352 		    METADATA_IDX(kqum), SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2353 		    SK_KVA(&kqum->qum_buf[0]));
2354 		if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2355 			SK_DF(SK_VERB_MEM, " D usd [%-3u, 0x%llx] "
2356 			    "uqum [%-3u, 0x%llx]  ubuf[%-3u, 0x%llx]",
2357 			    (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2358 			    SK_KVA(usd), METADATA_IDX(kqum),
2359 			    SK_KVA(kqum->qum_user),
2360 			    kqum->qum_user->qum_buf[0].buf_idx,
2361 			    SK_KVA(&kqum->qum_user->qum_buf[0]));
2362 		}
2363 
2364 		if (free_packet) {
2365 			pp_free_packet(pp, SK_PTR_ADDR(kqum)); ++j;
2366 		}
2367 	}
2368 
2369 	SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") depopulated %u of %u slots",
2370 	    SK_KVA(KRNA(kring)->na_arena), KRNA(kring)->na_arena->ar_name,
2371 	    j, n);
2372 }
2373 
2374 int
na_rings_mem_setup(struct nexus_adapter * na,uint32_t tailroom,boolean_t alloc_ctx,struct kern_channel * ch)2375 na_rings_mem_setup(struct nexus_adapter *na, uint32_t tailroom,
2376     boolean_t alloc_ctx, struct kern_channel *ch)
2377 {
2378 	boolean_t kronly;
2379 	int err;
2380 
2381 	SK_LOCK_ASSERT_HELD();
2382 	ASSERT(na->na_channels == 0);
2383 	/*
2384 	 * If NAF_MEM_NO_INIT is set, then only create the krings and not
2385 	 * the backing memory regions for the adapter.
2386 	 */
2387 	kronly = (na->na_flags & NAF_MEM_NO_INIT);
2388 	ASSERT(!kronly || NA_KERNEL_ONLY(na));
2389 
2390 	/*
2391 	 * Create and initialize the common fields of the krings array.
2392 	 * using the information that must be already available in the na.
2393 	 * tailroom can be used to request the allocation of additional
2394 	 * tailroom bytes after the krings array.  This is used by
2395 	 * nexus_vp_adapter's (i.e., flow switch ports) to make room
2396 	 * for leasing-related data structures.
2397 	 */
2398 	if ((err = na_kr_create(na, tailroom, alloc_ctx)) == 0 && !kronly) {
2399 		err = na_kr_setup(na, ch);
2400 		if (err != 0) {
2401 			na_kr_delete(na);
2402 		}
2403 	}
2404 
2405 	return err;
2406 }
2407 
2408 void
na_rings_mem_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2409 na_rings_mem_teardown(struct nexus_adapter *na, struct kern_channel *ch,
2410     boolean_t defunct)
2411 {
2412 	SK_LOCK_ASSERT_HELD();
2413 	ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT));
2414 
2415 	/*
2416 	 * Deletes the kring and ring array of the adapter. They
2417 	 * must have been created using na_rings_mem_setup().
2418 	 *
2419 	 * XXX: [email protected] -- the parameter "ch" should not be
2420 	 * needed here; however na_kr_depopulate_slots() needs to
2421 	 * go thru the channel's user packet pool hash, and so for
2422 	 * now we leave it here.
2423 	 */
2424 	na_kr_teardown_all(na, ch, defunct);
2425 	if (!defunct) {
2426 		na_kr_delete(na);
2427 	}
2428 }
2429 
2430 void
na_ch_rings_defunct(struct kern_channel * ch,struct proc * p)2431 na_ch_rings_defunct(struct kern_channel *ch, struct proc *p)
2432 {
2433 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2434 
2435 	/*
2436 	 * Depopulate slots on the TX and RX rings of this channel,
2437 	 * but don't touch other rings owned by other channels if
2438 	 * this adapter is being shared.
2439 	 */
2440 	na_kr_teardown_txrx(ch->ch_na, ch, TRUE, p);
2441 }
2442 
2443 void
na_kr_drop(struct nexus_adapter * na,boolean_t drop)2444 na_kr_drop(struct nexus_adapter *na, boolean_t drop)
2445 {
2446 	enum txrx t;
2447 	uint32_t i;
2448 
2449 	for_rx_tx(t) {
2450 		for (i = 0; i < na_get_nrings(na, t); i++) {
2451 			struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2452 			int error;
2453 			error = kr_enter(kring, TRUE);
2454 			if (drop) {
2455 				kring->ckr_flags |= CKRF_DROP;
2456 			} else {
2457 				kring->ckr_flags &= ~CKRF_DROP;
2458 			}
2459 
2460 			if (error != 0) {
2461 				SK_ERR("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2462 				    "kr_enter failed %d",
2463 				    na->na_name, SK_KVA(na),
2464 				    kring->ckr_name, SK_KVA(kring),
2465 				    error);
2466 			} else {
2467 				kr_exit(kring);
2468 			}
2469 			SK_D("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2470 			    "krflags 0x%b", na->na_name, SK_KVA(na),
2471 			    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
2472 			    CKRF_BITS);
2473 		}
2474 	}
2475 }
2476 
2477 /*
2478  * Set the stopped/enabled status of ring.  When stopping, they also wait
2479  * for all current activity on the ring to terminate.  The status change
2480  * is then notified using the na na_notify callback.
2481  */
2482 static void
na_set_ring(struct nexus_adapter * na,uint32_t ring_id,enum txrx t,uint32_t state)2483 na_set_ring(struct nexus_adapter *na, uint32_t ring_id, enum txrx t,
2484     uint32_t state)
2485 {
2486 	struct __kern_channel_ring *kr = &NAKR(na, t)[ring_id];
2487 
2488 	/*
2489 	 * Mark the ring as stopped/enabled, and run through the
2490 	 * locks to make sure other users get to see it.
2491 	 */
2492 	if (state == KR_READY) {
2493 		kr_start(kr);
2494 	} else {
2495 		kr_stop(kr, state);
2496 	}
2497 }
2498 
2499 
2500 /* stop or enable all the rings of na */
2501 static void
na_set_all_rings(struct nexus_adapter * na,uint32_t state)2502 na_set_all_rings(struct nexus_adapter *na, uint32_t state)
2503 {
2504 	uint32_t i;
2505 	enum txrx t;
2506 
2507 	SK_LOCK_ASSERT_HELD();
2508 
2509 	if (!NA_IS_ACTIVE(na)) {
2510 		return;
2511 	}
2512 
2513 	for_rx_tx(t) {
2514 		for (i = 0; i < na_get_nrings(na, t); i++) {
2515 			na_set_ring(na, i, t, state);
2516 		}
2517 	}
2518 }
2519 
2520 /*
2521  * Convenience function used in drivers.  Waits for current txsync()s/rxsync()s
2522  * to finish and prevents any new one from starting.  Call this before turning
2523  * Skywalk mode off, or before removing the harware rings (e.g., on module
2524  * onload).  As a rule of thumb for linux drivers, this should be placed near
2525  * each napi_disable().
2526  */
2527 void
na_disable_all_rings(struct nexus_adapter * na)2528 na_disable_all_rings(struct nexus_adapter *na)
2529 {
2530 	na_set_all_rings(na, KR_STOPPED);
2531 }
2532 
2533 /*
2534  * Convenience function used in drivers.  Re-enables rxsync and txsync on the
2535  * adapter's rings In linux drivers, this should be placed near each
2536  * napi_enable().
2537  */
2538 void
na_enable_all_rings(struct nexus_adapter * na)2539 na_enable_all_rings(struct nexus_adapter *na)
2540 {
2541 	na_set_all_rings(na, KR_READY /* enabled */);
2542 }
2543 
2544 void
na_lock_all_rings(struct nexus_adapter * na)2545 na_lock_all_rings(struct nexus_adapter *na)
2546 {
2547 	na_set_all_rings(na, KR_LOCKED);
2548 }
2549 
2550 void
na_unlock_all_rings(struct nexus_adapter * na)2551 na_unlock_all_rings(struct nexus_adapter *na)
2552 {
2553 	na_enable_all_rings(na);
2554 }
2555 
2556 int
na_connect(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)2557 na_connect(struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
2558     struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
2559 {
2560 	struct nexus_adapter *na = NULL;
2561 	mach_vm_size_t memsize = 0;
2562 	int err = 0;
2563 	enum txrx t;
2564 
2565 	ASSERT(!(chr->cr_mode & CHMODE_KERNEL));
2566 	ASSERT(!(ch->ch_flags & CHANF_KERNEL));
2567 
2568 	SK_LOCK_ASSERT_HELD();
2569 
2570 	/* find the nexus adapter and return the reference */
2571 	err = na_find(ch, nx, chr, ch0, nxb, p, &na, TRUE /* create */);
2572 	if (err != 0) {
2573 		ASSERT(na == NULL);
2574 		goto done;
2575 	}
2576 
2577 	if (NA_KERNEL_ONLY(na)) {
2578 		err = EBUSY;
2579 		goto done;
2580 	}
2581 
2582 	/* reject if the adapter is defunct of non-permissive */
2583 	if ((na->na_flags & NAF_DEFUNCT) || na_reject_channel(ch, na)) {
2584 		err = ENXIO;
2585 		goto done;
2586 	}
2587 
2588 	err = na_bind_channel(na, ch, chr);
2589 	if (err != 0) {
2590 		goto done;
2591 	}
2592 
2593 	ASSERT(ch->ch_schema != NULL);
2594 	ASSERT(na == ch->ch_na);
2595 
2596 	for_all_rings(t) {
2597 		if (na_get_nrings(na, t) == 0) {
2598 			ch->ch_si[t] = NULL;
2599 			continue;
2600 		}
2601 		ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2602 		    &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2603 	}
2604 
2605 	skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2606 
2607 	if (!(skmem_arena_nexus(na->na_arena)->arn_mode &
2608 	    AR_NEXUS_MODE_EXTERNAL_PPOOL)) {
2609 		atomic_bitset_32(__DECONST(uint32_t *,
2610 		    &ch->ch_schema->csm_flags), CSM_PRIV_MEM);
2611 	}
2612 
2613 	err = skmem_arena_mmap(na->na_arena, p, &ch->ch_mmap);
2614 	if (err != 0) {
2615 		goto done;
2616 	}
2617 
2618 	atomic_bitset_32(__DECONST(uint32_t *, &ch->ch_schema->csm_flags),
2619 	    CSM_ACTIVE);
2620 	chr->cr_memsize = memsize;
2621 	chr->cr_memoffset = ch->ch_schema_offset;
2622 
2623 	SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2624 	    "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2625 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2626 	    na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2627 	    na->na_flags, NAF_BITS);
2628 
2629 done:
2630 	if (err != 0) {
2631 		if (ch->ch_schema != NULL || na != NULL) {
2632 			if (ch->ch_schema != NULL) {
2633 				ASSERT(na == ch->ch_na);
2634 				/*
2635 				 * Callee will unmap memory region if needed,
2636 				 * as well as release reference held on 'na'.
2637 				 */
2638 				na_disconnect(nx, ch);
2639 				na = NULL;
2640 			}
2641 			if (na != NULL) {
2642 				(void) na_release_locked(na);
2643 				na = NULL;
2644 			}
2645 		}
2646 	}
2647 
2648 	return err;
2649 }
2650 
2651 void
na_disconnect(struct kern_nexus * nx,struct kern_channel * ch)2652 na_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
2653 {
2654 #pragma unused(nx)
2655 	enum txrx t;
2656 
2657 	SK_LOCK_ASSERT_HELD();
2658 
2659 	SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2660 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2661 	    ch->ch_na->na_name, ch->ch_info->cinfo_nx_port,
2662 	    (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(ch->ch_na),
2663 	    ch->ch_na->na_flags, NAF_BITS);
2664 
2665 	/* destroy mapping and release references */
2666 	na_unbind_channel(ch);
2667 	ASSERT(ch->ch_na == NULL);
2668 	ASSERT(ch->ch_schema == NULL);
2669 	for_all_rings(t) {
2670 		ch->ch_si[t] = NULL;
2671 	}
2672 }
2673 
2674 void
na_defunct(struct kern_nexus * nx,struct kern_channel * ch,struct nexus_adapter * na,boolean_t locked)2675 na_defunct(struct kern_nexus *nx, struct kern_channel *ch,
2676     struct nexus_adapter *na, boolean_t locked)
2677 {
2678 #pragma unused(nx)
2679 	SK_LOCK_ASSERT_HELD();
2680 	if (!locked) {
2681 		lck_mtx_lock(&ch->ch_lock);
2682 	}
2683 
2684 	LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2685 
2686 	if (!(na->na_flags & NAF_DEFUNCT)) {
2687 		/*
2688 		 * Mark this adapter as defunct to inform nexus-specific
2689 		 * teardown handler called by na_teardown() below.
2690 		 */
2691 		atomic_bitset_32(&na->na_flags, NAF_DEFUNCT);
2692 
2693 		/*
2694 		 * Depopulate slots.
2695 		 */
2696 		na_teardown(na, ch, TRUE);
2697 
2698 		/*
2699 		 * And finally destroy any already-defunct memory regions.
2700 		 * Do this only if the nexus adapter owns the arena, i.e.
2701 		 * NAF_MEM_LOANED is not set.  Otherwise, we'd expect
2702 		 * that this routine be called again for the real owner.
2703 		 */
2704 		if (!(na->na_flags & NAF_MEM_LOANED)) {
2705 			skmem_arena_defunct(na->na_arena);
2706 		}
2707 	}
2708 
2709 	SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d) "
2710 	    "na 0x%llx naflags %b", ch->ch_name, ch->ch_pid,
2711 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2712 	    na->na_name, ch->ch_info->cinfo_nx_port,
2713 	    (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2714 	    na->na_flags, NAF_BITS);
2715 
2716 	if (!locked) {
2717 		lck_mtx_unlock(&ch->ch_lock);
2718 	}
2719 }
2720 
2721 /*
2722  * TODO: [email protected] -- merge this into na_connect()
2723  */
2724 int
na_connect_spec(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct proc * p)2725 na_connect_spec(struct kern_nexus *nx, struct kern_channel *ch,
2726     struct chreq *chr, struct proc *p)
2727 {
2728 #pragma unused(p)
2729 	struct nexus_adapter *na = NULL;
2730 	mach_vm_size_t memsize = 0;
2731 	int error = 0;
2732 	enum txrx t;
2733 
2734 	ASSERT(chr->cr_mode & CHMODE_KERNEL);
2735 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2736 	ASSERT(ch->ch_na == NULL);
2737 	ASSERT(ch->ch_schema == NULL);
2738 
2739 	SK_LOCK_ASSERT_HELD();
2740 
2741 	error = na_find(ch, nx, chr, NULL, NULL, kernproc, &na, TRUE);
2742 	if (error != 0) {
2743 		goto done;
2744 	}
2745 
2746 	if (na == NULL) {
2747 		error = EINVAL;
2748 		goto done;
2749 	}
2750 
2751 	if (na->na_channels > 0) {
2752 		error = EBUSY;
2753 		goto done;
2754 	}
2755 
2756 	if (na->na_flags & NAF_DEFUNCT) {
2757 		error = ENXIO;
2758 		goto done;
2759 	}
2760 
2761 	/*
2762 	 * Special connect requires the nexus adapter to handle its
2763 	 * own channel binding and unbinding via na_special(); bail
2764 	 * if this adapter doesn't support it.
2765 	 */
2766 	if (na->na_special == NULL) {
2767 		error = ENOTSUP;
2768 		goto done;
2769 	}
2770 
2771 	/* upon success, "ch->ch_na" will point to "na" */
2772 	error = na->na_special(na, ch, chr, NXSPEC_CMD_CONNECT);
2773 	if (error != 0) {
2774 		ASSERT(ch->ch_na == NULL);
2775 		goto done;
2776 	}
2777 
2778 	ASSERT(na->na_flags & NAF_SPEC_INIT);
2779 	ASSERT(na == ch->ch_na);
2780 	/* make sure this is still the case */
2781 	ASSERT(ch->ch_schema == NULL);
2782 
2783 	for_rx_tx(t) {
2784 		ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2785 		    &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2786 	}
2787 
2788 	skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2789 	chr->cr_memsize = memsize;
2790 
2791 	SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2792 	    "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2793 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2794 	    na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2795 	    na->na_flags, NAF_BITS);
2796 
2797 done:
2798 	if (error != 0) {
2799 		if (ch->ch_na != NULL || na != NULL) {
2800 			if (ch->ch_na != NULL) {
2801 				ASSERT(na == ch->ch_na);
2802 				/* callee will release reference on 'na' */
2803 				na_disconnect_spec(nx, ch);
2804 				na = NULL;
2805 			}
2806 			if (na != NULL) {
2807 				(void) na_release_locked(na);
2808 				na = NULL;
2809 			}
2810 		}
2811 	}
2812 
2813 	return error;
2814 }
2815 
2816 /*
2817  * TODO: [email protected] -- merge this into na_disconnect()
2818  */
2819 void
na_disconnect_spec(struct kern_nexus * nx,struct kern_channel * ch)2820 na_disconnect_spec(struct kern_nexus *nx, struct kern_channel *ch)
2821 {
2822 #pragma unused(nx)
2823 	struct nexus_adapter *na = ch->ch_na;
2824 	enum txrx t;
2825 	int error;
2826 
2827 	SK_LOCK_ASSERT_HELD();
2828 	ASSERT(na != NULL);
2829 	ASSERT(na->na_flags & NAF_SPEC_INIT);   /* has been bound */
2830 
2831 	SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2832 	    SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2833 	    na->na_name, ch->ch_info->cinfo_nx_port,
2834 	    (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2835 	    na->na_flags, NAF_BITS);
2836 
2837 	/* take a reference for this routine */
2838 	na_retain_locked(na);
2839 
2840 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2841 	ASSERT(ch->ch_schema == NULL);
2842 	ASSERT(na->na_special != NULL);
2843 	/* unbind this channel */
2844 	error = na->na_special(na, ch, NULL, NXSPEC_CMD_DISCONNECT);
2845 	ASSERT(error == 0);
2846 	ASSERT(!(na->na_flags & NAF_SPEC_INIT));
2847 
2848 	/* now release our reference; this may be the last */
2849 	na_release_locked(na);
2850 	na = NULL;
2851 
2852 	ASSERT(ch->ch_na == NULL);
2853 	for_rx_tx(t) {
2854 		ch->ch_si[t] = NULL;
2855 	}
2856 }
2857 
2858 void
na_start_spec(struct kern_nexus * nx,struct kern_channel * ch)2859 na_start_spec(struct kern_nexus *nx, struct kern_channel *ch)
2860 {
2861 #pragma unused(nx)
2862 	struct nexus_adapter *na = ch->ch_na;
2863 
2864 	SK_LOCK_ASSERT_HELD();
2865 
2866 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2867 	ASSERT(NA_KERNEL_ONLY(na));
2868 	ASSERT(na->na_special != NULL);
2869 
2870 	na->na_special(na, ch, NULL, NXSPEC_CMD_START);
2871 }
2872 
2873 void
na_stop_spec(struct kern_nexus * nx,struct kern_channel * ch)2874 na_stop_spec(struct kern_nexus *nx, struct kern_channel *ch)
2875 {
2876 #pragma unused(nx)
2877 	struct nexus_adapter *na = ch->ch_na;
2878 
2879 	SK_LOCK_ASSERT_HELD();
2880 
2881 	ASSERT(ch->ch_flags & CHANF_KERNEL);
2882 	ASSERT(NA_KERNEL_ONLY(na));
2883 	ASSERT(na->na_special != NULL);
2884 
2885 	na->na_special(na, ch, NULL, NXSPEC_CMD_STOP);
2886 }
2887 
2888 /*
2889  * MUST BE CALLED UNDER SK_LOCK()
2890  *
2891  * Get a refcounted reference to a nexus adapter attached
2892  * to the interface specified by chr.
2893  * This is always called in the execution of an ioctl().
2894  *
2895  * Return ENXIO if the interface specified by the request does
2896  * not exist, ENOTSUP if Skywalk is not supported by the interface,
2897  * EINVAL if parameters are invalid, ENOMEM if needed resources
2898  * could not be allocated.
2899  * If successful, hold a reference to the nexus adapter.
2900  *
2901  * No reference is kept on the real interface, which may then
2902  * disappear at any time.
2903  */
2904 int
na_find(struct kern_channel * ch,struct kern_nexus * nx,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)2905 na_find(struct kern_channel *ch, struct kern_nexus *nx, struct chreq *chr,
2906     struct kern_channel *ch0, struct nxbind *nxb, struct proc *p,
2907     struct nexus_adapter **na, boolean_t create)
2908 {
2909 	int error = 0;
2910 
2911 	_CASSERT(sizeof(chr->cr_name) == sizeof((*na)->na_name));
2912 
2913 	*na = NULL;     /* default return value */
2914 
2915 	SK_LOCK_ASSERT_HELD();
2916 
2917 	/*
2918 	 * We cascade through all possibile types of nexus adapter.
2919 	 * All nx_*_na_find() functions return an error and an na,
2920 	 * with the following combinations:
2921 	 *
2922 	 * error    na
2923 	 *   0	   NULL		type doesn't match
2924 	 *  !0	   NULL		type matches, but na creation/lookup failed
2925 	 *   0	  !NULL		type matches and na created/found
2926 	 *  !0    !NULL		impossible
2927 	 */
2928 
2929 #if CONFIG_NEXUS_MONITOR
2930 	/* try to see if this is a monitor port */
2931 	error = nx_monitor_na_find(nx, ch, chr, ch0, nxb, p, na, create);
2932 	if (error != 0 || *na != NULL) {
2933 		return error;
2934 	}
2935 #endif /* CONFIG_NEXUS_MONITOR */
2936 #if CONFIG_NEXUS_USER_PIPE
2937 	/* try to see if this is a pipe port */
2938 	error = nx_upipe_na_find(nx, ch, chr, nxb, p, na, create);
2939 	if (error != 0 || *na != NULL) {
2940 		return error;
2941 	}
2942 #endif /* CONFIG_NEXUS_USER_PIPE */
2943 #if CONFIG_NEXUS_KERNEL_PIPE
2944 	/* try to see if this is a kernel pipe port */
2945 	error = nx_kpipe_na_find(nx, ch, chr, nxb, p, na, create);
2946 	if (error != 0 || *na != NULL) {
2947 		return error;
2948 	}
2949 #endif /* CONFIG_NEXUS_KERNEL_PIPE */
2950 #if CONFIG_NEXUS_FLOWSWITCH
2951 	/* try to see if this is a flowswitch port */
2952 	error = nx_fsw_na_find(nx, ch, chr, nxb, p, na, create);
2953 	if (error != 0 || *na != NULL) {
2954 		return error;
2955 	}
2956 #endif /* CONFIG_NEXUS_FLOWSWITCH */
2957 #if CONFIG_NEXUS_NETIF
2958 	error = nx_netif_na_find(nx, ch, chr, nxb, p, na, create);
2959 	if (error != 0 || *na != NULL) {
2960 		return error;
2961 	}
2962 #endif /* CONFIG_NEXUS_NETIF */
2963 
2964 	ASSERT(*na == NULL);
2965 	return ENXIO;
2966 }
2967 
2968 void
na_retain_locked(struct nexus_adapter * na)2969 na_retain_locked(struct nexus_adapter *na)
2970 {
2971 	SK_LOCK_ASSERT_HELD();
2972 
2973 	if (na != NULL) {
2974 #if SK_LOG
2975 		uint32_t oref = atomic_add_32_ov(&na->na_refcount, 1);
2976 		SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
2977 		    na->na_name, SK_KVA(na), oref + 1, na->na_channels);
2978 #else /* !SK_LOG */
2979 		atomic_add_32(&na->na_refcount, 1);
2980 #endif /* !SK_LOG */
2981 	}
2982 }
2983 
2984 /* returns 1 iff the nexus_adapter is destroyed */
2985 int
na_release_locked(struct nexus_adapter * na)2986 na_release_locked(struct nexus_adapter *na)
2987 {
2988 	uint32_t oref;
2989 
2990 	SK_LOCK_ASSERT_HELD();
2991 
2992 	ASSERT(na->na_refcount > 0);
2993 	oref = atomic_add_32_ov(&na->na_refcount, -1);
2994 	if (oref > 1) {
2995 		SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
2996 		    na->na_name, SK_KVA(na), oref - 1, na->na_channels);
2997 		return 0;
2998 	}
2999 	ASSERT(na->na_channels == 0);
3000 
3001 #if CONFIG_NEXUS_FLOWSWITCH || CONFIG_NEXUS_NETIF
3002 	struct ifnet *ifp = na->na_ifp;
3003 	if (ifp != NULL) {
3004 		/*
3005 		 * Prevent threads from doing further data movement
3006 		 * on this interface; callee holds an I/O refcnt
3007 		 * which we'll release later during resume.
3008 		 */
3009 		ifnet_datamov_suspend(ifp);
3010 	}
3011 #endif /* !CONFIG_NEXUS_FLOWSWITCH & !CONFIG_NEXUS_NETIF */
3012 
3013 	if (na->na_flags & NAF_ASYNC_DTOR) {
3014 		na_destroyer_enqueue(na);
3015 	} else {
3016 		na_destroyer_final(na);
3017 	}
3018 
3019 	return 1;
3020 }
3021 
3022 static void
na_destroyer_final(struct nexus_adapter * na)3023 na_destroyer_final(struct nexus_adapter *na)
3024 {
3025 	SK_LOCK_ASSERT_HELD();
3026 
3027 #if CONFIG_NEXUS_FLOWSWITCH || CONFIG_NEXUS_NETIF
3028 	struct ifnet *ifp = na->na_ifp;
3029 
3030 	if (ifp != NULL) {
3031 		SK_UNLOCK();
3032 		/*
3033 		 * Wait until all threads in the data paths are done.
3034 		 */
3035 		ifnet_datamov_drain(ifp);
3036 
3037 		if (na->na_type == NA_NETIF_DEV ||
3038 		    na->na_type == NA_NETIF_COMPAT_DEV) {
3039 			/* undo what nx_netif_attach() did */
3040 			ASSERT(na == (struct nexus_adapter *)ifp->if_na);
3041 			ifp->if_na_ops = NULL;
3042 			ifp->if_na = NULL;
3043 			membar_sync();
3044 
3045 			SKYWALK_CLEAR_CAPABLE(ifp, na);
3046 		}
3047 		SK_LOCK();
3048 	}
3049 #endif /* !CONFIG_NEXUS_FLOWSWITCH & !CONFIG_NEXUS_NETIF */
3050 
3051 	ASSERT(na->na_refcount == 0);
3052 	if (na->na_dtor != NULL) {
3053 		na->na_dtor(na);
3054 	}
3055 
3056 #if CONFIG_NEXUS_FLOWSWITCH || CONFIG_NEXUS_NETIF
3057 	if (na->na_ifp != NULL) {
3058 		ASSERT(ifp == na->na_ifp);
3059 		SK_DF(SK_VERB_REFCNT,
3060 		    "na \"%s\" (0x%llx) releasing %s [ioref %u]",
3061 		    na->na_name, SK_KVA(na), na->na_ifp->if_xname,
3062 		    (na->na_ifp->if_refio - 1));
3063 		ifnet_decr_iorefcnt(na->na_ifp);
3064 		na->na_ifp = NULL;
3065 	}
3066 
3067 	/*
3068 	 * Release reference during suspend and mark the interface
3069 	 * as data-ready; at this point it's safe to resume data
3070 	 * movement thru the interface.
3071 	 */
3072 	if (ifp != NULL) {
3073 		ifnet_datamov_resume(ifp);
3074 		ifp = NULL;
3075 	}
3076 #endif /* CONFIG_NEXUS_FLOWSWITCH || CONFIG_NEXUS_NETIF */
3077 
3078 	ASSERT(na->na_tx_rings == NULL && na->na_rx_rings == NULL);
3079 	ASSERT(na->na_slot_ctxs == NULL);
3080 	ASSERT(na->na_scratch == NULL);
3081 
3082 #if CONFIG_NEXUS_USER_PIPE
3083 	nx_upipe_na_dealloc(na);
3084 #endif /* CONFIG_NEXUS_USER_PIPE */
3085 	if (na->na_arena != NULL) {
3086 		skmem_arena_release(na->na_arena);
3087 		na->na_arena = NULL;
3088 	}
3089 
3090 	SK_DF(SK_VERB_MEM, "na \"%s\" (0x%llx) being freed",
3091 	    na->na_name, SK_KVA(na));
3092 
3093 	NA_FREE(na);
3094 }
3095 
3096 static void
na_destroyer_enqueue(struct nexus_adapter * na)3097 na_destroyer_enqueue(struct nexus_adapter *na)
3098 {
3099 	SK_LOCK_ASSERT_HELD();
3100 
3101 	ASSERT(na->na_refcount == 0);
3102 	++na_destroyer_cnt;
3103 	VERIFY(na_destroyer_cnt != 0);
3104 	TAILQ_INSERT_TAIL(&na_destroyer_head, na, na_destroyer_link);
3105 	wakeup((caddr_t)&na_destroyer_run);
3106 }
3107 
3108 static struct nexus_adapter *
na_destroyer_dequeue(void)3109 na_destroyer_dequeue(void)
3110 {
3111 	struct nexus_adapter *na;
3112 
3113 	SK_LOCK_ASSERT_HELD();
3114 
3115 	na = TAILQ_FIRST(&na_destroyer_head);
3116 	VERIFY(na_destroyer_cnt != 0 || na == NULL);
3117 	if (na != NULL) {
3118 		VERIFY(na_destroyer_cnt != 0);
3119 		--na_destroyer_cnt;
3120 		TAILQ_REMOVE(&na_destroyer_head, na, na_destroyer_link);
3121 		na->na_destroyer_link.tqe_next = NULL;
3122 		na->na_destroyer_link.tqe_prev = NULL;
3123 	}
3124 	return na;
3125 }
3126 
3127 static int
na_destroyer_thread_cont(int err)3128 na_destroyer_thread_cont(int err)
3129 {
3130 #pragma unused(err)
3131 	struct nexus_adapter *na;
3132 
3133 	for (;;) {
3134 		SK_LOCK_ASSERT_HELD();
3135 		while (na_destroyer_cnt == 0) {
3136 			(void) msleep0(&na_destroyer_run, &sk_lock,
3137 			    (PZERO - 1), "na_destroyer_thread_cont", 0,
3138 			    na_destroyer_thread_cont);
3139 			/* NOTREACHED */
3140 		}
3141 
3142 		net_update_uptime();
3143 
3144 		VERIFY(TAILQ_FIRST(&na_destroyer_head) != NULL);
3145 
3146 		na = na_destroyer_dequeue();
3147 		if (na != NULL) {
3148 			na_destroyer_final(na);
3149 			SK_LOCK_ASSERT_HELD();
3150 		}
3151 	}
3152 }
3153 
3154 __dead2
3155 static void
na_destroyer_thread_func(void * v,wait_result_t w)3156 na_destroyer_thread_func(void *v, wait_result_t w)
3157 {
3158 #pragma unused(v, w)
3159 	SK_LOCK();
3160 	(void) msleep0(&na_destroyer_run, &sk_lock,
3161 	    (PZERO - 1), "na_destroyer", 0, na_destroyer_thread_cont);
3162 	/*
3163 	 * msleep0() shouldn't have returned as PCATCH was not set;
3164 	 * therefore assert in this case.
3165 	 */
3166 	SK_UNLOCK();
3167 	VERIFY(0);
3168 	/* NOTREACHED */
3169 	__builtin_unreachable();
3170 }
3171 
3172 static struct nexus_adapter *
na_pseudo_alloc(zalloc_flags_t how)3173 na_pseudo_alloc(zalloc_flags_t how)
3174 {
3175 	struct nexus_adapter *na;
3176 
3177 	na = zalloc_flags(na_pseudo_zone, how | Z_ZERO);
3178 	if (na) {
3179 		na->na_type = NA_PSEUDO;
3180 		na->na_free = na_pseudo_free;
3181 	}
3182 	return na;
3183 }
3184 
3185 static void
na_pseudo_free(struct nexus_adapter * na)3186 na_pseudo_free(struct nexus_adapter *na)
3187 {
3188 	ASSERT(na->na_refcount == 0);
3189 	SK_DF(SK_VERB_MEM, "na 0x%llx FREE", SK_KVA(na));
3190 	bzero(na, sizeof(*na));
3191 	zfree(na_pseudo_zone, na);
3192 }
3193 
3194 static int
na_pseudo_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3195 na_pseudo_txsync(struct __kern_channel_ring *kring, struct proc *p,
3196     uint32_t flags)
3197 {
3198 #pragma unused(kring, p, flags)
3199 	SK_DF(SK_VERB_SYNC | SK_VERB_TX,
3200 	    "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3201 	    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3202 	    SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3203 	    flags);
3204 
3205 	return 0;
3206 }
3207 
3208 static int
na_pseudo_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3209 na_pseudo_rxsync(struct __kern_channel_ring *kring, struct proc *p,
3210     uint32_t flags)
3211 {
3212 #pragma unused(kring, p, flags)
3213 	SK_DF(SK_VERB_SYNC | SK_VERB_RX,
3214 	    "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3215 	    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3216 	    SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3217 	    flags);
3218 
3219 	ASSERT(kring->ckr_rhead <= kring->ckr_lim);
3220 
3221 	return 0;
3222 }
3223 
3224 static int
na_pseudo_activate(struct nexus_adapter * na,na_activate_mode_t mode)3225 na_pseudo_activate(struct nexus_adapter *na, na_activate_mode_t mode)
3226 {
3227 	SK_D("na \"%s\" (0x%llx) %s", na->na_name,
3228 	    SK_KVA(na), na_activate_mode2str(mode));
3229 
3230 	switch (mode) {
3231 	case NA_ACTIVATE_MODE_ON:
3232 		atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
3233 		break;
3234 
3235 	case NA_ACTIVATE_MODE_DEFUNCT:
3236 		break;
3237 
3238 	case NA_ACTIVATE_MODE_OFF:
3239 		atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
3240 		break;
3241 
3242 	default:
3243 		VERIFY(0);
3244 		/* NOTREACHED */
3245 		__builtin_unreachable();
3246 	}
3247 
3248 	return 0;
3249 }
3250 
3251 static void
na_pseudo_dtor(struct nexus_adapter * na)3252 na_pseudo_dtor(struct nexus_adapter *na)
3253 {
3254 #pragma unused(na)
3255 }
3256 
3257 static int
na_pseudo_krings_create(struct nexus_adapter * na,struct kern_channel * ch)3258 na_pseudo_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
3259 {
3260 	return na_rings_mem_setup(na, 0, FALSE, ch);
3261 }
3262 
3263 static void
na_pseudo_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)3264 na_pseudo_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
3265     boolean_t defunct)
3266 {
3267 	na_rings_mem_teardown(na, ch, defunct);
3268 }
3269 
3270 /*
3271  * Pseudo nexus adapter; typically used as a generic parent adapter.
3272  */
3273 int
na_pseudo_create(struct kern_nexus * nx,struct chreq * chr,struct nexus_adapter ** ret)3274 na_pseudo_create(struct kern_nexus *nx, struct chreq *chr,
3275     struct nexus_adapter **ret)
3276 {
3277 	struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
3278 	struct nexus_adapter *na;
3279 	int error;
3280 
3281 	SK_LOCK_ASSERT_HELD();
3282 	*ret = NULL;
3283 
3284 	na = na_pseudo_alloc(Z_WAITOK);
3285 
3286 	ASSERT(na->na_type == NA_PSEUDO);
3287 	ASSERT(na->na_free == na_pseudo_free);
3288 
3289 	(void) strncpy(na->na_name, chr->cr_name, sizeof(na->na_name) - 1);
3290 	na->na_name[sizeof(na->na_name) - 1] = '\0';
3291 	uuid_generate_random(na->na_uuid);
3292 
3293 	/*
3294 	 * Verify upper bounds; for all cases including user pipe nexus,
3295 	 * the parameters must have already been validated by corresponding
3296 	 * nxdom_prov_params() function defined by each domain.
3297 	 */
3298 	na_set_nrings(na, NR_TX, nxp->nxp_tx_rings);
3299 	na_set_nrings(na, NR_RX, nxp->nxp_rx_rings);
3300 	na_set_nslots(na, NR_TX, nxp->nxp_tx_slots);
3301 	na_set_nslots(na, NR_RX, nxp->nxp_rx_slots);
3302 	ASSERT(na_get_nrings(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
3303 	ASSERT(na_get_nrings(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
3304 	ASSERT(na_get_nslots(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
3305 	ASSERT(na_get_nslots(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
3306 
3307 	na->na_txsync = na_pseudo_txsync;
3308 	na->na_rxsync = na_pseudo_rxsync;
3309 	na->na_activate = na_pseudo_activate;
3310 	na->na_dtor = na_pseudo_dtor;
3311 	na->na_krings_create = na_pseudo_krings_create;
3312 	na->na_krings_delete = na_pseudo_krings_delete;
3313 
3314 	*(nexus_stats_type_t *)(uintptr_t)&na->na_stats_type =
3315 	    NEXUS_STATS_TYPE_INVALID;
3316 
3317 	/* other fields are set in the common routine */
3318 	na_attach_common(na, nx, NX_DOM_PROV(nx));
3319 
3320 	if ((error = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
3321 	    nx, na)) != 0) {
3322 		ASSERT(na->na_arena == NULL);
3323 		goto err;
3324 	}
3325 	ASSERT(na->na_arena != NULL);
3326 
3327 	*(uint32_t *)(uintptr_t)&na->na_flowadv_max = nxp->nxp_flowadv_max;
3328 	ASSERT(na->na_flowadv_max == 0 ||
3329 	    skmem_arena_nexus(na->na_arena)->arn_flowadv_obj != NULL);
3330 
3331 #if SK_LOG
3332 	uuid_string_t uuidstr;
3333 	SK_D("na_name: \"%s\"", na->na_name);
3334 	SK_D("  UUID:        %s", sk_uuid_unparse(na->na_uuid, uuidstr));
3335 	SK_D("  nx:          0x%llx (\"%s\":\"%s\")",
3336 	    SK_KVA(na->na_nx), NX_DOM(na->na_nx)->nxdom_name,
3337 	    NX_DOM_PROV(na->na_nx)->nxdom_prov_name);
3338 	SK_D("  flags:       %b", na->na_flags, NAF_BITS);
3339 	SK_D("  flowadv_max: %u", na->na_flowadv_max);
3340 	SK_D("  rings:       tx %u rx %u",
3341 	    na_get_nrings(na, NR_TX), na_get_nrings(na, NR_RX));
3342 	SK_D("  slots:       tx %u rx %u",
3343 	    na_get_nslots(na, NR_TX), na_get_nslots(na, NR_RX));
3344 #if CONFIG_NEXUS_USER_PIPE
3345 	SK_D("  next_pipe:   %u", na->na_next_pipe);
3346 	SK_D("  max_pipes:   %u", na->na_max_pipes);
3347 #endif /* CONFIG_NEXUS_USER_PIPE */
3348 #endif /* SK_LOG */
3349 
3350 	*ret = na;
3351 	na_retain_locked(na);
3352 
3353 	return 0;
3354 
3355 err:
3356 	if (na != NULL) {
3357 		if (na->na_arena != NULL) {
3358 			skmem_arena_release(na->na_arena);
3359 			na->na_arena = NULL;
3360 		}
3361 		NA_FREE(na);
3362 	}
3363 	return error;
3364 }
3365 
3366 void
na_flowadv_entry_alloc(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx)3367 na_flowadv_entry_alloc(const struct nexus_adapter *na, uuid_t fae_id,
3368     const flowadv_idx_t fe_idx)
3369 {
3370 	struct skmem_arena *ar = na->na_arena;
3371 	struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
3372 	struct __flowadv_entry *fae;
3373 
3374 	ASSERT(NA_IS_ACTIVE(na) && na->na_flowadv_max != 0);
3375 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3376 
3377 	AR_LOCK(ar);
3378 
3379 	/* we must not get here if arena is defunct; this must be valid */
3380 	ASSERT(arn->arn_flowadv_obj != NULL);
3381 
3382 	VERIFY(fe_idx < na->na_flowadv_max);
3383 	fae = &arn->arn_flowadv_obj[fe_idx];
3384 	uuid_copy(fae->fae_id, fae_id);
3385 	fae->fae_flags |= FLOWADVF_VALID;
3386 
3387 	AR_UNLOCK(ar);
3388 }
3389 
3390 void
na_flowadv_entry_free(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx)3391 na_flowadv_entry_free(const struct nexus_adapter *na, uuid_t fae_id,
3392     const flowadv_idx_t fe_idx)
3393 {
3394 #pragma unused(fae_id)
3395 	struct skmem_arena *ar = na->na_arena;
3396 	struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3397 
3398 	ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3399 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3400 
3401 	AR_LOCK(ar);
3402 
3403 	ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3404 	if (arn->arn_flowadv_obj != NULL) {
3405 		struct __flowadv_entry *fae;
3406 
3407 		VERIFY(fe_idx < na->na_flowadv_max);
3408 		fae = &arn->arn_flowadv_obj[fe_idx];
3409 		ASSERT(uuid_compare(fae->fae_id, fae_id) == 0);
3410 		uuid_clear(fae->fae_id);
3411 		fae->fae_flags &= ~FLOWADVF_VALID;
3412 	}
3413 
3414 	AR_UNLOCK(ar);
3415 }
3416 
3417 bool
na_flowadv_set(const struct nexus_adapter * na,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3418 na_flowadv_set(const struct nexus_adapter *na, const flowadv_idx_t fe_idx,
3419     const flowadv_token_t flow_token)
3420 {
3421 	struct skmem_arena *ar = na->na_arena;
3422 	struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3423 	bool suspend;
3424 
3425 	ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3426 	ASSERT(fe_idx < na->na_flowadv_max);
3427 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3428 
3429 	AR_LOCK(ar);
3430 
3431 	ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3432 
3433 	if (arn->arn_flowadv_obj != NULL) {
3434 		struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3435 
3436 		_CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3437 		/*
3438 		 * We cannot guarantee that the flow is still around by now,
3439 		 * so check if that's the case and let the caller know.
3440 		 */
3441 		if ((suspend = (fae->fae_token == flow_token))) {
3442 			ASSERT(fae->fae_flags & FLOWADVF_VALID);
3443 			fae->fae_flags |= FLOWADVF_SUSPENDED;
3444 		}
3445 	} else {
3446 		suspend = false;
3447 	}
3448 	if (suspend) {
3449 		SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) flow token 0x%llu fidx %u "
3450 		    "SUSPEND", sk_proc_name_address(current_proc()),
3451 		    sk_proc_pid(current_proc()), flow_token, fe_idx);
3452 	} else {
3453 		SK_ERR("%s(%d) flow token 0x%llu fidx %u no longer around",
3454 		    sk_proc_name_address(current_proc()),
3455 		    sk_proc_pid(current_proc()), flow_token, fe_idx);
3456 	}
3457 
3458 	AR_UNLOCK(ar);
3459 
3460 	return suspend;
3461 }
3462 
3463 int
na_flowadv_clear(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3464 na_flowadv_clear(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3465     const flowadv_token_t flow_token)
3466 {
3467 	struct nexus_adapter *na = ch->ch_na;
3468 	struct skmem_arena *ar = na->na_arena;
3469 	struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3470 	boolean_t resume;
3471 
3472 	ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3473 	ASSERT(fe_idx < na->na_flowadv_max);
3474 	ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3475 
3476 	AR_LOCK(ar);
3477 
3478 	ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3479 
3480 	if (arn->arn_flowadv_obj != NULL) {
3481 		struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3482 
3483 		_CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3484 		/*
3485 		 * We cannot guarantee that the flow is still around by now,
3486 		 * so check if that's the case and let the caller know.
3487 		 */
3488 		if ((resume = (fae->fae_token == flow_token))) {
3489 			ASSERT(fae->fae_flags & FLOWADVF_VALID);
3490 			fae->fae_flags &= ~FLOWADVF_SUSPENDED;
3491 		}
3492 	} else {
3493 		resume = FALSE;
3494 	}
3495 	if (resume) {
3496 		SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d): flow token 0x%x "
3497 		    "fidx %u RESUME", ch->ch_name, ch->ch_pid, flow_token,
3498 		    fe_idx);
3499 	} else {
3500 		SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3501 		    ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3502 	}
3503 
3504 	AR_UNLOCK(ar);
3505 
3506 	return resume;
3507 }
3508 
3509 void
na_flowadv_event(struct __kern_channel_ring * kring)3510 na_flowadv_event(struct __kern_channel_ring *kring)
3511 {
3512 	ASSERT(kring->ckr_tx == NR_TX);
3513 
3514 	SK_DF(SK_VERB_EVENTS, "%s(%d) na \"%s\" (0x%llx) kr 0x%llx",
3515 	    sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
3516 	    KRNA(kring)->na_name, SK_KVA(KRNA(kring)), SK_KVA(kring));
3517 
3518 	na_post_event(kring, TRUE, FALSE, FALSE, CHAN_FILT_HINT_FLOW_ADV_UPD);
3519 }
3520 
3521 static int
na_packet_pool_free_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3522 na_packet_pool_free_sync(struct __kern_channel_ring *kring, struct proc *p,
3523     uint32_t flags)
3524 {
3525 #pragma unused(flags, p)
3526 	int n, ret = 0;
3527 	slot_idx_t j;
3528 	struct __kern_slot_desc *ksd;
3529 	struct __user_slot_desc *usd;
3530 	struct __kern_quantum *kqum;
3531 	struct kern_pbufpool *pp = kring->ckr_pp;
3532 	uint32_t nfree = 0;
3533 
3534 	/* packet pool list is protected by channel lock */
3535 	ASSERT(!KR_KERNEL_ONLY(kring));
3536 
3537 	/* # of new slots */
3538 	n = kring->ckr_rhead - kring->ckr_khead;
3539 	if (n < 0) {
3540 		n += kring->ckr_num_slots;
3541 	}
3542 
3543 	/* nothing to free */
3544 	if (__improbable(n == 0)) {
3545 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3546 		    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3547 		    "nothing to free");
3548 		goto done;
3549 	}
3550 
3551 	j = kring->ckr_khead;
3552 	PP_LOCK(pp);
3553 	while (n--) {
3554 		int err;
3555 
3556 		ksd = KR_KSD(kring, j);
3557 		usd = KR_USD(kring, j);
3558 
3559 		if (__improbable(!SD_VALID_METADATA(usd))) {
3560 			SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3561 			ret = EINVAL;
3562 			break;
3563 		}
3564 
3565 		kqum = pp_remove_upp_locked(pp, usd->sd_md_idx, &err);
3566 		if (__improbable(err != 0)) {
3567 			SK_ERR("un-allocated packet or buflet %d %p",
3568 			    usd->sd_md_idx, SK_KVA(kqum));
3569 			ret = EINVAL;
3570 			break;
3571 		}
3572 
3573 		/* detach and free the packet */
3574 		kqum->qum_qflags &= ~QUM_F_FINALIZED;
3575 		kqum->qum_ksd = NULL;
3576 		ASSERT(!KSD_VALID_METADATA(ksd));
3577 		USD_DETACH_METADATA(usd);
3578 		ASSERT(pp == kqum->qum_pp);
3579 		ASSERT(nfree < kring->ckr_num_slots);
3580 		kring->ckr_scratch[nfree++] = (uint64_t)kqum;
3581 		j = SLOT_NEXT(j, kring->ckr_lim);
3582 	}
3583 	PP_UNLOCK(pp);
3584 
3585 	if (__probable(nfree > 0)) {
3586 		pp_free_packet_batch(pp, &kring->ckr_scratch[0], nfree);
3587 	}
3588 
3589 	kring->ckr_khead = j;
3590 	kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3591 
3592 done:
3593 	return ret;
3594 }
3595 
3596 static int
na_packet_pool_alloc_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3597 na_packet_pool_alloc_sync(struct __kern_channel_ring *kring, struct proc *p,
3598     uint32_t flags)
3599 {
3600 	int b, err;
3601 	uint32_t n = 0;
3602 	slot_idx_t j;
3603 	uint64_t now;
3604 	uint32_t curr_ws, ph_needed, ph_cnt;
3605 	struct __kern_slot_desc *ksd;
3606 	struct __user_slot_desc *usd;
3607 	struct __kern_quantum *kqum;
3608 	kern_pbufpool_t pp = kring->ckr_pp;
3609 	pid_t pid = proc_pid(p);
3610 
3611 	/* packet pool list is protected by channel lock */
3612 	ASSERT(!KR_KERNEL_ONLY(kring));
3613 	ASSERT(!PP_KERNEL_ONLY(pp));
3614 
3615 	now = _net_uptime;
3616 	if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3617 		if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3618 			kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3619 		}
3620 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3621 		    "%s: purged curr_ws(%d)", kring->ckr_name,
3622 		    kring->ckr_alloc_ws);
3623 		return 0;
3624 	}
3625 	/* reclaim the completed slots */
3626 	kring->ckr_khead = kring->ckr_rhead;
3627 
3628 	/* # of busy (unclaimed) slots */
3629 	b = kring->ckr_ktail - kring->ckr_khead;
3630 	if (b < 0) {
3631 		b += kring->ckr_num_slots;
3632 	}
3633 
3634 	curr_ws = kring->ckr_alloc_ws;
3635 	if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3636 		/* increment the working set by 50% */
3637 		curr_ws += (curr_ws >> 1);
3638 		curr_ws = MIN(curr_ws, kring->ckr_lim);
3639 	} else {
3640 		if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3641 		    (uint32_t)b >= (curr_ws >> 2)) {
3642 			/* decrease the working set by 25% */
3643 			curr_ws -= (curr_ws >> 2);
3644 		}
3645 	}
3646 	curr_ws = MAX(curr_ws, na_upp_alloc_lowat);
3647 	if (curr_ws > (uint32_t)b) {
3648 		n = curr_ws - b;
3649 	}
3650 	kring->ckr_alloc_ws = curr_ws;
3651 	kring->ckr_sync_time = now;
3652 
3653 	/* min with # of avail free slots (subtract busy from max) */
3654 	n = ph_needed = MIN(n, kring->ckr_lim - b);
3655 	j = kring->ckr_ktail;
3656 	SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3657 	    "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3658 
3659 	if ((ph_cnt = ph_needed) == 0) {
3660 		goto done;
3661 	}
3662 
3663 	err = kern_pbufpool_alloc_batch_nosleep(pp, 1, kring->ckr_scratch,
3664 	    &ph_cnt);
3665 
3666 	if (__improbable(ph_cnt == 0)) {
3667 		SK_ERR("kr 0x%llx failed to alloc %u packet s(%d)",
3668 		    SK_KVA(kring), ph_needed, err);
3669 		kring->ckr_err_stats.cres_pkt_alloc_failures += ph_needed;
3670 	} else {
3671 		/*
3672 		 * Add packets to the allocated list of user packet pool.
3673 		 */
3674 		pp_insert_upp_batch(pp, pid, kring->ckr_scratch, ph_cnt);
3675 	}
3676 
3677 
3678 	for (n = 0; n < ph_cnt; n++) {
3679 		ksd = KR_KSD(kring, j);
3680 		usd = KR_USD(kring, j);
3681 
3682 		kqum = SK_PTR_ADDR_KQUM(kring->ckr_scratch[n]);
3683 		kring->ckr_scratch[n] = 0;
3684 		ASSERT(kqum != NULL);
3685 
3686 		/* cleanup any stale slot mapping */
3687 		KSD_RESET(ksd);
3688 		ASSERT(usd != NULL);
3689 		USD_RESET(usd);
3690 
3691 		/*
3692 		 * Since this packet is freshly allocated and we need to
3693 		 * have the flag set for the attach to succeed, just set
3694 		 * it here rather than calling __packet_finalize().
3695 		 */
3696 		kqum->qum_qflags |= QUM_F_FINALIZED;
3697 
3698 		/* Attach packet to slot */
3699 		KR_SLOT_ATTACH_METADATA(kring, ksd, kqum);
3700 		/*
3701 		 * externalize the packet as it is being transferred to
3702 		 * user space.
3703 		 */
3704 		kr_externalize_metadata(kring, pp->pp_max_frags, kqum, p);
3705 
3706 		j = SLOT_NEXT(j, kring->ckr_lim);
3707 	}
3708 done:
3709 	ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3710 	kring->ckr_ktail = j;
3711 	return 0;
3712 }
3713 
3714 static int
na_packet_pool_free_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3715 na_packet_pool_free_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3716     uint32_t flags)
3717 {
3718 #pragma unused(flags, p)
3719 	int n, ret = 0;
3720 	slot_idx_t j;
3721 	struct __kern_slot_desc *ksd;
3722 	struct __user_slot_desc *usd;
3723 	struct __kern_buflet *kbft;
3724 	struct kern_pbufpool *pp = kring->ckr_pp;
3725 
3726 	/* packet pool list is protected by channel lock */
3727 	ASSERT(!KR_KERNEL_ONLY(kring));
3728 
3729 	/* # of new slots */
3730 	n = kring->ckr_rhead - kring->ckr_khead;
3731 	if (n < 0) {
3732 		n += kring->ckr_num_slots;
3733 	}
3734 
3735 	/* nothing to free */
3736 	if (__improbable(n == 0)) {
3737 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3738 		    sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3739 		    "nothing to free");
3740 		goto done;
3741 	}
3742 
3743 	j = kring->ckr_khead;
3744 	while (n--) {
3745 		int err;
3746 
3747 		ksd = KR_KSD(kring, j);
3748 		usd = KR_USD(kring, j);
3749 
3750 		if (__improbable(!SD_VALID_METADATA(usd))) {
3751 			SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3752 			ret = EINVAL;
3753 			break;
3754 		}
3755 
3756 		kbft = pp_remove_upp_bft(pp, usd->sd_md_idx, &err);
3757 		if (__improbable(err != 0)) {
3758 			SK_ERR("un-allocated buflet %d %p", usd->sd_md_idx,
3759 			    SK_KVA(kbft));
3760 			ret = EINVAL;
3761 			break;
3762 		}
3763 
3764 		/* detach and free the packet */
3765 		ASSERT(!KSD_VALID_METADATA(ksd));
3766 		USD_DETACH_METADATA(usd);
3767 		pp_free_buflet(pp, kbft);
3768 		j = SLOT_NEXT(j, kring->ckr_lim);
3769 	}
3770 	kring->ckr_khead = j;
3771 	kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3772 
3773 done:
3774 	return ret;
3775 }
3776 
3777 static int
na_packet_pool_alloc_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3778 na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3779     uint32_t flags)
3780 {
3781 	int b, err;
3782 	uint32_t n = 0;
3783 	slot_idx_t j;
3784 	uint64_t now;
3785 	uint32_t curr_ws, bh_needed, bh_cnt;
3786 	struct __kern_slot_desc *ksd;
3787 	struct __user_slot_desc *usd;
3788 	struct __kern_buflet *kbft;
3789 	struct __kern_buflet_ext *kbe;
3790 	kern_pbufpool_t pp = kring->ckr_pp;
3791 	pid_t pid = proc_pid(p);
3792 
3793 	/* packet pool list is protected by channel lock */
3794 	ASSERT(!KR_KERNEL_ONLY(kring));
3795 	ASSERT(!PP_KERNEL_ONLY(pp));
3796 
3797 	now = _net_uptime;
3798 	if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3799 		if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3800 			kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3801 		}
3802 		SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3803 		    "%s: purged curr_ws(%d)", kring->ckr_name,
3804 		    kring->ckr_alloc_ws);
3805 		return 0;
3806 	}
3807 	/* reclaim the completed slots */
3808 	kring->ckr_khead = kring->ckr_rhead;
3809 
3810 	/* # of busy (unclaimed) slots */
3811 	b = kring->ckr_ktail - kring->ckr_khead;
3812 	if (b < 0) {
3813 		b += kring->ckr_num_slots;
3814 	}
3815 
3816 	curr_ws = kring->ckr_alloc_ws;
3817 	if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3818 		/* increment the working set by 50% */
3819 		curr_ws += (curr_ws >> 1);
3820 		curr_ws = MIN(curr_ws, kring->ckr_lim);
3821 	} else {
3822 		if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3823 		    (uint32_t)b >= (curr_ws >> 2)) {
3824 			/* decrease the working set by 25% */
3825 			curr_ws -= (curr_ws >> 2);
3826 		}
3827 	}
3828 	curr_ws = MAX(curr_ws, na_upp_alloc_buf_lowat);
3829 	if (curr_ws > (uint32_t)b) {
3830 		n = curr_ws - b;
3831 	}
3832 	kring->ckr_alloc_ws = curr_ws;
3833 	kring->ckr_sync_time = now;
3834 
3835 	/* min with # of avail free slots (subtract busy from max) */
3836 	n = bh_needed = MIN(n, kring->ckr_lim - b);
3837 	j = kring->ckr_ktail;
3838 	SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3839 	    "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3840 
3841 	if ((bh_cnt = bh_needed) == 0) {
3842 		goto done;
3843 	}
3844 
3845 	err = pp_alloc_buflet_batch(pp, kring->ckr_scratch, &bh_cnt,
3846 	    SKMEM_NOSLEEP);
3847 
3848 	if (bh_cnt == 0) {
3849 		SK_ERR("kr 0x%llx failed to alloc %u buflets(%d)",
3850 		    SK_KVA(kring), bh_needed, err);
3851 		kring->ckr_err_stats.cres_pkt_alloc_failures += bh_needed;
3852 	}
3853 
3854 	for (n = 0; n < bh_cnt; n++) {
3855 		struct __user_buflet *ubft;
3856 
3857 		ksd = KR_KSD(kring, j);
3858 		usd = KR_USD(kring, j);
3859 
3860 		kbft = (struct __kern_buflet *)(kring->ckr_scratch[n]);
3861 		kbe = (struct __kern_buflet_ext *)kbft;
3862 		kring->ckr_scratch[n] = 0;
3863 		ASSERT(kbft != NULL);
3864 
3865 		/*
3866 		 * Add buflet to the allocated list of user packet pool.
3867 		 */
3868 		pp_insert_upp_bft(pp, kbft, pid);
3869 
3870 		/*
3871 		 * externalize the buflet as it is being transferred to
3872 		 * user space.
3873 		 */
3874 		ubft = __DECONST(struct __user_buflet *, kbe->kbe_buf_user);
3875 		KBUF_EXTERNALIZE(kbft, ubft, pp);
3876 
3877 		/* cleanup any stale slot mapping */
3878 		KSD_RESET(ksd);
3879 		ASSERT(usd != NULL);
3880 		USD_RESET(usd);
3881 
3882 		/* Attach buflet to slot */
3883 		KR_SLOT_ATTACH_BUF_METADATA(kring, ksd, kbft);
3884 
3885 		j = SLOT_NEXT(j, kring->ckr_lim);
3886 	}
3887 done:
3888 	ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3889 	kring->ckr_ktail = j;
3890 	return 0;
3891 }
3892 
3893 /* The caller needs to ensure that the NA stays intact */
3894 void
na_drain(struct nexus_adapter * na,boolean_t purge)3895 na_drain(struct nexus_adapter *na, boolean_t purge)
3896 {
3897 	/* will be cleared on next channel sync */
3898 	if (!(atomic_bitset_32_ov(&na->na_flags, NAF_DRAINING) &
3899 	    NAF_DRAINING) && NA_IS_ACTIVE(na)) {
3900 		SK_DF(SK_VERB_NA, "%s: %s na 0x%llx flags %b",
3901 		    na->na_name, (purge ? "purging" : "pruning"),
3902 		    SK_KVA(na), na->na_flags, NAF_BITS);
3903 
3904 		/* reap (purge/prune) caches in the arena */
3905 		skmem_arena_reap(na->na_arena, purge);
3906 	}
3907 }
3908