1 /*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri.
31 * All rights reserved.
32 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 */
55 #include <sys/systm.h>
56 #include <skywalk/os_skywalk_private.h>
57 #include <skywalk/nexus/monitor/nx_monitor.h>
58 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
59 #include <skywalk/nexus/netif/nx_netif.h>
60 #include <skywalk/nexus/upipe/nx_user_pipe.h>
61 #include <skywalk/nexus/kpipe/nx_kernel_pipe.h>
62 #include <kern/thread.h>
63
64 static int na_krings_use(struct kern_channel *);
65 static void na_krings_unuse(struct kern_channel *);
66 static void na_krings_verify(struct nexus_adapter *);
67 static int na_notify(struct __kern_channel_ring *, struct proc *, uint32_t);
68 static void na_set_ring(struct nexus_adapter *, uint32_t, enum txrx, uint32_t);
69 static void na_set_all_rings(struct nexus_adapter *, uint32_t);
70 static int na_set_ringid(struct kern_channel *, ring_set_t, ring_id_t);
71 static void na_unset_ringid(struct kern_channel *);
72 static void na_teardown(struct nexus_adapter *, struct kern_channel *,
73 boolean_t);
74
75 static int na_kr_create(struct nexus_adapter *, uint32_t, boolean_t);
76 static void na_kr_delete(struct nexus_adapter *);
77 static int na_kr_setup(struct nexus_adapter *, struct kern_channel *);
78 static void na_kr_teardown_all(struct nexus_adapter *, struct kern_channel *,
79 boolean_t);
80 static void na_kr_teardown_txrx(struct nexus_adapter *, struct kern_channel *,
81 boolean_t, struct proc *);
82 static int na_kr_populate_slots(struct __kern_channel_ring *);
83 static void na_kr_depopulate_slots(struct __kern_channel_ring *,
84 struct kern_channel *, boolean_t defunct);
85
86 static int na_schema_alloc(struct kern_channel *);
87
88 static struct nexus_adapter *na_pseudo_alloc(zalloc_flags_t);
89 static void na_pseudo_free(struct nexus_adapter *);
90 static int na_pseudo_txsync(struct __kern_channel_ring *, struct proc *,
91 uint32_t);
92 static int na_pseudo_rxsync(struct __kern_channel_ring *, struct proc *,
93 uint32_t);
94 static int na_pseudo_activate(struct nexus_adapter *, na_activate_mode_t);
95 static void na_pseudo_dtor(struct nexus_adapter *);
96 static int na_pseudo_krings_create(struct nexus_adapter *,
97 struct kern_channel *);
98 static void na_pseudo_krings_delete(struct nexus_adapter *,
99 struct kern_channel *, boolean_t);
100 static int na_packet_pool_alloc_sync(struct __kern_channel_ring *,
101 struct proc *, uint32_t);
102 static int na_packet_pool_free_sync(struct __kern_channel_ring *,
103 struct proc *, uint32_t);
104 static int na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *,
105 struct proc *, uint32_t);
106 static int na_packet_pool_free_buf_sync(struct __kern_channel_ring *,
107 struct proc *, uint32_t);
108
109 #define NA_KRING_IDLE_TIMEOUT (NSEC_PER_SEC * 30) /* 30 seconds */
110
111 static ZONE_DEFINE(na_pseudo_zone, SKMEM_ZONE_PREFIX ".na.pseudo",
112 sizeof(struct nexus_adapter), ZC_ZFREE_CLEARMEM);
113
114 static int __na_inited = 0;
115
116 #define NA_NUM_WMM_CLASSES 4
117 #define NAKR_WMM_SC2RINGID(_s) PKT_SC2TC(_s)
118 #define NAKR_SET_SVC_LUT(_n, _s) \
119 (_n)->na_kring_svc_lut[MBUF_SCIDX(_s)] = NAKR_WMM_SC2RINGID(_s)
120 #define NAKR_SET_KR_SVC(_n, _s) \
121 NAKR((_n), NR_TX)[NAKR_WMM_SC2RINGID(_s)].ckr_svc = (_s)
122
123 #define NA_UPP_ALLOC_LOWAT 8
124 static uint32_t na_upp_alloc_lowat = NA_UPP_ALLOC_LOWAT;
125
126 #define NA_UPP_REAP_INTERVAL 10 /* seconds */
127 static uint32_t na_upp_reap_interval = NA_UPP_REAP_INTERVAL;
128
129 #define NA_UPP_WS_HOLD_TIME 2 /* seconds */
130 static uint32_t na_upp_ws_hold_time = NA_UPP_WS_HOLD_TIME;
131
132 #define NA_UPP_REAP_MIN_PKTS 0
133 static uint32_t na_upp_reap_min_pkts = NA_UPP_REAP_MIN_PKTS;
134
135 #define NA_UPP_ALLOC_BUF_LOWAT 64
136 static uint32_t na_upp_alloc_buf_lowat = NA_UPP_ALLOC_BUF_LOWAT;
137
138 #if (DEVELOPMENT || DEBUG)
139 static uint64_t _na_inject_error = 0;
140 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) \
141 _SK_INJECT_ERROR(_na_inject_error, _en, _ev, _ec, NULL, _f, __VA_ARGS__)
142
143 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_ws_hold_time,
144 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_ws_hold_time,
145 NA_UPP_WS_HOLD_TIME, "");
146 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_interval,
147 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_interval,
148 NA_UPP_REAP_INTERVAL, "");
149 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_min_pkts,
150 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_min_pkts,
151 NA_UPP_REAP_MIN_PKTS, "");
152 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_lowat,
153 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_lowat,
154 NA_UPP_ALLOC_LOWAT, "");
155 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_buf_lowat,
156 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_buf_lowat,
157 NA_UPP_ALLOC_BUF_LOWAT, "");
158 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, na_inject_error,
159 CTLFLAG_RW | CTLFLAG_LOCKED, &_na_inject_error, "");
160 #else
161 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) do { } while (0)
162 #endif /* !DEVELOPMENT && !DEBUG */
163
164 #define SKMEM_TAG_NX_RINGS "com.apple.skywalk.nexus.rings"
165 static SKMEM_TAG_DEFINE(skmem_tag_nx_rings, SKMEM_TAG_NX_RINGS);
166
167 #define SKMEM_TAG_NX_CONTEXTS "com.apple.skywalk.nexus.contexts"
168 static SKMEM_TAG_DEFINE(skmem_tag_nx_contexts, SKMEM_TAG_NX_CONTEXTS);
169
170 #define SKMEM_TAG_NX_SCRATCH "com.apple.skywalk.nexus.scratch"
171 static SKMEM_TAG_DEFINE(skmem_tag_nx_scratch, SKMEM_TAG_NX_SCRATCH);
172
173 #if !XNU_TARGET_OS_OSX
174 /* see KLDBootstrap::readPrelinkedExtensions() for details */
175 extern uuid_t kernelcache_uuid;
176 #else /* XNU_TARGET_OS_OSX */
177 /* see panic_init() for details */
178 extern unsigned char *kernel_uuid;
179 #endif /* XNU_TARGET_OS_OSX */
180
181 void
na_init(void)182 na_init(void)
183 {
184 /*
185 * Changing the size of nexus_mdata structure won't break ABI,
186 * but we need to be mindful of memory consumption; Thus here
187 * we add a compile-time check to make sure the size is within
188 * the expected limit and that it's properly aligned. This
189 * check may be adjusted in future as needed.
190 */
191 _CASSERT(sizeof(struct nexus_mdata) <= 32 &&
192 IS_P2ALIGNED(sizeof(struct nexus_mdata), 8));
193 _CASSERT(sizeof(struct nexus_mdata) <= sizeof(struct __user_quantum));
194
195 /* see comments on nexus_meta_type_t */
196 _CASSERT(NEXUS_META_TYPE_MAX == 3);
197 _CASSERT(NEXUS_META_SUBTYPE_MAX == 3);
198
199 ASSERT(!__na_inited);
200
201 __na_inited = 1;
202 }
203
204 void
na_fini(void)205 na_fini(void)
206 {
207 if (__na_inited) {
208 __na_inited = 0;
209 }
210 }
211
212 /*
213 * Interpret the ringid of an chreq, by translating it into a pair
214 * of intervals of ring indices:
215 *
216 * [txfirst, txlast) and [rxfirst, rxlast)
217 */
218 int
na_interp_ringid(struct nexus_adapter * na,ring_id_t ring_id,ring_set_t ring_set,uint32_t first[NR_TXRX],uint32_t last[NR_TXRX])219 na_interp_ringid(struct nexus_adapter *na, ring_id_t ring_id,
220 ring_set_t ring_set, uint32_t first[NR_TXRX], uint32_t last[NR_TXRX])
221 {
222 enum txrx t;
223
224 switch (ring_set) {
225 case RING_SET_ALL:
226 /*
227 * Ring pair eligibility: all ring(s).
228 */
229 if (ring_id != CHANNEL_RING_ID_ANY &&
230 ring_id >= na_get_nrings(na, NR_TX) &&
231 ring_id >= na_get_nrings(na, NR_RX)) {
232 SK_ERR("\"%s\": invalid ring_id %d for ring_set %u",
233 na->na_name, (int)ring_id, ring_set);
234 return EINVAL;
235 }
236 for_rx_tx(t) {
237 if (ring_id == CHANNEL_RING_ID_ANY) {
238 first[t] = 0;
239 last[t] = na_get_nrings(na, t);
240 } else {
241 first[t] = ring_id;
242 last[t] = ring_id + 1;
243 }
244 }
245 break;
246
247 default:
248 SK_ERR("\"%s\": invalid ring_set %u", na->na_name, ring_set);
249 return EINVAL;
250 }
251
252 SK_DF(SK_VERB_NA | SK_VERB_RING,
253 "\"%s\": ring_id %d, ring_set %u tx [%u,%u) rx [%u,%u)",
254 na->na_name, (int)ring_id, ring_set, first[NR_TX], last[NR_TX],
255 first[NR_RX], last[NR_RX]);
256
257 return 0;
258 }
259
260 /*
261 * Set the ring ID. For devices with a single queue, a request
262 * for all rings is the same as a single ring.
263 */
264 static int
na_set_ringid(struct kern_channel * ch,ring_set_t ring_set,ring_id_t ring_id)265 na_set_ringid(struct kern_channel *ch, ring_set_t ring_set, ring_id_t ring_id)
266 {
267 struct nexus_adapter *na = ch->ch_na;
268 int error;
269 enum txrx t;
270 uint32_t n_alloc_rings;
271
272 if ((error = na_interp_ringid(na, ring_id, ring_set,
273 ch->ch_first, ch->ch_last)) != 0) {
274 return error;
275 }
276
277 n_alloc_rings = na_get_nrings(na, NR_A);
278 if (n_alloc_rings != 0) {
279 ch->ch_first[NR_A] = ch->ch_first[NR_F] = 0;
280 ch->ch_last[NR_A] = ch->ch_last[NR_F] =
281 ch->ch_first[NR_A] + n_alloc_rings;
282 } else {
283 ch->ch_first[NR_A] = ch->ch_last[NR_A] = 0;
284 ch->ch_first[NR_F] = ch->ch_last[NR_F] = 0;
285 }
286 ch->ch_first[NR_EV] = 0;
287 ch->ch_last[NR_EV] = ch->ch_first[NR_EV] + na_get_nrings(na, NR_EV);
288 /* XXX: should we initialize na_si_users for event ring ? */
289
290 /*
291 * Optimization: count the users registered for more than
292 * one ring, which are the ones sleeping on the global queue.
293 * The default na_notify() callback will then avoid signaling
294 * the global queue if nobody is using it
295 */
296 for_rx_tx(t) {
297 if (ch_is_multiplex(ch, t)) {
298 na->na_si_users[t]++;
299 ASSERT(na->na_si_users[t] != 0);
300 }
301 }
302 return 0;
303 }
304
305 static void
na_unset_ringid(struct kern_channel * ch)306 na_unset_ringid(struct kern_channel *ch)
307 {
308 struct nexus_adapter *na = ch->ch_na;
309 enum txrx t;
310
311 for_rx_tx(t) {
312 if (ch_is_multiplex(ch, t)) {
313 ASSERT(na->na_si_users[t] != 0);
314 na->na_si_users[t]--;
315 }
316 ch->ch_first[t] = ch->ch_last[t] = 0;
317 }
318 }
319
320 /*
321 * Check that the rings we want to bind are not exclusively owned by a previous
322 * bind. If exclusive ownership has been requested, we also mark the rings.
323 */
324 /* Hoisted out of line to reduce kernel stack footprint */
325 SK_NO_INLINE_ATTRIBUTE
326 static int
na_krings_use(struct kern_channel * ch)327 na_krings_use(struct kern_channel *ch)
328 {
329 struct nexus_adapter *na = ch->ch_na;
330 struct __kern_channel_ring *kring;
331 boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
332 enum txrx t;
333 uint32_t i;
334
335 SK_DF(SK_VERB_NA | SK_VERB_RING, "na \"%s\" (0x%llx) grabbing tx [%u,%u) rx [%u,%u)",
336 na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
337 ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
338
339 /*
340 * First round: check that all the requested rings
341 * are neither alread exclusively owned, nor we
342 * want exclusive ownership when they are already in use
343 */
344 for_all_rings(t) {
345 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
346 kring = &NAKR(na, t)[i];
347 if ((kring->ckr_flags & CKRF_EXCLUSIVE) ||
348 (kring->ckr_users && excl)) {
349 SK_DF(SK_VERB_NA | SK_VERB_RING,
350 "kr \"%s\" (0x%llx) krflags 0x%b is busy",
351 kring->ckr_name, SK_KVA(kring),
352 kring->ckr_flags, CKRF_BITS);
353 return EBUSY;
354 }
355 }
356 }
357
358 /*
359 * Second round: increment usage count and possibly
360 * mark as exclusive
361 */
362
363 for_all_rings(t) {
364 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
365 kring = &NAKR(na, t)[i];
366 kring->ckr_users++;
367 if (excl) {
368 kring->ckr_flags |= CKRF_EXCLUSIVE;
369 }
370 }
371 }
372
373 return 0;
374 }
375
376 /* Hoisted out of line to reduce kernel stack footprint */
377 SK_NO_INLINE_ATTRIBUTE
378 static void
na_krings_unuse(struct kern_channel * ch)379 na_krings_unuse(struct kern_channel *ch)
380 {
381 struct nexus_adapter *na = ch->ch_na;
382 struct __kern_channel_ring *kring;
383 boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
384 enum txrx t;
385 uint32_t i;
386
387 SK_DF(SK_VERB_NA | SK_VERB_RING,
388 "na \"%s\" (0x%llx) releasing tx [%u, %u) rx [%u, %u)",
389 na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
390 ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
391
392 for_all_rings(t) {
393 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
394 kring = &NAKR(na, t)[i];
395 if (excl) {
396 kring->ckr_flags &= ~CKRF_EXCLUSIVE;
397 }
398 kring->ckr_users--;
399 }
400 }
401 }
402
403 /* Hoisted out of line to reduce kernel stack footprint */
404 SK_NO_INLINE_ATTRIBUTE
405 static void
na_krings_verify(struct nexus_adapter * na)406 na_krings_verify(struct nexus_adapter *na)
407 {
408 struct __kern_channel_ring *kring;
409 enum txrx t;
410 uint32_t i;
411
412 for_all_rings(t) {
413 for (i = 0; i < na_get_nrings(na, t); i++) {
414 kring = &NAKR(na, t)[i];
415 /* na_kr_create() validations */
416 ASSERT(kring->ckr_num_slots > 0);
417 ASSERT(kring->ckr_lim == (kring->ckr_num_slots - 1));
418 ASSERT(kring->ckr_pp != NULL);
419
420 if (!(kring->ckr_flags & CKRF_MEM_RING_INITED)) {
421 continue;
422 }
423 /* na_kr_setup() validations */
424 if (KR_KERNEL_ONLY(kring)) {
425 ASSERT(kring->ckr_ring == NULL);
426 } else {
427 ASSERT(kring->ckr_ring != NULL);
428 }
429 ASSERT(kring->ckr_ksds_last ==
430 &kring->ckr_ksds[kring->ckr_lim]);
431 }
432 }
433 }
434
435 int
na_bind_channel(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr)436 na_bind_channel(struct nexus_adapter *na, struct kern_channel *ch,
437 struct chreq *chr)
438 {
439 struct kern_pbufpool *rx_pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
440 struct kern_pbufpool *tx_pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
441 uint32_t ch_mode = chr->cr_mode;
442 int err = 0;
443
444 SK_LOCK_ASSERT_HELD();
445 ASSERT(ch->ch_schema == NULL);
446 ASSERT(ch->ch_na == NULL);
447
448 /* ring configuration may have changed, fetch from the card */
449 na_update_config(na);
450 ch->ch_na = na; /* store the reference */
451 err = na_set_ringid(ch, chr->cr_ring_set, chr->cr_ring_id);
452 if (err != 0) {
453 goto err;
454 }
455
456 atomic_bitclear_32(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE |
457 CHANF_USER_PACKET_POOL | CHANF_EVENT_RING));
458 if (ch_mode & CHMODE_EXCLUSIVE) {
459 atomic_bitset_32(&ch->ch_flags, CHANF_EXCLUSIVE);
460 }
461 /*
462 * Disallow automatic sync for monitor mode, since TX
463 * direction is disabled.
464 */
465 if (ch_mode & CHMODE_MONITOR) {
466 atomic_bitset_32(&ch->ch_flags, CHANF_RXONLY);
467 }
468
469 if (!!(na->na_flags & NAF_USER_PKT_POOL) ^
470 !!(ch_mode & CHMODE_USER_PACKET_POOL)) {
471 SK_ERR("incompatible channel mode (0x%b), na_flags (0x%b)",
472 ch_mode, CHMODE_BITS, na->na_flags, NAF_BITS);
473 err = EINVAL;
474 goto err;
475 }
476
477 if (na->na_arena->ar_flags & ARF_DEFUNCT) {
478 err = ENXIO;
479 goto err;
480 }
481
482 if (ch_mode & CHMODE_USER_PACKET_POOL) {
483 ASSERT(na->na_flags & NAF_USER_PKT_POOL);
484 ASSERT(ch->ch_first[NR_A] != ch->ch_last[NR_A]);
485 ASSERT(ch->ch_first[NR_F] != ch->ch_last[NR_F]);
486 atomic_bitset_32(&ch->ch_flags, CHANF_USER_PACKET_POOL);
487 }
488
489 if (ch_mode & CHMODE_EVENT_RING) {
490 ASSERT(na->na_flags & NAF_USER_PKT_POOL);
491 ASSERT(na->na_flags & NAF_EVENT_RING);
492 ASSERT(ch->ch_first[NR_EV] != ch->ch_last[NR_EV]);
493 atomic_bitset_32(&ch->ch_flags, CHANF_EVENT_RING);
494 }
495
496 /*
497 * If this is the first channel of the adapter, create
498 * the rings and their in-kernel view, the krings.
499 */
500 if (na->na_channels == 0) {
501 err = na->na_krings_create(na, ch);
502 if (err != 0) {
503 goto err;
504 }
505
506 /*
507 * Sanity check; this is already done in na_kr_create(),
508 * but we do it here as well to validate na_kr_setup().
509 */
510 na_krings_verify(na);
511 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
512 skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_type;
513 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
514 skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_subtype;
515 }
516
517 /*
518 * Validate ownership and usability of the krings; take into account
519 * whether some previous bind has exclusive ownership on them.
520 */
521 err = na_krings_use(ch);
522 if (err != 0) {
523 goto err_del_rings;
524 }
525
526 /* for user-facing channel, create a new channel schema */
527 if (!(ch->ch_flags & CHANF_KERNEL)) {
528 err = na_schema_alloc(ch);
529 if (err != 0) {
530 goto err_rel_excl;
531 }
532
533 ASSERT(ch->ch_schema != NULL);
534 ASSERT(ch->ch_schema_offset != (mach_vm_offset_t)-1);
535 } else {
536 ASSERT(ch->ch_schema == NULL);
537 ch->ch_schema_offset = (mach_vm_offset_t)-1;
538 }
539
540 /* update our work timestamp */
541 na->na_work_ts = net_uptime();
542
543 /* update our work timestamp */
544 na->na_work_ts = net_uptime();
545
546 na->na_channels++;
547
548 /*
549 * If user packet pool is desired, initialize the allocated
550 * object hash table in the pool, if not already. This also
551 * retains a refcnt on the pool which the caller must release.
552 */
553 ASSERT(ch->ch_pp == NULL);
554 if (ch_mode & CHMODE_USER_PACKET_POOL) {
555 #pragma unused(tx_pp)
556 ASSERT(rx_pp == tx_pp);
557 err = pp_init_upp(rx_pp, TRUE);
558 if (err != 0) {
559 goto err_free_schema;
560 }
561 ch->ch_pp = rx_pp;
562 }
563
564 if (!NA_IS_ACTIVE(na)) {
565 err = na->na_activate(na, NA_ACTIVATE_MODE_ON);
566 if (err != 0) {
567 goto err_release_pp;
568 }
569
570 SK_D("activated \"%s\" adapter 0x%llx", na->na_name,
571 SK_KVA(na));
572 SK_D(" na_md_type: %u", na->na_md_type);
573 SK_D(" na_md_subtype: %u", na->na_md_subtype);
574 }
575
576 SK_D("ch 0x%llx", SK_KVA(ch));
577 SK_D(" ch_flags: 0x%b", ch->ch_flags, CHANF_BITS);
578 if (ch->ch_schema != NULL) {
579 SK_D(" ch_schema: 0x%llx", SK_KVA(ch->ch_schema));
580 }
581 SK_D(" ch_na: 0x%llx (chcnt %u)", SK_KVA(ch->ch_na),
582 ch->ch_na->na_channels);
583 SK_D(" ch_tx_rings: [%u,%u)", ch->ch_first[NR_TX],
584 ch->ch_last[NR_TX]);
585 SK_D(" ch_rx_rings: [%u,%u)", ch->ch_first[NR_RX],
586 ch->ch_last[NR_RX]);
587 SK_D(" ch_alloc_rings: [%u,%u)", ch->ch_first[NR_A],
588 ch->ch_last[NR_A]);
589 SK_D(" ch_free_rings: [%u,%u)", ch->ch_first[NR_F],
590 ch->ch_last[NR_F]);
591 SK_D(" ch_ev_rings: [%u,%u)", ch->ch_first[NR_EV],
592 ch->ch_last[NR_EV]);
593
594 return 0;
595
596 err_release_pp:
597 if (ch_mode & CHMODE_USER_PACKET_POOL) {
598 ASSERT(ch->ch_pp != NULL);
599 pp_release(rx_pp);
600 ch->ch_pp = NULL;
601 }
602 err_free_schema:
603 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
604 NEXUS_META_TYPE_INVALID;
605 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
606 NEXUS_META_SUBTYPE_INVALID;
607 ASSERT(na->na_channels != 0);
608 na->na_channels--;
609 if (ch->ch_schema != NULL) {
610 skmem_cache_free(
611 skmem_arena_nexus(na->na_arena)->arn_schema_cache,
612 ch->ch_schema);
613 ch->ch_schema = NULL;
614 ch->ch_schema_offset = (mach_vm_offset_t)-1;
615 }
616 err_rel_excl:
617 na_krings_unuse(ch);
618 err_del_rings:
619 if (na->na_channels == 0) {
620 na->na_krings_delete(na, ch, FALSE);
621 }
622 err:
623 ch->ch_na = NULL;
624 ASSERT(err != 0);
625
626 return err;
627 }
628
629 /*
630 * Undo everything that was done in na_bind_channel().
631 */
632 /* call with SK_LOCK held */
633 void
na_unbind_channel(struct kern_channel * ch)634 na_unbind_channel(struct kern_channel *ch)
635 {
636 struct nexus_adapter *na = ch->ch_na;
637
638 SK_LOCK_ASSERT_HELD();
639
640 ASSERT(na->na_channels != 0);
641 na->na_channels--;
642
643 /* release exclusive use if it was requested at bind time */
644 na_krings_unuse(ch);
645
646 if (na->na_channels == 0) { /* last instance */
647 SK_D("%s(%d): deleting last channel instance for %s",
648 ch->ch_name, ch->ch_pid, na->na_name);
649
650 /*
651 * Free any remaining allocated packets attached to
652 * the slots, followed by a teardown of the arena.
653 */
654 na_teardown(na, ch, FALSE);
655
656 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
657 NEXUS_META_TYPE_INVALID;
658 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
659 NEXUS_META_SUBTYPE_INVALID;
660 } else {
661 SK_D("%s(%d): %s has %u remaining channel instance(s)",
662 ch->ch_name, ch->ch_pid, na->na_name, na->na_channels);
663 }
664
665 /*
666 * Free any allocated packets (for the process) attached to the slots;
667 * note that na_teardown() could have done this there as well.
668 */
669 if (ch->ch_pp != NULL) {
670 ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
671 pp_purge_upp(ch->ch_pp, ch->ch_pid);
672 pp_release(ch->ch_pp);
673 ch->ch_pp = NULL;
674 }
675
676 /* possibily decrement counter of tx_si/rx_si users */
677 na_unset_ringid(ch);
678
679 /* reap the caches now (purge if adapter is idle) */
680 skmem_arena_reap(na->na_arena, (na->na_channels == 0));
681
682 /* delete the csm */
683 if (ch->ch_schema != NULL) {
684 skmem_cache_free(
685 skmem_arena_nexus(na->na_arena)->arn_schema_cache,
686 ch->ch_schema);
687 ch->ch_schema = NULL;
688 ch->ch_schema_offset = (mach_vm_offset_t)-1;
689 }
690
691 /* destroy the memory map */
692 skmem_arena_munmap_channel(na->na_arena, ch);
693
694 /* mark the channel as unbound */
695 atomic_bitclear_32(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE));
696 ch->ch_na = NULL;
697
698 /* and finally release the nexus adapter; this might free it */
699 (void) na_release_locked(na);
700 }
701
702 static void
na_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)703 na_teardown(struct nexus_adapter *na, struct kern_channel *ch,
704 boolean_t defunct)
705 {
706 SK_LOCK_ASSERT_HELD();
707 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
708
709 #if CONFIG_NEXUS_MONITOR
710 /*
711 * Walk through all the rings and tell any monitor
712 * that the port is going to exit Skywalk mode
713 */
714 nx_mon_stop(na);
715 #endif /* CONFIG_NEXUS_MONITOR */
716
717 /*
718 * Deactive the adapter.
719 */
720 (void) na->na_activate(na,
721 (defunct ? NA_ACTIVATE_MODE_DEFUNCT : NA_ACTIVATE_MODE_OFF));
722
723 /*
724 * Free any remaining allocated packets for this process.
725 */
726 if (ch->ch_pp != NULL) {
727 ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
728 pp_purge_upp(ch->ch_pp, ch->ch_pid);
729 if (!defunct) {
730 pp_release(ch->ch_pp);
731 ch->ch_pp = NULL;
732 }
733 }
734
735 /*
736 * Delete rings and buffers.
737 */
738 na->na_krings_delete(na, ch, defunct);
739 }
740
741 /* call with SK_LOCK held */
742 /*
743 * Allocate the per-fd structure __user_channel_schema.
744 */
745 static int
na_schema_alloc(struct kern_channel * ch)746 na_schema_alloc(struct kern_channel *ch)
747 {
748 struct nexus_adapter *na = ch->ch_na;
749 struct skmem_arena *ar = na->na_arena;
750 struct skmem_arena_nexus *arn;
751 mach_vm_offset_t roff[SKMEM_REGIONS];
752 struct __kern_channel_ring *kr;
753 struct __user_channel_schema *csm;
754 struct skmem_obj_info csm_oi, ring_oi, ksd_oi, usd_oi;
755 mach_vm_offset_t base;
756 uint32_t i, j, k, n[NR_ALL];
757 enum txrx t;
758
759 /* see comments for struct __user_channel_schema */
760 _CASSERT(offsetof(struct __user_channel_schema, csm_ver) == 0);
761 _CASSERT(offsetof(struct __user_channel_schema, csm_flags) ==
762 sizeof(csm->csm_ver));
763 _CASSERT(offsetof(struct __user_channel_schema, csm_kern_name) ==
764 sizeof(csm->csm_ver) + sizeof(csm->csm_flags));
765 _CASSERT(offsetof(struct __user_channel_schema, csm_kern_uuid) ==
766 sizeof(csm->csm_ver) + sizeof(csm->csm_flags) +
767 sizeof(csm->csm_kern_name));
768
769 SK_LOCK_ASSERT_HELD();
770
771 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
772 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
773 arn = skmem_arena_nexus(ar);
774 ASSERT(arn != NULL);
775 for_all_rings(t) {
776 n[t] = 0;
777 }
778
779 csm = skmem_cache_alloc(arn->arn_schema_cache, SKMEM_NOSLEEP);
780 if (csm == NULL) {
781 return ENOMEM;
782 }
783
784 skmem_cache_get_obj_info(arn->arn_schema_cache, csm, &csm_oi, NULL);
785 bzero(csm, SKMEM_OBJ_SIZE(&csm_oi));
786
787 *(uint32_t *)(uintptr_t)&csm->csm_ver = CSM_CURRENT_VERSION;
788
789 /* kernel version and executable UUID */
790 _CASSERT(sizeof(csm->csm_kern_name) == _SYS_NAMELEN);
791 (void) strncpy((char *)(uintptr_t)csm->csm_kern_name,
792 version, sizeof(csm->csm_kern_name) - 1);
793 #if !XNU_TARGET_OS_OSX
794 (void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid,
795 kernelcache_uuid, sizeof(csm->csm_kern_uuid));
796 #else /* XNU_TARGET_OS_OSX */
797 if (kernel_uuid != NULL) {
798 (void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid,
799 kernel_uuid, sizeof(csm->csm_kern_uuid));
800 }
801 #endif /* XNU_TARGET_OS_OSX */
802
803 for_rx_tx(t) {
804 ASSERT((ch->ch_last[t] > 0) || (ch->ch_first[t] == 0));
805 n[t] = ch->ch_last[t] - ch->ch_first[t];
806 ASSERT(n[t] == 0 || n[t] <= na_get_nrings(na, t));
807 }
808
809 /* return total number of tx and rx rings for this channel */
810 *(uint32_t *)(uintptr_t)&csm->csm_tx_rings = n[NR_TX];
811 *(uint32_t *)(uintptr_t)&csm->csm_rx_rings = n[NR_RX];
812
813 if (ch->ch_flags & CHANF_USER_PACKET_POOL) {
814 *(uint32_t *)(uintptr_t)&csm->csm_allocator_ring_pairs =
815 na->na_num_allocator_ring_pairs;
816 n[NR_A] = n[NR_F] = na->na_num_allocator_ring_pairs;
817 ASSERT(n[NR_A] != 0 && n[NR_A] <= na_get_nrings(na, NR_A));
818 ASSERT(n[NR_A] == (ch->ch_last[NR_A] - ch->ch_first[NR_A]));
819 ASSERT(n[NR_F] == (ch->ch_last[NR_F] - ch->ch_first[NR_F]));
820 }
821
822 if (ch->ch_flags & CHANF_EVENT_RING) {
823 n[NR_EV] = ch->ch_last[NR_EV] - ch->ch_first[NR_EV];
824 ASSERT(n[NR_EV] != 0 && n[NR_EV] <= na_get_nrings(na, NR_EV));
825 *(uint32_t *)(uintptr_t)&csm->csm_num_event_rings = n[NR_EV];
826 }
827
828 bzero(&roff, sizeof(roff));
829 for (i = 0; i < SKMEM_REGIONS; i++) {
830 if (ar->ar_regions[i] == NULL) {
831 ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
832 i == SKMEM_REGION_SCHEMA ||
833 i == SKMEM_REGION_RXBUF ||
834 i == SKMEM_REGION_TXBUF ||
835 i == SKMEM_REGION_RXKMD ||
836 i == SKMEM_REGION_TXKMD ||
837 i == SKMEM_REGION_UMD ||
838 i == SKMEM_REGION_UBFT ||
839 i == SKMEM_REGION_KBFT ||
840 i == SKMEM_REGION_RXKBFT ||
841 i == SKMEM_REGION_TXKBFT ||
842 i == SKMEM_REGION_TXAUSD ||
843 i == SKMEM_REGION_RXFUSD ||
844 i == SKMEM_REGION_USTATS ||
845 i == SKMEM_REGION_KSTATS ||
846 i == SKMEM_REGION_INTRINSIC ||
847 i == SKMEM_REGION_FLOWADV ||
848 i == SKMEM_REGION_NEXUSADV ||
849 i == SKMEM_REGION_SYSCTLS ||
850 i == SKMEM_REGION_GUARD_TAIL);
851 continue;
852 }
853
854 /* not for nexus */
855 ASSERT(i != SKMEM_REGION_SYSCTLS);
856
857 /*
858 * Get region offsets from base of mmap span; the arena
859 * doesn't need to be mmap'd at this point, since we
860 * simply compute the relative offset.
861 */
862 roff[i] = skmem_arena_get_region_offset(ar, i);
863 }
864
865 /*
866 * The schema is made up of the descriptor followed inline by an array
867 * of offsets to the tx, rx, allocator and event rings in the mmap span.
868 * They contain the offset between the ring and schema, so the
869 * information is usable in userspace to reach the ring from
870 * the schema.
871 */
872 base = roff[SKMEM_REGION_SCHEMA] + SKMEM_OBJ_ROFF(&csm_oi);
873
874 /* initialize schema with tx ring info */
875 for (i = 0, j = ch->ch_first[NR_TX]; i < n[NR_TX]; i++, j++) {
876 kr = &na->na_tx_rings[j];
877 if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
878 continue;
879 }
880
881 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
882 skmem_cache_get_obj_info(arn->arn_ring_cache,
883 kr->ckr_ring, &ring_oi, NULL);
884 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].ring_off =
885 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
886
887 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
888 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
889 kr->ckr_ksds, &ksd_oi, &usd_oi);
890
891 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].sd_off =
892 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
893 base;
894 }
895 /* initialize schema with rx ring info */
896 for (i = 0, j = ch->ch_first[NR_RX]; i < n[NR_RX]; i++, j++) {
897 kr = &na->na_rx_rings[j];
898 if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
899 continue;
900 }
901
902 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
903 skmem_cache_get_obj_info(arn->arn_ring_cache,
904 kr->ckr_ring, &ring_oi, NULL);
905 *(mach_vm_offset_t *)
906 (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].ring_off =
907 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
908
909 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
910 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
911 kr->ckr_ksds, &ksd_oi, &usd_oi);
912
913 *(mach_vm_offset_t *)
914 (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].sd_off =
915 (roff[SKMEM_REGION_RXFUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
916 base;
917 }
918 /* initialize schema with allocator ring info */
919 for (i = 0, j = ch->ch_first[NR_A], k = n[NR_TX] + n[NR_RX];
920 i < n[NR_A]; i++, j++) {
921 mach_vm_offset_t usd_roff;
922
923 usd_roff = roff[SKMEM_REGION_TXAUSD];
924 kr = &na->na_alloc_rings[j];
925 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
926 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
927
928 skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
929 &ring_oi, NULL);
930 *(mach_vm_offset_t *)
931 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
932 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
933
934 skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
935 &ksd_oi, &usd_oi);
936 *(mach_vm_offset_t *)
937 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
938 (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
939 }
940 /* initialize schema with free ring info */
941 for (i = 0, j = ch->ch_first[NR_F], k = n[NR_TX] + n[NR_RX] + n[NR_A];
942 i < n[NR_F]; i++, j++) {
943 mach_vm_offset_t usd_roff;
944
945 usd_roff = roff[SKMEM_REGION_RXFUSD];
946 kr = &na->na_free_rings[j];
947 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
948 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
949
950 skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
951 &ring_oi, NULL);
952 *(mach_vm_offset_t *)
953 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
954 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
955
956 skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
957 &ksd_oi, &usd_oi);
958 *(mach_vm_offset_t *)
959 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
960 (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
961 }
962 /* initialize schema with event ring info */
963 for (i = 0, j = ch->ch_first[NR_EV], k = n[NR_TX] + n[NR_RX] +
964 n[NR_A] + n[NR_F]; i < n[NR_EV]; i++, j++) {
965 ASSERT(csm->csm_num_event_rings != 0);
966 kr = &na->na_event_rings[j];
967 ASSERT(!KR_KERNEL_ONLY(kr));
968 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
969 skmem_cache_get_obj_info(arn->arn_ring_cache,
970 kr->ckr_ring, &ring_oi, NULL);
971 *(mach_vm_offset_t *)
972 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
973 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
974
975 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
976 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
977 kr->ckr_ksds, &ksd_oi, &usd_oi);
978
979 *(mach_vm_offset_t *)
980 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
981 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
982 base;
983 }
984
985 *(uint64_t *)(uintptr_t)&csm->csm_md_redzone_cookie =
986 __ch_umd_redzone_cookie;
987 *(nexus_meta_type_t *)(uintptr_t)&csm->csm_md_type = na->na_md_type;
988 *(nexus_meta_subtype_t *)(uintptr_t)&csm->csm_md_subtype =
989 na->na_md_subtype;
990
991 if (arn->arn_stats_obj != NULL) {
992 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
993 ASSERT(roff[SKMEM_REGION_USTATS] != 0);
994 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs =
995 roff[SKMEM_REGION_USTATS];
996 *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
997 na->na_stats_type;
998 } else {
999 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1000 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs = 0;
1001 *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1002 NEXUS_STATS_TYPE_INVALID;
1003 }
1004
1005 if (arn->arn_flowadv_obj != NULL) {
1006 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL);
1007 ASSERT(roff[SKMEM_REGION_FLOWADV] != 0);
1008 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs =
1009 roff[SKMEM_REGION_FLOWADV];
1010 *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max =
1011 na->na_flowadv_max;
1012 } else {
1013 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1014 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs = 0;
1015 *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max = 0;
1016 }
1017
1018 if (arn->arn_nexusadv_obj != NULL) {
1019 struct __kern_nexus_adv_metadata *adv_md;
1020
1021 adv_md = arn->arn_nexusadv_obj;
1022 ASSERT(adv_md->knam_version == NX_ADVISORY_MD_CURRENT_VERSION);
1023 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] != NULL);
1024 ASSERT(roff[SKMEM_REGION_NEXUSADV] != 0);
1025 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs =
1026 roff[SKMEM_REGION_NEXUSADV];
1027 } else {
1028 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1029 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs = 0;
1030 }
1031
1032 ch->ch_schema = csm;
1033 ch->ch_schema_offset = base;
1034
1035 return 0;
1036 }
1037
1038 /*
1039 * Called by all routines that create nexus_adapters.
1040 * Attach na to the ifp (if any) and provide defaults
1041 * for optional callbacks. Defaults assume that we
1042 * are creating an hardware nexus_adapter.
1043 */
1044 void
na_attach_common(struct nexus_adapter * na,struct kern_nexus * nx,struct kern_nexus_domain_provider * nxdom_prov)1045 na_attach_common(struct nexus_adapter *na, struct kern_nexus *nx,
1046 struct kern_nexus_domain_provider *nxdom_prov)
1047 {
1048 SK_LOCK_ASSERT_HELD();
1049
1050 ASSERT(nx != NULL);
1051 ASSERT(nxdom_prov != NULL);
1052 ASSERT(na->na_krings_create != NULL);
1053 ASSERT(na->na_krings_delete != NULL);
1054 if (na->na_type != NA_NETIF_COMPAT_DEV) {
1055 ASSERT(na_get_nrings(na, NR_TX) != 0);
1056 }
1057 if (na->na_type != NA_NETIF_COMPAT_HOST) {
1058 ASSERT(na_get_nrings(na, NR_RX) != 0);
1059 }
1060 ASSERT(na->na_channels == 0);
1061
1062 if (na->na_notify == NULL) {
1063 na->na_notify = na_notify;
1064 }
1065
1066 na->na_nx = nx;
1067 na->na_nxdom_prov = nxdom_prov;
1068
1069 SK_D("na 0x%llx nx 0x%llx nxtype %u ar 0x%llx",
1070 SK_KVA(na), SK_KVA(nx), nxdom_prov->nxdom_prov_dom->nxdom_type,
1071 SK_KVA(na->na_arena));
1072 }
1073
1074 void
na_post_event(struct __kern_channel_ring * kring,boolean_t nodelay,boolean_t within_kevent,boolean_t selwake,uint32_t hint)1075 na_post_event(struct __kern_channel_ring *kring, boolean_t nodelay,
1076 boolean_t within_kevent, boolean_t selwake, uint32_t hint)
1077 {
1078 struct nexus_adapter *na = KRNA(kring);
1079 enum txrx t = kring->ckr_tx;
1080
1081 SK_DF(SK_VERB_EVENTS,
1082 "%s(%d) na \"%s\" (0x%llx) kr 0x%llx kev %u sel %u hint 0x%b",
1083 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1084 na->na_name, SK_KVA(na), SK_KVA(kring), within_kevent, selwake,
1085 hint, CHAN_FILT_HINT_BITS);
1086
1087 csi_selwakeup_one(kring, nodelay, within_kevent, selwake, hint);
1088 /*
1089 * optimization: avoid a wake up on the global
1090 * queue if nobody has registered for more
1091 * than one ring
1092 */
1093 if (na->na_si_users[t] > 0) {
1094 csi_selwakeup_all(na, t, nodelay, within_kevent, selwake, hint);
1095 }
1096 }
1097
1098 /* default notify callback */
1099 static int
na_notify(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1100 na_notify(struct __kern_channel_ring *kring, struct proc *p, uint32_t flags)
1101 {
1102 #pragma unused(p)
1103 SK_DF(SK_VERB_NOTIFY | ((kring->ckr_tx == NR_TX) ?
1104 SK_VERB_TX : SK_VERB_RX),
1105 "%s(%d) [%s] na \"%s\" (0x%llx) kr \"%s\" (0x%llx) krflags 0x%b "
1106 "flags 0x%x, kh %u kt %u | h %u t %u",
1107 sk_proc_name_address(p), sk_proc_pid(p),
1108 (kring->ckr_tx == NR_TX) ? "W" : "R", KRNA(kring)->na_name,
1109 SK_KVA(KRNA(kring)), kring->ckr_name, SK_KVA(kring),
1110 kring->ckr_flags, CKRF_BITS, flags, kring->ckr_khead,
1111 kring->ckr_ktail, kring->ckr_rhead, kring->ckr_rtail);
1112
1113 na_post_event(kring, (flags & NA_NOTEF_PUSH),
1114 (flags & NA_NOTEF_IN_KEVENT), TRUE, 0);
1115
1116 return 0;
1117 }
1118
1119 /*
1120 * Fetch configuration from the device, to cope with dynamic
1121 * reconfigurations after loading the module.
1122 */
1123 /* call with SK_LOCK held */
1124 int
na_update_config(struct nexus_adapter * na)1125 na_update_config(struct nexus_adapter *na)
1126 {
1127 uint32_t txr, txd, rxr, rxd;
1128
1129 SK_LOCK_ASSERT_HELD();
1130
1131 txr = txd = rxr = rxd = 0;
1132 if (na->na_config == NULL ||
1133 na->na_config(na, &txr, &txd, &rxr, &rxd)) {
1134 /* take whatever we had at init time */
1135 txr = na_get_nrings(na, NR_TX);
1136 txd = na_get_nslots(na, NR_TX);
1137 rxr = na_get_nrings(na, NR_RX);
1138 rxd = na_get_nslots(na, NR_RX);
1139 }
1140
1141 if (na_get_nrings(na, NR_TX) == txr &&
1142 na_get_nslots(na, NR_TX) == txd &&
1143 na_get_nrings(na, NR_RX) == rxr &&
1144 na_get_nslots(na, NR_RX) == rxd) {
1145 return 0; /* nothing changed */
1146 }
1147 SK_D("stored config %s: txring %u x %u, rxring %u x %u",
1148 na->na_name, na_get_nrings(na, NR_TX), na_get_nslots(na, NR_TX),
1149 na_get_nrings(na, NR_RX), na_get_nslots(na, NR_RX));
1150 SK_D("new config %s: txring %u x %u, rxring %u x %u",
1151 na->na_name, txr, txd, rxr, rxd);
1152
1153 if (na->na_channels == 0) {
1154 SK_D("configuration changed (but fine)");
1155 na_set_nrings(na, NR_TX, txr);
1156 na_set_nslots(na, NR_TX, txd);
1157 na_set_nrings(na, NR_RX, rxr);
1158 na_set_nslots(na, NR_RX, rxd);
1159 return 0;
1160 }
1161 SK_ERR("configuration changed while active, this is bad...");
1162 return 1;
1163 }
1164
1165 static void
na_kr_setup_netif_svc_map(struct nexus_adapter * na)1166 na_kr_setup_netif_svc_map(struct nexus_adapter *na)
1167 {
1168 uint32_t i;
1169 uint32_t num_tx_rings;
1170
1171 ASSERT(na->na_type == NA_NETIF_DEV);
1172 num_tx_rings = na_get_nrings(na, NR_TX);
1173
1174 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK_SYS) ==
1175 NAKR_WMM_SC2RINGID(KPKT_SC_BK));
1176 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1177 NAKR_WMM_SC2RINGID(KPKT_SC_RD));
1178 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1179 NAKR_WMM_SC2RINGID(KPKT_SC_OAM));
1180 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1181 NAKR_WMM_SC2RINGID(KPKT_SC_RV));
1182 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1183 NAKR_WMM_SC2RINGID(KPKT_SC_VI));
1184 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) ==
1185 NAKR_WMM_SC2RINGID(KPKT_SC_CTL));
1186
1187 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK) < NA_NUM_WMM_CLASSES);
1188 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) < NA_NUM_WMM_CLASSES);
1189 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VI) < NA_NUM_WMM_CLASSES);
1190 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) < NA_NUM_WMM_CLASSES);
1191
1192 _CASSERT(MBUF_SCIDX(KPKT_SC_BK_SYS) < KPKT_SC_MAX_CLASSES);
1193 _CASSERT(MBUF_SCIDX(KPKT_SC_BK) < KPKT_SC_MAX_CLASSES);
1194 _CASSERT(MBUF_SCIDX(KPKT_SC_BE) < KPKT_SC_MAX_CLASSES);
1195 _CASSERT(MBUF_SCIDX(KPKT_SC_RD) < KPKT_SC_MAX_CLASSES);
1196 _CASSERT(MBUF_SCIDX(KPKT_SC_OAM) < KPKT_SC_MAX_CLASSES);
1197 _CASSERT(MBUF_SCIDX(KPKT_SC_AV) < KPKT_SC_MAX_CLASSES);
1198 _CASSERT(MBUF_SCIDX(KPKT_SC_RV) < KPKT_SC_MAX_CLASSES);
1199 _CASSERT(MBUF_SCIDX(KPKT_SC_VI) < KPKT_SC_MAX_CLASSES);
1200 _CASSERT(MBUF_SCIDX(KPKT_SC_SIG) < KPKT_SC_MAX_CLASSES);
1201 _CASSERT(MBUF_SCIDX(KPKT_SC_VO) < KPKT_SC_MAX_CLASSES);
1202 _CASSERT(MBUF_SCIDX(KPKT_SC_CTL) < KPKT_SC_MAX_CLASSES);
1203
1204 /*
1205 * we support the following 2 configurations:
1206 * 1. packets from all 10 service class map to one ring.
1207 * 2. a 10:4 mapping between service classes and the rings. These 4
1208 * rings map to the 4 WMM access categories.
1209 */
1210 if (na->na_nx->nx_prov->nxprov_params->nxp_qmap == NEXUS_QMAP_TYPE_WMM) {
1211 ASSERT(num_tx_rings == NEXUS_NUM_WMM_QUEUES);
1212 /* setup the adapter's service class LUT */
1213 NAKR_SET_SVC_LUT(na, KPKT_SC_BK_SYS);
1214 NAKR_SET_SVC_LUT(na, KPKT_SC_BK);
1215 NAKR_SET_SVC_LUT(na, KPKT_SC_BE);
1216 NAKR_SET_SVC_LUT(na, KPKT_SC_RD);
1217 NAKR_SET_SVC_LUT(na, KPKT_SC_OAM);
1218 NAKR_SET_SVC_LUT(na, KPKT_SC_AV);
1219 NAKR_SET_SVC_LUT(na, KPKT_SC_RV);
1220 NAKR_SET_SVC_LUT(na, KPKT_SC_VI);
1221 NAKR_SET_SVC_LUT(na, KPKT_SC_SIG);
1222 NAKR_SET_SVC_LUT(na, KPKT_SC_VO);
1223 NAKR_SET_SVC_LUT(na, KPKT_SC_CTL);
1224
1225 /* Initialize the service class for each of the 4 ring */
1226 NAKR_SET_KR_SVC(na, KPKT_SC_BK);
1227 NAKR_SET_KR_SVC(na, KPKT_SC_BE);
1228 NAKR_SET_KR_SVC(na, KPKT_SC_VI);
1229 NAKR_SET_KR_SVC(na, KPKT_SC_VO);
1230 } else {
1231 ASSERT(na->na_nx->nx_prov->nxprov_params->nxp_qmap ==
1232 NEXUS_QMAP_TYPE_DEFAULT);
1233 /* 10: 1 mapping */
1234 for (i = 0; i < KPKT_SC_MAX_CLASSES; i++) {
1235 na->na_kring_svc_lut[i] = 0;
1236 }
1237 for (i = 0; i < num_tx_rings; i++) {
1238 NAKR(na, NR_TX)[i].ckr_svc = KPKT_SC_UNSPEC;
1239 }
1240 }
1241 }
1242
1243 static LCK_GRP_DECLARE(channel_txq_lock_group, "sk_ch_txq_lock");
1244 static LCK_GRP_DECLARE(channel_rxq_lock_group, "sk_ch_rxq_lock");
1245 static LCK_GRP_DECLARE(channel_txs_lock_group, "sk_ch_txs_lock");
1246 static LCK_GRP_DECLARE(channel_rxs_lock_group, "sk_ch_rxs_lock");
1247 static LCK_GRP_DECLARE(channel_alloc_lock_group, "sk_ch_alloc_lock");
1248 static LCK_GRP_DECLARE(channel_evq_lock_group, "sk_ch_evq_lock");
1249 static LCK_GRP_DECLARE(channel_evs_lock_group, "sk_ch_evs_lock");
1250
1251 static lck_grp_t *
na_kr_q_lck_grp(enum txrx t)1252 na_kr_q_lck_grp(enum txrx t)
1253 {
1254 switch (t) {
1255 case NR_TX:
1256 return &channel_txq_lock_group;
1257 case NR_RX:
1258 return &channel_rxq_lock_group;
1259 case NR_A:
1260 case NR_F:
1261 return &channel_alloc_lock_group;
1262 case NR_EV:
1263 return &channel_evq_lock_group;
1264 default:
1265 VERIFY(0);
1266 /* NOTREACHED */
1267 __builtin_unreachable();
1268 }
1269 }
1270
1271 static lck_grp_t *
na_kr_s_lck_grp(enum txrx t)1272 na_kr_s_lck_grp(enum txrx t)
1273 {
1274 switch (t) {
1275 case NR_TX:
1276 return &channel_txs_lock_group;
1277 case NR_RX:
1278 return &channel_rxs_lock_group;
1279 case NR_A:
1280 case NR_F:
1281 return &channel_alloc_lock_group;
1282 case NR_EV:
1283 return &channel_evs_lock_group;
1284 default:
1285 VERIFY(0);
1286 /* NOTREACHED */
1287 __builtin_unreachable();
1288 }
1289 }
1290
1291 static void
kr_init_tbr(struct __kern_channel_ring * r)1292 kr_init_tbr(struct __kern_channel_ring *r)
1293 {
1294 r->ckr_tbr_depth = CKR_TBR_TOKEN_INVALID;
1295 r->ckr_tbr_token = CKR_TBR_TOKEN_INVALID;
1296 r->ckr_tbr_last = 0;
1297 }
1298
1299 struct kern_pbufpool *
na_kr_get_pp(struct nexus_adapter * na,enum txrx t)1300 na_kr_get_pp(struct nexus_adapter *na, enum txrx t)
1301 {
1302 struct kern_pbufpool *pp = NULL;
1303 switch (t) {
1304 case NR_RX:
1305 case NR_F:
1306 case NR_EV:
1307 pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
1308 break;
1309 case NR_TX:
1310 case NR_A:
1311 pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
1312 break;
1313 default:
1314 VERIFY(0);
1315 /* NOTREACHED */
1316 __builtin_unreachable();
1317 }
1318
1319 return pp;
1320 }
1321
1322 /*
1323 * Create the krings array and initialize the fields common to all adapters.
1324 * The array layout is this:
1325 *
1326 * +----------+
1327 * na->na_tx_rings ----->| | \
1328 * | | } na->num_tx_ring
1329 * | | /
1330 * na->na_rx_rings ----> +----------+
1331 * | | \
1332 * | | } na->na_num_rx_rings
1333 * | | /
1334 * na->na_alloc_rings -> +----------+
1335 * | | \
1336 * na->na_free_rings --> +----------+ } na->na_num_allocator_ring_pairs
1337 * | | /
1338 * na->na_event_rings -> +----------+
1339 * | | \
1340 * | | } na->na_num_event_rings
1341 * | | /
1342 * +----------+
1343 * na->na_tailroom ----->| | \
1344 * | | } tailroom bytes
1345 * | | /
1346 * +----------+
1347 *
1348 * The tailroom space is currently used by flow switch ports for allocating
1349 * leases.
1350 */
1351 /* call with SK_LOCK held */
1352 static int
na_kr_create(struct nexus_adapter * na,uint32_t tailroom,boolean_t alloc_ctx)1353 na_kr_create(struct nexus_adapter *na, uint32_t tailroom, boolean_t alloc_ctx)
1354 {
1355 lck_grp_t *q_lck_grp, *s_lck_grp;
1356 uint32_t i, len, ndesc;
1357 struct kern_pbufpool *pp = NULL;
1358 struct __kern_channel_ring *kring;
1359 uint32_t n[NR_ALL];
1360 int c, tot_slots, err = 0;
1361 enum txrx t;
1362
1363 SK_LOCK_ASSERT_HELD();
1364
1365 n[NR_TX] = na_get_nrings(na, NR_TX);
1366 n[NR_RX] = na_get_nrings(na, NR_RX);
1367 n[NR_A] = na_get_nrings(na, NR_A);
1368 n[NR_F] = na_get_nrings(na, NR_F);
1369 n[NR_EV] = na_get_nrings(na, NR_EV);
1370
1371 len = ((n[NR_TX] + n[NR_RX] + n[NR_A] + n[NR_F] + n[NR_EV]) *
1372 sizeof(struct __kern_channel_ring)) + tailroom;
1373
1374 na->na_rings_mem_sz = (size_t)len;
1375 na->na_tx_rings = sk_alloc((size_t)len, Z_WAITOK, skmem_tag_nx_rings);
1376 if (__improbable(na->na_tx_rings == NULL)) {
1377 SK_ERR("Cannot allocate krings");
1378 err = ENOMEM;
1379 goto error;
1380 }
1381 na->na_rx_rings = na->na_tx_rings + n[NR_TX];
1382 if (n[NR_A] != 0) {
1383 na->na_alloc_rings = na->na_rx_rings + n[NR_RX];
1384 na->na_free_rings = na->na_alloc_rings + n[NR_A];
1385 } else {
1386 na->na_alloc_rings = na->na_free_rings = NULL;
1387 }
1388 if (n[NR_EV] != 0) {
1389 if (na->na_free_rings != NULL) {
1390 na->na_event_rings = na->na_free_rings + n[NR_F];
1391 } else {
1392 na->na_event_rings = na->na_rx_rings + n[NR_RX];
1393 }
1394 }
1395
1396 /* total number of slots for TX/RX adapter rings */
1397 c = tot_slots = (n[NR_TX] * na_get_nslots(na, NR_TX)) +
1398 (n[NR_RX] * na_get_nslots(na, NR_RX));
1399
1400 /* for scratch space on alloc and free rings */
1401 if (n[NR_A] != 0) {
1402 tot_slots += n[NR_A] * na_get_nslots(na, NR_A);
1403 tot_slots += n[NR_F] * na_get_nslots(na, NR_F);
1404 c = tot_slots;
1405 }
1406 na->na_total_slots = tot_slots;
1407
1408 /* slot context (optional) for all TX/RX ring slots of this adapter */
1409 if (alloc_ctx) {
1410 na->na_slot_ctxs =
1411 skn_alloc_type_array(slot_ctxs, struct slot_ctx,
1412 na->na_total_slots, Z_WAITOK, skmem_tag_nx_contexts);
1413 if (na->na_slot_ctxs == NULL) {
1414 SK_ERR("Cannot allocate slot contexts");
1415 err = ENOMEM;
1416 goto error;
1417 }
1418 atomic_bitset_32(&na->na_flags, NAF_SLOT_CONTEXT);
1419 }
1420
1421 /*
1422 * packet handle array storage for all TX/RX ring slots of this
1423 * adapter.
1424 */
1425 na->na_scratch = skn_alloc_type_array(scratch, kern_packet_t,
1426 na->na_total_slots, Z_WAITOK, skmem_tag_nx_scratch);
1427 if (na->na_scratch == NULL) {
1428 SK_ERR("Cannot allocate slot contexts");
1429 err = ENOMEM;
1430 goto error;
1431 }
1432
1433 /*
1434 * All fields in krings are 0 except the one initialized below.
1435 * but better be explicit on important kring fields.
1436 */
1437 for_all_rings(t) {
1438 ndesc = na_get_nslots(na, t);
1439 pp = na_kr_get_pp(na, t);
1440 for (i = 0; i < n[t]; i++) {
1441 kring = &NAKR(na, t)[i];
1442 bzero(kring, sizeof(*kring));
1443 kring->ckr_na = na;
1444 kring->ckr_pp = pp;
1445 kring->ckr_max_pkt_len = pp->pp_buflet_size *
1446 pp->pp_max_frags;
1447 kring->ckr_ring_id = i;
1448 kring->ckr_tx = t;
1449 kr_init_to_mhints(kring, ndesc);
1450 kr_init_tbr(kring);
1451 if (NA_KERNEL_ONLY(na)) {
1452 kring->ckr_flags |= CKRF_KERNEL_ONLY;
1453 }
1454 if (na->na_flags & NAF_HOST_ONLY) {
1455 kring->ckr_flags |= CKRF_HOST;
1456 }
1457 ASSERT((t >= NR_TXRX) || (c > 0));
1458 if ((t < NR_TXRX) &&
1459 (na->na_flags & NAF_SLOT_CONTEXT)) {
1460 ASSERT(na->na_slot_ctxs != NULL);
1461 kring->ckr_flags |= CKRF_SLOT_CONTEXT;
1462 kring->ckr_slot_ctxs =
1463 na->na_slot_ctxs + (tot_slots - c);
1464 }
1465 ASSERT(na->na_scratch != NULL);
1466 if (t < NR_TXRXAF) {
1467 kring->ckr_scratch =
1468 na->na_scratch + (tot_slots - c);
1469 }
1470 if (t < NR_TXRXAF) {
1471 c -= ndesc;
1472 }
1473 switch (t) {
1474 case NR_A:
1475 if (i == 0) {
1476 kring->ckr_na_sync =
1477 na_packet_pool_alloc_sync;
1478 kring->ckr_alloc_ws =
1479 na_upp_alloc_lowat;
1480 } else {
1481 ASSERT(i == 1);
1482 kring->ckr_na_sync =
1483 na_packet_pool_alloc_buf_sync;
1484 kring->ckr_alloc_ws =
1485 na_upp_alloc_buf_lowat;
1486 }
1487 break;
1488 case NR_F:
1489 if (i == 0) {
1490 kring->ckr_na_sync =
1491 na_packet_pool_free_sync;
1492 } else {
1493 ASSERT(i == 1);
1494 kring->ckr_na_sync =
1495 na_packet_pool_free_buf_sync;
1496 }
1497 break;
1498 case NR_TX:
1499 kring->ckr_na_sync = na->na_txsync;
1500 if (na->na_flags & NAF_TX_MITIGATION) {
1501 kring->ckr_flags |= CKRF_MITIGATION;
1502 }
1503 switch (na->na_type) {
1504 #if CONFIG_NEXUS_USER_PIPE
1505 case NA_USER_PIPE:
1506 ASSERT(!(na->na_flags &
1507 NAF_USER_PKT_POOL));
1508 kring->ckr_prologue = kr_txprologue;
1509 kring->ckr_finalize = NULL;
1510 break;
1511 #endif /* CONFIG_NEXUS_USER_PIPE */
1512 #if CONFIG_NEXUS_MONITOR
1513 case NA_MONITOR:
1514 ASSERT(!(na->na_flags &
1515 NAF_USER_PKT_POOL));
1516 kring->ckr_prologue = kr_txprologue;
1517 kring->ckr_finalize = NULL;
1518 break;
1519 #endif /* CONFIG_NEXUS_MONITOR */
1520 default:
1521 if (na->na_flags & NAF_USER_PKT_POOL) {
1522 kring->ckr_prologue =
1523 kr_txprologue_upp;
1524 kring->ckr_finalize =
1525 kr_txfinalize_upp;
1526 } else {
1527 kring->ckr_prologue =
1528 kr_txprologue;
1529 kring->ckr_finalize =
1530 kr_txfinalize;
1531 }
1532 break;
1533 }
1534 break;
1535 case NR_RX:
1536 kring->ckr_na_sync = na->na_rxsync;
1537 if (na->na_flags & NAF_RX_MITIGATION) {
1538 kring->ckr_flags |= CKRF_MITIGATION;
1539 }
1540 switch (na->na_type) {
1541 #if CONFIG_NEXUS_USER_PIPE
1542 case NA_USER_PIPE:
1543 ASSERT(!(na->na_flags &
1544 NAF_USER_PKT_POOL));
1545 kring->ckr_prologue =
1546 kr_rxprologue_nodetach;
1547 kring->ckr_finalize = kr_rxfinalize;
1548 break;
1549 #endif /* CONFIG_NEXUS_USER_PIPE */
1550 #if CONFIG_NEXUS_MONITOR
1551 case NA_MONITOR:
1552 ASSERT(!(na->na_flags &
1553 NAF_USER_PKT_POOL));
1554 kring->ckr_prologue =
1555 kr_rxprologue_nodetach;
1556 kring->ckr_finalize = kr_rxfinalize;
1557 break;
1558 #endif /* CONFIG_NEXUS_MONITOR */
1559 default:
1560 if (na->na_flags & NAF_USER_PKT_POOL) {
1561 kring->ckr_prologue =
1562 kr_rxprologue_upp;
1563 kring->ckr_finalize =
1564 kr_rxfinalize_upp;
1565 } else {
1566 kring->ckr_prologue =
1567 kr_rxprologue;
1568 kring->ckr_finalize =
1569 kr_rxfinalize;
1570 }
1571 break;
1572 }
1573 break;
1574 case NR_EV:
1575 kring->ckr_na_sync = kern_channel_event_sync;
1576 break;
1577 default:
1578 VERIFY(0);
1579 /* NOTREACHED */
1580 __builtin_unreachable();
1581 }
1582 if (t != NR_EV) {
1583 kring->ckr_na_notify = na->na_notify;
1584 } else {
1585 kring->ckr_na_notify = NULL;
1586 }
1587 (void) snprintf(kring->ckr_name,
1588 sizeof(kring->ckr_name) - 1,
1589 "%s %s%u%s", na->na_name, sk_ring2str(t), i,
1590 ((kring->ckr_flags & CKRF_HOST) ? "^" : ""));
1591 SK_DF(SK_VERB_NA | SK_VERB_RING,
1592 "kr \"%s\" (0x%llx) krflags 0x%b rh %u rt %u",
1593 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1594 CKRF_BITS, kring->ckr_rhead, kring->ckr_rtail);
1595 kring->ckr_state = KR_READY;
1596 q_lck_grp = na_kr_q_lck_grp(t);
1597 s_lck_grp = na_kr_s_lck_grp(t);
1598 kring->ckr_qlock_group = q_lck_grp;
1599 lck_mtx_init(&kring->ckr_qlock, kring->ckr_qlock_group,
1600 &channel_lock_attr);
1601 kring->ckr_slock_group = s_lck_grp;
1602 lck_spin_init(&kring->ckr_slock, kring->ckr_slock_group,
1603 &channel_lock_attr);
1604 csi_init(&kring->ckr_si,
1605 (kring->ckr_flags & CKRF_MITIGATION),
1606 na->na_ch_mit_ival);
1607 }
1608 csi_init(&na->na_si[t],
1609 (na->na_flags & (NAF_TX_MITIGATION | NAF_RX_MITIGATION)),
1610 na->na_ch_mit_ival);
1611 }
1612 ASSERT(c == 0);
1613 na->na_tailroom = na->na_rx_rings + n[NR_RX] + n[NR_A] + n[NR_F];
1614
1615 if (na->na_type == NA_NETIF_DEV) {
1616 na_kr_setup_netif_svc_map(na);
1617 }
1618
1619 /* validate now for cases where we create only krings */
1620 na_krings_verify(na);
1621 return 0;
1622
1623 error:
1624 ASSERT(err != 0);
1625 if (na->na_tx_rings != NULL) {
1626 sk_free(na->na_tx_rings, na->na_rings_mem_sz);
1627 na->na_tx_rings = NULL;
1628 }
1629 if (na->na_slot_ctxs != NULL) {
1630 ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1631 skn_free_type_array(slot_ctxs,
1632 struct slot_ctx, na->na_total_slots,
1633 na->na_slot_ctxs);
1634 na->na_slot_ctxs = NULL;
1635 }
1636 if (na->na_scratch != NULL) {
1637 skn_free_type_array(scratch,
1638 kern_packet_t, na->na_total_slots,
1639 na->na_scratch);
1640 na->na_scratch = NULL;
1641 }
1642 return err;
1643 }
1644
1645 /* undo the actions performed by na_kr_create() */
1646 /* call with SK_LOCK held */
1647 static void
na_kr_delete(struct nexus_adapter * na)1648 na_kr_delete(struct nexus_adapter *na)
1649 {
1650 struct __kern_channel_ring *kring = na->na_tx_rings;
1651 enum txrx t;
1652
1653 ASSERT((kring != NULL) && (na->na_tailroom != NULL));
1654 SK_LOCK_ASSERT_HELD();
1655
1656 for_all_rings(t) {
1657 csi_destroy(&na->na_si[t]);
1658 }
1659 /* we rely on the krings layout described above */
1660 for (; kring != na->na_tailroom; kring++) {
1661 lck_mtx_destroy(&kring->ckr_qlock, kring->ckr_qlock_group);
1662 lck_spin_destroy(&kring->ckr_slock, kring->ckr_slock_group);
1663 csi_destroy(&kring->ckr_si);
1664 if (kring->ckr_flags & CKRF_SLOT_CONTEXT) {
1665 kring->ckr_flags &= ~CKRF_SLOT_CONTEXT;
1666 ASSERT(kring->ckr_slot_ctxs != NULL);
1667 kring->ckr_slot_ctxs = NULL;
1668 }
1669 }
1670 if (na->na_slot_ctxs != NULL) {
1671 ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1672 atomic_bitclear_32(&na->na_flags, NAF_SLOT_CONTEXT);
1673 skn_free_type_array(slot_ctxs,
1674 struct slot_ctx, na->na_total_slots,
1675 na->na_slot_ctxs);
1676 na->na_slot_ctxs = NULL;
1677 }
1678 if (na->na_scratch != NULL) {
1679 skn_free_type_array(scratch,
1680 kern_packet_t, na->na_total_slots,
1681 na->na_scratch);
1682 na->na_scratch = NULL;
1683 }
1684 ASSERT(!(na->na_flags & NAF_SLOT_CONTEXT));
1685 sk_free(na->na_tx_rings, na->na_rings_mem_sz);
1686 na->na_tx_rings = na->na_rx_rings = na->na_alloc_rings =
1687 na->na_free_rings = na->na_event_rings = na->na_tailroom = NULL;
1688 }
1689
1690 static void
na_kr_slot_desc_init(struct __slot_desc * ksds,boolean_t kernel_only,struct __slot_desc * usds,size_t ndesc)1691 na_kr_slot_desc_init(struct __slot_desc *ksds,
1692 boolean_t kernel_only, struct __slot_desc *usds, size_t ndesc)
1693 {
1694 size_t i;
1695
1696 bzero(ksds, ndesc * SLOT_DESC_SZ);
1697 if (usds != NULL) {
1698 ASSERT(!kernel_only);
1699 bzero(usds, ndesc * SLOT_DESC_SZ);
1700 } else {
1701 ASSERT(kernel_only);
1702 }
1703
1704 for (i = 0; i < ndesc; i++) {
1705 KSD_INIT(SLOT_DESC_KSD(&ksds[i]));
1706 if (!kernel_only) {
1707 USD_INIT(SLOT_DESC_USD(&usds[i]));
1708 }
1709 }
1710 }
1711
1712 /* call with SK_LOCK held */
1713 static int
na_kr_setup(struct nexus_adapter * na,struct kern_channel * ch)1714 na_kr_setup(struct nexus_adapter *na, struct kern_channel *ch)
1715 {
1716 struct skmem_arena *ar = na->na_arena;
1717 struct skmem_arena_nexus *arn;
1718 mach_vm_offset_t roff[SKMEM_REGIONS];
1719 enum txrx t;
1720 uint32_t i;
1721
1722 SK_LOCK_ASSERT_HELD();
1723 ASSERT(!(na->na_flags & NAF_MEM_NO_INIT));
1724 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
1725 arn = skmem_arena_nexus(ar);
1726 ASSERT(arn != NULL);
1727
1728 bzero(&roff, sizeof(roff));
1729 for (i = 0; i < SKMEM_REGIONS; i++) {
1730 if (ar->ar_regions[i] == NULL) {
1731 continue;
1732 }
1733
1734 /* not for nexus */
1735 ASSERT(i != SKMEM_REGION_SYSCTLS);
1736
1737 /*
1738 * Get region offsets from base of mmap span; the arena
1739 * doesn't need to be mmap'd at this point, since we
1740 * simply compute the relative offset.
1741 */
1742 roff[i] = skmem_arena_get_region_offset(ar, i);
1743 }
1744
1745 for_all_rings(t) {
1746 for (i = 0; i < na_get_nrings(na, t); i++) {
1747 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
1748 struct __user_channel_ring *ring = kring->ckr_ring;
1749 mach_vm_offset_t ring_off, usd_roff;
1750 struct skmem_obj_info oi, oim;
1751 uint32_t ndesc;
1752
1753 if (ring != NULL) {
1754 SK_DF(SK_VERB_NA | SK_VERB_RING,
1755 "kr 0x%llx (\"%s\") is already "
1756 "initialized", SK_KVA(kring),
1757 kring->ckr_name);
1758 continue; /* already created by somebody else */
1759 }
1760
1761 if (!KR_KERNEL_ONLY(kring) &&
1762 (ring = skmem_cache_alloc(arn->arn_ring_cache,
1763 SKMEM_NOSLEEP)) == NULL) {
1764 SK_ERR("Cannot allocate %s_ring for kr "
1765 "0x%llx (\"%s\")", sk_ring2str(t),
1766 SK_KVA(kring), kring->ckr_name);
1767 goto cleanup;
1768 }
1769 kring->ckr_flags |= CKRF_MEM_RING_INITED;
1770 kring->ckr_ring = ring;
1771 ndesc = kring->ckr_num_slots;
1772
1773 if (ring == NULL) {
1774 goto skip_user_ring_setup;
1775 }
1776
1777 *(uint32_t *)(uintptr_t)&ring->ring_num_slots = ndesc;
1778
1779 /* offset of current ring in mmap span */
1780 skmem_cache_get_obj_info(arn->arn_ring_cache,
1781 ring, &oi, NULL);
1782 ring_off = (roff[SKMEM_REGION_RING] +
1783 SKMEM_OBJ_ROFF(&oi));
1784
1785 /*
1786 * ring_{buf,md,sd}_ofs offsets are relative to the
1787 * current ring, and not to the base of mmap span.
1788 */
1789 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_buf_base =
1790 (roff[SKMEM_REGION_BUF] - ring_off);
1791 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_md_base =
1792 (roff[SKMEM_REGION_UMD] - ring_off);
1793 _CASSERT(sizeof(uint16_t) ==
1794 sizeof(ring->ring_bft_size));
1795 if (roff[SKMEM_REGION_UBFT] != 0) {
1796 ASSERT(ar->ar_regions[SKMEM_REGION_UBFT] !=
1797 NULL);
1798 *(mach_vm_offset_t *)(uintptr_t)
1799 &ring->ring_bft_base =
1800 (roff[SKMEM_REGION_UBFT] - ring_off);
1801 *(uint16_t *)(uintptr_t)&ring->ring_bft_size =
1802 (uint16_t)ar->ar_regions[SKMEM_REGION_UBFT]->
1803 skr_c_obj_size;
1804 ASSERT(ring->ring_bft_size ==
1805 ar->ar_regions[SKMEM_REGION_KBFT]->
1806 skr_c_obj_size);
1807 } else {
1808 *(mach_vm_offset_t *)(uintptr_t)
1809 &ring->ring_bft_base = 0;
1810 *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1811 }
1812
1813 if (t == NR_TX || t == NR_A || t == NR_EV) {
1814 usd_roff = roff[SKMEM_REGION_TXAUSD];
1815 } else {
1816 ASSERT(t == NR_RX || t == NR_F);
1817 usd_roff = roff[SKMEM_REGION_RXFUSD];
1818 }
1819 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_sd_base =
1820 (usd_roff - ring_off);
1821
1822 /* copy values from kring */
1823 ring->ring_head = kring->ckr_rhead;
1824 *(slot_idx_t *)(uintptr_t)&ring->ring_khead =
1825 kring->ckr_khead;
1826 *(slot_idx_t *)(uintptr_t)&ring->ring_tail =
1827 kring->ckr_rtail;
1828
1829 _CASSERT(sizeof(uint32_t) ==
1830 sizeof(ring->ring_buf_size));
1831 _CASSERT(sizeof(uint16_t) ==
1832 sizeof(ring->ring_md_size));
1833 *(uint32_t *)(uintptr_t)&ring->ring_buf_size =
1834 ar->ar_regions[SKMEM_REGION_BUF]->skr_c_obj_size;
1835 if (ar->ar_regions[SKMEM_REGION_UMD] != NULL) {
1836 *(uint16_t *)(uintptr_t)&ring->ring_md_size =
1837 (uint16_t)ar->ar_regions[SKMEM_REGION_UMD]->
1838 skr_c_obj_size;
1839 ASSERT(ring->ring_md_size ==
1840 ar->ar_regions[SKMEM_REGION_KMD]->
1841 skr_c_obj_size);
1842 } else {
1843 *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1844 ASSERT(PP_KERNEL_ONLY(arn->arn_rx_pp));
1845 ASSERT(PP_KERNEL_ONLY(arn->arn_tx_pp));
1846 }
1847
1848 /* ring info */
1849 _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_id));
1850 _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_kind));
1851 *(uint16_t *)(uintptr_t)&ring->ring_id =
1852 (uint16_t)kring->ckr_ring_id;
1853 *(uint16_t *)(uintptr_t)&ring->ring_kind =
1854 (uint16_t)kring->ckr_tx;
1855
1856 SK_DF(SK_VERB_NA | SK_VERB_RING,
1857 "%s_ring at 0x%llx kr 0x%llx (\"%s\")",
1858 sk_ring2str(t), SK_KVA(ring), SK_KVA(kring),
1859 kring->ckr_name);
1860 SK_DF(SK_VERB_NA | SK_VERB_RING,
1861 " num_slots: %u", ring->ring_num_slots);
1862 SK_DF(SK_VERB_NA | SK_VERB_RING,
1863 " buf_base: 0x%llx",
1864 (uint64_t)ring->ring_buf_base);
1865 SK_DF(SK_VERB_NA | SK_VERB_RING,
1866 " md_base: 0x%llx",
1867 (uint64_t)ring->ring_md_base);
1868 SK_DF(SK_VERB_NA | SK_VERB_RING,
1869 " sd_base: 0x%llx",
1870 (uint64_t)ring->ring_sd_base);
1871 SK_DF(SK_VERB_NA | SK_VERB_RING,
1872 " h, t: %u, %u, %u", ring->ring_head,
1873 ring->ring_tail);
1874 SK_DF(SK_VERB_NA | SK_VERB_RING,
1875 " md_size: %d",
1876 (uint64_t)ring->ring_md_size);
1877
1878 /* make sure they're in synch */
1879 _CASSERT(NR_RX == CR_KIND_RX);
1880 _CASSERT(NR_TX == CR_KIND_TX);
1881 _CASSERT(NR_A == CR_KIND_ALLOC);
1882 _CASSERT(NR_F == CR_KIND_FREE);
1883 _CASSERT(NR_EV == CR_KIND_EVENT);
1884
1885 skip_user_ring_setup:
1886 /*
1887 * This flag tells na_kr_teardown_all() that it should
1888 * go thru the checks to free up the slot maps.
1889 */
1890 kring->ckr_flags |= CKRF_MEM_SD_INITED;
1891 if (t == NR_TX || t == NR_A || t == NR_EV) {
1892 kring->ckr_ksds_cache = arn->arn_txaksd_cache;
1893 } else {
1894 ASSERT(t == NR_RX || t == NR_F);
1895 kring->ckr_ksds_cache = arn->arn_rxfksd_cache;
1896 }
1897 kring->ckr_ksds =
1898 skmem_cache_alloc(kring->ckr_ksds_cache,
1899 SKMEM_NOSLEEP);
1900 if (kring->ckr_ksds == NULL) {
1901 SK_ERR("Cannot allocate %s_ksds for kr "
1902 "0x%llx (\"%s\")", sk_ring2str(t),
1903 SK_KVA(kring), kring->ckr_name);
1904 goto cleanup;
1905 }
1906 if (!KR_KERNEL_ONLY(kring)) {
1907 skmem_cache_get_obj_info(kring->ckr_ksds_cache,
1908 kring->ckr_ksds, &oi, &oim);
1909 kring->ckr_usds = SKMEM_OBJ_ADDR(&oim);
1910 }
1911 na_kr_slot_desc_init(kring->ckr_ksds,
1912 KR_KERNEL_ONLY(kring), kring->ckr_usds, ndesc);
1913
1914 /* cache last slot descriptor address */
1915 ASSERT(kring->ckr_lim == (ndesc - 1));
1916 kring->ckr_ksds_last = &kring->ckr_ksds[kring->ckr_lim];
1917
1918 if ((t < NR_TXRX) &&
1919 !(na->na_flags & NAF_USER_PKT_POOL) &&
1920 na_kr_populate_slots(kring) != 0) {
1921 SK_ERR("Cannot allocate buffers for kr "
1922 "0x%llx (\"%s\")", SK_KVA(kring),
1923 kring->ckr_name);
1924 goto cleanup;
1925 }
1926 }
1927 }
1928
1929 return 0;
1930
1931 cleanup:
1932 na_kr_teardown_all(na, ch, FALSE);
1933
1934 return ENOMEM;
1935 }
1936
1937 static void
na_kr_teardown_common(struct nexus_adapter * na,struct __kern_channel_ring * kring,enum txrx t,struct kern_channel * ch,boolean_t defunct)1938 na_kr_teardown_common(struct nexus_adapter *na,
1939 struct __kern_channel_ring *kring, enum txrx t, struct kern_channel *ch,
1940 boolean_t defunct)
1941 {
1942 struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
1943 struct __user_channel_ring *ckr_ring;
1944 boolean_t sd_idle, sd_inited;
1945
1946 ASSERT(arn != NULL);
1947 kr_enter(kring, TRUE);
1948 /*
1949 * Check for CKRF_MEM_SD_INITED and CKRF_MEM_RING_INITED
1950 * to make sure that the freeing needs to happen (else just
1951 * nullify the values).
1952 * If this adapter owns the memory for the slot descriptors,
1953 * check if the region is marked as busy (sd_idle is false)
1954 * and leave the kring's slot descriptor fields alone if so,
1955 * at defunct time. At final teardown time, sd_idle must be
1956 * true else we assert; this indicates a missing call to
1957 * skmem_arena_nexus_sd_set_noidle().
1958 */
1959 sd_inited = ((kring->ckr_flags & CKRF_MEM_SD_INITED) != 0);
1960 if (sd_inited) {
1961 /* callee will do KR_KSD(), so check */
1962 if (((t < NR_TXRX) || (t == NR_EV)) &&
1963 (kring->ckr_ksds != NULL)) {
1964 na_kr_depopulate_slots(kring, ch, defunct);
1965 }
1966 /* leave CKRF_MEM_SD_INITED flag alone until idle */
1967 sd_idle = skmem_arena_nexus_sd_idle(arn);
1968 VERIFY(sd_idle || defunct);
1969 } else {
1970 sd_idle = TRUE;
1971 }
1972
1973 if (sd_idle) {
1974 kring->ckr_flags &= ~CKRF_MEM_SD_INITED;
1975 if (kring->ckr_ksds != NULL) {
1976 if (sd_inited) {
1977 skmem_cache_free(kring->ckr_ksds_cache,
1978 kring->ckr_ksds);
1979 }
1980 kring->ckr_ksds = NULL;
1981 kring->ckr_ksds_last = NULL;
1982 kring->ckr_usds = NULL;
1983 }
1984 ASSERT(kring->ckr_ksds_last == NULL);
1985 ASSERT(kring->ckr_usds == NULL);
1986 }
1987
1988 if ((ckr_ring = kring->ckr_ring) != NULL) {
1989 kring->ckr_ring = NULL;
1990 }
1991
1992 if (kring->ckr_flags & CKRF_MEM_RING_INITED) {
1993 ASSERT(ckr_ring != NULL || KR_KERNEL_ONLY(kring));
1994 if (ckr_ring != NULL) {
1995 skmem_cache_free(arn->arn_ring_cache, ckr_ring);
1996 }
1997 kring->ckr_flags &= ~CKRF_MEM_RING_INITED;
1998 }
1999
2000 if (defunct) {
2001 /* if defunct, drop everything; see KR_DROP() */
2002 kring->ckr_flags |= CKRF_DEFUNCT;
2003 }
2004 kr_exit(kring);
2005 }
2006
2007 /*
2008 * Teardown ALL rings of a nexus adapter; this includes {tx,rx,alloc,free,event}
2009 */
2010 static void
na_kr_teardown_all(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2011 na_kr_teardown_all(struct nexus_adapter *na, struct kern_channel *ch,
2012 boolean_t defunct)
2013 {
2014 enum txrx t;
2015
2016 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2017
2018 /* skip if this adapter has no allocated rings */
2019 if (na->na_tx_rings == NULL) {
2020 return;
2021 }
2022
2023 for_all_rings(t) {
2024 for (uint32_t i = 0; i < na_get_nrings(na, t); i++) {
2025 na_kr_teardown_common(na, &NAKR(na, t)[i],
2026 t, ch, defunct);
2027 }
2028 }
2029 }
2030
2031 /*
2032 * Teardown only {tx,rx} rings assigned to the channel.
2033 */
2034 static void
na_kr_teardown_txrx(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct,struct proc * p)2035 na_kr_teardown_txrx(struct nexus_adapter *na, struct kern_channel *ch,
2036 boolean_t defunct, struct proc *p)
2037 {
2038 enum txrx t;
2039
2040 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2041
2042 for_rx_tx(t) {
2043 ring_id_t qfirst = ch->ch_first[t];
2044 ring_id_t qlast = ch->ch_last[t];
2045 uint32_t i;
2046
2047 for (i = qfirst; i < qlast; i++) {
2048 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2049 na_kr_teardown_common(na, kring, t, ch, defunct);
2050
2051 /*
2052 * Issue a notify to wake up anyone sleeping in kqueue
2053 * so that they notice the newly defuncted channels and
2054 * return an error
2055 */
2056 kring->ckr_na_notify(kring, p, 0);
2057 }
2058 }
2059 }
2060
2061 static int
na_kr_populate_slots(struct __kern_channel_ring * kring)2062 na_kr_populate_slots(struct __kern_channel_ring *kring)
2063 {
2064 const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2065 struct nexus_adapter *na = KRNA(kring);
2066 kern_pbufpool_t pp = kring->ckr_pp;
2067 uint32_t nslots = kring->ckr_num_slots;
2068 uint32_t start_idx, i;
2069 uint32_t sidx = 0; /* slot counter */
2070 struct __kern_slot_desc *ksd;
2071 struct __user_slot_desc *usd;
2072 struct __kern_quantum *kqum;
2073 nexus_type_t nexus_type;
2074 int err = 0;
2075
2076 ASSERT(kring->ckr_tx < NR_TXRX);
2077 ASSERT(!(KRNA(kring)->na_flags & NAF_USER_PKT_POOL));
2078 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2079 ASSERT(pp != NULL);
2080
2081 /*
2082 * xxx_ppool: remove this special case
2083 */
2084 nexus_type = na->na_nxdom_prov->nxdom_prov_dom->nxdom_type;
2085
2086 switch (nexus_type) {
2087 case NEXUS_TYPE_FLOW_SWITCH:
2088 case NEXUS_TYPE_KERNEL_PIPE:
2089 /*
2090 * xxx_ppool: This is temporary code until we come up with a
2091 * scheme for user space to alloc & attach packets to tx ring.
2092 */
2093 if (kernel_only || kring->ckr_tx == NR_RX) {
2094 return 0;
2095 }
2096 break;
2097
2098 case NEXUS_TYPE_NET_IF:
2099 if (((na->na_type == NA_NETIF_DEV) ||
2100 (na->na_type == NA_NETIF_HOST)) &&
2101 (kernel_only || (kring->ckr_tx == NR_RX))) {
2102 return 0;
2103 }
2104
2105 ASSERT((na->na_type == NA_NETIF_COMPAT_DEV) ||
2106 (na->na_type == NA_NETIF_COMPAT_HOST) ||
2107 (na->na_type == NA_NETIF_DEV) ||
2108 (na->na_type == NA_NETIF_VP));
2109
2110 if (!kernel_only) {
2111 if (kring->ckr_tx == NR_RX) {
2112 return 0;
2113 } else {
2114 break;
2115 }
2116 }
2117
2118 ASSERT(kernel_only);
2119
2120 if ((na->na_type == NA_NETIF_COMPAT_DEV) ||
2121 (na->na_type == NA_NETIF_COMPAT_HOST)) {
2122 return 0;
2123 }
2124 VERIFY(0);
2125 /* NOTREACHED */
2126 __builtin_unreachable();
2127
2128 case NEXUS_TYPE_USER_PIPE:
2129 case NEXUS_TYPE_MONITOR:
2130 break;
2131
2132 default:
2133 VERIFY(0);
2134 /* NOTREACHED */
2135 __builtin_unreachable();
2136 }
2137
2138 /* Fill the ring with packets */
2139 sidx = start_idx = 0;
2140 for (i = 0; i < nslots; i++) {
2141 kqum = SK_PTR_ADDR_KQUM(pp_alloc_packet(pp, pp->pp_max_frags,
2142 SKMEM_NOSLEEP));
2143 if (kqum == NULL) {
2144 err = ENOMEM;
2145 SK_ERR("ar 0x%llx (\"%s\") no more buffers "
2146 "after %u of %u, err %d", SK_KVA(na->na_arena),
2147 na->na_arena->ar_name, i, nslots, err);
2148 goto cleanup;
2149 }
2150 ksd = KR_KSD(kring, i);
2151 usd = (kernel_only ? NULL : KR_USD(kring, i));
2152
2153 /* attach packet to slot */
2154 kqum->qum_ksd = ksd;
2155 ASSERT(!KSD_VALID_METADATA(ksd));
2156 KSD_ATTACH_METADATA(ksd, kqum);
2157 if (usd != NULL) {
2158 USD_ATTACH_METADATA(usd, METADATA_IDX(kqum));
2159 kr_externalize_metadata(kring, pp->pp_max_frags,
2160 kqum, current_proc());
2161 }
2162
2163 SK_DF(SK_VERB_MEM, " C ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2164 " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd), METADATA_IDX(kqum),
2165 SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2166 SK_KVA(&kqum->qum_buf[0]));
2167 if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2168 SK_DF(SK_VERB_MEM, " C usd [%-3d, 0x%llx] "
2169 "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]",
2170 (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2171 SK_KVA(usd), METADATA_IDX(kqum),
2172 SK_KVA(kqum->qum_user),
2173 kqum->qum_user->qum_buf[0].buf_idx,
2174 SK_KVA(&kqum->qum_user->qum_buf[0]));
2175 }
2176
2177 sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2178 }
2179
2180 SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") populated %u slots from idx %u",
2181 SK_KVA(na->na_arena), na->na_arena->ar_name, nslots, start_idx);
2182
2183 cleanup:
2184 if (err != 0) {
2185 sidx = start_idx;
2186 while (i-- > 0) {
2187 ksd = KR_KSD(kring, i);
2188 usd = (kernel_only ? NULL : KR_USD(kring, i));
2189 kqum = ksd->sd_qum;
2190
2191 ASSERT(ksd == kqum->qum_ksd);
2192 KSD_RESET(ksd);
2193 if (usd != NULL) {
2194 USD_RESET(usd);
2195 }
2196 /* detach packet from slot */
2197 kqum->qum_ksd = NULL;
2198 pp_free_packet(pp, SK_PTR_ADDR(kqum));
2199
2200 sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2201 }
2202 }
2203 return err;
2204 }
2205
2206 static void
na_kr_depopulate_slots(struct __kern_channel_ring * kring,struct kern_channel * ch,boolean_t defunct)2207 na_kr_depopulate_slots(struct __kern_channel_ring *kring,
2208 struct kern_channel *ch, boolean_t defunct)
2209 {
2210 #pragma unused(ch)
2211 const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2212 uint32_t i, j, n = kring->ckr_num_slots;
2213 struct nexus_adapter *na = KRNA(kring);
2214 struct kern_pbufpool *pp = kring->ckr_pp;
2215 boolean_t upp = FALSE;
2216 obj_idx_t midx;
2217
2218 ASSERT((kring->ckr_tx < NR_TXRX) || (kring->ckr_tx == NR_EV));
2219 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2220
2221 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2222
2223 if (((na->na_flags & NAF_USER_PKT_POOL) != 0) &&
2224 (kring->ckr_tx != NR_EV)) {
2225 upp = TRUE;
2226 }
2227 for (i = 0, j = 0; i < n; i++) {
2228 struct __kern_slot_desc *ksd = KR_KSD(kring, i);
2229 struct __user_slot_desc *usd;
2230 struct __kern_quantum *qum, *kqum;
2231 boolean_t free_packet = FALSE;
2232 int err;
2233
2234 if (!KSD_VALID_METADATA(ksd)) {
2235 continue;
2236 }
2237
2238 kqum = ksd->sd_qum;
2239 usd = (kernel_only ? NULL : KR_USD(kring, i));
2240 midx = METADATA_IDX(kqum);
2241
2242 /*
2243 * if the packet is internalized it should not be in the
2244 * hash table of packets loaned to user space.
2245 */
2246 if (upp && (kqum->qum_qflags & QUM_F_INTERNALIZED)) {
2247 if ((qum = pp_find_upp(pp, midx)) != NULL) {
2248 panic("internalized packet 0x%llx in htbl",
2249 SK_KVA(qum));
2250 /* NOTREACHED */
2251 __builtin_unreachable();
2252 }
2253 free_packet = TRUE;
2254 } else if (upp) {
2255 /*
2256 * if the packet is not internalized check if it is
2257 * in the list of packets loaned to user-space.
2258 * Remove from the list before freeing.
2259 */
2260 ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
2261 qum = pp_remove_upp(pp, midx, &err);
2262 if (err != 0) {
2263 SK_ERR("un-allocated packet or buflet %d %p",
2264 midx, SK_KVA(qum));
2265 if (qum != NULL) {
2266 free_packet = TRUE;
2267 }
2268 }
2269 } else {
2270 free_packet = TRUE;
2271 }
2272
2273 /*
2274 * Clear the user and kernel slot descriptors. Note that
2275 * if we are depopulating the slots due to defunct (and not
2276 * due to normal deallocation/teardown), we leave the user
2277 * slot descriptor alone. At that point the process may
2278 * be suspended, and later when it resumes it would just
2279 * pick up the original contents and move forward with
2280 * whatever it was doing.
2281 */
2282 KSD_RESET(ksd);
2283 if (usd != NULL && !defunct) {
2284 USD_RESET(usd);
2285 }
2286
2287 /* detach packet from slot */
2288 kqum->qum_ksd = NULL;
2289
2290 SK_DF(SK_VERB_MEM, " D ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2291 " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd),
2292 METADATA_IDX(kqum), SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2293 SK_KVA(&kqum->qum_buf[0]));
2294 if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2295 SK_DF(SK_VERB_MEM, " D usd [%-3u, 0x%llx] "
2296 "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]",
2297 (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2298 SK_KVA(usd), METADATA_IDX(kqum),
2299 SK_KVA(kqum->qum_user),
2300 kqum->qum_user->qum_buf[0].buf_idx,
2301 SK_KVA(&kqum->qum_user->qum_buf[0]));
2302 }
2303
2304 if (free_packet) {
2305 pp_free_packet(pp, SK_PTR_ADDR(kqum)); ++j;
2306 }
2307 }
2308
2309 SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") depopulated %u of %u slots",
2310 SK_KVA(KRNA(kring)->na_arena), KRNA(kring)->na_arena->ar_name,
2311 j, n);
2312 }
2313
2314 int
na_rings_mem_setup(struct nexus_adapter * na,uint32_t tailroom,boolean_t alloc_ctx,struct kern_channel * ch)2315 na_rings_mem_setup(struct nexus_adapter *na, uint32_t tailroom,
2316 boolean_t alloc_ctx, struct kern_channel *ch)
2317 {
2318 boolean_t kronly;
2319 int err;
2320
2321 SK_LOCK_ASSERT_HELD();
2322 ASSERT(na->na_channels == 0);
2323 /*
2324 * If NAF_MEM_NO_INIT is set, then only create the krings and not
2325 * the backing memory regions for the adapter.
2326 */
2327 kronly = (na->na_flags & NAF_MEM_NO_INIT);
2328 ASSERT(!kronly || NA_KERNEL_ONLY(na));
2329
2330 /*
2331 * Create and initialize the common fields of the krings array.
2332 * using the information that must be already available in the na.
2333 * tailroom can be used to request the allocation of additional
2334 * tailroom bytes after the krings array. This is used by
2335 * nexus_vp_adapter's (i.e., flow switch ports) to make room
2336 * for leasing-related data structures.
2337 */
2338 if ((err = na_kr_create(na, tailroom, alloc_ctx)) == 0 && !kronly) {
2339 err = na_kr_setup(na, ch);
2340 if (err != 0) {
2341 na_kr_delete(na);
2342 }
2343 }
2344
2345 return err;
2346 }
2347
2348 void
na_rings_mem_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2349 na_rings_mem_teardown(struct nexus_adapter *na, struct kern_channel *ch,
2350 boolean_t defunct)
2351 {
2352 SK_LOCK_ASSERT_HELD();
2353 ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT));
2354
2355 /*
2356 * Deletes the kring and ring array of the adapter. They
2357 * must have been created using na_rings_mem_setup().
2358 *
2359 * XXX: [email protected] -- the parameter "ch" should not be
2360 * needed here; however na_kr_depopulate_slots() needs to
2361 * go thru the channel's user packet pool hash, and so for
2362 * now we leave it here.
2363 */
2364 na_kr_teardown_all(na, ch, defunct);
2365 if (!defunct) {
2366 na_kr_delete(na);
2367 }
2368 }
2369
2370 void
na_ch_rings_defunct(struct kern_channel * ch,struct proc * p)2371 na_ch_rings_defunct(struct kern_channel *ch, struct proc *p)
2372 {
2373 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2374
2375 /*
2376 * Depopulate slots on the TX and RX rings of this channel,
2377 * but don't touch other rings owned by other channels if
2378 * this adapter is being shared.
2379 */
2380 na_kr_teardown_txrx(ch->ch_na, ch, TRUE, p);
2381 }
2382
2383 void
na_kr_drop(struct nexus_adapter * na,boolean_t drop)2384 na_kr_drop(struct nexus_adapter *na, boolean_t drop)
2385 {
2386 enum txrx t;
2387 uint32_t i;
2388
2389 for_rx_tx(t) {
2390 for (i = 0; i < na_get_nrings(na, t); i++) {
2391 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2392 int error;
2393 error = kr_enter(kring, TRUE);
2394 if (drop) {
2395 kring->ckr_flags |= CKRF_DROP;
2396 } else {
2397 kring->ckr_flags &= ~CKRF_DROP;
2398 }
2399
2400 if (error != 0) {
2401 SK_ERR("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2402 "kr_enter failed %d",
2403 na->na_name, SK_KVA(na),
2404 kring->ckr_name, SK_KVA(kring),
2405 error);
2406 } else {
2407 kr_exit(kring);
2408 }
2409 SK_D("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2410 "krflags 0x%b", na->na_name, SK_KVA(na),
2411 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
2412 CKRF_BITS);
2413 }
2414 }
2415 }
2416
2417 /*
2418 * Set the stopped/enabled status of ring. When stopping, they also wait
2419 * for all current activity on the ring to terminate. The status change
2420 * is then notified using the na na_notify callback.
2421 */
2422 static void
na_set_ring(struct nexus_adapter * na,uint32_t ring_id,enum txrx t,uint32_t state)2423 na_set_ring(struct nexus_adapter *na, uint32_t ring_id, enum txrx t,
2424 uint32_t state)
2425 {
2426 struct __kern_channel_ring *kr = &NAKR(na, t)[ring_id];
2427
2428 /*
2429 * Mark the ring as stopped/enabled, and run through the
2430 * locks to make sure other users get to see it.
2431 */
2432 if (state == KR_READY) {
2433 kr_start(kr);
2434 } else {
2435 kr_stop(kr, state);
2436 }
2437 }
2438
2439
2440 /* stop or enable all the rings of na */
2441 static void
na_set_all_rings(struct nexus_adapter * na,uint32_t state)2442 na_set_all_rings(struct nexus_adapter *na, uint32_t state)
2443 {
2444 uint32_t i;
2445 enum txrx t;
2446
2447 SK_LOCK_ASSERT_HELD();
2448
2449 if (!NA_IS_ACTIVE(na)) {
2450 return;
2451 }
2452
2453 for_rx_tx(t) {
2454 for (i = 0; i < na_get_nrings(na, t); i++) {
2455 na_set_ring(na, i, t, state);
2456 }
2457 }
2458 }
2459
2460 /*
2461 * Convenience function used in drivers. Waits for current txsync()s/rxsync()s
2462 * to finish and prevents any new one from starting. Call this before turning
2463 * Skywalk mode off, or before removing the harware rings (e.g., on module
2464 * onload). As a rule of thumb for linux drivers, this should be placed near
2465 * each napi_disable().
2466 */
2467 void
na_disable_all_rings(struct nexus_adapter * na)2468 na_disable_all_rings(struct nexus_adapter *na)
2469 {
2470 na_set_all_rings(na, KR_STOPPED);
2471 }
2472
2473 /*
2474 * Convenience function used in drivers. Re-enables rxsync and txsync on the
2475 * adapter's rings In linux drivers, this should be placed near each
2476 * napi_enable().
2477 */
2478 void
na_enable_all_rings(struct nexus_adapter * na)2479 na_enable_all_rings(struct nexus_adapter *na)
2480 {
2481 na_set_all_rings(na, KR_READY /* enabled */);
2482 }
2483
2484 void
na_lock_all_rings(struct nexus_adapter * na)2485 na_lock_all_rings(struct nexus_adapter *na)
2486 {
2487 na_set_all_rings(na, KR_LOCKED);
2488 }
2489
2490 void
na_unlock_all_rings(struct nexus_adapter * na)2491 na_unlock_all_rings(struct nexus_adapter *na)
2492 {
2493 na_enable_all_rings(na);
2494 }
2495
2496 int
na_connect(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)2497 na_connect(struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
2498 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
2499 {
2500 struct nexus_adapter *na = NULL;
2501 mach_vm_size_t memsize = 0;
2502 int err = 0;
2503 enum txrx t;
2504
2505 ASSERT(!(chr->cr_mode & CHMODE_KERNEL));
2506 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
2507
2508 SK_LOCK_ASSERT_HELD();
2509
2510 /* find the nexus adapter and return the reference */
2511 err = na_find(ch, nx, chr, ch0, nxb, p, &na, TRUE /* create */);
2512 if (err != 0) {
2513 ASSERT(na == NULL);
2514 goto done;
2515 }
2516
2517 if (NA_KERNEL_ONLY(na)) {
2518 err = EBUSY;
2519 goto done;
2520 }
2521
2522 /* reject if the adapter is defunct of non-permissive */
2523 if ((na->na_flags & NAF_DEFUNCT) || na_reject_channel(ch, na)) {
2524 err = ENXIO;
2525 goto done;
2526 }
2527
2528 err = na_bind_channel(na, ch, chr);
2529 if (err != 0) {
2530 goto done;
2531 }
2532
2533 ASSERT(ch->ch_schema != NULL);
2534 ASSERT(na == ch->ch_na);
2535
2536 for_all_rings(t) {
2537 if (na_get_nrings(na, t) == 0) {
2538 ch->ch_si[t] = NULL;
2539 continue;
2540 }
2541 ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2542 &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2543 }
2544
2545 skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2546
2547 if (!(skmem_arena_nexus(na->na_arena)->arn_mode &
2548 AR_NEXUS_MODE_EXTERNAL_PPOOL)) {
2549 atomic_bitset_32(__DECONST(uint32_t *,
2550 &ch->ch_schema->csm_flags), CSM_PRIV_MEM);
2551 }
2552
2553 err = skmem_arena_mmap(na->na_arena, p, &ch->ch_mmap);
2554 if (err != 0) {
2555 goto done;
2556 }
2557
2558 atomic_bitset_32(__DECONST(uint32_t *, &ch->ch_schema->csm_flags),
2559 CSM_ACTIVE);
2560 chr->cr_memsize = memsize;
2561 chr->cr_memoffset = ch->ch_schema_offset;
2562
2563 SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2564 "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2565 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2566 na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2567 na->na_flags, NAF_BITS);
2568
2569 done:
2570 if (err != 0) {
2571 if (ch->ch_schema != NULL || na != NULL) {
2572 if (ch->ch_schema != NULL) {
2573 ASSERT(na == ch->ch_na);
2574 /*
2575 * Callee will unmap memory region if needed,
2576 * as well as release reference held on 'na'.
2577 */
2578 na_disconnect(nx, ch);
2579 na = NULL;
2580 }
2581 if (na != NULL) {
2582 (void) na_release_locked(na);
2583 na = NULL;
2584 }
2585 }
2586 }
2587
2588 return err;
2589 }
2590
2591 void
na_disconnect(struct kern_nexus * nx,struct kern_channel * ch)2592 na_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
2593 {
2594 #pragma unused(nx)
2595 enum txrx t;
2596
2597 SK_LOCK_ASSERT_HELD();
2598
2599 SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2600 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2601 ch->ch_na->na_name, ch->ch_info->cinfo_nx_port,
2602 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(ch->ch_na),
2603 ch->ch_na->na_flags, NAF_BITS);
2604
2605 /* destroy mapping and release references */
2606 na_unbind_channel(ch);
2607 ASSERT(ch->ch_na == NULL);
2608 ASSERT(ch->ch_schema == NULL);
2609 for_all_rings(t) {
2610 ch->ch_si[t] = NULL;
2611 }
2612 }
2613
2614 void
na_defunct(struct kern_nexus * nx,struct kern_channel * ch,struct nexus_adapter * na,boolean_t locked)2615 na_defunct(struct kern_nexus *nx, struct kern_channel *ch,
2616 struct nexus_adapter *na, boolean_t locked)
2617 {
2618 #pragma unused(nx)
2619 SK_LOCK_ASSERT_HELD();
2620 if (!locked) {
2621 lck_mtx_lock(&ch->ch_lock);
2622 }
2623
2624 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2625
2626 if (!(na->na_flags & NAF_DEFUNCT)) {
2627 /*
2628 * Mark this adapter as defunct to inform nexus-specific
2629 * teardown handler called by na_teardown() below.
2630 */
2631 atomic_bitset_32(&na->na_flags, NAF_DEFUNCT);
2632
2633 /*
2634 * Depopulate slots.
2635 */
2636 na_teardown(na, ch, TRUE);
2637
2638 /*
2639 * And finally destroy any already-defunct memory regions.
2640 * Do this only if the nexus adapter owns the arena, i.e.
2641 * NAF_MEM_LOANED is not set. Otherwise, we'd expect
2642 * that this routine be called again for the real owner.
2643 */
2644 if (!(na->na_flags & NAF_MEM_LOANED)) {
2645 skmem_arena_defunct(na->na_arena);
2646 }
2647 }
2648
2649 SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d) "
2650 "na 0x%llx naflags %b", ch->ch_name, ch->ch_pid,
2651 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2652 na->na_name, ch->ch_info->cinfo_nx_port,
2653 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2654 na->na_flags, NAF_BITS);
2655
2656 if (!locked) {
2657 lck_mtx_unlock(&ch->ch_lock);
2658 }
2659 }
2660
2661 /*
2662 * TODO: [email protected] -- merge this into na_connect()
2663 */
2664 int
na_connect_spec(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct proc * p)2665 na_connect_spec(struct kern_nexus *nx, struct kern_channel *ch,
2666 struct chreq *chr, struct proc *p)
2667 {
2668 #pragma unused(p)
2669 struct nexus_adapter *na = NULL;
2670 mach_vm_size_t memsize = 0;
2671 int error = 0;
2672 enum txrx t;
2673
2674 ASSERT(chr->cr_mode & CHMODE_KERNEL);
2675 ASSERT(ch->ch_flags & CHANF_KERNEL);
2676 ASSERT(ch->ch_na == NULL);
2677 ASSERT(ch->ch_schema == NULL);
2678
2679 SK_LOCK_ASSERT_HELD();
2680
2681 error = na_find(ch, nx, chr, NULL, NULL, kernproc, &na, TRUE);
2682 if (error != 0) {
2683 goto done;
2684 }
2685
2686 if (na == NULL) {
2687 error = EINVAL;
2688 goto done;
2689 }
2690
2691 if (na->na_channels > 0) {
2692 error = EBUSY;
2693 goto done;
2694 }
2695
2696 if (na->na_flags & NAF_DEFUNCT) {
2697 error = ENXIO;
2698 goto done;
2699 }
2700
2701 /*
2702 * Special connect requires the nexus adapter to handle its
2703 * own channel binding and unbinding via na_special(); bail
2704 * if this adapter doesn't support it.
2705 */
2706 if (na->na_special == NULL) {
2707 error = ENOTSUP;
2708 goto done;
2709 }
2710
2711 /* upon success, "ch->ch_na" will point to "na" */
2712 error = na->na_special(na, ch, chr, NXSPEC_CMD_CONNECT);
2713 if (error != 0) {
2714 ASSERT(ch->ch_na == NULL);
2715 goto done;
2716 }
2717
2718 ASSERT(na->na_flags & NAF_SPEC_INIT);
2719 ASSERT(na == ch->ch_na);
2720 /* make sure this is still the case */
2721 ASSERT(ch->ch_schema == NULL);
2722
2723 for_rx_tx(t) {
2724 ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2725 &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2726 }
2727
2728 skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2729 chr->cr_memsize = memsize;
2730
2731 SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2732 "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2733 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2734 na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2735 na->na_flags, NAF_BITS);
2736
2737 done:
2738 if (error != 0) {
2739 if (ch->ch_na != NULL || na != NULL) {
2740 if (ch->ch_na != NULL) {
2741 ASSERT(na == ch->ch_na);
2742 /* callee will release reference on 'na' */
2743 na_disconnect_spec(nx, ch);
2744 na = NULL;
2745 }
2746 if (na != NULL) {
2747 (void) na_release_locked(na);
2748 na = NULL;
2749 }
2750 }
2751 }
2752
2753 return error;
2754 }
2755
2756 /*
2757 * TODO: [email protected] -- merge this into na_disconnect()
2758 */
2759 void
na_disconnect_spec(struct kern_nexus * nx,struct kern_channel * ch)2760 na_disconnect_spec(struct kern_nexus *nx, struct kern_channel *ch)
2761 {
2762 #pragma unused(nx)
2763 struct nexus_adapter *na = ch->ch_na;
2764 enum txrx t;
2765 int error;
2766
2767 SK_LOCK_ASSERT_HELD();
2768 ASSERT(na != NULL);
2769 ASSERT(na->na_flags & NAF_SPEC_INIT); /* has been bound */
2770
2771 SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2772 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2773 na->na_name, ch->ch_info->cinfo_nx_port,
2774 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2775 na->na_flags, NAF_BITS);
2776
2777 /* take a reference for this routine */
2778 na_retain_locked(na);
2779
2780 ASSERT(ch->ch_flags & CHANF_KERNEL);
2781 ASSERT(ch->ch_schema == NULL);
2782 ASSERT(na->na_special != NULL);
2783 /* unbind this channel */
2784 error = na->na_special(na, ch, NULL, NXSPEC_CMD_DISCONNECT);
2785 ASSERT(error == 0);
2786 ASSERT(!(na->na_flags & NAF_SPEC_INIT));
2787
2788 /* now release our reference; this may be the last */
2789 na_release_locked(na);
2790 na = NULL;
2791
2792 ASSERT(ch->ch_na == NULL);
2793 for_rx_tx(t) {
2794 ch->ch_si[t] = NULL;
2795 }
2796 }
2797
2798 void
na_start_spec(struct kern_nexus * nx,struct kern_channel * ch)2799 na_start_spec(struct kern_nexus *nx, struct kern_channel *ch)
2800 {
2801 #pragma unused(nx)
2802 struct nexus_adapter *na = ch->ch_na;
2803
2804 SK_LOCK_ASSERT_HELD();
2805
2806 ASSERT(ch->ch_flags & CHANF_KERNEL);
2807 ASSERT(NA_KERNEL_ONLY(na));
2808 ASSERT(na->na_special != NULL);
2809
2810 na->na_special(na, ch, NULL, NXSPEC_CMD_START);
2811 }
2812
2813 void
na_stop_spec(struct kern_nexus * nx,struct kern_channel * ch)2814 na_stop_spec(struct kern_nexus *nx, struct kern_channel *ch)
2815 {
2816 #pragma unused(nx)
2817 struct nexus_adapter *na = ch->ch_na;
2818
2819 SK_LOCK_ASSERT_HELD();
2820
2821 ASSERT(ch->ch_flags & CHANF_KERNEL);
2822 ASSERT(NA_KERNEL_ONLY(na));
2823 ASSERT(na->na_special != NULL);
2824
2825 na->na_special(na, ch, NULL, NXSPEC_CMD_STOP);
2826 }
2827
2828 /*
2829 * MUST BE CALLED UNDER SK_LOCK()
2830 *
2831 * Get a refcounted reference to a nexus adapter attached
2832 * to the interface specified by chr.
2833 * This is always called in the execution of an ioctl().
2834 *
2835 * Return ENXIO if the interface specified by the request does
2836 * not exist, ENOTSUP if Skywalk is not supported by the interface,
2837 * EINVAL if parameters are invalid, ENOMEM if needed resources
2838 * could not be allocated.
2839 * If successful, hold a reference to the nexus adapter.
2840 *
2841 * No reference is kept on the real interface, which may then
2842 * disappear at any time.
2843 */
2844 int
na_find(struct kern_channel * ch,struct kern_nexus * nx,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)2845 na_find(struct kern_channel *ch, struct kern_nexus *nx, struct chreq *chr,
2846 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p,
2847 struct nexus_adapter **na, boolean_t create)
2848 {
2849 int error = 0;
2850
2851 _CASSERT(sizeof(chr->cr_name) == sizeof((*na)->na_name));
2852
2853 *na = NULL; /* default return value */
2854
2855 SK_LOCK_ASSERT_HELD();
2856
2857 /*
2858 * We cascade through all possibile types of nexus adapter.
2859 * All nx_*_na_find() functions return an error and an na,
2860 * with the following combinations:
2861 *
2862 * error na
2863 * 0 NULL type doesn't match
2864 * !0 NULL type matches, but na creation/lookup failed
2865 * 0 !NULL type matches and na created/found
2866 * !0 !NULL impossible
2867 */
2868
2869 #if CONFIG_NEXUS_MONITOR
2870 /* try to see if this is a monitor port */
2871 error = nx_monitor_na_find(nx, ch, chr, ch0, nxb, p, na, create);
2872 if (error != 0 || *na != NULL) {
2873 return error;
2874 }
2875 #endif /* CONFIG_NEXUS_MONITOR */
2876 #if CONFIG_NEXUS_USER_PIPE
2877 /* try to see if this is a pipe port */
2878 error = nx_upipe_na_find(nx, ch, chr, nxb, p, na, create);
2879 if (error != 0 || *na != NULL) {
2880 return error;
2881 }
2882 #endif /* CONFIG_NEXUS_USER_PIPE */
2883 #if CONFIG_NEXUS_KERNEL_PIPE
2884 /* try to see if this is a kernel pipe port */
2885 error = nx_kpipe_na_find(nx, ch, chr, nxb, p, na, create);
2886 if (error != 0 || *na != NULL) {
2887 return error;
2888 }
2889 #endif /* CONFIG_NEXUS_KERNEL_PIPE */
2890 #if CONFIG_NEXUS_FLOWSWITCH
2891 /* try to see if this is a flowswitch port */
2892 error = nx_fsw_na_find(nx, ch, chr, nxb, p, na, create);
2893 if (error != 0 || *na != NULL) {
2894 return error;
2895 }
2896 #endif /* CONFIG_NEXUS_FLOWSWITCH */
2897 #if CONFIG_NEXUS_NETIF
2898 error = nx_netif_na_find(nx, ch, chr, nxb, p, na, create);
2899 if (error != 0 || *na != NULL) {
2900 return error;
2901 }
2902 #endif /* CONFIG_NEXUS_NETIF */
2903
2904 ASSERT(*na == NULL);
2905 return ENXIO;
2906 }
2907
2908 void
na_retain_locked(struct nexus_adapter * na)2909 na_retain_locked(struct nexus_adapter *na)
2910 {
2911 SK_LOCK_ASSERT_HELD();
2912
2913 if (na != NULL) {
2914 #if SK_LOG
2915 uint32_t oref = atomic_add_32_ov(&na->na_refcount, 1);
2916 SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
2917 na->na_name, SK_KVA(na), oref + 1, na->na_channels);
2918 #else /* !SK_LOG */
2919 atomic_add_32(&na->na_refcount, 1);
2920 #endif /* !SK_LOG */
2921 }
2922 }
2923
2924 /* returns 1 iff the nexus_adapter is destroyed */
2925 int
na_release_locked(struct nexus_adapter * na)2926 na_release_locked(struct nexus_adapter *na)
2927 {
2928 uint32_t oref;
2929
2930 SK_LOCK_ASSERT_HELD();
2931
2932 ASSERT(na->na_refcount > 0);
2933 oref = atomic_add_32_ov(&na->na_refcount, -1);
2934 if (oref > 1) {
2935 SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
2936 na->na_name, SK_KVA(na), oref - 1, na->na_channels);
2937 return 0;
2938 }
2939 ASSERT(na->na_channels == 0);
2940
2941 if (na->na_dtor != NULL) {
2942 na->na_dtor(na);
2943 }
2944
2945 ASSERT(na->na_tx_rings == NULL && na->na_rx_rings == NULL);
2946 ASSERT(na->na_slot_ctxs == NULL);
2947 ASSERT(na->na_scratch == NULL);
2948
2949 #if CONFIG_NEXUS_USER_PIPE
2950 nx_upipe_na_dealloc(na);
2951 #endif /* CONFIG_NEXUS_USER_PIPE */
2952 if (na->na_arena != NULL) {
2953 skmem_arena_release(na->na_arena);
2954 na->na_arena = NULL;
2955 }
2956
2957 SK_DF(SK_VERB_MEM, "na \"%s\" (0x%llx) being freed",
2958 na->na_name, SK_KVA(na));
2959
2960 NA_FREE(na);
2961 return 1;
2962 }
2963
2964 static struct nexus_adapter *
na_pseudo_alloc(zalloc_flags_t how)2965 na_pseudo_alloc(zalloc_flags_t how)
2966 {
2967 struct nexus_adapter *na;
2968
2969 na = zalloc_flags(na_pseudo_zone, how | Z_ZERO);
2970 if (na) {
2971 na->na_type = NA_PSEUDO;
2972 na->na_free = na_pseudo_free;
2973 }
2974 return na;
2975 }
2976
2977 static void
na_pseudo_free(struct nexus_adapter * na)2978 na_pseudo_free(struct nexus_adapter *na)
2979 {
2980 ASSERT(na->na_refcount == 0);
2981 SK_DF(SK_VERB_MEM, "na 0x%llx FREE", SK_KVA(na));
2982 bzero(na, sizeof(*na));
2983 zfree(na_pseudo_zone, na);
2984 }
2985
2986 static int
na_pseudo_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)2987 na_pseudo_txsync(struct __kern_channel_ring *kring, struct proc *p,
2988 uint32_t flags)
2989 {
2990 #pragma unused(kring, p, flags)
2991 SK_DF(SK_VERB_SYNC | SK_VERB_TX,
2992 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
2993 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
2994 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
2995 flags);
2996
2997 return 0;
2998 }
2999
3000 static int
na_pseudo_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3001 na_pseudo_rxsync(struct __kern_channel_ring *kring, struct proc *p,
3002 uint32_t flags)
3003 {
3004 #pragma unused(kring, p, flags)
3005 SK_DF(SK_VERB_SYNC | SK_VERB_RX,
3006 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3007 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3008 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3009 flags);
3010
3011 ASSERT(kring->ckr_rhead <= kring->ckr_lim);
3012
3013 return 0;
3014 }
3015
3016 static int
na_pseudo_activate(struct nexus_adapter * na,na_activate_mode_t mode)3017 na_pseudo_activate(struct nexus_adapter *na, na_activate_mode_t mode)
3018 {
3019 SK_D("na \"%s\" (0x%llx) %s", na->na_name,
3020 SK_KVA(na), na_activate_mode2str(mode));
3021
3022 switch (mode) {
3023 case NA_ACTIVATE_MODE_ON:
3024 atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
3025 break;
3026
3027 case NA_ACTIVATE_MODE_DEFUNCT:
3028 break;
3029
3030 case NA_ACTIVATE_MODE_OFF:
3031 atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
3032 break;
3033
3034 default:
3035 VERIFY(0);
3036 /* NOTREACHED */
3037 __builtin_unreachable();
3038 }
3039
3040 return 0;
3041 }
3042
3043 static void
na_pseudo_dtor(struct nexus_adapter * na)3044 na_pseudo_dtor(struct nexus_adapter *na)
3045 {
3046 #pragma unused(na)
3047 }
3048
3049 static int
na_pseudo_krings_create(struct nexus_adapter * na,struct kern_channel * ch)3050 na_pseudo_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
3051 {
3052 return na_rings_mem_setup(na, 0, FALSE, ch);
3053 }
3054
3055 static void
na_pseudo_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)3056 na_pseudo_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
3057 boolean_t defunct)
3058 {
3059 na_rings_mem_teardown(na, ch, defunct);
3060 }
3061
3062 /*
3063 * Pseudo nexus adapter; typically used as a generic parent adapter.
3064 */
3065 int
na_pseudo_create(struct kern_nexus * nx,struct chreq * chr,struct nexus_adapter ** ret)3066 na_pseudo_create(struct kern_nexus *nx, struct chreq *chr,
3067 struct nexus_adapter **ret)
3068 {
3069 struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
3070 struct nexus_adapter *na;
3071 int error;
3072
3073 SK_LOCK_ASSERT_HELD();
3074 *ret = NULL;
3075
3076 na = na_pseudo_alloc(Z_WAITOK);
3077
3078 ASSERT(na->na_type == NA_PSEUDO);
3079 ASSERT(na->na_free == na_pseudo_free);
3080
3081 (void) strncpy(na->na_name, chr->cr_name, sizeof(na->na_name) - 1);
3082 na->na_name[sizeof(na->na_name) - 1] = '\0';
3083 uuid_generate_random(na->na_uuid);
3084
3085 /*
3086 * Verify upper bounds; for all cases including user pipe nexus,
3087 * the parameters must have already been validated by corresponding
3088 * nxdom_prov_params() function defined by each domain.
3089 */
3090 na_set_nrings(na, NR_TX, nxp->nxp_tx_rings);
3091 na_set_nrings(na, NR_RX, nxp->nxp_rx_rings);
3092 na_set_nslots(na, NR_TX, nxp->nxp_tx_slots);
3093 na_set_nslots(na, NR_RX, nxp->nxp_rx_slots);
3094 ASSERT(na_get_nrings(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
3095 ASSERT(na_get_nrings(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
3096 ASSERT(na_get_nslots(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
3097 ASSERT(na_get_nslots(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
3098
3099 na->na_txsync = na_pseudo_txsync;
3100 na->na_rxsync = na_pseudo_rxsync;
3101 na->na_activate = na_pseudo_activate;
3102 na->na_dtor = na_pseudo_dtor;
3103 na->na_krings_create = na_pseudo_krings_create;
3104 na->na_krings_delete = na_pseudo_krings_delete;
3105
3106 *(nexus_stats_type_t *)(uintptr_t)&na->na_stats_type =
3107 NEXUS_STATS_TYPE_INVALID;
3108
3109 /* other fields are set in the common routine */
3110 na_attach_common(na, nx, NX_DOM_PROV(nx));
3111
3112 if ((error = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
3113 nx, na)) != 0) {
3114 ASSERT(na->na_arena == NULL);
3115 goto err;
3116 }
3117 ASSERT(na->na_arena != NULL);
3118
3119 *(uint32_t *)(uintptr_t)&na->na_flowadv_max = nxp->nxp_flowadv_max;
3120 ASSERT(na->na_flowadv_max == 0 ||
3121 skmem_arena_nexus(na->na_arena)->arn_flowadv_obj != NULL);
3122
3123 #if SK_LOG
3124 uuid_string_t uuidstr;
3125 SK_D("na_name: \"%s\"", na->na_name);
3126 SK_D(" UUID: %s", sk_uuid_unparse(na->na_uuid, uuidstr));
3127 SK_D(" nx: 0x%llx (\"%s\":\"%s\")",
3128 SK_KVA(na->na_nx), NX_DOM(na->na_nx)->nxdom_name,
3129 NX_DOM_PROV(na->na_nx)->nxdom_prov_name);
3130 SK_D(" flags: %b", na->na_flags, NAF_BITS);
3131 SK_D(" flowadv_max: %u", na->na_flowadv_max);
3132 SK_D(" rings: tx %u rx %u",
3133 na_get_nrings(na, NR_TX), na_get_nrings(na, NR_RX));
3134 SK_D(" slots: tx %u rx %u",
3135 na_get_nslots(na, NR_TX), na_get_nslots(na, NR_RX));
3136 #if CONFIG_NEXUS_USER_PIPE
3137 SK_D(" next_pipe: %u", na->na_next_pipe);
3138 SK_D(" max_pipes: %u", na->na_max_pipes);
3139 #endif /* CONFIG_NEXUS_USER_PIPE */
3140 #endif /* SK_LOG */
3141
3142 *ret = na;
3143 na_retain_locked(na);
3144
3145 return 0;
3146
3147 err:
3148 if (na != NULL) {
3149 if (na->na_arena != NULL) {
3150 skmem_arena_release(na->na_arena);
3151 na->na_arena = NULL;
3152 }
3153 NA_FREE(na);
3154 }
3155 return error;
3156 }
3157
3158 void
na_flowadv_entry_alloc(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx)3159 na_flowadv_entry_alloc(const struct nexus_adapter *na, uuid_t fae_id,
3160 const flowadv_idx_t fe_idx)
3161 {
3162 struct skmem_arena *ar = na->na_arena;
3163 struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
3164 struct __flowadv_entry *fae;
3165
3166 ASSERT(NA_IS_ACTIVE(na) && na->na_flowadv_max != 0);
3167 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3168
3169 AR_LOCK(ar);
3170
3171 /* we must not get here if arena is defunct; this must be valid */
3172 ASSERT(arn->arn_flowadv_obj != NULL);
3173
3174 VERIFY(fe_idx < na->na_flowadv_max);
3175 fae = &arn->arn_flowadv_obj[fe_idx];
3176 uuid_copy(fae->fae_id, fae_id);
3177 fae->fae_flags |= FLOWADVF_VALID;
3178
3179 AR_UNLOCK(ar);
3180 }
3181
3182 void
na_flowadv_entry_free(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx)3183 na_flowadv_entry_free(const struct nexus_adapter *na, uuid_t fae_id,
3184 const flowadv_idx_t fe_idx)
3185 {
3186 #pragma unused(fae_id)
3187 struct skmem_arena *ar = na->na_arena;
3188 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3189
3190 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3191 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3192
3193 AR_LOCK(ar);
3194
3195 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3196 if (arn->arn_flowadv_obj != NULL) {
3197 struct __flowadv_entry *fae;
3198
3199 VERIFY(fe_idx < na->na_flowadv_max);
3200 fae = &arn->arn_flowadv_obj[fe_idx];
3201 ASSERT(uuid_compare(fae->fae_id, fae_id) == 0);
3202 uuid_clear(fae->fae_id);
3203 fae->fae_flags &= ~FLOWADVF_VALID;
3204 }
3205
3206 AR_UNLOCK(ar);
3207 }
3208
3209 bool
na_flowadv_set(const struct nexus_adapter * na,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3210 na_flowadv_set(const struct nexus_adapter *na, const flowadv_idx_t fe_idx,
3211 const flowadv_token_t flow_token)
3212 {
3213 struct skmem_arena *ar = na->na_arena;
3214 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3215 bool suspend;
3216
3217 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3218 ASSERT(fe_idx < na->na_flowadv_max);
3219 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3220
3221 AR_LOCK(ar);
3222
3223 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3224
3225 if (arn->arn_flowadv_obj != NULL) {
3226 struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3227
3228 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3229 /*
3230 * We cannot guarantee that the flow is still around by now,
3231 * so check if that's the case and let the caller know.
3232 */
3233 if ((suspend = (fae->fae_token == flow_token))) {
3234 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3235 fae->fae_flags |= FLOWADVF_SUSPENDED;
3236 }
3237 } else {
3238 suspend = false;
3239 }
3240 if (suspend) {
3241 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) flow token 0x%llu fidx %u "
3242 "SUSPEND", sk_proc_name_address(current_proc()),
3243 sk_proc_pid(current_proc()), flow_token, fe_idx);
3244 } else {
3245 SK_ERR("%s(%d) flow token 0x%llu fidx %u no longer around",
3246 sk_proc_name_address(current_proc()),
3247 sk_proc_pid(current_proc()), flow_token, fe_idx);
3248 }
3249
3250 AR_UNLOCK(ar);
3251
3252 return suspend;
3253 }
3254
3255 int
na_flowadv_clear(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3256 na_flowadv_clear(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3257 const flowadv_token_t flow_token)
3258 {
3259 struct nexus_adapter *na = ch->ch_na;
3260 struct skmem_arena *ar = na->na_arena;
3261 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3262 boolean_t resume;
3263
3264 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3265 ASSERT(fe_idx < na->na_flowadv_max);
3266 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3267
3268 AR_LOCK(ar);
3269
3270 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3271
3272 if (arn->arn_flowadv_obj != NULL) {
3273 struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3274
3275 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3276 /*
3277 * We cannot guarantee that the flow is still around by now,
3278 * so check if that's the case and let the caller know.
3279 */
3280 if ((resume = (fae->fae_token == flow_token))) {
3281 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3282 fae->fae_flags &= ~FLOWADVF_SUSPENDED;
3283 }
3284 } else {
3285 resume = FALSE;
3286 }
3287 if (resume) {
3288 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d): flow token 0x%x "
3289 "fidx %u RESUME", ch->ch_name, ch->ch_pid, flow_token,
3290 fe_idx);
3291 } else {
3292 SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3293 ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3294 }
3295
3296 AR_UNLOCK(ar);
3297
3298 return resume;
3299 }
3300
3301 void
na_flowadv_event(struct __kern_channel_ring * kring)3302 na_flowadv_event(struct __kern_channel_ring *kring)
3303 {
3304 ASSERT(kring->ckr_tx == NR_TX);
3305
3306 SK_DF(SK_VERB_EVENTS, "%s(%d) na \"%s\" (0x%llx) kr 0x%llx",
3307 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
3308 KRNA(kring)->na_name, SK_KVA(KRNA(kring)), SK_KVA(kring));
3309
3310 na_post_event(kring, TRUE, FALSE, FALSE, CHAN_FILT_HINT_FLOW_ADV_UPD);
3311 }
3312
3313 static int
na_packet_pool_free_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3314 na_packet_pool_free_sync(struct __kern_channel_ring *kring, struct proc *p,
3315 uint32_t flags)
3316 {
3317 #pragma unused(flags, p)
3318 int n, ret = 0;
3319 slot_idx_t j;
3320 struct __kern_slot_desc *ksd;
3321 struct __user_slot_desc *usd;
3322 struct __kern_quantum *kqum;
3323 struct kern_pbufpool *pp = kring->ckr_pp;
3324 uint32_t nfree = 0;
3325
3326 /* packet pool list is protected by channel lock */
3327 ASSERT(!KR_KERNEL_ONLY(kring));
3328
3329 /* # of new slots */
3330 n = kring->ckr_rhead - kring->ckr_khead;
3331 if (n < 0) {
3332 n += kring->ckr_num_slots;
3333 }
3334
3335 /* nothing to free */
3336 if (__improbable(n == 0)) {
3337 SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3338 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3339 "nothing to free");
3340 goto done;
3341 }
3342
3343 j = kring->ckr_khead;
3344 PP_LOCK(pp);
3345 while (n--) {
3346 int err;
3347
3348 ksd = KR_KSD(kring, j);
3349 usd = KR_USD(kring, j);
3350
3351 if (__improbable(!SD_VALID_METADATA(usd))) {
3352 SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3353 ret = EINVAL;
3354 break;
3355 }
3356
3357 kqum = pp_remove_upp_locked(pp, usd->sd_md_idx, &err);
3358 if (__improbable(err != 0)) {
3359 SK_ERR("un-allocated packet or buflet %d %p",
3360 usd->sd_md_idx, SK_KVA(kqum));
3361 ret = EINVAL;
3362 break;
3363 }
3364
3365 /* detach and free the packet */
3366 kqum->qum_qflags &= ~QUM_F_FINALIZED;
3367 kqum->qum_ksd = NULL;
3368 ASSERT(!KSD_VALID_METADATA(ksd));
3369 USD_DETACH_METADATA(usd);
3370 ASSERT(pp == kqum->qum_pp);
3371 ASSERT(nfree < kring->ckr_num_slots);
3372 kring->ckr_scratch[nfree++] = (uint64_t)kqum;
3373 j = SLOT_NEXT(j, kring->ckr_lim);
3374 }
3375 PP_UNLOCK(pp);
3376
3377 if (__probable(nfree > 0)) {
3378 pp_free_packet_batch(pp, &kring->ckr_scratch[0], nfree);
3379 }
3380
3381 kring->ckr_khead = j;
3382 kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3383
3384 done:
3385 return ret;
3386 }
3387
3388 static int
na_packet_pool_alloc_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3389 na_packet_pool_alloc_sync(struct __kern_channel_ring *kring, struct proc *p,
3390 uint32_t flags)
3391 {
3392 int b, err;
3393 uint32_t n = 0;
3394 slot_idx_t j;
3395 uint64_t now;
3396 uint32_t curr_ws, ph_needed, ph_cnt;
3397 struct __kern_slot_desc *ksd;
3398 struct __user_slot_desc *usd;
3399 struct __kern_quantum *kqum;
3400 kern_pbufpool_t pp = kring->ckr_pp;
3401 pid_t pid = proc_pid(p);
3402
3403 /* packet pool list is protected by channel lock */
3404 ASSERT(!KR_KERNEL_ONLY(kring));
3405 ASSERT(!PP_KERNEL_ONLY(pp));
3406
3407 now = _net_uptime;
3408 if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3409 if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3410 kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3411 }
3412 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3413 "%s: purged curr_ws(%d)", kring->ckr_name,
3414 kring->ckr_alloc_ws);
3415 return 0;
3416 }
3417 /* reclaim the completed slots */
3418 kring->ckr_khead = kring->ckr_rhead;
3419
3420 /* # of busy (unclaimed) slots */
3421 b = kring->ckr_ktail - kring->ckr_khead;
3422 if (b < 0) {
3423 b += kring->ckr_num_slots;
3424 }
3425
3426 curr_ws = kring->ckr_alloc_ws;
3427 if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3428 /* increment the working set by 50% */
3429 curr_ws += (curr_ws >> 1);
3430 curr_ws = MIN(curr_ws, kring->ckr_lim);
3431 } else {
3432 if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3433 (uint32_t)b >= (curr_ws >> 2)) {
3434 /* decrease the working set by 25% */
3435 curr_ws -= (curr_ws >> 2);
3436 }
3437 }
3438 curr_ws = MAX(curr_ws, na_upp_alloc_lowat);
3439 if (curr_ws > (uint32_t)b) {
3440 n = curr_ws - b;
3441 }
3442 kring->ckr_alloc_ws = curr_ws;
3443 kring->ckr_sync_time = now;
3444
3445 /* min with # of avail free slots (subtract busy from max) */
3446 n = ph_needed = MIN(n, kring->ckr_lim - b);
3447 j = kring->ckr_ktail;
3448 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3449 "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3450
3451 if ((ph_cnt = ph_needed) == 0) {
3452 goto done;
3453 }
3454
3455 err = kern_pbufpool_alloc_batch_nosleep(pp, 1, kring->ckr_scratch,
3456 &ph_cnt);
3457
3458 if (__improbable(ph_cnt == 0)) {
3459 SK_ERR("kr 0x%llx failed to alloc %u packet s(%d)",
3460 SK_KVA(kring), ph_needed, err);
3461 kring->ckr_err_stats.cres_pkt_alloc_failures += ph_needed;
3462 } else {
3463 /*
3464 * Add packets to the allocated list of user packet pool.
3465 */
3466 pp_insert_upp_batch(pp, pid, kring->ckr_scratch, ph_cnt);
3467 }
3468
3469
3470 for (n = 0; n < ph_cnt; n++) {
3471 ksd = KR_KSD(kring, j);
3472 usd = KR_USD(kring, j);
3473
3474 kqum = SK_PTR_ADDR_KQUM(kring->ckr_scratch[n]);
3475 kring->ckr_scratch[n] = 0;
3476 ASSERT(kqum != NULL);
3477
3478 /* cleanup any stale slot mapping */
3479 KSD_RESET(ksd);
3480 ASSERT(usd != NULL);
3481 USD_RESET(usd);
3482
3483 /*
3484 * Since this packet is freshly allocated and we need to
3485 * have the flag set for the attach to succeed, just set
3486 * it here rather than calling __packet_finalize().
3487 */
3488 kqum->qum_qflags |= QUM_F_FINALIZED;
3489
3490 /* Attach packet to slot */
3491 KR_SLOT_ATTACH_METADATA(kring, ksd, kqum);
3492 /*
3493 * externalize the packet as it is being transferred to
3494 * user space.
3495 */
3496 kr_externalize_metadata(kring, pp->pp_max_frags, kqum, p);
3497
3498 j = SLOT_NEXT(j, kring->ckr_lim);
3499 }
3500 done:
3501 ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3502 kring->ckr_ktail = j;
3503 return 0;
3504 }
3505
3506 static int
na_packet_pool_free_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3507 na_packet_pool_free_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3508 uint32_t flags)
3509 {
3510 #pragma unused(flags, p)
3511 int n, ret = 0;
3512 slot_idx_t j;
3513 struct __kern_slot_desc *ksd;
3514 struct __user_slot_desc *usd;
3515 struct __kern_buflet *kbft;
3516 struct kern_pbufpool *pp = kring->ckr_pp;
3517
3518 /* packet pool list is protected by channel lock */
3519 ASSERT(!KR_KERNEL_ONLY(kring));
3520
3521 /* # of new slots */
3522 n = kring->ckr_rhead - kring->ckr_khead;
3523 if (n < 0) {
3524 n += kring->ckr_num_slots;
3525 }
3526
3527 /* nothing to free */
3528 if (__improbable(n == 0)) {
3529 SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3530 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3531 "nothing to free");
3532 goto done;
3533 }
3534
3535 j = kring->ckr_khead;
3536 while (n--) {
3537 int err;
3538
3539 ksd = KR_KSD(kring, j);
3540 usd = KR_USD(kring, j);
3541
3542 if (__improbable(!SD_VALID_METADATA(usd))) {
3543 SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3544 ret = EINVAL;
3545 break;
3546 }
3547
3548 kbft = pp_remove_upp_bft(pp, usd->sd_md_idx, &err);
3549 if (__improbable(err != 0)) {
3550 SK_ERR("un-allocated buflet %d %p", usd->sd_md_idx,
3551 SK_KVA(kbft));
3552 ret = EINVAL;
3553 break;
3554 }
3555
3556 /* detach and free the packet */
3557 ASSERT(!KSD_VALID_METADATA(ksd));
3558 USD_DETACH_METADATA(usd);
3559 pp_free_buflet(pp, kbft);
3560 j = SLOT_NEXT(j, kring->ckr_lim);
3561 }
3562 kring->ckr_khead = j;
3563 kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3564
3565 done:
3566 return ret;
3567 }
3568
3569 static int
na_packet_pool_alloc_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3570 na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3571 uint32_t flags)
3572 {
3573 int b, err;
3574 uint32_t n = 0;
3575 slot_idx_t j;
3576 uint64_t now;
3577 uint32_t curr_ws, bh_needed, bh_cnt;
3578 struct __kern_slot_desc *ksd;
3579 struct __user_slot_desc *usd;
3580 struct __kern_buflet *kbft;
3581 struct __kern_buflet_ext *kbe;
3582 kern_pbufpool_t pp = kring->ckr_pp;
3583 pid_t pid = proc_pid(p);
3584
3585 /* packet pool list is protected by channel lock */
3586 ASSERT(!KR_KERNEL_ONLY(kring));
3587 ASSERT(!PP_KERNEL_ONLY(pp));
3588
3589 now = _net_uptime;
3590 if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3591 if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3592 kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3593 }
3594 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3595 "%s: purged curr_ws(%d)", kring->ckr_name,
3596 kring->ckr_alloc_ws);
3597 return 0;
3598 }
3599 /* reclaim the completed slots */
3600 kring->ckr_khead = kring->ckr_rhead;
3601
3602 /* # of busy (unclaimed) slots */
3603 b = kring->ckr_ktail - kring->ckr_khead;
3604 if (b < 0) {
3605 b += kring->ckr_num_slots;
3606 }
3607
3608 curr_ws = kring->ckr_alloc_ws;
3609 if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3610 /* increment the working set by 50% */
3611 curr_ws += (curr_ws >> 1);
3612 curr_ws = MIN(curr_ws, kring->ckr_lim);
3613 } else {
3614 if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3615 (uint32_t)b >= (curr_ws >> 2)) {
3616 /* decrease the working set by 25% */
3617 curr_ws -= (curr_ws >> 2);
3618 }
3619 }
3620 curr_ws = MAX(curr_ws, na_upp_alloc_buf_lowat);
3621 if (curr_ws > (uint32_t)b) {
3622 n = curr_ws - b;
3623 }
3624 kring->ckr_alloc_ws = curr_ws;
3625 kring->ckr_sync_time = now;
3626
3627 /* min with # of avail free slots (subtract busy from max) */
3628 n = bh_needed = MIN(n, kring->ckr_lim - b);
3629 j = kring->ckr_ktail;
3630 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3631 "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3632
3633 if ((bh_cnt = bh_needed) == 0) {
3634 goto done;
3635 }
3636
3637 err = pp_alloc_buflet_batch(pp, kring->ckr_scratch, &bh_cnt,
3638 SKMEM_NOSLEEP);
3639
3640 if (bh_cnt == 0) {
3641 SK_ERR("kr 0x%llx failed to alloc %u buflets(%d)",
3642 SK_KVA(kring), bh_needed, err);
3643 kring->ckr_err_stats.cres_pkt_alloc_failures += bh_needed;
3644 }
3645
3646 for (n = 0; n < bh_cnt; n++) {
3647 struct __user_buflet *ubft;
3648
3649 ksd = KR_KSD(kring, j);
3650 usd = KR_USD(kring, j);
3651
3652 kbft = (struct __kern_buflet *)(kring->ckr_scratch[n]);
3653 kbe = (struct __kern_buflet_ext *)kbft;
3654 kring->ckr_scratch[n] = 0;
3655 ASSERT(kbft != NULL);
3656
3657 /*
3658 * Add buflet to the allocated list of user packet pool.
3659 */
3660 pp_insert_upp_bft(pp, kbft, pid);
3661
3662 /*
3663 * externalize the buflet as it is being transferred to
3664 * user space.
3665 */
3666 ubft = __DECONST(struct __user_buflet *, kbe->kbe_buf_user);
3667 KBUF_EXTERNALIZE(kbft, ubft, pp);
3668
3669 /* cleanup any stale slot mapping */
3670 KSD_RESET(ksd);
3671 ASSERT(usd != NULL);
3672 USD_RESET(usd);
3673
3674 /* Attach buflet to slot */
3675 KR_SLOT_ATTACH_BUF_METADATA(kring, ksd, kbft);
3676
3677 j = SLOT_NEXT(j, kring->ckr_lim);
3678 }
3679 done:
3680 ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3681 kring->ckr_ktail = j;
3682 return 0;
3683 }
3684
3685 /* The caller needs to ensure that the NA stays intact */
3686 void
na_drain(struct nexus_adapter * na,boolean_t purge)3687 na_drain(struct nexus_adapter *na, boolean_t purge)
3688 {
3689 /* will be cleared on next channel sync */
3690 if (!(atomic_bitset_32_ov(&na->na_flags, NAF_DRAINING) &
3691 NAF_DRAINING) && NA_IS_ACTIVE(na)) {
3692 SK_DF(SK_VERB_NA, "%s: %s na 0x%llx flags %b",
3693 na->na_name, (purge ? "purging" : "pruning"),
3694 SK_KVA(na), na->na_flags, NAF_BITS);
3695
3696 /* reap (purge/prune) caches in the arena */
3697 skmem_arena_reap(na->na_arena, purge);
3698 }
3699 }
3700