1 /*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri.
31 * All rights reserved.
32 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 */
55 #include <sys/systm.h>
56 #include <skywalk/os_skywalk_private.h>
57 #include <skywalk/nexus/monitor/nx_monitor.h>
58 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
59 #include <skywalk/nexus/netif/nx_netif.h>
60 #include <skywalk/nexus/upipe/nx_user_pipe.h>
61 #include <skywalk/nexus/kpipe/nx_kernel_pipe.h>
62 #include <kern/thread.h>
63
64 static int na_krings_use(struct kern_channel *);
65 static void na_krings_unuse(struct kern_channel *);
66 static void na_krings_verify(struct nexus_adapter *);
67 static int na_notify(struct __kern_channel_ring *, struct proc *, uint32_t);
68 static void na_set_ring(struct nexus_adapter *, uint32_t, enum txrx, uint32_t);
69 static void na_set_all_rings(struct nexus_adapter *, uint32_t);
70 static int na_set_ringid(struct kern_channel *, ring_set_t, ring_id_t);
71 static void na_unset_ringid(struct kern_channel *);
72 static void na_teardown(struct nexus_adapter *, struct kern_channel *,
73 boolean_t);
74
75 static int na_kr_create(struct nexus_adapter *, boolean_t);
76 static void na_kr_delete(struct nexus_adapter *);
77 static int na_kr_setup(struct nexus_adapter *, struct kern_channel *);
78 static void na_kr_teardown_all(struct nexus_adapter *, struct kern_channel *,
79 boolean_t);
80 static void na_kr_teardown_txrx(struct nexus_adapter *, struct kern_channel *,
81 boolean_t, struct proc *);
82 static int na_kr_populate_slots(struct __kern_channel_ring *);
83 static void na_kr_depopulate_slots(struct __kern_channel_ring *,
84 struct kern_channel *, boolean_t defunct);
85
86 static int na_schema_alloc(struct kern_channel *);
87
88 static struct nexus_adapter *na_pseudo_alloc(zalloc_flags_t);
89 static void na_pseudo_free(struct nexus_adapter *);
90 static int na_pseudo_txsync(struct __kern_channel_ring *, struct proc *,
91 uint32_t);
92 static int na_pseudo_rxsync(struct __kern_channel_ring *, struct proc *,
93 uint32_t);
94 static int na_pseudo_activate(struct nexus_adapter *, na_activate_mode_t);
95 static void na_pseudo_dtor(struct nexus_adapter *);
96 static int na_pseudo_krings_create(struct nexus_adapter *,
97 struct kern_channel *);
98 static void na_pseudo_krings_delete(struct nexus_adapter *,
99 struct kern_channel *, boolean_t);
100 static int na_packet_pool_alloc_sync(struct __kern_channel_ring *,
101 struct proc *, uint32_t);
102 static int na_packet_pool_free_sync(struct __kern_channel_ring *,
103 struct proc *, uint32_t);
104 static int na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *,
105 struct proc *, uint32_t);
106 static int na_packet_pool_free_buf_sync(struct __kern_channel_ring *,
107 struct proc *, uint32_t);
108
109 #define NA_KRING_IDLE_TIMEOUT (NSEC_PER_SEC * 30) /* 30 seconds */
110
111 static SKMEM_TYPE_DEFINE(na_pseudo_zone, struct nexus_adapter);
112
113 static int __na_inited = 0;
114
115 #define NA_NUM_WMM_CLASSES 4
116 #define NAKR_WMM_SC2RINGID(_s) PKT_SC2TC(_s)
117 #define NAKR_SET_SVC_LUT(_n, _s) \
118 (_n)->na_kring_svc_lut[MBUF_SCIDX(_s)] = NAKR_WMM_SC2RINGID(_s)
119 #define NAKR_SET_KR_SVC(_n, _s) \
120 NAKR((_n), NR_TX)[NAKR_WMM_SC2RINGID(_s)].ckr_svc = (_s)
121
122 #define NA_UPP_ALLOC_LOWAT 8
123 static uint32_t na_upp_alloc_lowat = NA_UPP_ALLOC_LOWAT;
124
125 #define NA_UPP_REAP_INTERVAL 10 /* seconds */
126 static uint32_t na_upp_reap_interval = NA_UPP_REAP_INTERVAL;
127
128 #define NA_UPP_WS_HOLD_TIME 2 /* seconds */
129 static uint32_t na_upp_ws_hold_time = NA_UPP_WS_HOLD_TIME;
130
131 #define NA_UPP_REAP_MIN_PKTS 0
132 static uint32_t na_upp_reap_min_pkts = NA_UPP_REAP_MIN_PKTS;
133
134 #define NA_UPP_ALLOC_BUF_LOWAT 64
135 static uint32_t na_upp_alloc_buf_lowat = NA_UPP_ALLOC_BUF_LOWAT;
136
137 #if (DEVELOPMENT || DEBUG)
138 static uint64_t _na_inject_error = 0;
139 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) \
140 _SK_INJECT_ERROR(_na_inject_error, _en, _ev, _ec, NULL, _f, __VA_ARGS__)
141
142 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_ws_hold_time,
143 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_ws_hold_time,
144 NA_UPP_WS_HOLD_TIME, "");
145 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_interval,
146 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_interval,
147 NA_UPP_REAP_INTERVAL, "");
148 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_min_pkts,
149 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_min_pkts,
150 NA_UPP_REAP_MIN_PKTS, "");
151 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_lowat,
152 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_lowat,
153 NA_UPP_ALLOC_LOWAT, "");
154 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_buf_lowat,
155 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_buf_lowat,
156 NA_UPP_ALLOC_BUF_LOWAT, "");
157 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, na_inject_error,
158 CTLFLAG_RW | CTLFLAG_LOCKED, &_na_inject_error, "");
159 #else
160 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) do { } while (0)
161 #endif /* !DEVELOPMENT && !DEBUG */
162
163 #define SKMEM_TAG_NX_RINGS "com.apple.skywalk.nexus.rings"
164 static SKMEM_TAG_DEFINE(skmem_tag_nx_rings, SKMEM_TAG_NX_RINGS);
165
166 #define SKMEM_TAG_NX_CONTEXTS "com.apple.skywalk.nexus.contexts"
167 static SKMEM_TAG_DEFINE(skmem_tag_nx_contexts, SKMEM_TAG_NX_CONTEXTS);
168
169 #define SKMEM_TAG_NX_SCRATCH "com.apple.skywalk.nexus.scratch"
170 static SKMEM_TAG_DEFINE(skmem_tag_nx_scratch, SKMEM_TAG_NX_SCRATCH);
171
172 #if !XNU_TARGET_OS_OSX
173 /* see KLDBootstrap::readPrelinkedExtensions() for details */
174 extern uuid_t kernelcache_uuid;
175 #else /* XNU_TARGET_OS_OSX */
176 /* see panic_init() for details */
177 extern unsigned char *kernel_uuid;
178 #endif /* XNU_TARGET_OS_OSX */
179
180 void
na_init(void)181 na_init(void)
182 {
183 /*
184 * Changing the size of nexus_mdata structure won't break ABI,
185 * but we need to be mindful of memory consumption; Thus here
186 * we add a compile-time check to make sure the size is within
187 * the expected limit and that it's properly aligned. This
188 * check may be adjusted in future as needed.
189 */
190 _CASSERT(sizeof(struct nexus_mdata) <= 32 &&
191 IS_P2ALIGNED(sizeof(struct nexus_mdata), 8));
192 _CASSERT(sizeof(struct nexus_mdata) <= sizeof(struct __user_quantum));
193
194 /* see comments on nexus_meta_type_t */
195 _CASSERT(NEXUS_META_TYPE_MAX == 3);
196 _CASSERT(NEXUS_META_SUBTYPE_MAX == 3);
197
198 ASSERT(!__na_inited);
199
200 __na_inited = 1;
201 }
202
203 void
na_fini(void)204 na_fini(void)
205 {
206 if (__na_inited) {
207 __na_inited = 0;
208 }
209 }
210
211 /*
212 * Interpret the ringid of an chreq, by translating it into a pair
213 * of intervals of ring indices:
214 *
215 * [txfirst, txlast) and [rxfirst, rxlast)
216 */
217 int
na_interp_ringid(struct nexus_adapter * na,ring_id_t ring_id,ring_set_t ring_set,uint32_t first[NR_TXRX],uint32_t last[NR_TXRX])218 na_interp_ringid(struct nexus_adapter *na, ring_id_t ring_id,
219 ring_set_t ring_set, uint32_t first[NR_TXRX], uint32_t last[NR_TXRX])
220 {
221 enum txrx t;
222
223 switch (ring_set) {
224 case RING_SET_ALL:
225 /*
226 * Ring pair eligibility: all ring(s).
227 */
228 if (ring_id != CHANNEL_RING_ID_ANY &&
229 ring_id >= na_get_nrings(na, NR_TX) &&
230 ring_id >= na_get_nrings(na, NR_RX)) {
231 SK_ERR("\"%s\": invalid ring_id %d for ring_set %u",
232 na->na_name, (int)ring_id, ring_set);
233 return EINVAL;
234 }
235 for_rx_tx(t) {
236 if (ring_id == CHANNEL_RING_ID_ANY) {
237 first[t] = 0;
238 last[t] = na_get_nrings(na, t);
239 } else {
240 first[t] = ring_id;
241 last[t] = ring_id + 1;
242 }
243 }
244 break;
245
246 default:
247 SK_ERR("\"%s\": invalid ring_set %u", na->na_name, ring_set);
248 return EINVAL;
249 }
250
251 SK_DF(SK_VERB_NA | SK_VERB_RING,
252 "\"%s\": ring_id %d, ring_set %u tx [%u,%u) rx [%u,%u)",
253 na->na_name, (int)ring_id, ring_set, first[NR_TX], last[NR_TX],
254 first[NR_RX], last[NR_RX]);
255
256 return 0;
257 }
258
259 /*
260 * Set the ring ID. For devices with a single queue, a request
261 * for all rings is the same as a single ring.
262 */
263 static int
na_set_ringid(struct kern_channel * ch,ring_set_t ring_set,ring_id_t ring_id)264 na_set_ringid(struct kern_channel *ch, ring_set_t ring_set, ring_id_t ring_id)
265 {
266 struct nexus_adapter *na = ch->ch_na;
267 int error;
268 enum txrx t;
269 uint32_t n_alloc_rings;
270
271 if ((error = na_interp_ringid(na, ring_id, ring_set,
272 ch->ch_first, ch->ch_last)) != 0) {
273 return error;
274 }
275
276 n_alloc_rings = na_get_nrings(na, NR_A);
277 if (n_alloc_rings != 0) {
278 ch->ch_first[NR_A] = ch->ch_first[NR_F] = 0;
279 ch->ch_last[NR_A] = ch->ch_last[NR_F] =
280 ch->ch_first[NR_A] + n_alloc_rings;
281 } else {
282 ch->ch_first[NR_A] = ch->ch_last[NR_A] = 0;
283 ch->ch_first[NR_F] = ch->ch_last[NR_F] = 0;
284 }
285 ch->ch_first[NR_EV] = 0;
286 ch->ch_last[NR_EV] = ch->ch_first[NR_EV] + na_get_nrings(na, NR_EV);
287 /* XXX: should we initialize na_si_users for event ring ? */
288
289 /*
290 * Optimization: count the users registered for more than
291 * one ring, which are the ones sleeping on the global queue.
292 * The default na_notify() callback will then avoid signaling
293 * the global queue if nobody is using it
294 */
295 for_rx_tx(t) {
296 if (ch_is_multiplex(ch, t)) {
297 na->na_si_users[t]++;
298 ASSERT(na->na_si_users[t] != 0);
299 }
300 }
301 return 0;
302 }
303
304 static void
na_unset_ringid(struct kern_channel * ch)305 na_unset_ringid(struct kern_channel *ch)
306 {
307 struct nexus_adapter *na = ch->ch_na;
308 enum txrx t;
309
310 for_rx_tx(t) {
311 if (ch_is_multiplex(ch, t)) {
312 ASSERT(na->na_si_users[t] != 0);
313 na->na_si_users[t]--;
314 }
315 ch->ch_first[t] = ch->ch_last[t] = 0;
316 }
317 }
318
319 /*
320 * Check that the rings we want to bind are not exclusively owned by a previous
321 * bind. If exclusive ownership has been requested, we also mark the rings.
322 */
323 /* Hoisted out of line to reduce kernel stack footprint */
324 SK_NO_INLINE_ATTRIBUTE
325 static int
na_krings_use(struct kern_channel * ch)326 na_krings_use(struct kern_channel *ch)
327 {
328 struct nexus_adapter *na = ch->ch_na;
329 struct __kern_channel_ring *kring;
330 boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
331 enum txrx t;
332 uint32_t i;
333
334 SK_DF(SK_VERB_NA | SK_VERB_RING, "na \"%s\" (0x%llx) grabbing tx [%u,%u) rx [%u,%u)",
335 na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
336 ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
337
338 /*
339 * First round: check that all the requested rings
340 * are neither alread exclusively owned, nor we
341 * want exclusive ownership when they are already in use
342 */
343 for_all_rings(t) {
344 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
345 kring = &NAKR(na, t)[i];
346 if ((kring->ckr_flags & CKRF_EXCLUSIVE) ||
347 (kring->ckr_users && excl)) {
348 SK_DF(SK_VERB_NA | SK_VERB_RING,
349 "kr \"%s\" (0x%llx) krflags 0x%b is busy",
350 kring->ckr_name, SK_KVA(kring),
351 kring->ckr_flags, CKRF_BITS);
352 return EBUSY;
353 }
354 }
355 }
356
357 /*
358 * Second round: increment usage count and possibly
359 * mark as exclusive
360 */
361
362 for_all_rings(t) {
363 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
364 kring = &NAKR(na, t)[i];
365 kring->ckr_users++;
366 if (excl) {
367 kring->ckr_flags |= CKRF_EXCLUSIVE;
368 }
369 }
370 }
371
372 return 0;
373 }
374
375 /* Hoisted out of line to reduce kernel stack footprint */
376 SK_NO_INLINE_ATTRIBUTE
377 static void
na_krings_unuse(struct kern_channel * ch)378 na_krings_unuse(struct kern_channel *ch)
379 {
380 struct nexus_adapter *na = ch->ch_na;
381 struct __kern_channel_ring *kring;
382 boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
383 enum txrx t;
384 uint32_t i;
385
386 SK_DF(SK_VERB_NA | SK_VERB_RING,
387 "na \"%s\" (0x%llx) releasing tx [%u, %u) rx [%u, %u)",
388 na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
389 ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
390
391 for_all_rings(t) {
392 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
393 kring = &NAKR(na, t)[i];
394 if (excl) {
395 kring->ckr_flags &= ~CKRF_EXCLUSIVE;
396 }
397 kring->ckr_users--;
398 }
399 }
400 }
401
402 /* Hoisted out of line to reduce kernel stack footprint */
403 SK_NO_INLINE_ATTRIBUTE
404 static void
na_krings_verify(struct nexus_adapter * na)405 na_krings_verify(struct nexus_adapter *na)
406 {
407 struct __kern_channel_ring *kring;
408 enum txrx t;
409 uint32_t i;
410
411 for_all_rings(t) {
412 for (i = 0; i < na_get_nrings(na, t); i++) {
413 kring = &NAKR(na, t)[i];
414 /* na_kr_create() validations */
415 ASSERT(kring->ckr_num_slots > 0);
416 ASSERT(kring->ckr_lim == (kring->ckr_num_slots - 1));
417 ASSERT(kring->ckr_pp != NULL);
418
419 if (!(kring->ckr_flags & CKRF_MEM_RING_INITED)) {
420 continue;
421 }
422 /* na_kr_setup() validations */
423 if (KR_KERNEL_ONLY(kring)) {
424 ASSERT(kring->ckr_ring == NULL);
425 } else {
426 ASSERT(kring->ckr_ring != NULL);
427 }
428 ASSERT(kring->ckr_ksds_last ==
429 &kring->ckr_ksds[kring->ckr_lim]);
430 }
431 }
432 }
433
434 int
na_bind_channel(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr)435 na_bind_channel(struct nexus_adapter *na, struct kern_channel *ch,
436 struct chreq *chr)
437 {
438 struct kern_pbufpool *rx_pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
439 struct kern_pbufpool *tx_pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
440 uint32_t ch_mode = chr->cr_mode;
441 int err = 0;
442
443 SK_LOCK_ASSERT_HELD();
444 ASSERT(ch->ch_schema == NULL);
445 ASSERT(ch->ch_na == NULL);
446
447 /* ring configuration may have changed, fetch from the card */
448 na_update_config(na);
449 ch->ch_na = na; /* store the reference */
450 err = na_set_ringid(ch, chr->cr_ring_set, chr->cr_ring_id);
451 if (err != 0) {
452 goto err;
453 }
454
455 atomic_bitclear_32(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE |
456 CHANF_USER_PACKET_POOL | CHANF_EVENT_RING));
457 if (ch_mode & CHMODE_EXCLUSIVE) {
458 atomic_bitset_32(&ch->ch_flags, CHANF_EXCLUSIVE);
459 }
460 /*
461 * Disallow automatic sync for monitor mode, since TX
462 * direction is disabled.
463 */
464 if (ch_mode & CHMODE_MONITOR) {
465 atomic_bitset_32(&ch->ch_flags, CHANF_RXONLY);
466 }
467
468 if (!!(na->na_flags & NAF_USER_PKT_POOL) ^
469 !!(ch_mode & CHMODE_USER_PACKET_POOL)) {
470 SK_ERR("incompatible channel mode (0x%b), na_flags (0x%b)",
471 ch_mode, CHMODE_BITS, na->na_flags, NAF_BITS);
472 err = EINVAL;
473 goto err;
474 }
475
476 if (na->na_arena->ar_flags & ARF_DEFUNCT) {
477 err = ENXIO;
478 goto err;
479 }
480
481 if (ch_mode & CHMODE_USER_PACKET_POOL) {
482 ASSERT(na->na_flags & NAF_USER_PKT_POOL);
483 ASSERT(ch->ch_first[NR_A] != ch->ch_last[NR_A]);
484 ASSERT(ch->ch_first[NR_F] != ch->ch_last[NR_F]);
485 atomic_bitset_32(&ch->ch_flags, CHANF_USER_PACKET_POOL);
486 }
487
488 if (ch_mode & CHMODE_EVENT_RING) {
489 ASSERT(na->na_flags & NAF_USER_PKT_POOL);
490 ASSERT(na->na_flags & NAF_EVENT_RING);
491 ASSERT(ch->ch_first[NR_EV] != ch->ch_last[NR_EV]);
492 atomic_bitset_32(&ch->ch_flags, CHANF_EVENT_RING);
493 }
494
495 /*
496 * If this is the first channel of the adapter, create
497 * the rings and their in-kernel view, the krings.
498 */
499 if (na->na_channels == 0) {
500 err = na->na_krings_create(na, ch);
501 if (err != 0) {
502 goto err;
503 }
504
505 /*
506 * Sanity check; this is already done in na_kr_create(),
507 * but we do it here as well to validate na_kr_setup().
508 */
509 na_krings_verify(na);
510 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
511 skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_type;
512 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
513 skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_subtype;
514 }
515
516 /*
517 * Validate ownership and usability of the krings; take into account
518 * whether some previous bind has exclusive ownership on them.
519 */
520 err = na_krings_use(ch);
521 if (err != 0) {
522 goto err_del_rings;
523 }
524
525 /* for user-facing channel, create a new channel schema */
526 if (!(ch->ch_flags & CHANF_KERNEL)) {
527 err = na_schema_alloc(ch);
528 if (err != 0) {
529 goto err_rel_excl;
530 }
531
532 ASSERT(ch->ch_schema != NULL);
533 ASSERT(ch->ch_schema_offset != (mach_vm_offset_t)-1);
534 } else {
535 ASSERT(ch->ch_schema == NULL);
536 ch->ch_schema_offset = (mach_vm_offset_t)-1;
537 }
538
539 /* update our work timestamp */
540 na->na_work_ts = net_uptime();
541
542 /* update our work timestamp */
543 na->na_work_ts = net_uptime();
544
545 na->na_channels++;
546
547 /*
548 * If user packet pool is desired, initialize the allocated
549 * object hash table in the pool, if not already. This also
550 * retains a refcnt on the pool which the caller must release.
551 */
552 ASSERT(ch->ch_pp == NULL);
553 if (ch_mode & CHMODE_USER_PACKET_POOL) {
554 #pragma unused(tx_pp)
555 ASSERT(rx_pp == tx_pp);
556 err = pp_init_upp(rx_pp, TRUE);
557 if (err != 0) {
558 goto err_free_schema;
559 }
560 ch->ch_pp = rx_pp;
561 }
562
563 if (!NA_IS_ACTIVE(na)) {
564 err = na->na_activate(na, NA_ACTIVATE_MODE_ON);
565 if (err != 0) {
566 goto err_release_pp;
567 }
568
569 SK_D("activated \"%s\" adapter 0x%llx", na->na_name,
570 SK_KVA(na));
571 SK_D(" na_md_type: %u", na->na_md_type);
572 SK_D(" na_md_subtype: %u", na->na_md_subtype);
573 }
574
575 SK_D("ch 0x%llx", SK_KVA(ch));
576 SK_D(" ch_flags: 0x%b", ch->ch_flags, CHANF_BITS);
577 if (ch->ch_schema != NULL) {
578 SK_D(" ch_schema: 0x%llx", SK_KVA(ch->ch_schema));
579 }
580 SK_D(" ch_na: 0x%llx (chcnt %u)", SK_KVA(ch->ch_na),
581 ch->ch_na->na_channels);
582 SK_D(" ch_tx_rings: [%u,%u)", ch->ch_first[NR_TX],
583 ch->ch_last[NR_TX]);
584 SK_D(" ch_rx_rings: [%u,%u)", ch->ch_first[NR_RX],
585 ch->ch_last[NR_RX]);
586 SK_D(" ch_alloc_rings: [%u,%u)", ch->ch_first[NR_A],
587 ch->ch_last[NR_A]);
588 SK_D(" ch_free_rings: [%u,%u)", ch->ch_first[NR_F],
589 ch->ch_last[NR_F]);
590 SK_D(" ch_ev_rings: [%u,%u)", ch->ch_first[NR_EV],
591 ch->ch_last[NR_EV]);
592
593 return 0;
594
595 err_release_pp:
596 if (ch_mode & CHMODE_USER_PACKET_POOL) {
597 ASSERT(ch->ch_pp != NULL);
598 pp_release(rx_pp);
599 ch->ch_pp = NULL;
600 }
601 err_free_schema:
602 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
603 NEXUS_META_TYPE_INVALID;
604 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
605 NEXUS_META_SUBTYPE_INVALID;
606 ASSERT(na->na_channels != 0);
607 na->na_channels--;
608 if (ch->ch_schema != NULL) {
609 skmem_cache_free(
610 skmem_arena_nexus(na->na_arena)->arn_schema_cache,
611 ch->ch_schema);
612 ch->ch_schema = NULL;
613 ch->ch_schema_offset = (mach_vm_offset_t)-1;
614 }
615 err_rel_excl:
616 na_krings_unuse(ch);
617 err_del_rings:
618 if (na->na_channels == 0) {
619 na->na_krings_delete(na, ch, FALSE);
620 }
621 err:
622 ch->ch_na = NULL;
623 ASSERT(err != 0);
624
625 return err;
626 }
627
628 /*
629 * Undo everything that was done in na_bind_channel().
630 */
631 /* call with SK_LOCK held */
632 void
na_unbind_channel(struct kern_channel * ch)633 na_unbind_channel(struct kern_channel *ch)
634 {
635 struct nexus_adapter *na = ch->ch_na;
636
637 SK_LOCK_ASSERT_HELD();
638
639 ASSERT(na->na_channels != 0);
640 na->na_channels--;
641
642 /* release exclusive use if it was requested at bind time */
643 na_krings_unuse(ch);
644
645 if (na->na_channels == 0) { /* last instance */
646 SK_D("%s(%d): deleting last channel instance for %s",
647 ch->ch_name, ch->ch_pid, na->na_name);
648
649 /*
650 * Free any remaining allocated packets attached to
651 * the slots, followed by a teardown of the arena.
652 */
653 na_teardown(na, ch, FALSE);
654
655 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
656 NEXUS_META_TYPE_INVALID;
657 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
658 NEXUS_META_SUBTYPE_INVALID;
659 } else {
660 SK_D("%s(%d): %s has %u remaining channel instance(s)",
661 ch->ch_name, ch->ch_pid, na->na_name, na->na_channels);
662 }
663
664 /*
665 * Free any allocated packets (for the process) attached to the slots;
666 * note that na_teardown() could have done this there as well.
667 */
668 if (ch->ch_pp != NULL) {
669 ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
670 pp_purge_upp(ch->ch_pp, ch->ch_pid);
671 pp_release(ch->ch_pp);
672 ch->ch_pp = NULL;
673 }
674
675 /* possibily decrement counter of tx_si/rx_si users */
676 na_unset_ringid(ch);
677
678 /* reap the caches now (purge if adapter is idle) */
679 skmem_arena_reap(na->na_arena, (na->na_channels == 0));
680
681 /* delete the csm */
682 if (ch->ch_schema != NULL) {
683 skmem_cache_free(
684 skmem_arena_nexus(na->na_arena)->arn_schema_cache,
685 ch->ch_schema);
686 ch->ch_schema = NULL;
687 ch->ch_schema_offset = (mach_vm_offset_t)-1;
688 }
689
690 /* destroy the memory map */
691 skmem_arena_munmap_channel(na->na_arena, ch);
692
693 /* mark the channel as unbound */
694 atomic_bitclear_32(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE));
695 ch->ch_na = NULL;
696
697 /* and finally release the nexus adapter; this might free it */
698 (void) na_release_locked(na);
699 }
700
701 static void
na_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)702 na_teardown(struct nexus_adapter *na, struct kern_channel *ch,
703 boolean_t defunct)
704 {
705 SK_LOCK_ASSERT_HELD();
706 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
707
708 #if CONFIG_NEXUS_MONITOR
709 /*
710 * Walk through all the rings and tell any monitor
711 * that the port is going to exit Skywalk mode
712 */
713 nx_mon_stop(na);
714 #endif /* CONFIG_NEXUS_MONITOR */
715
716 /*
717 * Deactive the adapter.
718 */
719 (void) na->na_activate(na,
720 (defunct ? NA_ACTIVATE_MODE_DEFUNCT : NA_ACTIVATE_MODE_OFF));
721
722 /*
723 * Free any remaining allocated packets for this process.
724 */
725 if (ch->ch_pp != NULL) {
726 ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
727 pp_purge_upp(ch->ch_pp, ch->ch_pid);
728 if (!defunct) {
729 pp_release(ch->ch_pp);
730 ch->ch_pp = NULL;
731 }
732 }
733
734 /*
735 * Delete rings and buffers.
736 */
737 na->na_krings_delete(na, ch, defunct);
738 }
739
740 /* call with SK_LOCK held */
741 /*
742 * Allocate the per-fd structure __user_channel_schema.
743 */
744 static int
na_schema_alloc(struct kern_channel * ch)745 na_schema_alloc(struct kern_channel *ch)
746 {
747 struct nexus_adapter *na = ch->ch_na;
748 struct skmem_arena *ar = na->na_arena;
749 struct skmem_arena_nexus *arn;
750 mach_vm_offset_t roff[SKMEM_REGIONS];
751 struct __kern_channel_ring *kr;
752 struct __user_channel_schema *csm;
753 struct skmem_obj_info csm_oi, ring_oi, ksd_oi, usd_oi;
754 mach_vm_offset_t base;
755 uint32_t i, j, k, n[NR_ALL];
756 enum txrx t;
757
758 /* see comments for struct __user_channel_schema */
759 _CASSERT(offsetof(struct __user_channel_schema, csm_ver) == 0);
760 _CASSERT(offsetof(struct __user_channel_schema, csm_flags) ==
761 sizeof(csm->csm_ver));
762 _CASSERT(offsetof(struct __user_channel_schema, csm_kern_name) ==
763 sizeof(csm->csm_ver) + sizeof(csm->csm_flags));
764 _CASSERT(offsetof(struct __user_channel_schema, csm_kern_uuid) ==
765 sizeof(csm->csm_ver) + sizeof(csm->csm_flags) +
766 sizeof(csm->csm_kern_name));
767
768 SK_LOCK_ASSERT_HELD();
769
770 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
771 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
772 arn = skmem_arena_nexus(ar);
773 ASSERT(arn != NULL);
774 for_all_rings(t) {
775 n[t] = 0;
776 }
777
778 csm = skmem_cache_alloc(arn->arn_schema_cache, SKMEM_NOSLEEP);
779 if (csm == NULL) {
780 return ENOMEM;
781 }
782
783 skmem_cache_get_obj_info(arn->arn_schema_cache, csm, &csm_oi, NULL);
784 bzero(csm, SKMEM_OBJ_SIZE(&csm_oi));
785
786 *(uint32_t *)(uintptr_t)&csm->csm_ver = CSM_CURRENT_VERSION;
787
788 /* kernel version and executable UUID */
789 _CASSERT(sizeof(csm->csm_kern_name) == _SYS_NAMELEN);
790 (void) strncpy((char *)(uintptr_t)csm->csm_kern_name,
791 version, sizeof(csm->csm_kern_name) - 1);
792 #if !XNU_TARGET_OS_OSX
793 (void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid,
794 kernelcache_uuid, sizeof(csm->csm_kern_uuid));
795 #else /* XNU_TARGET_OS_OSX */
796 if (kernel_uuid != NULL) {
797 (void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid,
798 kernel_uuid, sizeof(csm->csm_kern_uuid));
799 }
800 #endif /* XNU_TARGET_OS_OSX */
801
802 for_rx_tx(t) {
803 ASSERT((ch->ch_last[t] > 0) || (ch->ch_first[t] == 0));
804 n[t] = ch->ch_last[t] - ch->ch_first[t];
805 ASSERT(n[t] == 0 || n[t] <= na_get_nrings(na, t));
806 }
807
808 /* return total number of tx and rx rings for this channel */
809 *(uint32_t *)(uintptr_t)&csm->csm_tx_rings = n[NR_TX];
810 *(uint32_t *)(uintptr_t)&csm->csm_rx_rings = n[NR_RX];
811
812 if (ch->ch_flags & CHANF_USER_PACKET_POOL) {
813 *(uint32_t *)(uintptr_t)&csm->csm_allocator_ring_pairs =
814 na->na_num_allocator_ring_pairs;
815 n[NR_A] = n[NR_F] = na->na_num_allocator_ring_pairs;
816 ASSERT(n[NR_A] != 0 && n[NR_A] <= na_get_nrings(na, NR_A));
817 ASSERT(n[NR_A] == (ch->ch_last[NR_A] - ch->ch_first[NR_A]));
818 ASSERT(n[NR_F] == (ch->ch_last[NR_F] - ch->ch_first[NR_F]));
819 }
820
821 if (ch->ch_flags & CHANF_EVENT_RING) {
822 n[NR_EV] = ch->ch_last[NR_EV] - ch->ch_first[NR_EV];
823 ASSERT(n[NR_EV] != 0 && n[NR_EV] <= na_get_nrings(na, NR_EV));
824 *(uint32_t *)(uintptr_t)&csm->csm_num_event_rings = n[NR_EV];
825 }
826
827 bzero(&roff, sizeof(roff));
828 for (i = 0; i < SKMEM_REGIONS; i++) {
829 if (ar->ar_regions[i] == NULL) {
830 ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
831 i == SKMEM_REGION_SCHEMA ||
832 i == SKMEM_REGION_BUF_LARGE ||
833 i == SKMEM_REGION_RXBUF_DEF ||
834 i == SKMEM_REGION_RXBUF_LARGE ||
835 i == SKMEM_REGION_TXBUF_DEF ||
836 i == SKMEM_REGION_TXBUF_LARGE ||
837 i == SKMEM_REGION_RXKMD ||
838 i == SKMEM_REGION_TXKMD ||
839 i == SKMEM_REGION_UMD ||
840 i == SKMEM_REGION_UBFT ||
841 i == SKMEM_REGION_KBFT ||
842 i == SKMEM_REGION_RXKBFT ||
843 i == SKMEM_REGION_TXKBFT ||
844 i == SKMEM_REGION_TXAUSD ||
845 i == SKMEM_REGION_RXFUSD ||
846 i == SKMEM_REGION_USTATS ||
847 i == SKMEM_REGION_KSTATS ||
848 i == SKMEM_REGION_INTRINSIC ||
849 i == SKMEM_REGION_FLOWADV ||
850 i == SKMEM_REGION_NEXUSADV ||
851 i == SKMEM_REGION_SYSCTLS ||
852 i == SKMEM_REGION_GUARD_TAIL);
853 continue;
854 }
855
856 /* not for nexus */
857 ASSERT(i != SKMEM_REGION_SYSCTLS);
858
859 /*
860 * Get region offsets from base of mmap span; the arena
861 * doesn't need to be mmap'd at this point, since we
862 * simply compute the relative offset.
863 */
864 roff[i] = skmem_arena_get_region_offset(ar, i);
865 }
866
867 /*
868 * The schema is made up of the descriptor followed inline by an array
869 * of offsets to the tx, rx, allocator and event rings in the mmap span.
870 * They contain the offset between the ring and schema, so the
871 * information is usable in userspace to reach the ring from
872 * the schema.
873 */
874 base = roff[SKMEM_REGION_SCHEMA] + SKMEM_OBJ_ROFF(&csm_oi);
875
876 /* initialize schema with tx ring info */
877 for (i = 0, j = ch->ch_first[NR_TX]; i < n[NR_TX]; i++, j++) {
878 kr = &na->na_tx_rings[j];
879 if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
880 continue;
881 }
882
883 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
884 skmem_cache_get_obj_info(arn->arn_ring_cache,
885 kr->ckr_ring, &ring_oi, NULL);
886 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].ring_off =
887 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
888
889 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
890 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
891 kr->ckr_ksds, &ksd_oi, &usd_oi);
892
893 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].sd_off =
894 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
895 base;
896 }
897 /* initialize schema with rx ring info */
898 for (i = 0, j = ch->ch_first[NR_RX]; i < n[NR_RX]; i++, j++) {
899 kr = &na->na_rx_rings[j];
900 if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
901 continue;
902 }
903
904 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
905 skmem_cache_get_obj_info(arn->arn_ring_cache,
906 kr->ckr_ring, &ring_oi, NULL);
907 *(mach_vm_offset_t *)
908 (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].ring_off =
909 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
910
911 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
912 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
913 kr->ckr_ksds, &ksd_oi, &usd_oi);
914
915 *(mach_vm_offset_t *)
916 (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].sd_off =
917 (roff[SKMEM_REGION_RXFUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
918 base;
919 }
920 /* initialize schema with allocator ring info */
921 for (i = 0, j = ch->ch_first[NR_A], k = n[NR_TX] + n[NR_RX];
922 i < n[NR_A]; i++, j++) {
923 mach_vm_offset_t usd_roff;
924
925 usd_roff = roff[SKMEM_REGION_TXAUSD];
926 kr = &na->na_alloc_rings[j];
927 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
928 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
929
930 skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
931 &ring_oi, NULL);
932 *(mach_vm_offset_t *)
933 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
934 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
935
936 skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
937 &ksd_oi, &usd_oi);
938 *(mach_vm_offset_t *)
939 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
940 (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
941 }
942 /* initialize schema with free ring info */
943 for (i = 0, j = ch->ch_first[NR_F], k = n[NR_TX] + n[NR_RX] + n[NR_A];
944 i < n[NR_F]; i++, j++) {
945 mach_vm_offset_t usd_roff;
946
947 usd_roff = roff[SKMEM_REGION_RXFUSD];
948 kr = &na->na_free_rings[j];
949 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
950 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
951
952 skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
953 &ring_oi, NULL);
954 *(mach_vm_offset_t *)
955 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
956 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
957
958 skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
959 &ksd_oi, &usd_oi);
960 *(mach_vm_offset_t *)
961 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
962 (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
963 }
964 /* initialize schema with event ring info */
965 for (i = 0, j = ch->ch_first[NR_EV], k = n[NR_TX] + n[NR_RX] +
966 n[NR_A] + n[NR_F]; i < n[NR_EV]; i++, j++) {
967 ASSERT(csm->csm_num_event_rings != 0);
968 kr = &na->na_event_rings[j];
969 ASSERT(!KR_KERNEL_ONLY(kr));
970 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
971 skmem_cache_get_obj_info(arn->arn_ring_cache,
972 kr->ckr_ring, &ring_oi, NULL);
973 *(mach_vm_offset_t *)
974 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
975 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
976
977 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
978 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
979 kr->ckr_ksds, &ksd_oi, &usd_oi);
980
981 *(mach_vm_offset_t *)
982 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
983 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
984 base;
985 }
986
987 *(uint64_t *)(uintptr_t)&csm->csm_md_redzone_cookie =
988 __ch_umd_redzone_cookie;
989 *(nexus_meta_type_t *)(uintptr_t)&csm->csm_md_type = na->na_md_type;
990 *(nexus_meta_subtype_t *)(uintptr_t)&csm->csm_md_subtype =
991 na->na_md_subtype;
992
993 if (arn->arn_stats_obj != NULL) {
994 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
995 ASSERT(roff[SKMEM_REGION_USTATS] != 0);
996 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs =
997 roff[SKMEM_REGION_USTATS];
998 *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
999 na->na_stats_type;
1000 } else {
1001 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1002 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs = 0;
1003 *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1004 NEXUS_STATS_TYPE_INVALID;
1005 }
1006
1007 if (arn->arn_flowadv_obj != NULL) {
1008 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL);
1009 ASSERT(roff[SKMEM_REGION_FLOWADV] != 0);
1010 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs =
1011 roff[SKMEM_REGION_FLOWADV];
1012 *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max =
1013 na->na_flowadv_max;
1014 } else {
1015 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1016 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs = 0;
1017 *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max = 0;
1018 }
1019
1020 if (arn->arn_nexusadv_obj != NULL) {
1021 struct __kern_nexus_adv_metadata *adv_md;
1022
1023 adv_md = arn->arn_nexusadv_obj;
1024 ASSERT(adv_md->knam_version == NX_ADVISORY_MD_CURRENT_VERSION);
1025 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] != NULL);
1026 ASSERT(roff[SKMEM_REGION_NEXUSADV] != 0);
1027 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs =
1028 roff[SKMEM_REGION_NEXUSADV];
1029 } else {
1030 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1031 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs = 0;
1032 }
1033
1034 ch->ch_schema = csm;
1035 ch->ch_schema_offset = base;
1036
1037 return 0;
1038 }
1039
1040 /*
1041 * Called by all routines that create nexus_adapters.
1042 * Attach na to the ifp (if any) and provide defaults
1043 * for optional callbacks. Defaults assume that we
1044 * are creating an hardware nexus_adapter.
1045 */
1046 void
na_attach_common(struct nexus_adapter * na,struct kern_nexus * nx,struct kern_nexus_domain_provider * nxdom_prov)1047 na_attach_common(struct nexus_adapter *na, struct kern_nexus *nx,
1048 struct kern_nexus_domain_provider *nxdom_prov)
1049 {
1050 SK_LOCK_ASSERT_HELD();
1051
1052 ASSERT(nx != NULL);
1053 ASSERT(nxdom_prov != NULL);
1054 ASSERT(na->na_krings_create != NULL);
1055 ASSERT(na->na_krings_delete != NULL);
1056 if (na->na_type != NA_NETIF_COMPAT_DEV) {
1057 ASSERT(na_get_nrings(na, NR_TX) != 0);
1058 }
1059 if (na->na_type != NA_NETIF_COMPAT_HOST) {
1060 ASSERT(na_get_nrings(na, NR_RX) != 0);
1061 }
1062 ASSERT(na->na_channels == 0);
1063
1064 if (na->na_notify == NULL) {
1065 na->na_notify = na_notify;
1066 }
1067
1068 na->na_nx = nx;
1069 na->na_nxdom_prov = nxdom_prov;
1070
1071 SK_D("na 0x%llx nx 0x%llx nxtype %u ar 0x%llx",
1072 SK_KVA(na), SK_KVA(nx), nxdom_prov->nxdom_prov_dom->nxdom_type,
1073 SK_KVA(na->na_arena));
1074 }
1075
1076 void
na_post_event(struct __kern_channel_ring * kring,boolean_t nodelay,boolean_t within_kevent,boolean_t selwake,uint32_t hint)1077 na_post_event(struct __kern_channel_ring *kring, boolean_t nodelay,
1078 boolean_t within_kevent, boolean_t selwake, uint32_t hint)
1079 {
1080 struct nexus_adapter *na = KRNA(kring);
1081 enum txrx t = kring->ckr_tx;
1082
1083 SK_DF(SK_VERB_EVENTS,
1084 "%s(%d) na \"%s\" (0x%llx) kr 0x%llx kev %u sel %u hint 0x%b",
1085 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1086 na->na_name, SK_KVA(na), SK_KVA(kring), within_kevent, selwake,
1087 hint, CHAN_FILT_HINT_BITS);
1088
1089 csi_selwakeup_one(kring, nodelay, within_kevent, selwake, hint);
1090 /*
1091 * optimization: avoid a wake up on the global
1092 * queue if nobody has registered for more
1093 * than one ring
1094 */
1095 if (na->na_si_users[t] > 0) {
1096 csi_selwakeup_all(na, t, nodelay, within_kevent, selwake, hint);
1097 }
1098 }
1099
1100 /* default notify callback */
1101 static int
na_notify(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1102 na_notify(struct __kern_channel_ring *kring, struct proc *p, uint32_t flags)
1103 {
1104 #pragma unused(p)
1105 SK_DF(SK_VERB_NOTIFY | ((kring->ckr_tx == NR_TX) ?
1106 SK_VERB_TX : SK_VERB_RX),
1107 "%s(%d) [%s] na \"%s\" (0x%llx) kr \"%s\" (0x%llx) krflags 0x%b "
1108 "flags 0x%x, kh %u kt %u | h %u t %u",
1109 sk_proc_name_address(p), sk_proc_pid(p),
1110 (kring->ckr_tx == NR_TX) ? "W" : "R", KRNA(kring)->na_name,
1111 SK_KVA(KRNA(kring)), kring->ckr_name, SK_KVA(kring),
1112 kring->ckr_flags, CKRF_BITS, flags, kring->ckr_khead,
1113 kring->ckr_ktail, kring->ckr_rhead, kring->ckr_rtail);
1114
1115 na_post_event(kring, (flags & NA_NOTEF_PUSH),
1116 (flags & NA_NOTEF_IN_KEVENT), TRUE, 0);
1117
1118 return 0;
1119 }
1120
1121 /*
1122 * Fetch configuration from the device, to cope with dynamic
1123 * reconfigurations after loading the module.
1124 */
1125 /* call with SK_LOCK held */
1126 int
na_update_config(struct nexus_adapter * na)1127 na_update_config(struct nexus_adapter *na)
1128 {
1129 uint32_t txr, txd, rxr, rxd;
1130
1131 SK_LOCK_ASSERT_HELD();
1132
1133 txr = txd = rxr = rxd = 0;
1134 if (na->na_config == NULL ||
1135 na->na_config(na, &txr, &txd, &rxr, &rxd)) {
1136 /* take whatever we had at init time */
1137 txr = na_get_nrings(na, NR_TX);
1138 txd = na_get_nslots(na, NR_TX);
1139 rxr = na_get_nrings(na, NR_RX);
1140 rxd = na_get_nslots(na, NR_RX);
1141 }
1142
1143 if (na_get_nrings(na, NR_TX) == txr &&
1144 na_get_nslots(na, NR_TX) == txd &&
1145 na_get_nrings(na, NR_RX) == rxr &&
1146 na_get_nslots(na, NR_RX) == rxd) {
1147 return 0; /* nothing changed */
1148 }
1149 SK_D("stored config %s: txring %u x %u, rxring %u x %u",
1150 na->na_name, na_get_nrings(na, NR_TX), na_get_nslots(na, NR_TX),
1151 na_get_nrings(na, NR_RX), na_get_nslots(na, NR_RX));
1152 SK_D("new config %s: txring %u x %u, rxring %u x %u",
1153 na->na_name, txr, txd, rxr, rxd);
1154
1155 if (na->na_channels == 0) {
1156 SK_D("configuration changed (but fine)");
1157 na_set_nrings(na, NR_TX, txr);
1158 na_set_nslots(na, NR_TX, txd);
1159 na_set_nrings(na, NR_RX, rxr);
1160 na_set_nslots(na, NR_RX, rxd);
1161 return 0;
1162 }
1163 SK_ERR("configuration changed while active, this is bad...");
1164 return 1;
1165 }
1166
1167 static void
na_kr_setup_netif_svc_map(struct nexus_adapter * na)1168 na_kr_setup_netif_svc_map(struct nexus_adapter *na)
1169 {
1170 uint32_t i;
1171 uint32_t num_tx_rings;
1172
1173 ASSERT(na->na_type == NA_NETIF_DEV);
1174 num_tx_rings = na_get_nrings(na, NR_TX);
1175
1176 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK_SYS) ==
1177 NAKR_WMM_SC2RINGID(KPKT_SC_BK));
1178 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1179 NAKR_WMM_SC2RINGID(KPKT_SC_RD));
1180 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1181 NAKR_WMM_SC2RINGID(KPKT_SC_OAM));
1182 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1183 NAKR_WMM_SC2RINGID(KPKT_SC_RV));
1184 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1185 NAKR_WMM_SC2RINGID(KPKT_SC_VI));
1186 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) ==
1187 NAKR_WMM_SC2RINGID(KPKT_SC_CTL));
1188
1189 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK) < NA_NUM_WMM_CLASSES);
1190 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) < NA_NUM_WMM_CLASSES);
1191 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VI) < NA_NUM_WMM_CLASSES);
1192 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) < NA_NUM_WMM_CLASSES);
1193
1194 _CASSERT(MBUF_SCIDX(KPKT_SC_BK_SYS) < KPKT_SC_MAX_CLASSES);
1195 _CASSERT(MBUF_SCIDX(KPKT_SC_BK) < KPKT_SC_MAX_CLASSES);
1196 _CASSERT(MBUF_SCIDX(KPKT_SC_BE) < KPKT_SC_MAX_CLASSES);
1197 _CASSERT(MBUF_SCIDX(KPKT_SC_RD) < KPKT_SC_MAX_CLASSES);
1198 _CASSERT(MBUF_SCIDX(KPKT_SC_OAM) < KPKT_SC_MAX_CLASSES);
1199 _CASSERT(MBUF_SCIDX(KPKT_SC_AV) < KPKT_SC_MAX_CLASSES);
1200 _CASSERT(MBUF_SCIDX(KPKT_SC_RV) < KPKT_SC_MAX_CLASSES);
1201 _CASSERT(MBUF_SCIDX(KPKT_SC_VI) < KPKT_SC_MAX_CLASSES);
1202 _CASSERT(MBUF_SCIDX(KPKT_SC_SIG) < KPKT_SC_MAX_CLASSES);
1203 _CASSERT(MBUF_SCIDX(KPKT_SC_VO) < KPKT_SC_MAX_CLASSES);
1204 _CASSERT(MBUF_SCIDX(KPKT_SC_CTL) < KPKT_SC_MAX_CLASSES);
1205
1206 /*
1207 * we support the following 2 configurations:
1208 * 1. packets from all 10 service class map to one ring.
1209 * 2. a 10:4 mapping between service classes and the rings. These 4
1210 * rings map to the 4 WMM access categories.
1211 */
1212 if (na->na_nx->nx_prov->nxprov_params->nxp_qmap == NEXUS_QMAP_TYPE_WMM) {
1213 ASSERT(num_tx_rings == NEXUS_NUM_WMM_QUEUES);
1214 /* setup the adapter's service class LUT */
1215 NAKR_SET_SVC_LUT(na, KPKT_SC_BK_SYS);
1216 NAKR_SET_SVC_LUT(na, KPKT_SC_BK);
1217 NAKR_SET_SVC_LUT(na, KPKT_SC_BE);
1218 NAKR_SET_SVC_LUT(na, KPKT_SC_RD);
1219 NAKR_SET_SVC_LUT(na, KPKT_SC_OAM);
1220 NAKR_SET_SVC_LUT(na, KPKT_SC_AV);
1221 NAKR_SET_SVC_LUT(na, KPKT_SC_RV);
1222 NAKR_SET_SVC_LUT(na, KPKT_SC_VI);
1223 NAKR_SET_SVC_LUT(na, KPKT_SC_SIG);
1224 NAKR_SET_SVC_LUT(na, KPKT_SC_VO);
1225 NAKR_SET_SVC_LUT(na, KPKT_SC_CTL);
1226
1227 /* Initialize the service class for each of the 4 ring */
1228 NAKR_SET_KR_SVC(na, KPKT_SC_BK);
1229 NAKR_SET_KR_SVC(na, KPKT_SC_BE);
1230 NAKR_SET_KR_SVC(na, KPKT_SC_VI);
1231 NAKR_SET_KR_SVC(na, KPKT_SC_VO);
1232 } else {
1233 ASSERT(na->na_nx->nx_prov->nxprov_params->nxp_qmap ==
1234 NEXUS_QMAP_TYPE_DEFAULT);
1235 /* 10: 1 mapping */
1236 for (i = 0; i < KPKT_SC_MAX_CLASSES; i++) {
1237 na->na_kring_svc_lut[i] = 0;
1238 }
1239 for (i = 0; i < num_tx_rings; i++) {
1240 NAKR(na, NR_TX)[i].ckr_svc = KPKT_SC_UNSPEC;
1241 }
1242 }
1243 }
1244
1245 static LCK_GRP_DECLARE(channel_txq_lock_group, "sk_ch_txq_lock");
1246 static LCK_GRP_DECLARE(channel_rxq_lock_group, "sk_ch_rxq_lock");
1247 static LCK_GRP_DECLARE(channel_txs_lock_group, "sk_ch_txs_lock");
1248 static LCK_GRP_DECLARE(channel_rxs_lock_group, "sk_ch_rxs_lock");
1249 static LCK_GRP_DECLARE(channel_alloc_lock_group, "sk_ch_alloc_lock");
1250 static LCK_GRP_DECLARE(channel_evq_lock_group, "sk_ch_evq_lock");
1251 static LCK_GRP_DECLARE(channel_evs_lock_group, "sk_ch_evs_lock");
1252
1253 static lck_grp_t *
na_kr_q_lck_grp(enum txrx t)1254 na_kr_q_lck_grp(enum txrx t)
1255 {
1256 switch (t) {
1257 case NR_TX:
1258 return &channel_txq_lock_group;
1259 case NR_RX:
1260 return &channel_rxq_lock_group;
1261 case NR_A:
1262 case NR_F:
1263 return &channel_alloc_lock_group;
1264 case NR_EV:
1265 return &channel_evq_lock_group;
1266 default:
1267 VERIFY(0);
1268 /* NOTREACHED */
1269 __builtin_unreachable();
1270 }
1271 }
1272
1273 static lck_grp_t *
na_kr_s_lck_grp(enum txrx t)1274 na_kr_s_lck_grp(enum txrx t)
1275 {
1276 switch (t) {
1277 case NR_TX:
1278 return &channel_txs_lock_group;
1279 case NR_RX:
1280 return &channel_rxs_lock_group;
1281 case NR_A:
1282 case NR_F:
1283 return &channel_alloc_lock_group;
1284 case NR_EV:
1285 return &channel_evs_lock_group;
1286 default:
1287 VERIFY(0);
1288 /* NOTREACHED */
1289 __builtin_unreachable();
1290 }
1291 }
1292
1293 static void
kr_init_tbr(struct __kern_channel_ring * r)1294 kr_init_tbr(struct __kern_channel_ring *r)
1295 {
1296 r->ckr_tbr_depth = CKR_TBR_TOKEN_INVALID;
1297 r->ckr_tbr_token = CKR_TBR_TOKEN_INVALID;
1298 r->ckr_tbr_last = 0;
1299 }
1300
1301 struct kern_pbufpool *
na_kr_get_pp(struct nexus_adapter * na,enum txrx t)1302 na_kr_get_pp(struct nexus_adapter *na, enum txrx t)
1303 {
1304 struct kern_pbufpool *pp = NULL;
1305 switch (t) {
1306 case NR_RX:
1307 case NR_F:
1308 case NR_EV:
1309 pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
1310 break;
1311 case NR_TX:
1312 case NR_A:
1313 pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
1314 break;
1315 default:
1316 VERIFY(0);
1317 /* NOTREACHED */
1318 __builtin_unreachable();
1319 }
1320
1321 return pp;
1322 }
1323
1324 /*
1325 * Create the krings array and initialize the fields common to all adapters.
1326 * The array layout is this:
1327 *
1328 * +----------+
1329 * na->na_tx_rings ----->| | \
1330 * | | } na->num_tx_ring
1331 * | | /
1332 * na->na_rx_rings ----> +----------+
1333 * | | \
1334 * | | } na->na_num_rx_rings
1335 * | | /
1336 * na->na_alloc_rings -> +----------+
1337 * | | \
1338 * na->na_free_rings --> +----------+ } na->na_num_allocator_ring_pairs
1339 * | | /
1340 * na->na_event_rings -> +----------+
1341 * | | \
1342 * | | } na->na_num_event_rings
1343 * | | /
1344 * na->na_tail ----->+----------+
1345 */
1346 /* call with SK_LOCK held */
1347 static int
na_kr_create(struct nexus_adapter * na,boolean_t alloc_ctx)1348 na_kr_create(struct nexus_adapter *na, boolean_t alloc_ctx)
1349 {
1350 lck_grp_t *q_lck_grp, *s_lck_grp;
1351 uint32_t i, count, ndesc;
1352 struct kern_pbufpool *pp = NULL;
1353 struct __kern_channel_ring *kring;
1354 uint32_t n[NR_ALL];
1355 int c, tot_slots, err = 0;
1356 enum txrx t;
1357
1358 SK_LOCK_ASSERT_HELD();
1359
1360 n[NR_TX] = na_get_nrings(na, NR_TX);
1361 n[NR_RX] = na_get_nrings(na, NR_RX);
1362 n[NR_A] = na_get_nrings(na, NR_A);
1363 n[NR_F] = na_get_nrings(na, NR_F);
1364 n[NR_EV] = na_get_nrings(na, NR_EV);
1365
1366 count = n[NR_TX] + n[NR_RX] + n[NR_A] + n[NR_F] + n[NR_EV];
1367
1368 na->na_tx_rings = sk_alloc_type_array(struct __kern_channel_ring, count,
1369 Z_WAITOK, skmem_tag_nx_rings);
1370 if (__improbable(na->na_tx_rings == NULL)) {
1371 SK_ERR("Cannot allocate krings");
1372 err = ENOMEM;
1373 goto error;
1374 }
1375
1376 na->na_rx_rings = na->na_tx_rings + n[NR_TX];
1377 if (n[NR_A] != 0) {
1378 na->na_alloc_rings = na->na_rx_rings + n[NR_RX];
1379 na->na_free_rings = na->na_alloc_rings + n[NR_A];
1380 } else {
1381 na->na_alloc_rings = na->na_free_rings = NULL;
1382 }
1383 if (n[NR_EV] != 0) {
1384 if (na->na_free_rings != NULL) {
1385 na->na_event_rings = na->na_free_rings + n[NR_F];
1386 } else {
1387 na->na_event_rings = na->na_rx_rings + n[NR_RX];
1388 }
1389 }
1390
1391 /* total number of slots for TX/RX adapter rings */
1392 c = tot_slots = (n[NR_TX] * na_get_nslots(na, NR_TX)) +
1393 (n[NR_RX] * na_get_nslots(na, NR_RX));
1394
1395 /* for scratch space on alloc and free rings */
1396 if (n[NR_A] != 0) {
1397 tot_slots += n[NR_A] * na_get_nslots(na, NR_A);
1398 tot_slots += n[NR_F] * na_get_nslots(na, NR_F);
1399 c = tot_slots;
1400 }
1401 na->na_total_slots = tot_slots;
1402
1403 /* slot context (optional) for all TX/RX ring slots of this adapter */
1404 if (alloc_ctx) {
1405 na->na_slot_ctxs =
1406 skn_alloc_type_array(slot_ctxs, struct slot_ctx,
1407 na->na_total_slots, Z_WAITOK, skmem_tag_nx_contexts);
1408 if (na->na_slot_ctxs == NULL) {
1409 SK_ERR("Cannot allocate slot contexts");
1410 err = ENOMEM;
1411 goto error;
1412 }
1413 atomic_bitset_32(&na->na_flags, NAF_SLOT_CONTEXT);
1414 }
1415
1416 /*
1417 * packet handle array storage for all TX/RX ring slots of this
1418 * adapter.
1419 */
1420 na->na_scratch = skn_alloc_type_array(scratch, kern_packet_t,
1421 na->na_total_slots, Z_WAITOK, skmem_tag_nx_scratch);
1422 if (na->na_scratch == NULL) {
1423 SK_ERR("Cannot allocate slot contexts");
1424 err = ENOMEM;
1425 goto error;
1426 }
1427
1428 /*
1429 * All fields in krings are 0 except the one initialized below.
1430 * but better be explicit on important kring fields.
1431 */
1432 for_all_rings(t) {
1433 ndesc = na_get_nslots(na, t);
1434 pp = na_kr_get_pp(na, t);
1435 for (i = 0; i < n[t]; i++) {
1436 kring = &NAKR(na, t)[i];
1437 bzero(kring, sizeof(*kring));
1438 kring->ckr_na = na;
1439 kring->ckr_pp = pp;
1440 kring->ckr_max_pkt_len = PP_BUF_SIZE_DEF(pp) *
1441 pp->pp_max_frags;
1442 kring->ckr_ring_id = i;
1443 kring->ckr_tx = t;
1444 kr_init_to_mhints(kring, ndesc);
1445 kr_init_tbr(kring);
1446 if (NA_KERNEL_ONLY(na)) {
1447 kring->ckr_flags |= CKRF_KERNEL_ONLY;
1448 }
1449 if (na->na_flags & NAF_HOST_ONLY) {
1450 kring->ckr_flags |= CKRF_HOST;
1451 }
1452 ASSERT((t >= NR_TXRX) || (c > 0));
1453 if ((t < NR_TXRX) &&
1454 (na->na_flags & NAF_SLOT_CONTEXT)) {
1455 ASSERT(na->na_slot_ctxs != NULL);
1456 kring->ckr_flags |= CKRF_SLOT_CONTEXT;
1457 kring->ckr_slot_ctxs =
1458 na->na_slot_ctxs + (tot_slots - c);
1459 }
1460 ASSERT(na->na_scratch != NULL);
1461 if (t < NR_TXRXAF) {
1462 kring->ckr_scratch =
1463 na->na_scratch + (tot_slots - c);
1464 }
1465 if (t < NR_TXRXAF) {
1466 c -= ndesc;
1467 }
1468 switch (t) {
1469 case NR_A:
1470 if (i == 0) {
1471 kring->ckr_na_sync =
1472 na_packet_pool_alloc_sync;
1473 kring->ckr_alloc_ws =
1474 na_upp_alloc_lowat;
1475 } else {
1476 ASSERT(i == 1);
1477 kring->ckr_na_sync =
1478 na_packet_pool_alloc_buf_sync;
1479 kring->ckr_alloc_ws =
1480 na_upp_alloc_buf_lowat;
1481 }
1482 break;
1483 case NR_F:
1484 if (i == 0) {
1485 kring->ckr_na_sync =
1486 na_packet_pool_free_sync;
1487 } else {
1488 ASSERT(i == 1);
1489 kring->ckr_na_sync =
1490 na_packet_pool_free_buf_sync;
1491 }
1492 break;
1493 case NR_TX:
1494 kring->ckr_na_sync = na->na_txsync;
1495 if (na->na_flags & NAF_TX_MITIGATION) {
1496 kring->ckr_flags |= CKRF_MITIGATION;
1497 }
1498 switch (na->na_type) {
1499 #if CONFIG_NEXUS_USER_PIPE
1500 case NA_USER_PIPE:
1501 ASSERT(!(na->na_flags &
1502 NAF_USER_PKT_POOL));
1503 kring->ckr_prologue = kr_txprologue;
1504 kring->ckr_finalize = NULL;
1505 break;
1506 #endif /* CONFIG_NEXUS_USER_PIPE */
1507 #if CONFIG_NEXUS_MONITOR
1508 case NA_MONITOR:
1509 ASSERT(!(na->na_flags &
1510 NAF_USER_PKT_POOL));
1511 kring->ckr_prologue = kr_txprologue;
1512 kring->ckr_finalize = NULL;
1513 break;
1514 #endif /* CONFIG_NEXUS_MONITOR */
1515 default:
1516 if (na->na_flags & NAF_USER_PKT_POOL) {
1517 kring->ckr_prologue =
1518 kr_txprologue_upp;
1519 kring->ckr_finalize =
1520 kr_txfinalize_upp;
1521 } else {
1522 kring->ckr_prologue =
1523 kr_txprologue;
1524 kring->ckr_finalize =
1525 kr_txfinalize;
1526 }
1527 break;
1528 }
1529 break;
1530 case NR_RX:
1531 kring->ckr_na_sync = na->na_rxsync;
1532 if (na->na_flags & NAF_RX_MITIGATION) {
1533 kring->ckr_flags |= CKRF_MITIGATION;
1534 }
1535 switch (na->na_type) {
1536 #if CONFIG_NEXUS_USER_PIPE
1537 case NA_USER_PIPE:
1538 ASSERT(!(na->na_flags &
1539 NAF_USER_PKT_POOL));
1540 kring->ckr_prologue =
1541 kr_rxprologue_nodetach;
1542 kring->ckr_finalize = kr_rxfinalize;
1543 break;
1544 #endif /* CONFIG_NEXUS_USER_PIPE */
1545 #if CONFIG_NEXUS_MONITOR
1546 case NA_MONITOR:
1547 ASSERT(!(na->na_flags &
1548 NAF_USER_PKT_POOL));
1549 kring->ckr_prologue =
1550 kr_rxprologue_nodetach;
1551 kring->ckr_finalize = kr_rxfinalize;
1552 break;
1553 #endif /* CONFIG_NEXUS_MONITOR */
1554 default:
1555 if (na->na_flags & NAF_USER_PKT_POOL) {
1556 kring->ckr_prologue =
1557 kr_rxprologue_upp;
1558 kring->ckr_finalize =
1559 kr_rxfinalize_upp;
1560 } else {
1561 kring->ckr_prologue =
1562 kr_rxprologue;
1563 kring->ckr_finalize =
1564 kr_rxfinalize;
1565 }
1566 break;
1567 }
1568 break;
1569 case NR_EV:
1570 kring->ckr_na_sync = kern_channel_event_sync;
1571 break;
1572 default:
1573 VERIFY(0);
1574 /* NOTREACHED */
1575 __builtin_unreachable();
1576 }
1577 if (t != NR_EV) {
1578 kring->ckr_na_notify = na->na_notify;
1579 } else {
1580 kring->ckr_na_notify = NULL;
1581 }
1582 (void) snprintf(kring->ckr_name,
1583 sizeof(kring->ckr_name) - 1,
1584 "%s %s%u%s", na->na_name, sk_ring2str(t), i,
1585 ((kring->ckr_flags & CKRF_HOST) ? "^" : ""));
1586 SK_DF(SK_VERB_NA | SK_VERB_RING,
1587 "kr \"%s\" (0x%llx) krflags 0x%b rh %u rt %u",
1588 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1589 CKRF_BITS, kring->ckr_rhead, kring->ckr_rtail);
1590 kring->ckr_state = KR_READY;
1591 q_lck_grp = na_kr_q_lck_grp(t);
1592 s_lck_grp = na_kr_s_lck_grp(t);
1593 kring->ckr_qlock_group = q_lck_grp;
1594 lck_mtx_init(&kring->ckr_qlock, kring->ckr_qlock_group,
1595 &channel_lock_attr);
1596 kring->ckr_slock_group = s_lck_grp;
1597 lck_spin_init(&kring->ckr_slock, kring->ckr_slock_group,
1598 &channel_lock_attr);
1599 csi_init(&kring->ckr_si,
1600 (kring->ckr_flags & CKRF_MITIGATION),
1601 na->na_ch_mit_ival);
1602 }
1603 csi_init(&na->na_si[t],
1604 (na->na_flags & (NAF_TX_MITIGATION | NAF_RX_MITIGATION)),
1605 na->na_ch_mit_ival);
1606 }
1607 ASSERT(c == 0);
1608 na->na_tail = na->na_rx_rings + n[NR_RX] + n[NR_A] + n[NR_F];
1609
1610 if (na->na_type == NA_NETIF_DEV) {
1611 na_kr_setup_netif_svc_map(na);
1612 }
1613
1614 /* validate now for cases where we create only krings */
1615 na_krings_verify(na);
1616 return 0;
1617
1618 error:
1619 ASSERT(err != 0);
1620 if (na->na_tx_rings != NULL) {
1621 sk_free_type_array(struct __kern_channel_ring,
1622 na->na_tail - na->na_tx_rings, na->na_tx_rings);
1623 }
1624 if (na->na_slot_ctxs != NULL) {
1625 ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1626 skn_free_type_array(slot_ctxs,
1627 struct slot_ctx, na->na_total_slots,
1628 na->na_slot_ctxs);
1629 na->na_slot_ctxs = NULL;
1630 }
1631 if (na->na_scratch != NULL) {
1632 skn_free_type_array(scratch,
1633 kern_packet_t, na->na_total_slots,
1634 na->na_scratch);
1635 na->na_scratch = NULL;
1636 }
1637 return err;
1638 }
1639
1640 /* undo the actions performed by na_kr_create() */
1641 /* call with SK_LOCK held */
1642 static void
na_kr_delete(struct nexus_adapter * na)1643 na_kr_delete(struct nexus_adapter *na)
1644 {
1645 struct __kern_channel_ring *kring = na->na_tx_rings;
1646 enum txrx t;
1647
1648 ASSERT((kring != NULL) && (na->na_tail != NULL));
1649 SK_LOCK_ASSERT_HELD();
1650
1651 for_all_rings(t) {
1652 csi_destroy(&na->na_si[t]);
1653 }
1654 /* we rely on the krings layout described above */
1655 for (; kring != na->na_tail; kring++) {
1656 lck_mtx_destroy(&kring->ckr_qlock, kring->ckr_qlock_group);
1657 lck_spin_destroy(&kring->ckr_slock, kring->ckr_slock_group);
1658 csi_destroy(&kring->ckr_si);
1659 if (kring->ckr_flags & CKRF_SLOT_CONTEXT) {
1660 kring->ckr_flags &= ~CKRF_SLOT_CONTEXT;
1661 ASSERT(kring->ckr_slot_ctxs != NULL);
1662 kring->ckr_slot_ctxs = NULL;
1663 }
1664 }
1665 if (na->na_slot_ctxs != NULL) {
1666 ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1667 atomic_bitclear_32(&na->na_flags, NAF_SLOT_CONTEXT);
1668 skn_free_type_array(slot_ctxs,
1669 struct slot_ctx, na->na_total_slots,
1670 na->na_slot_ctxs);
1671 na->na_slot_ctxs = NULL;
1672 }
1673 if (na->na_scratch != NULL) {
1674 skn_free_type_array(scratch,
1675 kern_packet_t, na->na_total_slots,
1676 na->na_scratch);
1677 na->na_scratch = NULL;
1678 }
1679 ASSERT(!(na->na_flags & NAF_SLOT_CONTEXT));
1680 sk_free_type_array(struct __kern_channel_ring,
1681 na->na_tail - na->na_tx_rings, na->na_tx_rings);
1682 na->na_tx_rings = na->na_rx_rings = na->na_alloc_rings =
1683 na->na_free_rings = na->na_event_rings = na->na_tail = NULL;
1684 }
1685
1686 static void
na_kr_slot_desc_init(struct __slot_desc * ksds,boolean_t kernel_only,struct __slot_desc * usds,size_t ndesc)1687 na_kr_slot_desc_init(struct __slot_desc *ksds,
1688 boolean_t kernel_only, struct __slot_desc *usds, size_t ndesc)
1689 {
1690 size_t i;
1691
1692 bzero(ksds, ndesc * SLOT_DESC_SZ);
1693 if (usds != NULL) {
1694 ASSERT(!kernel_only);
1695 bzero(usds, ndesc * SLOT_DESC_SZ);
1696 } else {
1697 ASSERT(kernel_only);
1698 }
1699
1700 for (i = 0; i < ndesc; i++) {
1701 KSD_INIT(SLOT_DESC_KSD(&ksds[i]));
1702 if (!kernel_only) {
1703 USD_INIT(SLOT_DESC_USD(&usds[i]));
1704 }
1705 }
1706 }
1707
1708 /* call with SK_LOCK held */
1709 static int
na_kr_setup(struct nexus_adapter * na,struct kern_channel * ch)1710 na_kr_setup(struct nexus_adapter *na, struct kern_channel *ch)
1711 {
1712 struct skmem_arena *ar = na->na_arena;
1713 struct skmem_arena_nexus *arn;
1714 mach_vm_offset_t roff[SKMEM_REGIONS];
1715 enum txrx t;
1716 uint32_t i;
1717
1718 SK_LOCK_ASSERT_HELD();
1719 ASSERT(!(na->na_flags & NAF_MEM_NO_INIT));
1720 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
1721 arn = skmem_arena_nexus(ar);
1722 ASSERT(arn != NULL);
1723
1724 bzero(&roff, sizeof(roff));
1725 for (i = 0; i < SKMEM_REGIONS; i++) {
1726 if (ar->ar_regions[i] == NULL) {
1727 continue;
1728 }
1729
1730 /* not for nexus */
1731 ASSERT(i != SKMEM_REGION_SYSCTLS);
1732
1733 /*
1734 * Get region offsets from base of mmap span; the arena
1735 * doesn't need to be mmap'd at this point, since we
1736 * simply compute the relative offset.
1737 */
1738 roff[i] = skmem_arena_get_region_offset(ar, i);
1739 }
1740
1741 for_all_rings(t) {
1742 for (i = 0; i < na_get_nrings(na, t); i++) {
1743 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
1744 struct __user_channel_ring *ring = kring->ckr_ring;
1745 mach_vm_offset_t ring_off, usd_roff;
1746 struct skmem_obj_info oi, oim;
1747 uint32_t ndesc;
1748
1749 if (ring != NULL) {
1750 SK_DF(SK_VERB_NA | SK_VERB_RING,
1751 "kr 0x%llx (\"%s\") is already "
1752 "initialized", SK_KVA(kring),
1753 kring->ckr_name);
1754 continue; /* already created by somebody else */
1755 }
1756
1757 if (!KR_KERNEL_ONLY(kring) &&
1758 (ring = skmem_cache_alloc(arn->arn_ring_cache,
1759 SKMEM_NOSLEEP)) == NULL) {
1760 SK_ERR("Cannot allocate %s_ring for kr "
1761 "0x%llx (\"%s\")", sk_ring2str(t),
1762 SK_KVA(kring), kring->ckr_name);
1763 goto cleanup;
1764 }
1765 kring->ckr_flags |= CKRF_MEM_RING_INITED;
1766 kring->ckr_ring = ring;
1767 ndesc = kring->ckr_num_slots;
1768
1769 if (ring == NULL) {
1770 goto skip_user_ring_setup;
1771 }
1772
1773 *(uint32_t *)(uintptr_t)&ring->ring_num_slots = ndesc;
1774
1775 /* offset of current ring in mmap span */
1776 skmem_cache_get_obj_info(arn->arn_ring_cache,
1777 ring, &oi, NULL);
1778 ring_off = (roff[SKMEM_REGION_RING] +
1779 SKMEM_OBJ_ROFF(&oi));
1780
1781 /*
1782 * ring_{buf,md,sd}_ofs offsets are relative to the
1783 * current ring, and not to the base of mmap span.
1784 */
1785 *(mach_vm_offset_t *)(uintptr_t)
1786 &ring->ring_def_buf_base =
1787 (roff[SKMEM_REGION_BUF_DEF] - ring_off);
1788 *(mach_vm_offset_t *)(uintptr_t)
1789 &ring->ring_large_buf_base =
1790 (roff[SKMEM_REGION_BUF_LARGE] - ring_off);
1791 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_md_base =
1792 (roff[SKMEM_REGION_UMD] - ring_off);
1793 _CASSERT(sizeof(uint16_t) ==
1794 sizeof(ring->ring_bft_size));
1795 if (roff[SKMEM_REGION_UBFT] != 0) {
1796 ASSERT(ar->ar_regions[SKMEM_REGION_UBFT] !=
1797 NULL);
1798 *(mach_vm_offset_t *)(uintptr_t)
1799 &ring->ring_bft_base =
1800 (roff[SKMEM_REGION_UBFT] - ring_off);
1801 *(uint16_t *)(uintptr_t)&ring->ring_bft_size =
1802 (uint16_t)ar->ar_regions[SKMEM_REGION_UBFT]->
1803 skr_c_obj_size;
1804 ASSERT(ring->ring_bft_size ==
1805 ar->ar_regions[SKMEM_REGION_KBFT]->
1806 skr_c_obj_size);
1807 } else {
1808 *(mach_vm_offset_t *)(uintptr_t)
1809 &ring->ring_bft_base = 0;
1810 *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1811 }
1812
1813 if (t == NR_TX || t == NR_A || t == NR_EV) {
1814 usd_roff = roff[SKMEM_REGION_TXAUSD];
1815 } else {
1816 ASSERT(t == NR_RX || t == NR_F);
1817 usd_roff = roff[SKMEM_REGION_RXFUSD];
1818 }
1819 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_sd_base =
1820 (usd_roff - ring_off);
1821
1822 /* copy values from kring */
1823 ring->ring_head = kring->ckr_rhead;
1824 *(slot_idx_t *)(uintptr_t)&ring->ring_khead =
1825 kring->ckr_khead;
1826 *(slot_idx_t *)(uintptr_t)&ring->ring_tail =
1827 kring->ckr_rtail;
1828
1829 _CASSERT(sizeof(uint32_t) ==
1830 sizeof(ring->ring_def_buf_size));
1831 _CASSERT(sizeof(uint32_t) ==
1832 sizeof(ring->ring_large_buf_size));
1833 _CASSERT(sizeof(uint16_t) ==
1834 sizeof(ring->ring_md_size));
1835 *(uint32_t *)(uintptr_t)&ring->ring_def_buf_size =
1836 ar->ar_regions[SKMEM_REGION_BUF_DEF]->skr_c_obj_size;
1837 if (ar->ar_regions[SKMEM_REGION_BUF_LARGE] != NULL) {
1838 *(uint32_t *)(uintptr_t)&ring->ring_large_buf_size =
1839 ar->ar_regions[SKMEM_REGION_BUF_LARGE]->skr_c_obj_size;
1840 } else {
1841 *(uint32_t *)(uintptr_t)&ring->ring_large_buf_size = 0;
1842 }
1843 if (ar->ar_regions[SKMEM_REGION_UMD] != NULL) {
1844 *(uint16_t *)(uintptr_t)&ring->ring_md_size =
1845 (uint16_t)ar->ar_regions[SKMEM_REGION_UMD]->
1846 skr_c_obj_size;
1847 ASSERT(ring->ring_md_size ==
1848 ar->ar_regions[SKMEM_REGION_KMD]->
1849 skr_c_obj_size);
1850 } else {
1851 *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1852 ASSERT(PP_KERNEL_ONLY(arn->arn_rx_pp));
1853 ASSERT(PP_KERNEL_ONLY(arn->arn_tx_pp));
1854 }
1855
1856 /* ring info */
1857 _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_id));
1858 _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_kind));
1859 *(uint16_t *)(uintptr_t)&ring->ring_id =
1860 (uint16_t)kring->ckr_ring_id;
1861 *(uint16_t *)(uintptr_t)&ring->ring_kind =
1862 (uint16_t)kring->ckr_tx;
1863
1864 SK_DF(SK_VERB_NA | SK_VERB_RING,
1865 "%s_ring at 0x%llx kr 0x%llx (\"%s\")",
1866 sk_ring2str(t), SK_KVA(ring), SK_KVA(kring),
1867 kring->ckr_name);
1868 SK_DF(SK_VERB_NA | SK_VERB_RING,
1869 " num_slots: %u", ring->ring_num_slots);
1870 SK_DF(SK_VERB_NA | SK_VERB_RING,
1871 " def_buf_base: 0x%llx",
1872 (uint64_t)ring->ring_def_buf_base);
1873 SK_DF(SK_VERB_NA | SK_VERB_RING,
1874 " large_buf_base: 0x%llx",
1875 (uint64_t)ring->ring_large_buf_base);
1876 SK_DF(SK_VERB_NA | SK_VERB_RING,
1877 " md_base: 0x%llx",
1878 (uint64_t)ring->ring_md_base);
1879 SK_DF(SK_VERB_NA | SK_VERB_RING,
1880 " sd_base: 0x%llx",
1881 (uint64_t)ring->ring_sd_base);
1882 SK_DF(SK_VERB_NA | SK_VERB_RING,
1883 " h, t: %u, %u, %u", ring->ring_head,
1884 ring->ring_tail);
1885 SK_DF(SK_VERB_NA | SK_VERB_RING,
1886 " md_size: %d",
1887 (uint64_t)ring->ring_md_size);
1888
1889 /* make sure they're in synch */
1890 _CASSERT(NR_RX == CR_KIND_RX);
1891 _CASSERT(NR_TX == CR_KIND_TX);
1892 _CASSERT(NR_A == CR_KIND_ALLOC);
1893 _CASSERT(NR_F == CR_KIND_FREE);
1894 _CASSERT(NR_EV == CR_KIND_EVENT);
1895
1896 skip_user_ring_setup:
1897 /*
1898 * This flag tells na_kr_teardown_all() that it should
1899 * go thru the checks to free up the slot maps.
1900 */
1901 kring->ckr_flags |= CKRF_MEM_SD_INITED;
1902 if (t == NR_TX || t == NR_A || t == NR_EV) {
1903 kring->ckr_ksds_cache = arn->arn_txaksd_cache;
1904 } else {
1905 ASSERT(t == NR_RX || t == NR_F);
1906 kring->ckr_ksds_cache = arn->arn_rxfksd_cache;
1907 }
1908 kring->ckr_ksds =
1909 skmem_cache_alloc(kring->ckr_ksds_cache,
1910 SKMEM_NOSLEEP);
1911 if (kring->ckr_ksds == NULL) {
1912 SK_ERR("Cannot allocate %s_ksds for kr "
1913 "0x%llx (\"%s\")", sk_ring2str(t),
1914 SK_KVA(kring), kring->ckr_name);
1915 goto cleanup;
1916 }
1917 if (!KR_KERNEL_ONLY(kring)) {
1918 skmem_cache_get_obj_info(kring->ckr_ksds_cache,
1919 kring->ckr_ksds, &oi, &oim);
1920 kring->ckr_usds = SKMEM_OBJ_ADDR(&oim);
1921 }
1922 na_kr_slot_desc_init(kring->ckr_ksds,
1923 KR_KERNEL_ONLY(kring), kring->ckr_usds, ndesc);
1924
1925 /* cache last slot descriptor address */
1926 ASSERT(kring->ckr_lim == (ndesc - 1));
1927 kring->ckr_ksds_last = &kring->ckr_ksds[kring->ckr_lim];
1928
1929 if ((t < NR_TXRX) &&
1930 !(na->na_flags & NAF_USER_PKT_POOL) &&
1931 na_kr_populate_slots(kring) != 0) {
1932 SK_ERR("Cannot allocate buffers for kr "
1933 "0x%llx (\"%s\")", SK_KVA(kring),
1934 kring->ckr_name);
1935 goto cleanup;
1936 }
1937 }
1938 }
1939
1940 return 0;
1941
1942 cleanup:
1943 na_kr_teardown_all(na, ch, FALSE);
1944
1945 return ENOMEM;
1946 }
1947
1948 static void
na_kr_teardown_common(struct nexus_adapter * na,struct __kern_channel_ring * kring,enum txrx t,struct kern_channel * ch,boolean_t defunct)1949 na_kr_teardown_common(struct nexus_adapter *na,
1950 struct __kern_channel_ring *kring, enum txrx t, struct kern_channel *ch,
1951 boolean_t defunct)
1952 {
1953 struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
1954 struct __user_channel_ring *ckr_ring;
1955 boolean_t sd_idle, sd_inited;
1956
1957 ASSERT(arn != NULL);
1958 kr_enter(kring, TRUE);
1959 /*
1960 * Check for CKRF_MEM_SD_INITED and CKRF_MEM_RING_INITED
1961 * to make sure that the freeing needs to happen (else just
1962 * nullify the values).
1963 * If this adapter owns the memory for the slot descriptors,
1964 * check if the region is marked as busy (sd_idle is false)
1965 * and leave the kring's slot descriptor fields alone if so,
1966 * at defunct time. At final teardown time, sd_idle must be
1967 * true else we assert; this indicates a missing call to
1968 * skmem_arena_nexus_sd_set_noidle().
1969 */
1970 sd_inited = ((kring->ckr_flags & CKRF_MEM_SD_INITED) != 0);
1971 if (sd_inited) {
1972 /* callee will do KR_KSD(), so check */
1973 if (((t < NR_TXRX) || (t == NR_EV)) &&
1974 (kring->ckr_ksds != NULL)) {
1975 na_kr_depopulate_slots(kring, ch, defunct);
1976 }
1977 /* leave CKRF_MEM_SD_INITED flag alone until idle */
1978 sd_idle = skmem_arena_nexus_sd_idle(arn);
1979 VERIFY(sd_idle || defunct);
1980 } else {
1981 sd_idle = TRUE;
1982 }
1983
1984 if (sd_idle) {
1985 kring->ckr_flags &= ~CKRF_MEM_SD_INITED;
1986 if (kring->ckr_ksds != NULL) {
1987 if (sd_inited) {
1988 skmem_cache_free(kring->ckr_ksds_cache,
1989 kring->ckr_ksds);
1990 }
1991 kring->ckr_ksds = NULL;
1992 kring->ckr_ksds_last = NULL;
1993 kring->ckr_usds = NULL;
1994 }
1995 ASSERT(kring->ckr_ksds_last == NULL);
1996 ASSERT(kring->ckr_usds == NULL);
1997 }
1998
1999 if ((ckr_ring = kring->ckr_ring) != NULL) {
2000 kring->ckr_ring = NULL;
2001 }
2002
2003 if (kring->ckr_flags & CKRF_MEM_RING_INITED) {
2004 ASSERT(ckr_ring != NULL || KR_KERNEL_ONLY(kring));
2005 if (ckr_ring != NULL) {
2006 skmem_cache_free(arn->arn_ring_cache, ckr_ring);
2007 }
2008 kring->ckr_flags &= ~CKRF_MEM_RING_INITED;
2009 }
2010
2011 if (defunct) {
2012 /* if defunct, drop everything; see KR_DROP() */
2013 kring->ckr_flags |= CKRF_DEFUNCT;
2014 }
2015 kr_exit(kring);
2016 }
2017
2018 /*
2019 * Teardown ALL rings of a nexus adapter; this includes {tx,rx,alloc,free,event}
2020 */
2021 static void
na_kr_teardown_all(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2022 na_kr_teardown_all(struct nexus_adapter *na, struct kern_channel *ch,
2023 boolean_t defunct)
2024 {
2025 enum txrx t;
2026
2027 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2028
2029 /* skip if this adapter has no allocated rings */
2030 if (na->na_tx_rings == NULL) {
2031 return;
2032 }
2033
2034 for_all_rings(t) {
2035 for (uint32_t i = 0; i < na_get_nrings(na, t); i++) {
2036 na_kr_teardown_common(na, &NAKR(na, t)[i],
2037 t, ch, defunct);
2038 }
2039 }
2040 }
2041
2042 /*
2043 * Teardown only {tx,rx} rings assigned to the channel.
2044 */
2045 static void
na_kr_teardown_txrx(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct,struct proc * p)2046 na_kr_teardown_txrx(struct nexus_adapter *na, struct kern_channel *ch,
2047 boolean_t defunct, struct proc *p)
2048 {
2049 enum txrx t;
2050
2051 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2052
2053 for_rx_tx(t) {
2054 ring_id_t qfirst = ch->ch_first[t];
2055 ring_id_t qlast = ch->ch_last[t];
2056 uint32_t i;
2057
2058 for (i = qfirst; i < qlast; i++) {
2059 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2060 na_kr_teardown_common(na, kring, t, ch, defunct);
2061
2062 /*
2063 * Issue a notify to wake up anyone sleeping in kqueue
2064 * so that they notice the newly defuncted channels and
2065 * return an error
2066 */
2067 kring->ckr_na_notify(kring, p, 0);
2068 }
2069 }
2070 }
2071
2072 static int
na_kr_populate_slots(struct __kern_channel_ring * kring)2073 na_kr_populate_slots(struct __kern_channel_ring *kring)
2074 {
2075 const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2076 struct nexus_adapter *na = KRNA(kring);
2077 kern_pbufpool_t pp = kring->ckr_pp;
2078 uint32_t nslots = kring->ckr_num_slots;
2079 uint32_t start_idx, i;
2080 uint32_t sidx = 0; /* slot counter */
2081 struct __kern_slot_desc *ksd;
2082 struct __user_slot_desc *usd;
2083 struct __kern_quantum *kqum;
2084 nexus_type_t nexus_type;
2085 int err = 0;
2086
2087 ASSERT(kring->ckr_tx < NR_TXRX);
2088 ASSERT(!(KRNA(kring)->na_flags & NAF_USER_PKT_POOL));
2089 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2090 ASSERT(pp != NULL);
2091
2092 /*
2093 * xxx_ppool: remove this special case
2094 */
2095 nexus_type = na->na_nxdom_prov->nxdom_prov_dom->nxdom_type;
2096
2097 switch (nexus_type) {
2098 case NEXUS_TYPE_FLOW_SWITCH:
2099 case NEXUS_TYPE_KERNEL_PIPE:
2100 /*
2101 * xxx_ppool: This is temporary code until we come up with a
2102 * scheme for user space to alloc & attach packets to tx ring.
2103 */
2104 if (kernel_only || kring->ckr_tx == NR_RX) {
2105 return 0;
2106 }
2107 break;
2108
2109 case NEXUS_TYPE_NET_IF:
2110 if (((na->na_type == NA_NETIF_DEV) ||
2111 (na->na_type == NA_NETIF_HOST)) &&
2112 (kernel_only || (kring->ckr_tx == NR_RX))) {
2113 return 0;
2114 }
2115
2116 ASSERT((na->na_type == NA_NETIF_COMPAT_DEV) ||
2117 (na->na_type == NA_NETIF_COMPAT_HOST) ||
2118 (na->na_type == NA_NETIF_DEV) ||
2119 (na->na_type == NA_NETIF_VP));
2120
2121 if (!kernel_only) {
2122 if (kring->ckr_tx == NR_RX) {
2123 return 0;
2124 } else {
2125 break;
2126 }
2127 }
2128
2129 ASSERT(kernel_only);
2130
2131 if ((na->na_type == NA_NETIF_COMPAT_DEV) ||
2132 (na->na_type == NA_NETIF_COMPAT_HOST)) {
2133 return 0;
2134 }
2135 VERIFY(0);
2136 /* NOTREACHED */
2137 __builtin_unreachable();
2138
2139 case NEXUS_TYPE_USER_PIPE:
2140 case NEXUS_TYPE_MONITOR:
2141 break;
2142
2143 default:
2144 VERIFY(0);
2145 /* NOTREACHED */
2146 __builtin_unreachable();
2147 }
2148
2149 /* Fill the ring with packets */
2150 sidx = start_idx = 0;
2151 for (i = 0; i < nslots; i++) {
2152 kqum = SK_PTR_ADDR_KQUM(pp_alloc_packet(pp, pp->pp_max_frags,
2153 SKMEM_NOSLEEP));
2154 if (kqum == NULL) {
2155 err = ENOMEM;
2156 SK_ERR("ar 0x%llx (\"%s\") no more buffers "
2157 "after %u of %u, err %d", SK_KVA(na->na_arena),
2158 na->na_arena->ar_name, i, nslots, err);
2159 goto cleanup;
2160 }
2161 ksd = KR_KSD(kring, i);
2162 usd = (kernel_only ? NULL : KR_USD(kring, i));
2163
2164 /* attach packet to slot */
2165 kqum->qum_ksd = ksd;
2166 ASSERT(!KSD_VALID_METADATA(ksd));
2167 KSD_ATTACH_METADATA(ksd, kqum);
2168 if (usd != NULL) {
2169 USD_ATTACH_METADATA(usd, METADATA_IDX(kqum));
2170 kr_externalize_metadata(kring, pp->pp_max_frags,
2171 kqum, current_proc());
2172 }
2173
2174 SK_DF(SK_VERB_MEM, " C ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2175 " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd), METADATA_IDX(kqum),
2176 SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2177 SK_KVA(&kqum->qum_buf[0]));
2178 if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2179 SK_DF(SK_VERB_MEM, " C usd [%-3d, 0x%llx] "
2180 "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]",
2181 (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2182 SK_KVA(usd), METADATA_IDX(kqum),
2183 SK_KVA(kqum->qum_user),
2184 kqum->qum_user->qum_buf[0].buf_idx,
2185 SK_KVA(&kqum->qum_user->qum_buf[0]));
2186 }
2187
2188 sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2189 }
2190
2191 SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") populated %u slots from idx %u",
2192 SK_KVA(na->na_arena), na->na_arena->ar_name, nslots, start_idx);
2193
2194 cleanup:
2195 if (err != 0) {
2196 sidx = start_idx;
2197 while (i-- > 0) {
2198 ksd = KR_KSD(kring, i);
2199 usd = (kernel_only ? NULL : KR_USD(kring, i));
2200 kqum = ksd->sd_qum;
2201
2202 ASSERT(ksd == kqum->qum_ksd);
2203 KSD_RESET(ksd);
2204 if (usd != NULL) {
2205 USD_RESET(usd);
2206 }
2207 /* detach packet from slot */
2208 kqum->qum_ksd = NULL;
2209 pp_free_packet(pp, SK_PTR_ADDR(kqum));
2210
2211 sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2212 }
2213 }
2214 return err;
2215 }
2216
2217 static void
na_kr_depopulate_slots(struct __kern_channel_ring * kring,struct kern_channel * ch,boolean_t defunct)2218 na_kr_depopulate_slots(struct __kern_channel_ring *kring,
2219 struct kern_channel *ch, boolean_t defunct)
2220 {
2221 #pragma unused(ch)
2222 const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2223 uint32_t i, j, n = kring->ckr_num_slots;
2224 struct nexus_adapter *na = KRNA(kring);
2225 struct kern_pbufpool *pp = kring->ckr_pp;
2226 boolean_t upp = FALSE;
2227 obj_idx_t midx;
2228
2229 ASSERT((kring->ckr_tx < NR_TXRX) || (kring->ckr_tx == NR_EV));
2230 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2231
2232 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2233
2234 if (((na->na_flags & NAF_USER_PKT_POOL) != 0) &&
2235 (kring->ckr_tx != NR_EV)) {
2236 upp = TRUE;
2237 }
2238 for (i = 0, j = 0; i < n; i++) {
2239 struct __kern_slot_desc *ksd = KR_KSD(kring, i);
2240 struct __user_slot_desc *usd;
2241 struct __kern_quantum *qum, *kqum;
2242 boolean_t free_packet = FALSE;
2243 int err;
2244
2245 if (!KSD_VALID_METADATA(ksd)) {
2246 continue;
2247 }
2248
2249 kqum = ksd->sd_qum;
2250 usd = (kernel_only ? NULL : KR_USD(kring, i));
2251 midx = METADATA_IDX(kqum);
2252
2253 /*
2254 * if the packet is internalized it should not be in the
2255 * hash table of packets loaned to user space.
2256 */
2257 if (upp && (kqum->qum_qflags & QUM_F_INTERNALIZED)) {
2258 if ((qum = pp_find_upp(pp, midx)) != NULL) {
2259 panic("internalized packet 0x%llx in htbl",
2260 SK_KVA(qum));
2261 /* NOTREACHED */
2262 __builtin_unreachable();
2263 }
2264 free_packet = TRUE;
2265 } else if (upp) {
2266 /*
2267 * if the packet is not internalized check if it is
2268 * in the list of packets loaned to user-space.
2269 * Remove from the list before freeing.
2270 */
2271 ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
2272 qum = pp_remove_upp(pp, midx, &err);
2273 if (err != 0) {
2274 SK_ERR("un-allocated packet or buflet %d %p",
2275 midx, SK_KVA(qum));
2276 if (qum != NULL) {
2277 free_packet = TRUE;
2278 }
2279 }
2280 } else {
2281 free_packet = TRUE;
2282 }
2283
2284 /*
2285 * Clear the user and kernel slot descriptors. Note that
2286 * if we are depopulating the slots due to defunct (and not
2287 * due to normal deallocation/teardown), we leave the user
2288 * slot descriptor alone. At that point the process may
2289 * be suspended, and later when it resumes it would just
2290 * pick up the original contents and move forward with
2291 * whatever it was doing.
2292 */
2293 KSD_RESET(ksd);
2294 if (usd != NULL && !defunct) {
2295 USD_RESET(usd);
2296 }
2297
2298 /* detach packet from slot */
2299 kqum->qum_ksd = NULL;
2300
2301 SK_DF(SK_VERB_MEM, " D ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2302 " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd),
2303 METADATA_IDX(kqum), SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2304 SK_KVA(&kqum->qum_buf[0]));
2305 if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2306 SK_DF(SK_VERB_MEM, " D usd [%-3u, 0x%llx] "
2307 "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]",
2308 (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2309 SK_KVA(usd), METADATA_IDX(kqum),
2310 SK_KVA(kqum->qum_user),
2311 kqum->qum_user->qum_buf[0].buf_idx,
2312 SK_KVA(&kqum->qum_user->qum_buf[0]));
2313 }
2314
2315 if (free_packet) {
2316 pp_free_packet(pp, SK_PTR_ADDR(kqum)); ++j;
2317 }
2318 }
2319
2320 SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") depopulated %u of %u slots",
2321 SK_KVA(KRNA(kring)->na_arena), KRNA(kring)->na_arena->ar_name,
2322 j, n);
2323 }
2324
2325 int
na_rings_mem_setup(struct nexus_adapter * na,boolean_t alloc_ctx,struct kern_channel * ch)2326 na_rings_mem_setup(struct nexus_adapter *na,
2327 boolean_t alloc_ctx, struct kern_channel *ch)
2328 {
2329 boolean_t kronly;
2330 int err;
2331
2332 SK_LOCK_ASSERT_HELD();
2333 ASSERT(na->na_channels == 0);
2334 /*
2335 * If NAF_MEM_NO_INIT is set, then only create the krings and not
2336 * the backing memory regions for the adapter.
2337 */
2338 kronly = (na->na_flags & NAF_MEM_NO_INIT);
2339 ASSERT(!kronly || NA_KERNEL_ONLY(na));
2340
2341 /*
2342 * Create and initialize the common fields of the krings array.
2343 * using the information that must be already available in the na.
2344 */
2345 if ((err = na_kr_create(na, alloc_ctx)) == 0 && !kronly) {
2346 err = na_kr_setup(na, ch);
2347 if (err != 0) {
2348 na_kr_delete(na);
2349 }
2350 }
2351
2352 return err;
2353 }
2354
2355 void
na_rings_mem_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2356 na_rings_mem_teardown(struct nexus_adapter *na, struct kern_channel *ch,
2357 boolean_t defunct)
2358 {
2359 SK_LOCK_ASSERT_HELD();
2360 ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT));
2361
2362 /*
2363 * Deletes the kring and ring array of the adapter. They
2364 * must have been created using na_rings_mem_setup().
2365 *
2366 * XXX: [email protected] -- the parameter "ch" should not be
2367 * needed here; however na_kr_depopulate_slots() needs to
2368 * go thru the channel's user packet pool hash, and so for
2369 * now we leave it here.
2370 */
2371 na_kr_teardown_all(na, ch, defunct);
2372 if (!defunct) {
2373 na_kr_delete(na);
2374 }
2375 }
2376
2377 void
na_ch_rings_defunct(struct kern_channel * ch,struct proc * p)2378 na_ch_rings_defunct(struct kern_channel *ch, struct proc *p)
2379 {
2380 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2381
2382 /*
2383 * Depopulate slots on the TX and RX rings of this channel,
2384 * but don't touch other rings owned by other channels if
2385 * this adapter is being shared.
2386 */
2387 na_kr_teardown_txrx(ch->ch_na, ch, TRUE, p);
2388 }
2389
2390 void
na_kr_drop(struct nexus_adapter * na,boolean_t drop)2391 na_kr_drop(struct nexus_adapter *na, boolean_t drop)
2392 {
2393 enum txrx t;
2394 uint32_t i;
2395
2396 for_rx_tx(t) {
2397 for (i = 0; i < na_get_nrings(na, t); i++) {
2398 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2399 int error;
2400 error = kr_enter(kring, TRUE);
2401 if (drop) {
2402 kring->ckr_flags |= CKRF_DROP;
2403 } else {
2404 kring->ckr_flags &= ~CKRF_DROP;
2405 }
2406
2407 if (error != 0) {
2408 SK_ERR("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2409 "kr_enter failed %d",
2410 na->na_name, SK_KVA(na),
2411 kring->ckr_name, SK_KVA(kring),
2412 error);
2413 } else {
2414 kr_exit(kring);
2415 }
2416 SK_D("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2417 "krflags 0x%b", na->na_name, SK_KVA(na),
2418 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
2419 CKRF_BITS);
2420 }
2421 }
2422 }
2423
2424 /*
2425 * Set the stopped/enabled status of ring. When stopping, they also wait
2426 * for all current activity on the ring to terminate. The status change
2427 * is then notified using the na na_notify callback.
2428 */
2429 static void
na_set_ring(struct nexus_adapter * na,uint32_t ring_id,enum txrx t,uint32_t state)2430 na_set_ring(struct nexus_adapter *na, uint32_t ring_id, enum txrx t,
2431 uint32_t state)
2432 {
2433 struct __kern_channel_ring *kr = &NAKR(na, t)[ring_id];
2434
2435 /*
2436 * Mark the ring as stopped/enabled, and run through the
2437 * locks to make sure other users get to see it.
2438 */
2439 if (state == KR_READY) {
2440 kr_start(kr);
2441 } else {
2442 kr_stop(kr, state);
2443 }
2444 }
2445
2446
2447 /* stop or enable all the rings of na */
2448 static void
na_set_all_rings(struct nexus_adapter * na,uint32_t state)2449 na_set_all_rings(struct nexus_adapter *na, uint32_t state)
2450 {
2451 uint32_t i;
2452 enum txrx t;
2453
2454 SK_LOCK_ASSERT_HELD();
2455
2456 if (!NA_IS_ACTIVE(na)) {
2457 return;
2458 }
2459
2460 for_rx_tx(t) {
2461 for (i = 0; i < na_get_nrings(na, t); i++) {
2462 na_set_ring(na, i, t, state);
2463 }
2464 }
2465 }
2466
2467 /*
2468 * Convenience function used in drivers. Waits for current txsync()s/rxsync()s
2469 * to finish and prevents any new one from starting. Call this before turning
2470 * Skywalk mode off, or before removing the harware rings (e.g., on module
2471 * onload). As a rule of thumb for linux drivers, this should be placed near
2472 * each napi_disable().
2473 */
2474 void
na_disable_all_rings(struct nexus_adapter * na)2475 na_disable_all_rings(struct nexus_adapter *na)
2476 {
2477 na_set_all_rings(na, KR_STOPPED);
2478 }
2479
2480 /*
2481 * Convenience function used in drivers. Re-enables rxsync and txsync on the
2482 * adapter's rings In linux drivers, this should be placed near each
2483 * napi_enable().
2484 */
2485 void
na_enable_all_rings(struct nexus_adapter * na)2486 na_enable_all_rings(struct nexus_adapter *na)
2487 {
2488 na_set_all_rings(na, KR_READY /* enabled */);
2489 }
2490
2491 void
na_lock_all_rings(struct nexus_adapter * na)2492 na_lock_all_rings(struct nexus_adapter *na)
2493 {
2494 na_set_all_rings(na, KR_LOCKED);
2495 }
2496
2497 void
na_unlock_all_rings(struct nexus_adapter * na)2498 na_unlock_all_rings(struct nexus_adapter *na)
2499 {
2500 na_enable_all_rings(na);
2501 }
2502
2503 int
na_connect(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)2504 na_connect(struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
2505 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
2506 {
2507 struct nexus_adapter *na = NULL;
2508 mach_vm_size_t memsize = 0;
2509 int err = 0;
2510 enum txrx t;
2511
2512 ASSERT(!(chr->cr_mode & CHMODE_KERNEL));
2513 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
2514
2515 SK_LOCK_ASSERT_HELD();
2516
2517 /* find the nexus adapter and return the reference */
2518 err = na_find(ch, nx, chr, ch0, nxb, p, &na, TRUE /* create */);
2519 if (err != 0) {
2520 ASSERT(na == NULL);
2521 goto done;
2522 }
2523
2524 if (NA_KERNEL_ONLY(na)) {
2525 err = EBUSY;
2526 goto done;
2527 }
2528
2529 /* reject if the adapter is defunct of non-permissive */
2530 if ((na->na_flags & NAF_DEFUNCT) || na_reject_channel(ch, na)) {
2531 err = ENXIO;
2532 goto done;
2533 }
2534
2535 err = na_bind_channel(na, ch, chr);
2536 if (err != 0) {
2537 goto done;
2538 }
2539
2540 ASSERT(ch->ch_schema != NULL);
2541 ASSERT(na == ch->ch_na);
2542
2543 for_all_rings(t) {
2544 if (na_get_nrings(na, t) == 0) {
2545 ch->ch_si[t] = NULL;
2546 continue;
2547 }
2548 ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2549 &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2550 }
2551
2552 skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2553
2554 if (!(skmem_arena_nexus(na->na_arena)->arn_mode &
2555 AR_NEXUS_MODE_EXTERNAL_PPOOL)) {
2556 atomic_bitset_32(__DECONST(uint32_t *,
2557 &ch->ch_schema->csm_flags), CSM_PRIV_MEM);
2558 }
2559
2560 err = skmem_arena_mmap(na->na_arena, p, &ch->ch_mmap);
2561 if (err != 0) {
2562 goto done;
2563 }
2564
2565 atomic_bitset_32(__DECONST(uint32_t *, &ch->ch_schema->csm_flags),
2566 CSM_ACTIVE);
2567 chr->cr_memsize = memsize;
2568 chr->cr_memoffset = ch->ch_schema_offset;
2569
2570 SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2571 "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2572 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2573 na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2574 na->na_flags, NAF_BITS);
2575
2576 done:
2577 if (err != 0) {
2578 if (ch->ch_schema != NULL || na != NULL) {
2579 if (ch->ch_schema != NULL) {
2580 ASSERT(na == ch->ch_na);
2581 /*
2582 * Callee will unmap memory region if needed,
2583 * as well as release reference held on 'na'.
2584 */
2585 na_disconnect(nx, ch);
2586 na = NULL;
2587 }
2588 if (na != NULL) {
2589 (void) na_release_locked(na);
2590 na = NULL;
2591 }
2592 }
2593 }
2594
2595 return err;
2596 }
2597
2598 void
na_disconnect(struct kern_nexus * nx,struct kern_channel * ch)2599 na_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
2600 {
2601 #pragma unused(nx)
2602 enum txrx t;
2603
2604 SK_LOCK_ASSERT_HELD();
2605
2606 SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2607 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2608 ch->ch_na->na_name, ch->ch_info->cinfo_nx_port,
2609 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(ch->ch_na),
2610 ch->ch_na->na_flags, NAF_BITS);
2611
2612 /* destroy mapping and release references */
2613 na_unbind_channel(ch);
2614 ASSERT(ch->ch_na == NULL);
2615 ASSERT(ch->ch_schema == NULL);
2616 for_all_rings(t) {
2617 ch->ch_si[t] = NULL;
2618 }
2619 }
2620
2621 void
na_defunct(struct kern_nexus * nx,struct kern_channel * ch,struct nexus_adapter * na,boolean_t locked)2622 na_defunct(struct kern_nexus *nx, struct kern_channel *ch,
2623 struct nexus_adapter *na, boolean_t locked)
2624 {
2625 #pragma unused(nx)
2626 SK_LOCK_ASSERT_HELD();
2627 if (!locked) {
2628 lck_mtx_lock(&ch->ch_lock);
2629 }
2630
2631 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2632
2633 if (!(na->na_flags & NAF_DEFUNCT)) {
2634 /*
2635 * Mark this adapter as defunct to inform nexus-specific
2636 * teardown handler called by na_teardown() below.
2637 */
2638 atomic_bitset_32(&na->na_flags, NAF_DEFUNCT);
2639
2640 /*
2641 * Depopulate slots.
2642 */
2643 na_teardown(na, ch, TRUE);
2644
2645 /*
2646 * And finally destroy any already-defunct memory regions.
2647 * Do this only if the nexus adapter owns the arena, i.e.
2648 * NAF_MEM_LOANED is not set. Otherwise, we'd expect
2649 * that this routine be called again for the real owner.
2650 */
2651 if (!(na->na_flags & NAF_MEM_LOANED)) {
2652 skmem_arena_defunct(na->na_arena);
2653 }
2654 }
2655
2656 SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d) "
2657 "na 0x%llx naflags %b", ch->ch_name, ch->ch_pid,
2658 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2659 na->na_name, ch->ch_info->cinfo_nx_port,
2660 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2661 na->na_flags, NAF_BITS);
2662
2663 if (!locked) {
2664 lck_mtx_unlock(&ch->ch_lock);
2665 }
2666 }
2667
2668 /*
2669 * TODO: [email protected] -- merge this into na_connect()
2670 */
2671 int
na_connect_spec(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct proc * p)2672 na_connect_spec(struct kern_nexus *nx, struct kern_channel *ch,
2673 struct chreq *chr, struct proc *p)
2674 {
2675 #pragma unused(p)
2676 struct nexus_adapter *na = NULL;
2677 mach_vm_size_t memsize = 0;
2678 int error = 0;
2679 enum txrx t;
2680
2681 ASSERT(chr->cr_mode & CHMODE_KERNEL);
2682 ASSERT(ch->ch_flags & CHANF_KERNEL);
2683 ASSERT(ch->ch_na == NULL);
2684 ASSERT(ch->ch_schema == NULL);
2685
2686 SK_LOCK_ASSERT_HELD();
2687
2688 error = na_find(ch, nx, chr, NULL, NULL, kernproc, &na, TRUE);
2689 if (error != 0) {
2690 goto done;
2691 }
2692
2693 if (na == NULL) {
2694 error = EINVAL;
2695 goto done;
2696 }
2697
2698 if (na->na_channels > 0) {
2699 error = EBUSY;
2700 goto done;
2701 }
2702
2703 if (na->na_flags & NAF_DEFUNCT) {
2704 error = ENXIO;
2705 goto done;
2706 }
2707
2708 /*
2709 * Special connect requires the nexus adapter to handle its
2710 * own channel binding and unbinding via na_special(); bail
2711 * if this adapter doesn't support it.
2712 */
2713 if (na->na_special == NULL) {
2714 error = ENOTSUP;
2715 goto done;
2716 }
2717
2718 /* upon success, "ch->ch_na" will point to "na" */
2719 error = na->na_special(na, ch, chr, NXSPEC_CMD_CONNECT);
2720 if (error != 0) {
2721 ASSERT(ch->ch_na == NULL);
2722 goto done;
2723 }
2724
2725 ASSERT(na->na_flags & NAF_SPEC_INIT);
2726 ASSERT(na == ch->ch_na);
2727 /* make sure this is still the case */
2728 ASSERT(ch->ch_schema == NULL);
2729
2730 for_rx_tx(t) {
2731 ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2732 &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2733 }
2734
2735 skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2736 chr->cr_memsize = memsize;
2737
2738 SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2739 "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2740 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2741 na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2742 na->na_flags, NAF_BITS);
2743
2744 done:
2745 if (error != 0) {
2746 if (ch->ch_na != NULL || na != NULL) {
2747 if (ch->ch_na != NULL) {
2748 ASSERT(na == ch->ch_na);
2749 /* callee will release reference on 'na' */
2750 na_disconnect_spec(nx, ch);
2751 na = NULL;
2752 }
2753 if (na != NULL) {
2754 (void) na_release_locked(na);
2755 na = NULL;
2756 }
2757 }
2758 }
2759
2760 return error;
2761 }
2762
2763 /*
2764 * TODO: [email protected] -- merge this into na_disconnect()
2765 */
2766 void
na_disconnect_spec(struct kern_nexus * nx,struct kern_channel * ch)2767 na_disconnect_spec(struct kern_nexus *nx, struct kern_channel *ch)
2768 {
2769 #pragma unused(nx)
2770 struct nexus_adapter *na = ch->ch_na;
2771 enum txrx t;
2772 int error;
2773
2774 SK_LOCK_ASSERT_HELD();
2775 ASSERT(na != NULL);
2776 ASSERT(na->na_flags & NAF_SPEC_INIT); /* has been bound */
2777
2778 SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2779 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2780 na->na_name, ch->ch_info->cinfo_nx_port,
2781 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2782 na->na_flags, NAF_BITS);
2783
2784 /* take a reference for this routine */
2785 na_retain_locked(na);
2786
2787 ASSERT(ch->ch_flags & CHANF_KERNEL);
2788 ASSERT(ch->ch_schema == NULL);
2789 ASSERT(na->na_special != NULL);
2790 /* unbind this channel */
2791 error = na->na_special(na, ch, NULL, NXSPEC_CMD_DISCONNECT);
2792 ASSERT(error == 0);
2793 ASSERT(!(na->na_flags & NAF_SPEC_INIT));
2794
2795 /* now release our reference; this may be the last */
2796 na_release_locked(na);
2797 na = NULL;
2798
2799 ASSERT(ch->ch_na == NULL);
2800 for_rx_tx(t) {
2801 ch->ch_si[t] = NULL;
2802 }
2803 }
2804
2805 void
na_start_spec(struct kern_nexus * nx,struct kern_channel * ch)2806 na_start_spec(struct kern_nexus *nx, struct kern_channel *ch)
2807 {
2808 #pragma unused(nx)
2809 struct nexus_adapter *na = ch->ch_na;
2810
2811 SK_LOCK_ASSERT_HELD();
2812
2813 ASSERT(ch->ch_flags & CHANF_KERNEL);
2814 ASSERT(NA_KERNEL_ONLY(na));
2815 ASSERT(na->na_special != NULL);
2816
2817 na->na_special(na, ch, NULL, NXSPEC_CMD_START);
2818 }
2819
2820 void
na_stop_spec(struct kern_nexus * nx,struct kern_channel * ch)2821 na_stop_spec(struct kern_nexus *nx, struct kern_channel *ch)
2822 {
2823 #pragma unused(nx)
2824 struct nexus_adapter *na = ch->ch_na;
2825
2826 SK_LOCK_ASSERT_HELD();
2827
2828 ASSERT(ch->ch_flags & CHANF_KERNEL);
2829 ASSERT(NA_KERNEL_ONLY(na));
2830 ASSERT(na->na_special != NULL);
2831
2832 na->na_special(na, ch, NULL, NXSPEC_CMD_STOP);
2833 }
2834
2835 /*
2836 * MUST BE CALLED UNDER SK_LOCK()
2837 *
2838 * Get a refcounted reference to a nexus adapter attached
2839 * to the interface specified by chr.
2840 * This is always called in the execution of an ioctl().
2841 *
2842 * Return ENXIO if the interface specified by the request does
2843 * not exist, ENOTSUP if Skywalk is not supported by the interface,
2844 * EINVAL if parameters are invalid, ENOMEM if needed resources
2845 * could not be allocated.
2846 * If successful, hold a reference to the nexus adapter.
2847 *
2848 * No reference is kept on the real interface, which may then
2849 * disappear at any time.
2850 */
2851 int
na_find(struct kern_channel * ch,struct kern_nexus * nx,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)2852 na_find(struct kern_channel *ch, struct kern_nexus *nx, struct chreq *chr,
2853 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p,
2854 struct nexus_adapter **na, boolean_t create)
2855 {
2856 int error = 0;
2857
2858 _CASSERT(sizeof(chr->cr_name) == sizeof((*na)->na_name));
2859
2860 *na = NULL; /* default return value */
2861
2862 SK_LOCK_ASSERT_HELD();
2863
2864 /*
2865 * We cascade through all possibile types of nexus adapter.
2866 * All nx_*_na_find() functions return an error and an na,
2867 * with the following combinations:
2868 *
2869 * error na
2870 * 0 NULL type doesn't match
2871 * !0 NULL type matches, but na creation/lookup failed
2872 * 0 !NULL type matches and na created/found
2873 * !0 !NULL impossible
2874 */
2875
2876 #if CONFIG_NEXUS_MONITOR
2877 /* try to see if this is a monitor port */
2878 error = nx_monitor_na_find(nx, ch, chr, ch0, nxb, p, na, create);
2879 if (error != 0 || *na != NULL) {
2880 return error;
2881 }
2882 #endif /* CONFIG_NEXUS_MONITOR */
2883 #if CONFIG_NEXUS_USER_PIPE
2884 /* try to see if this is a pipe port */
2885 error = nx_upipe_na_find(nx, ch, chr, nxb, p, na, create);
2886 if (error != 0 || *na != NULL) {
2887 return error;
2888 }
2889 #endif /* CONFIG_NEXUS_USER_PIPE */
2890 #if CONFIG_NEXUS_KERNEL_PIPE
2891 /* try to see if this is a kernel pipe port */
2892 error = nx_kpipe_na_find(nx, ch, chr, nxb, p, na, create);
2893 if (error != 0 || *na != NULL) {
2894 return error;
2895 }
2896 #endif /* CONFIG_NEXUS_KERNEL_PIPE */
2897 #if CONFIG_NEXUS_FLOWSWITCH
2898 /* try to see if this is a flowswitch port */
2899 error = nx_fsw_na_find(nx, ch, chr, nxb, p, na, create);
2900 if (error != 0 || *na != NULL) {
2901 return error;
2902 }
2903 #endif /* CONFIG_NEXUS_FLOWSWITCH */
2904 #if CONFIG_NEXUS_NETIF
2905 error = nx_netif_na_find(nx, ch, chr, nxb, p, na, create);
2906 if (error != 0 || *na != NULL) {
2907 return error;
2908 }
2909 #endif /* CONFIG_NEXUS_NETIF */
2910
2911 ASSERT(*na == NULL);
2912 return ENXIO;
2913 }
2914
2915 void
na_retain_locked(struct nexus_adapter * na)2916 na_retain_locked(struct nexus_adapter *na)
2917 {
2918 SK_LOCK_ASSERT_HELD();
2919
2920 if (na != NULL) {
2921 #if SK_LOG
2922 uint32_t oref = atomic_add_32_ov(&na->na_refcount, 1);
2923 SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
2924 na->na_name, SK_KVA(na), oref + 1, na->na_channels);
2925 #else /* !SK_LOG */
2926 atomic_add_32(&na->na_refcount, 1);
2927 #endif /* !SK_LOG */
2928 }
2929 }
2930
2931 /* returns 1 iff the nexus_adapter is destroyed */
2932 int
na_release_locked(struct nexus_adapter * na)2933 na_release_locked(struct nexus_adapter *na)
2934 {
2935 uint32_t oref;
2936
2937 SK_LOCK_ASSERT_HELD();
2938
2939 ASSERT(na->na_refcount > 0);
2940 oref = atomic_add_32_ov(&na->na_refcount, -1);
2941 if (oref > 1) {
2942 SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
2943 na->na_name, SK_KVA(na), oref - 1, na->na_channels);
2944 return 0;
2945 }
2946 ASSERT(na->na_channels == 0);
2947
2948 if (na->na_dtor != NULL) {
2949 na->na_dtor(na);
2950 }
2951
2952 ASSERT(na->na_tx_rings == NULL && na->na_rx_rings == NULL);
2953 ASSERT(na->na_slot_ctxs == NULL);
2954 ASSERT(na->na_scratch == NULL);
2955
2956 #if CONFIG_NEXUS_USER_PIPE
2957 nx_upipe_na_dealloc(na);
2958 #endif /* CONFIG_NEXUS_USER_PIPE */
2959 if (na->na_arena != NULL) {
2960 skmem_arena_release(na->na_arena);
2961 na->na_arena = NULL;
2962 }
2963
2964 SK_DF(SK_VERB_MEM, "na \"%s\" (0x%llx) being freed",
2965 na->na_name, SK_KVA(na));
2966
2967 NA_FREE(na);
2968 return 1;
2969 }
2970
2971 static struct nexus_adapter *
na_pseudo_alloc(zalloc_flags_t how)2972 na_pseudo_alloc(zalloc_flags_t how)
2973 {
2974 struct nexus_adapter *na;
2975
2976 na = zalloc_flags(na_pseudo_zone, how | Z_ZERO);
2977 if (na) {
2978 na->na_type = NA_PSEUDO;
2979 na->na_free = na_pseudo_free;
2980 }
2981 return na;
2982 }
2983
2984 static void
na_pseudo_free(struct nexus_adapter * na)2985 na_pseudo_free(struct nexus_adapter *na)
2986 {
2987 ASSERT(na->na_refcount == 0);
2988 SK_DF(SK_VERB_MEM, "na 0x%llx FREE", SK_KVA(na));
2989 bzero(na, sizeof(*na));
2990 zfree(na_pseudo_zone, na);
2991 }
2992
2993 static int
na_pseudo_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)2994 na_pseudo_txsync(struct __kern_channel_ring *kring, struct proc *p,
2995 uint32_t flags)
2996 {
2997 #pragma unused(kring, p, flags)
2998 SK_DF(SK_VERB_SYNC | SK_VERB_TX,
2999 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3000 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3001 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3002 flags);
3003
3004 return 0;
3005 }
3006
3007 static int
na_pseudo_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3008 na_pseudo_rxsync(struct __kern_channel_ring *kring, struct proc *p,
3009 uint32_t flags)
3010 {
3011 #pragma unused(kring, p, flags)
3012 SK_DF(SK_VERB_SYNC | SK_VERB_RX,
3013 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3014 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3015 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3016 flags);
3017
3018 ASSERT(kring->ckr_rhead <= kring->ckr_lim);
3019
3020 return 0;
3021 }
3022
3023 static int
na_pseudo_activate(struct nexus_adapter * na,na_activate_mode_t mode)3024 na_pseudo_activate(struct nexus_adapter *na, na_activate_mode_t mode)
3025 {
3026 SK_D("na \"%s\" (0x%llx) %s", na->na_name,
3027 SK_KVA(na), na_activate_mode2str(mode));
3028
3029 switch (mode) {
3030 case NA_ACTIVATE_MODE_ON:
3031 atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
3032 break;
3033
3034 case NA_ACTIVATE_MODE_DEFUNCT:
3035 break;
3036
3037 case NA_ACTIVATE_MODE_OFF:
3038 atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
3039 break;
3040
3041 default:
3042 VERIFY(0);
3043 /* NOTREACHED */
3044 __builtin_unreachable();
3045 }
3046
3047 return 0;
3048 }
3049
3050 static void
na_pseudo_dtor(struct nexus_adapter * na)3051 na_pseudo_dtor(struct nexus_adapter *na)
3052 {
3053 #pragma unused(na)
3054 }
3055
3056 static int
na_pseudo_krings_create(struct nexus_adapter * na,struct kern_channel * ch)3057 na_pseudo_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
3058 {
3059 return na_rings_mem_setup(na, FALSE, ch);
3060 }
3061
3062 static void
na_pseudo_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)3063 na_pseudo_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
3064 boolean_t defunct)
3065 {
3066 na_rings_mem_teardown(na, ch, defunct);
3067 }
3068
3069 /*
3070 * Pseudo nexus adapter; typically used as a generic parent adapter.
3071 */
3072 int
na_pseudo_create(struct kern_nexus * nx,struct chreq * chr,struct nexus_adapter ** ret)3073 na_pseudo_create(struct kern_nexus *nx, struct chreq *chr,
3074 struct nexus_adapter **ret)
3075 {
3076 struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
3077 struct nexus_adapter *na;
3078 int error;
3079
3080 SK_LOCK_ASSERT_HELD();
3081 *ret = NULL;
3082
3083 na = na_pseudo_alloc(Z_WAITOK);
3084
3085 ASSERT(na->na_type == NA_PSEUDO);
3086 ASSERT(na->na_free == na_pseudo_free);
3087
3088 (void) strncpy(na->na_name, chr->cr_name, sizeof(na->na_name) - 1);
3089 na->na_name[sizeof(na->na_name) - 1] = '\0';
3090 uuid_generate_random(na->na_uuid);
3091
3092 /*
3093 * Verify upper bounds; for all cases including user pipe nexus,
3094 * the parameters must have already been validated by corresponding
3095 * nxdom_prov_params() function defined by each domain.
3096 */
3097 na_set_nrings(na, NR_TX, nxp->nxp_tx_rings);
3098 na_set_nrings(na, NR_RX, nxp->nxp_rx_rings);
3099 na_set_nslots(na, NR_TX, nxp->nxp_tx_slots);
3100 na_set_nslots(na, NR_RX, nxp->nxp_rx_slots);
3101 ASSERT(na_get_nrings(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
3102 ASSERT(na_get_nrings(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
3103 ASSERT(na_get_nslots(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
3104 ASSERT(na_get_nslots(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
3105
3106 na->na_txsync = na_pseudo_txsync;
3107 na->na_rxsync = na_pseudo_rxsync;
3108 na->na_activate = na_pseudo_activate;
3109 na->na_dtor = na_pseudo_dtor;
3110 na->na_krings_create = na_pseudo_krings_create;
3111 na->na_krings_delete = na_pseudo_krings_delete;
3112
3113 *(nexus_stats_type_t *)(uintptr_t)&na->na_stats_type =
3114 NEXUS_STATS_TYPE_INVALID;
3115
3116 /* other fields are set in the common routine */
3117 na_attach_common(na, nx, NX_DOM_PROV(nx));
3118
3119 if ((error = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
3120 nx, na)) != 0) {
3121 ASSERT(na->na_arena == NULL);
3122 goto err;
3123 }
3124 ASSERT(na->na_arena != NULL);
3125
3126 *(uint32_t *)(uintptr_t)&na->na_flowadv_max = nxp->nxp_flowadv_max;
3127 ASSERT(na->na_flowadv_max == 0 ||
3128 skmem_arena_nexus(na->na_arena)->arn_flowadv_obj != NULL);
3129
3130 #if SK_LOG
3131 uuid_string_t uuidstr;
3132 SK_D("na_name: \"%s\"", na->na_name);
3133 SK_D(" UUID: %s", sk_uuid_unparse(na->na_uuid, uuidstr));
3134 SK_D(" nx: 0x%llx (\"%s\":\"%s\")",
3135 SK_KVA(na->na_nx), NX_DOM(na->na_nx)->nxdom_name,
3136 NX_DOM_PROV(na->na_nx)->nxdom_prov_name);
3137 SK_D(" flags: %b", na->na_flags, NAF_BITS);
3138 SK_D(" flowadv_max: %u", na->na_flowadv_max);
3139 SK_D(" rings: tx %u rx %u",
3140 na_get_nrings(na, NR_TX), na_get_nrings(na, NR_RX));
3141 SK_D(" slots: tx %u rx %u",
3142 na_get_nslots(na, NR_TX), na_get_nslots(na, NR_RX));
3143 #if CONFIG_NEXUS_USER_PIPE
3144 SK_D(" next_pipe: %u", na->na_next_pipe);
3145 SK_D(" max_pipes: %u", na->na_max_pipes);
3146 #endif /* CONFIG_NEXUS_USER_PIPE */
3147 #endif /* SK_LOG */
3148
3149 *ret = na;
3150 na_retain_locked(na);
3151
3152 return 0;
3153
3154 err:
3155 if (na != NULL) {
3156 if (na->na_arena != NULL) {
3157 skmem_arena_release(na->na_arena);
3158 na->na_arena = NULL;
3159 }
3160 NA_FREE(na);
3161 }
3162 return error;
3163 }
3164
3165 void
na_flowadv_entry_alloc(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx,const uint32_t flowid)3166 na_flowadv_entry_alloc(const struct nexus_adapter *na, uuid_t fae_id,
3167 const flowadv_idx_t fe_idx, const uint32_t flowid)
3168 {
3169 struct skmem_arena *ar = na->na_arena;
3170 struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
3171 struct __flowadv_entry *fae;
3172
3173 ASSERT(NA_IS_ACTIVE(na) && na->na_flowadv_max != 0);
3174 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3175
3176 AR_LOCK(ar);
3177
3178 /* we must not get here if arena is defunct; this must be valid */
3179 ASSERT(arn->arn_flowadv_obj != NULL);
3180
3181 VERIFY(fe_idx < na->na_flowadv_max);
3182 fae = &arn->arn_flowadv_obj[fe_idx];
3183 uuid_copy(fae->fae_id, fae_id);
3184 fae->fae_flowid = flowid;
3185 fae->fae_flags = FLOWADVF_VALID;
3186
3187 AR_UNLOCK(ar);
3188 }
3189
3190 void
na_flowadv_entry_free(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx,const uint32_t flowid)3191 na_flowadv_entry_free(const struct nexus_adapter *na, uuid_t fae_id,
3192 const flowadv_idx_t fe_idx, const uint32_t flowid)
3193 {
3194 #pragma unused(fae_id)
3195 struct skmem_arena *ar = na->na_arena;
3196 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3197
3198 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3199 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3200
3201 AR_LOCK(ar);
3202
3203 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3204 if (arn->arn_flowadv_obj != NULL) {
3205 struct __flowadv_entry *fae;
3206
3207 VERIFY(fe_idx < na->na_flowadv_max);
3208 fae = &arn->arn_flowadv_obj[fe_idx];
3209 ASSERT(uuid_compare(fae->fae_id, fae_id) == 0);
3210 uuid_clear(fae->fae_id);
3211 VERIFY(fae->fae_flowid == flowid);
3212 fae->fae_flowid = 0;
3213 fae->fae_flags = 0;
3214 }
3215
3216 AR_UNLOCK(ar);
3217 }
3218
3219 bool
na_flowadv_set(const struct nexus_adapter * na,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3220 na_flowadv_set(const struct nexus_adapter *na, const flowadv_idx_t fe_idx,
3221 const flowadv_token_t flow_token)
3222 {
3223 struct skmem_arena *ar = na->na_arena;
3224 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3225 bool suspend;
3226
3227 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3228 ASSERT(fe_idx < na->na_flowadv_max);
3229 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3230
3231 AR_LOCK(ar);
3232
3233 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3234
3235 if (arn->arn_flowadv_obj != NULL) {
3236 struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3237
3238 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3239 /*
3240 * We cannot guarantee that the flow is still around by now,
3241 * so check if that's the case and let the caller know.
3242 */
3243 if ((suspend = (fae->fae_token == flow_token))) {
3244 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3245 fae->fae_flags |= FLOWADVF_SUSPENDED;
3246 }
3247 } else {
3248 suspend = false;
3249 }
3250 if (suspend) {
3251 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) flow token 0x%llu fidx %u "
3252 "SUSPEND", sk_proc_name_address(current_proc()),
3253 sk_proc_pid(current_proc()), flow_token, fe_idx);
3254 } else {
3255 SK_ERR("%s(%d) flow token 0x%llu fidx %u no longer around",
3256 sk_proc_name_address(current_proc()),
3257 sk_proc_pid(current_proc()), flow_token, fe_idx);
3258 }
3259
3260 AR_UNLOCK(ar);
3261
3262 return suspend;
3263 }
3264
3265 int
na_flowadv_clear(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3266 na_flowadv_clear(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3267 const flowadv_token_t flow_token)
3268 {
3269 struct nexus_adapter *na = ch->ch_na;
3270 struct skmem_arena *ar = na->na_arena;
3271 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3272 boolean_t resume;
3273
3274 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3275 ASSERT(fe_idx < na->na_flowadv_max);
3276 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3277
3278 AR_LOCK(ar);
3279
3280 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3281
3282 if (arn->arn_flowadv_obj != NULL) {
3283 struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3284
3285 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3286 /*
3287 * We cannot guarantee that the flow is still around by now,
3288 * so check if that's the case and let the caller know.
3289 */
3290 if ((resume = (fae->fae_token == flow_token))) {
3291 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3292 fae->fae_flags &= ~FLOWADVF_SUSPENDED;
3293 }
3294 } else {
3295 resume = FALSE;
3296 }
3297 if (resume) {
3298 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d): flow token 0x%x "
3299 "fidx %u RESUME", ch->ch_name, ch->ch_pid, flow_token,
3300 fe_idx);
3301 } else {
3302 SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3303 ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3304 }
3305
3306 AR_UNLOCK(ar);
3307
3308 return resume;
3309 }
3310
3311 void
na_flowadv_event(struct __kern_channel_ring * kring)3312 na_flowadv_event(struct __kern_channel_ring *kring)
3313 {
3314 ASSERT(kring->ckr_tx == NR_TX);
3315
3316 SK_DF(SK_VERB_EVENTS, "%s(%d) na \"%s\" (0x%llx) kr 0x%llx",
3317 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
3318 KRNA(kring)->na_name, SK_KVA(KRNA(kring)), SK_KVA(kring));
3319
3320 na_post_event(kring, TRUE, FALSE, FALSE, CHAN_FILT_HINT_FLOW_ADV_UPD);
3321 }
3322
3323 static int
na_packet_pool_free_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3324 na_packet_pool_free_sync(struct __kern_channel_ring *kring, struct proc *p,
3325 uint32_t flags)
3326 {
3327 #pragma unused(flags, p)
3328 int n, ret = 0;
3329 slot_idx_t j;
3330 struct __kern_slot_desc *ksd;
3331 struct __user_slot_desc *usd;
3332 struct __kern_quantum *kqum;
3333 struct kern_pbufpool *pp = kring->ckr_pp;
3334 uint32_t nfree = 0;
3335
3336 /* packet pool list is protected by channel lock */
3337 ASSERT(!KR_KERNEL_ONLY(kring));
3338
3339 /* # of new slots */
3340 n = kring->ckr_rhead - kring->ckr_khead;
3341 if (n < 0) {
3342 n += kring->ckr_num_slots;
3343 }
3344
3345 /* nothing to free */
3346 if (__improbable(n == 0)) {
3347 SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3348 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3349 "nothing to free");
3350 goto done;
3351 }
3352
3353 j = kring->ckr_khead;
3354 PP_LOCK(pp);
3355 while (n--) {
3356 int err;
3357
3358 ksd = KR_KSD(kring, j);
3359 usd = KR_USD(kring, j);
3360
3361 if (__improbable(!SD_VALID_METADATA(usd))) {
3362 SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3363 ret = EINVAL;
3364 break;
3365 }
3366
3367 kqum = pp_remove_upp_locked(pp, usd->sd_md_idx, &err);
3368 if (__improbable(err != 0)) {
3369 SK_ERR("un-allocated packet or buflet %d %p",
3370 usd->sd_md_idx, SK_KVA(kqum));
3371 ret = EINVAL;
3372 break;
3373 }
3374
3375 /* detach and free the packet */
3376 kqum->qum_qflags &= ~QUM_F_FINALIZED;
3377 kqum->qum_ksd = NULL;
3378 ASSERT(!KSD_VALID_METADATA(ksd));
3379 USD_DETACH_METADATA(usd);
3380 ASSERT(pp == kqum->qum_pp);
3381 ASSERT(nfree < kring->ckr_num_slots);
3382 kring->ckr_scratch[nfree++] = (uint64_t)kqum;
3383 j = SLOT_NEXT(j, kring->ckr_lim);
3384 }
3385 PP_UNLOCK(pp);
3386
3387 if (__probable(nfree > 0)) {
3388 pp_free_packet_batch(pp, &kring->ckr_scratch[0], nfree);
3389 }
3390
3391 kring->ckr_khead = j;
3392 kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3393
3394 done:
3395 return ret;
3396 }
3397
3398 static int
na_packet_pool_alloc_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3399 na_packet_pool_alloc_sync(struct __kern_channel_ring *kring, struct proc *p,
3400 uint32_t flags)
3401 {
3402 int b, err;
3403 uint32_t n = 0;
3404 slot_idx_t j;
3405 uint64_t now;
3406 uint32_t curr_ws, ph_needed, ph_cnt;
3407 struct __kern_slot_desc *ksd;
3408 struct __user_slot_desc *usd;
3409 struct __kern_quantum *kqum;
3410 kern_pbufpool_t pp = kring->ckr_pp;
3411 pid_t pid = proc_pid(p);
3412
3413 /* packet pool list is protected by channel lock */
3414 ASSERT(!KR_KERNEL_ONLY(kring));
3415 ASSERT(!PP_KERNEL_ONLY(pp));
3416
3417 now = _net_uptime;
3418 if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3419 if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3420 kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3421 }
3422 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3423 "%s: purged curr_ws(%d)", kring->ckr_name,
3424 kring->ckr_alloc_ws);
3425 return 0;
3426 }
3427 /* reclaim the completed slots */
3428 kring->ckr_khead = kring->ckr_rhead;
3429
3430 /* # of busy (unclaimed) slots */
3431 b = kring->ckr_ktail - kring->ckr_khead;
3432 if (b < 0) {
3433 b += kring->ckr_num_slots;
3434 }
3435
3436 curr_ws = kring->ckr_alloc_ws;
3437 if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3438 /* increment the working set by 50% */
3439 curr_ws += (curr_ws >> 1);
3440 curr_ws = MIN(curr_ws, kring->ckr_lim);
3441 } else {
3442 if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3443 (uint32_t)b >= (curr_ws >> 2)) {
3444 /* decrease the working set by 25% */
3445 curr_ws -= (curr_ws >> 2);
3446 }
3447 }
3448 curr_ws = MAX(curr_ws, na_upp_alloc_lowat);
3449 if (curr_ws > (uint32_t)b) {
3450 n = curr_ws - b;
3451 }
3452 kring->ckr_alloc_ws = curr_ws;
3453 kring->ckr_sync_time = now;
3454
3455 /* min with # of avail free slots (subtract busy from max) */
3456 n = ph_needed = MIN(n, kring->ckr_lim - b);
3457 j = kring->ckr_ktail;
3458 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3459 "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3460
3461 if ((ph_cnt = ph_needed) == 0) {
3462 goto done;
3463 }
3464
3465 err = kern_pbufpool_alloc_batch_nosleep(pp, 1, kring->ckr_scratch,
3466 &ph_cnt);
3467
3468 if (__improbable(ph_cnt == 0)) {
3469 SK_ERR("kr 0x%llx failed to alloc %u packet s(%d)",
3470 SK_KVA(kring), ph_needed, err);
3471 kring->ckr_err_stats.cres_pkt_alloc_failures += ph_needed;
3472 } else {
3473 /*
3474 * Add packets to the allocated list of user packet pool.
3475 */
3476 pp_insert_upp_batch(pp, pid, kring->ckr_scratch, ph_cnt);
3477 }
3478
3479
3480 for (n = 0; n < ph_cnt; n++) {
3481 ksd = KR_KSD(kring, j);
3482 usd = KR_USD(kring, j);
3483
3484 kqum = SK_PTR_ADDR_KQUM(kring->ckr_scratch[n]);
3485 kring->ckr_scratch[n] = 0;
3486 ASSERT(kqum != NULL);
3487
3488 /* cleanup any stale slot mapping */
3489 KSD_RESET(ksd);
3490 ASSERT(usd != NULL);
3491 USD_RESET(usd);
3492
3493 /*
3494 * Since this packet is freshly allocated and we need to
3495 * have the flag set for the attach to succeed, just set
3496 * it here rather than calling __packet_finalize().
3497 */
3498 kqum->qum_qflags |= QUM_F_FINALIZED;
3499
3500 /* Attach packet to slot */
3501 KR_SLOT_ATTACH_METADATA(kring, ksd, kqum);
3502 /*
3503 * externalize the packet as it is being transferred to
3504 * user space.
3505 */
3506 kr_externalize_metadata(kring, pp->pp_max_frags, kqum, p);
3507
3508 j = SLOT_NEXT(j, kring->ckr_lim);
3509 }
3510 done:
3511 ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3512 kring->ckr_ktail = j;
3513 return 0;
3514 }
3515
3516 static int
na_packet_pool_free_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3517 na_packet_pool_free_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3518 uint32_t flags)
3519 {
3520 #pragma unused(flags, p)
3521 int n, ret = 0;
3522 slot_idx_t j;
3523 struct __kern_slot_desc *ksd;
3524 struct __user_slot_desc *usd;
3525 struct __kern_buflet *kbft;
3526 struct kern_pbufpool *pp = kring->ckr_pp;
3527
3528 /* packet pool list is protected by channel lock */
3529 ASSERT(!KR_KERNEL_ONLY(kring));
3530
3531 /* # of new slots */
3532 n = kring->ckr_rhead - kring->ckr_khead;
3533 if (n < 0) {
3534 n += kring->ckr_num_slots;
3535 }
3536
3537 /* nothing to free */
3538 if (__improbable(n == 0)) {
3539 SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3540 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3541 "nothing to free");
3542 goto done;
3543 }
3544
3545 j = kring->ckr_khead;
3546 while (n--) {
3547 int err;
3548
3549 ksd = KR_KSD(kring, j);
3550 usd = KR_USD(kring, j);
3551
3552 if (__improbable(!SD_VALID_METADATA(usd))) {
3553 SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3554 ret = EINVAL;
3555 break;
3556 }
3557
3558 kbft = pp_remove_upp_bft(pp, usd->sd_md_idx, &err);
3559 if (__improbable(err != 0)) {
3560 SK_ERR("un-allocated buflet %d %p", usd->sd_md_idx,
3561 SK_KVA(kbft));
3562 ret = EINVAL;
3563 break;
3564 }
3565
3566 /* detach and free the packet */
3567 ASSERT(!KSD_VALID_METADATA(ksd));
3568 USD_DETACH_METADATA(usd);
3569 pp_free_buflet(pp, kbft);
3570 j = SLOT_NEXT(j, kring->ckr_lim);
3571 }
3572 kring->ckr_khead = j;
3573 kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3574
3575 done:
3576 return ret;
3577 }
3578
3579 static int
na_packet_pool_alloc_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3580 na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3581 uint32_t flags)
3582 {
3583 int b, err;
3584 uint32_t n = 0;
3585 slot_idx_t j;
3586 uint64_t now;
3587 uint32_t curr_ws, bh_needed, bh_cnt;
3588 struct __kern_slot_desc *ksd;
3589 struct __user_slot_desc *usd;
3590 struct __kern_buflet *kbft;
3591 struct __kern_buflet_ext *kbe;
3592 kern_pbufpool_t pp = kring->ckr_pp;
3593 pid_t pid = proc_pid(p);
3594
3595 /* packet pool list is protected by channel lock */
3596 ASSERT(!KR_KERNEL_ONLY(kring));
3597 ASSERT(!PP_KERNEL_ONLY(pp));
3598
3599 now = _net_uptime;
3600 if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3601 if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3602 kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3603 }
3604 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3605 "%s: purged curr_ws(%d)", kring->ckr_name,
3606 kring->ckr_alloc_ws);
3607 return 0;
3608 }
3609 /* reclaim the completed slots */
3610 kring->ckr_khead = kring->ckr_rhead;
3611
3612 /* # of busy (unclaimed) slots */
3613 b = kring->ckr_ktail - kring->ckr_khead;
3614 if (b < 0) {
3615 b += kring->ckr_num_slots;
3616 }
3617
3618 curr_ws = kring->ckr_alloc_ws;
3619 if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3620 /* increment the working set by 50% */
3621 curr_ws += (curr_ws >> 1);
3622 curr_ws = MIN(curr_ws, kring->ckr_lim);
3623 } else {
3624 if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3625 (uint32_t)b >= (curr_ws >> 2)) {
3626 /* decrease the working set by 25% */
3627 curr_ws -= (curr_ws >> 2);
3628 }
3629 }
3630 curr_ws = MAX(curr_ws, na_upp_alloc_buf_lowat);
3631 if (curr_ws > (uint32_t)b) {
3632 n = curr_ws - b;
3633 }
3634 kring->ckr_alloc_ws = curr_ws;
3635 kring->ckr_sync_time = now;
3636
3637 /* min with # of avail free slots (subtract busy from max) */
3638 n = bh_needed = MIN(n, kring->ckr_lim - b);
3639 j = kring->ckr_ktail;
3640 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3641 "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3642
3643 if ((bh_cnt = bh_needed) == 0) {
3644 goto done;
3645 }
3646
3647 err = pp_alloc_buflet_batch(pp, kring->ckr_scratch, &bh_cnt,
3648 SKMEM_NOSLEEP, PP_ALLOC_BFT_ATTACH_BUFFER);
3649
3650 if (bh_cnt == 0) {
3651 SK_ERR("kr 0x%llx failed to alloc %u buflets(%d)",
3652 SK_KVA(kring), bh_needed, err);
3653 kring->ckr_err_stats.cres_pkt_alloc_failures += bh_needed;
3654 }
3655
3656 for (n = 0; n < bh_cnt; n++) {
3657 struct __user_buflet *ubft;
3658
3659 ksd = KR_KSD(kring, j);
3660 usd = KR_USD(kring, j);
3661
3662 kbft = (struct __kern_buflet *)(kring->ckr_scratch[n]);
3663 kbe = (struct __kern_buflet_ext *)kbft;
3664 kring->ckr_scratch[n] = 0;
3665 ASSERT(kbft != NULL);
3666
3667 /*
3668 * Add buflet to the allocated list of user packet pool.
3669 */
3670 pp_insert_upp_bft(pp, kbft, pid);
3671
3672 /*
3673 * externalize the buflet as it is being transferred to
3674 * user space.
3675 */
3676 ubft = __DECONST(struct __user_buflet *, kbe->kbe_buf_user);
3677 KBUF_EXTERNALIZE(kbft, ubft, pp);
3678
3679 /* cleanup any stale slot mapping */
3680 KSD_RESET(ksd);
3681 ASSERT(usd != NULL);
3682 USD_RESET(usd);
3683
3684 /* Attach buflet to slot */
3685 KR_SLOT_ATTACH_BUF_METADATA(kring, ksd, kbft);
3686
3687 j = SLOT_NEXT(j, kring->ckr_lim);
3688 }
3689 done:
3690 ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3691 kring->ckr_ktail = j;
3692 return 0;
3693 }
3694
3695 /* The caller needs to ensure that the NA stays intact */
3696 void
na_drain(struct nexus_adapter * na,boolean_t purge)3697 na_drain(struct nexus_adapter *na, boolean_t purge)
3698 {
3699 /* will be cleared on next channel sync */
3700 if (!(atomic_bitset_32_ov(&na->na_flags, NAF_DRAINING) &
3701 NAF_DRAINING) && NA_IS_ACTIVE(na)) {
3702 SK_DF(SK_VERB_NA, "%s: %s na 0x%llx flags %b",
3703 na->na_name, (purge ? "purging" : "pruning"),
3704 SK_KVA(na), na->na_flags, NAF_BITS);
3705
3706 /* reap (purge/prune) caches in the arena */
3707 skmem_arena_reap(na->na_arena, purge);
3708 }
3709 }
3710