1 /*
2 * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri.
31 * All rights reserved.
32 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 */
55 #include <sys/systm.h>
56 #include <skywalk/os_skywalk_private.h>
57 #include <skywalk/nexus/monitor/nx_monitor.h>
58 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
59 #include <skywalk/nexus/netif/nx_netif.h>
60 #include <skywalk/nexus/upipe/nx_user_pipe.h>
61 #include <skywalk/nexus/kpipe/nx_kernel_pipe.h>
62 #include <kern/thread.h>
63
64 static int na_krings_use(struct kern_channel *);
65 static void na_krings_unuse(struct kern_channel *);
66 static void na_krings_verify(struct nexus_adapter *);
67 static int na_notify(struct __kern_channel_ring *, struct proc *, uint32_t);
68 static void na_set_ring(struct nexus_adapter *, uint32_t, enum txrx, uint32_t);
69 static void na_set_all_rings(struct nexus_adapter *, uint32_t);
70 static int na_set_ringid(struct kern_channel *, ring_set_t, ring_id_t);
71 static void na_unset_ringid(struct kern_channel *);
72 static void na_teardown(struct nexus_adapter *, struct kern_channel *,
73 boolean_t);
74
75 static int na_kr_create(struct nexus_adapter *, boolean_t);
76 static void na_kr_delete(struct nexus_adapter *);
77 static int na_kr_setup(struct nexus_adapter *, struct kern_channel *);
78 static void na_kr_teardown_all(struct nexus_adapter *, struct kern_channel *,
79 boolean_t);
80 static void na_kr_teardown_txrx(struct nexus_adapter *, struct kern_channel *,
81 boolean_t, struct proc *);
82 static int na_kr_populate_slots(struct __kern_channel_ring *);
83 static void na_kr_depopulate_slots(struct __kern_channel_ring *,
84 struct kern_channel *, boolean_t defunct);
85
86 static int na_schema_alloc(struct kern_channel *);
87
88 static struct nexus_adapter *na_pseudo_alloc(zalloc_flags_t);
89 static void na_pseudo_free(struct nexus_adapter *);
90 static int na_pseudo_txsync(struct __kern_channel_ring *, struct proc *,
91 uint32_t);
92 static int na_pseudo_rxsync(struct __kern_channel_ring *, struct proc *,
93 uint32_t);
94 static int na_pseudo_activate(struct nexus_adapter *, na_activate_mode_t);
95 static void na_pseudo_dtor(struct nexus_adapter *);
96 static int na_pseudo_krings_create(struct nexus_adapter *,
97 struct kern_channel *);
98 static void na_pseudo_krings_delete(struct nexus_adapter *,
99 struct kern_channel *, boolean_t);
100 static int na_packet_pool_alloc_sync(struct __kern_channel_ring *,
101 struct proc *, uint32_t);
102 static int na_packet_pool_alloc_large_sync(struct __kern_channel_ring *,
103 struct proc *, uint32_t);
104 static int na_packet_pool_free_sync(struct __kern_channel_ring *,
105 struct proc *, uint32_t);
106 static int na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *,
107 struct proc *, uint32_t);
108 static int na_packet_pool_free_buf_sync(struct __kern_channel_ring *,
109 struct proc *, uint32_t);
110
111 #define NA_KRING_IDLE_TIMEOUT (NSEC_PER_SEC * 30) /* 30 seconds */
112
113 static SKMEM_TYPE_DEFINE(na_pseudo_zone, struct nexus_adapter);
114
115 static int __na_inited = 0;
116
117 #define NA_NUM_WMM_CLASSES 4
118 #define NAKR_WMM_SC2RINGID(_s) PKT_SC2TC(_s)
119 #define NAKR_SET_SVC_LUT(_n, _s) \
120 (_n)->na_kring_svc_lut[MBUF_SCIDX(_s)] = NAKR_WMM_SC2RINGID(_s)
121 #define NAKR_SET_KR_SVC(_n, _s) \
122 NAKR((_n), NR_TX)[NAKR_WMM_SC2RINGID(_s)].ckr_svc = (_s)
123
124 #define NA_UPP_ALLOC_LOWAT 8
125 static uint32_t na_upp_alloc_lowat = NA_UPP_ALLOC_LOWAT;
126
127 #define NA_UPP_REAP_INTERVAL 10 /* seconds */
128 static uint32_t na_upp_reap_interval = NA_UPP_REAP_INTERVAL;
129
130 #define NA_UPP_WS_HOLD_TIME 2 /* seconds */
131 static uint32_t na_upp_ws_hold_time = NA_UPP_WS_HOLD_TIME;
132
133 #define NA_UPP_REAP_MIN_PKTS 0
134 static uint32_t na_upp_reap_min_pkts = NA_UPP_REAP_MIN_PKTS;
135
136 #define NA_UPP_ALLOC_BUF_LOWAT 64
137 static uint32_t na_upp_alloc_buf_lowat = NA_UPP_ALLOC_BUF_LOWAT;
138
139 #if (DEVELOPMENT || DEBUG)
140 static uint64_t _na_inject_error = 0;
141 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) \
142 _SK_INJECT_ERROR(_na_inject_error, _en, _ev, _ec, NULL, _f, __VA_ARGS__)
143
144 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_ws_hold_time,
145 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_ws_hold_time,
146 NA_UPP_WS_HOLD_TIME, "");
147 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_interval,
148 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_interval,
149 NA_UPP_REAP_INTERVAL, "");
150 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_min_pkts,
151 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_min_pkts,
152 NA_UPP_REAP_MIN_PKTS, "");
153 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_lowat,
154 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_lowat,
155 NA_UPP_ALLOC_LOWAT, "");
156 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_buf_lowat,
157 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_buf_lowat,
158 NA_UPP_ALLOC_BUF_LOWAT, "");
159 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, na_inject_error,
160 CTLFLAG_RW | CTLFLAG_LOCKED, &_na_inject_error, "");
161 #else
162 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) do { } while (0)
163 #endif /* !DEVELOPMENT && !DEBUG */
164
165 #define SKMEM_TAG_NX_RINGS "com.apple.skywalk.nexus.rings"
166 static SKMEM_TAG_DEFINE(skmem_tag_nx_rings, SKMEM_TAG_NX_RINGS);
167
168 #define SKMEM_TAG_NX_CONTEXTS "com.apple.skywalk.nexus.contexts"
169 static SKMEM_TAG_DEFINE(skmem_tag_nx_contexts, SKMEM_TAG_NX_CONTEXTS);
170
171 #define SKMEM_TAG_NX_SCRATCH "com.apple.skywalk.nexus.scratch"
172 static SKMEM_TAG_DEFINE(skmem_tag_nx_scratch, SKMEM_TAG_NX_SCRATCH);
173
174 #if !XNU_TARGET_OS_OSX
175 /* see KLDBootstrap::readPrelinkedExtensions() for details */
176 extern uuid_t kernelcache_uuid;
177 #else /* XNU_TARGET_OS_OSX */
178 /* see panic_init() for details */
179 extern unsigned char *kernel_uuid;
180 #endif /* XNU_TARGET_OS_OSX */
181
182 void
na_init(void)183 na_init(void)
184 {
185 /*
186 * Changing the size of nexus_mdata structure won't break ABI,
187 * but we need to be mindful of memory consumption; Thus here
188 * we add a compile-time check to make sure the size is within
189 * the expected limit and that it's properly aligned. This
190 * check may be adjusted in future as needed.
191 */
192 _CASSERT(sizeof(struct nexus_mdata) <= 32 &&
193 IS_P2ALIGNED(sizeof(struct nexus_mdata), 8));
194 _CASSERT(sizeof(struct nexus_mdata) <= sizeof(struct __user_quantum));
195
196 /* see comments on nexus_meta_type_t */
197 _CASSERT(NEXUS_META_TYPE_MAX == 3);
198 _CASSERT(NEXUS_META_SUBTYPE_MAX == 3);
199
200 ASSERT(!__na_inited);
201
202 __na_inited = 1;
203 }
204
205 void
na_fini(void)206 na_fini(void)
207 {
208 if (__na_inited) {
209 __na_inited = 0;
210 }
211 }
212
213 /*
214 * Interpret the ringid of an chreq, by translating it into a pair
215 * of intervals of ring indices:
216 *
217 * [txfirst, txlast) and [rxfirst, rxlast)
218 */
219 int
na_interp_ringid(struct nexus_adapter * na,ring_id_t ring_id,ring_set_t ring_set,uint32_t first[NR_TXRX],uint32_t last[NR_TXRX])220 na_interp_ringid(struct nexus_adapter *na, ring_id_t ring_id,
221 ring_set_t ring_set, uint32_t first[NR_TXRX], uint32_t last[NR_TXRX])
222 {
223 enum txrx t;
224
225 switch (ring_set) {
226 case RING_SET_ALL:
227 /*
228 * Ring pair eligibility: all ring(s).
229 */
230 if (ring_id != CHANNEL_RING_ID_ANY &&
231 ring_id >= na_get_nrings(na, NR_TX) &&
232 ring_id >= na_get_nrings(na, NR_RX)) {
233 SK_ERR("\"%s\": invalid ring_id %d for ring_set %u",
234 na->na_name, (int)ring_id, ring_set);
235 return EINVAL;
236 }
237 for_rx_tx(t) {
238 if (ring_id == CHANNEL_RING_ID_ANY) {
239 first[t] = 0;
240 last[t] = na_get_nrings(na, t);
241 } else {
242 first[t] = ring_id;
243 last[t] = ring_id + 1;
244 }
245 }
246 break;
247
248 default:
249 SK_ERR("\"%s\": invalid ring_set %u", na->na_name, ring_set);
250 return EINVAL;
251 }
252
253 SK_DF(SK_VERB_NA | SK_VERB_RING,
254 "\"%s\": ring_id %d, ring_set %u tx [%u,%u) rx [%u,%u)",
255 na->na_name, (int)ring_id, ring_set, first[NR_TX], last[NR_TX],
256 first[NR_RX], last[NR_RX]);
257
258 return 0;
259 }
260
261 /*
262 * Set the ring ID. For devices with a single queue, a request
263 * for all rings is the same as a single ring.
264 */
265 static int
na_set_ringid(struct kern_channel * ch,ring_set_t ring_set,ring_id_t ring_id)266 na_set_ringid(struct kern_channel *ch, ring_set_t ring_set, ring_id_t ring_id)
267 {
268 struct nexus_adapter *na = ch->ch_na;
269 int error;
270 enum txrx t;
271 uint32_t n_alloc_rings;
272
273 if ((error = na_interp_ringid(na, ring_id, ring_set,
274 ch->ch_first, ch->ch_last)) != 0) {
275 return error;
276 }
277
278 n_alloc_rings = na_get_nrings(na, NR_A);
279 if (n_alloc_rings != 0) {
280 uint32_t n_large_alloc_rings;
281
282 ch->ch_first[NR_A] = ch->ch_first[NR_F] = 0;
283 ch->ch_last[NR_A] = ch->ch_last[NR_F] =
284 ch->ch_first[NR_A] + n_alloc_rings;
285
286 n_large_alloc_rings = na_get_nrings(na, NR_LBA);
287 ch->ch_first[NR_LBA] = 0;
288 ch->ch_last[NR_LBA] = ch->ch_first[NR_LBA] + n_large_alloc_rings;
289 } else {
290 ch->ch_first[NR_A] = ch->ch_last[NR_A] = 0;
291 ch->ch_first[NR_F] = ch->ch_last[NR_F] = 0;
292 ch->ch_first[NR_LBA] = ch->ch_last[NR_LBA] = 0;
293 }
294 ch->ch_first[NR_EV] = 0;
295 ch->ch_last[NR_EV] = ch->ch_first[NR_EV] + na_get_nrings(na, NR_EV);
296
297 /* XXX: should we initialize na_si_users for event ring ? */
298
299 /*
300 * Optimization: count the users registered for more than
301 * one ring, which are the ones sleeping on the global queue.
302 * The default na_notify() callback will then avoid signaling
303 * the global queue if nobody is using it
304 */
305 for_rx_tx(t) {
306 if (ch_is_multiplex(ch, t)) {
307 na->na_si_users[t]++;
308 ASSERT(na->na_si_users[t] != 0);
309 }
310 }
311 return 0;
312 }
313
314 static void
na_unset_ringid(struct kern_channel * ch)315 na_unset_ringid(struct kern_channel *ch)
316 {
317 struct nexus_adapter *na = ch->ch_na;
318 enum txrx t;
319
320 for_rx_tx(t) {
321 if (ch_is_multiplex(ch, t)) {
322 ASSERT(na->na_si_users[t] != 0);
323 na->na_si_users[t]--;
324 }
325 ch->ch_first[t] = ch->ch_last[t] = 0;
326 }
327 }
328
329 /*
330 * Check that the rings we want to bind are not exclusively owned by a previous
331 * bind. If exclusive ownership has been requested, we also mark the rings.
332 */
333 /* Hoisted out of line to reduce kernel stack footprint */
334 SK_NO_INLINE_ATTRIBUTE
335 static int
na_krings_use(struct kern_channel * ch)336 na_krings_use(struct kern_channel *ch)
337 {
338 struct nexus_adapter *na = ch->ch_na;
339 struct __kern_channel_ring *kring;
340 boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
341 enum txrx t;
342 uint32_t i;
343
344 SK_DF(SK_VERB_NA | SK_VERB_RING, "na \"%s\" (0x%llx) grabbing tx [%u,%u) rx [%u,%u)",
345 na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
346 ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
347
348 /*
349 * First round: check that all the requested rings
350 * are neither alread exclusively owned, nor we
351 * want exclusive ownership when they are already in use
352 */
353 for_all_rings(t) {
354 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
355 kring = &NAKR(na, t)[i];
356 if ((kring->ckr_flags & CKRF_EXCLUSIVE) ||
357 (kring->ckr_users && excl)) {
358 SK_DF(SK_VERB_NA | SK_VERB_RING,
359 "kr \"%s\" (0x%llx) krflags 0x%b is busy",
360 kring->ckr_name, SK_KVA(kring),
361 kring->ckr_flags, CKRF_BITS);
362 return EBUSY;
363 }
364 }
365 }
366
367 /*
368 * Second round: increment usage count and possibly
369 * mark as exclusive
370 */
371
372 for_all_rings(t) {
373 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
374 kring = &NAKR(na, t)[i];
375 kring->ckr_users++;
376 if (excl) {
377 kring->ckr_flags |= CKRF_EXCLUSIVE;
378 }
379 }
380 }
381
382 return 0;
383 }
384
385 /* Hoisted out of line to reduce kernel stack footprint */
386 SK_NO_INLINE_ATTRIBUTE
387 static void
na_krings_unuse(struct kern_channel * ch)388 na_krings_unuse(struct kern_channel *ch)
389 {
390 struct nexus_adapter *na = ch->ch_na;
391 struct __kern_channel_ring *kring;
392 boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
393 enum txrx t;
394 uint32_t i;
395
396 SK_DF(SK_VERB_NA | SK_VERB_RING,
397 "na \"%s\" (0x%llx) releasing tx [%u, %u) rx [%u, %u)",
398 na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
399 ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
400
401 for_all_rings(t) {
402 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
403 kring = &NAKR(na, t)[i];
404 if (excl) {
405 kring->ckr_flags &= ~CKRF_EXCLUSIVE;
406 }
407 kring->ckr_users--;
408 }
409 }
410 }
411
412 /* Hoisted out of line to reduce kernel stack footprint */
413 SK_NO_INLINE_ATTRIBUTE
414 static void
na_krings_verify(struct nexus_adapter * na)415 na_krings_verify(struct nexus_adapter *na)
416 {
417 struct __kern_channel_ring *kring;
418 enum txrx t;
419 uint32_t i;
420
421 for_all_rings(t) {
422 for (i = 0; i < na_get_nrings(na, t); i++) {
423 kring = &NAKR(na, t)[i];
424 /* na_kr_create() validations */
425 ASSERT(kring->ckr_num_slots > 0);
426 ASSERT(kring->ckr_lim == (kring->ckr_num_slots - 1));
427 ASSERT(kring->ckr_pp != NULL);
428
429 if (!(kring->ckr_flags & CKRF_MEM_RING_INITED)) {
430 continue;
431 }
432 /* na_kr_setup() validations */
433 if (KR_KERNEL_ONLY(kring)) {
434 ASSERT(kring->ckr_ring == NULL);
435 } else {
436 ASSERT(kring->ckr_ring != NULL);
437 }
438 ASSERT(kring->ckr_ksds_last ==
439 &kring->ckr_ksds[kring->ckr_lim]);
440 }
441 }
442 }
443
444 int
na_bind_channel(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr)445 na_bind_channel(struct nexus_adapter *na, struct kern_channel *ch,
446 struct chreq *chr)
447 {
448 struct kern_pbufpool *rx_pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
449 struct kern_pbufpool *tx_pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
450 uint32_t ch_mode = chr->cr_mode;
451 int err = 0;
452
453 SK_LOCK_ASSERT_HELD();
454 ASSERT(ch->ch_schema == NULL);
455 ASSERT(ch->ch_na == NULL);
456
457 /* ring configuration may have changed, fetch from the card */
458 na_update_config(na);
459 ch->ch_na = na; /* store the reference */
460 err = na_set_ringid(ch, chr->cr_ring_set, chr->cr_ring_id);
461 if (err != 0) {
462 goto err;
463 }
464
465 os_atomic_andnot(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE |
466 CHANF_USER_PACKET_POOL | CHANF_EVENT_RING), relaxed);
467 if (ch_mode & CHMODE_EXCLUSIVE) {
468 os_atomic_or(&ch->ch_flags, CHANF_EXCLUSIVE, relaxed);
469 }
470 /*
471 * Disallow automatic sync for monitor mode, since TX
472 * direction is disabled.
473 */
474 if (ch_mode & CHMODE_MONITOR) {
475 os_atomic_or(&ch->ch_flags, CHANF_RXONLY, relaxed);
476 }
477
478 if (!!(na->na_flags & NAF_USER_PKT_POOL) ^
479 !!(ch_mode & CHMODE_USER_PACKET_POOL)) {
480 SK_ERR("incompatible channel mode (0x%b), na_flags (0x%b)",
481 ch_mode, CHMODE_BITS, na->na_flags, NAF_BITS);
482 err = EINVAL;
483 goto err;
484 }
485
486 if (na->na_arena->ar_flags & ARF_DEFUNCT) {
487 err = ENXIO;
488 goto err;
489 }
490
491 if (ch_mode & CHMODE_USER_PACKET_POOL) {
492 ASSERT(na->na_flags & NAF_USER_PKT_POOL);
493 ASSERT(ch->ch_first[NR_A] != ch->ch_last[NR_A]);
494 ASSERT(ch->ch_first[NR_F] != ch->ch_last[NR_F]);
495 os_atomic_or(&ch->ch_flags, CHANF_USER_PACKET_POOL, relaxed);
496 }
497
498 if (ch_mode & CHMODE_EVENT_RING) {
499 ASSERT(na->na_flags & NAF_USER_PKT_POOL);
500 ASSERT(na->na_flags & NAF_EVENT_RING);
501 ASSERT(ch->ch_first[NR_EV] != ch->ch_last[NR_EV]);
502 os_atomic_or(&ch->ch_flags, CHANF_EVENT_RING, relaxed);
503 }
504
505 /*
506 * If this is the first channel of the adapter, create
507 * the rings and their in-kernel view, the krings.
508 */
509 if (na->na_channels == 0) {
510 err = na->na_krings_create(na, ch);
511 if (err != 0) {
512 goto err;
513 }
514
515 /*
516 * Sanity check; this is already done in na_kr_create(),
517 * but we do it here as well to validate na_kr_setup().
518 */
519 na_krings_verify(na);
520 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
521 skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_type;
522 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
523 skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_subtype;
524 }
525
526 /*
527 * Validate ownership and usability of the krings; take into account
528 * whether some previous bind has exclusive ownership on them.
529 */
530 err = na_krings_use(ch);
531 if (err != 0) {
532 goto err_del_rings;
533 }
534
535 /* for user-facing channel, create a new channel schema */
536 if (!(ch->ch_flags & CHANF_KERNEL)) {
537 err = na_schema_alloc(ch);
538 if (err != 0) {
539 goto err_rel_excl;
540 }
541
542 ASSERT(ch->ch_schema != NULL);
543 ASSERT(ch->ch_schema_offset != (mach_vm_offset_t)-1);
544 } else {
545 ASSERT(ch->ch_schema == NULL);
546 ch->ch_schema_offset = (mach_vm_offset_t)-1;
547 }
548
549 /* update our work timestamp */
550 na->na_work_ts = net_uptime();
551
552 /* update our work timestamp */
553 na->na_work_ts = net_uptime();
554
555 na->na_channels++;
556
557 /*
558 * If user packet pool is desired, initialize the allocated
559 * object hash table in the pool, if not already. This also
560 * retains a refcnt on the pool which the caller must release.
561 */
562 ASSERT(ch->ch_pp == NULL);
563 if (ch_mode & CHMODE_USER_PACKET_POOL) {
564 #pragma unused(tx_pp)
565 ASSERT(rx_pp == tx_pp);
566 err = pp_init_upp(rx_pp, TRUE);
567 if (err != 0) {
568 goto err_free_schema;
569 }
570 ch->ch_pp = rx_pp;
571 }
572
573 if (!NA_IS_ACTIVE(na)) {
574 err = na->na_activate(na, NA_ACTIVATE_MODE_ON);
575 if (err != 0) {
576 goto err_release_pp;
577 }
578
579 SK_D("activated \"%s\" adapter 0x%llx", na->na_name,
580 SK_KVA(na));
581 SK_D(" na_md_type: %u", na->na_md_type);
582 SK_D(" na_md_subtype: %u", na->na_md_subtype);
583 }
584
585 SK_D("ch 0x%llx", SK_KVA(ch));
586 SK_D(" ch_flags: 0x%b", ch->ch_flags, CHANF_BITS);
587 if (ch->ch_schema != NULL) {
588 SK_D(" ch_schema: 0x%llx", SK_KVA(ch->ch_schema));
589 }
590 SK_D(" ch_na: 0x%llx (chcnt %u)", SK_KVA(ch->ch_na),
591 ch->ch_na->na_channels);
592 SK_D(" ch_tx_rings: [%u,%u)", ch->ch_first[NR_TX],
593 ch->ch_last[NR_TX]);
594 SK_D(" ch_rx_rings: [%u,%u)", ch->ch_first[NR_RX],
595 ch->ch_last[NR_RX]);
596 SK_D(" ch_alloc_rings: [%u,%u)", ch->ch_first[NR_A],
597 ch->ch_last[NR_A]);
598 SK_D(" ch_free_rings: [%u,%u)", ch->ch_first[NR_F],
599 ch->ch_last[NR_F]);
600 SK_D(" ch_ev_rings: [%u,%u)", ch->ch_first[NR_EV],
601 ch->ch_last[NR_EV]);
602
603 return 0;
604
605 err_release_pp:
606 if (ch_mode & CHMODE_USER_PACKET_POOL) {
607 ASSERT(ch->ch_pp != NULL);
608 pp_release(rx_pp);
609 ch->ch_pp = NULL;
610 }
611 err_free_schema:
612 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
613 NEXUS_META_TYPE_INVALID;
614 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
615 NEXUS_META_SUBTYPE_INVALID;
616 ASSERT(na->na_channels != 0);
617 na->na_channels--;
618 if (ch->ch_schema != NULL) {
619 skmem_cache_free(
620 skmem_arena_nexus(na->na_arena)->arn_schema_cache,
621 ch->ch_schema);
622 ch->ch_schema = NULL;
623 ch->ch_schema_offset = (mach_vm_offset_t)-1;
624 }
625 err_rel_excl:
626 na_krings_unuse(ch);
627 err_del_rings:
628 if (na->na_channels == 0) {
629 na->na_krings_delete(na, ch, FALSE);
630 }
631 err:
632 ch->ch_na = NULL;
633 ASSERT(err != 0);
634
635 return err;
636 }
637
638 /*
639 * Undo everything that was done in na_bind_channel().
640 */
641 /* call with SK_LOCK held */
642 void
na_unbind_channel(struct kern_channel * ch)643 na_unbind_channel(struct kern_channel *ch)
644 {
645 struct nexus_adapter *na = ch->ch_na;
646
647 SK_LOCK_ASSERT_HELD();
648
649 ASSERT(na->na_channels != 0);
650 na->na_channels--;
651
652 /* release exclusive use if it was requested at bind time */
653 na_krings_unuse(ch);
654
655 if (na->na_channels == 0) { /* last instance */
656 SK_D("%s(%d): deleting last channel instance for %s",
657 ch->ch_name, ch->ch_pid, na->na_name);
658
659 /*
660 * Free any remaining allocated packets attached to
661 * the slots, followed by a teardown of the arena.
662 */
663 na_teardown(na, ch, FALSE);
664
665 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
666 NEXUS_META_TYPE_INVALID;
667 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
668 NEXUS_META_SUBTYPE_INVALID;
669 } else {
670 SK_D("%s(%d): %s has %u remaining channel instance(s)",
671 ch->ch_name, ch->ch_pid, na->na_name, na->na_channels);
672 }
673
674 /*
675 * Free any allocated packets (for the process) attached to the slots;
676 * note that na_teardown() could have done this there as well.
677 */
678 if (ch->ch_pp != NULL) {
679 ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
680 pp_purge_upp(ch->ch_pp, ch->ch_pid);
681 pp_release(ch->ch_pp);
682 ch->ch_pp = NULL;
683 }
684
685 /* possibily decrement counter of tx_si/rx_si users */
686 na_unset_ringid(ch);
687
688 /* reap the caches now (purge if adapter is idle) */
689 skmem_arena_reap(na->na_arena, (na->na_channels == 0));
690
691 /* delete the csm */
692 if (ch->ch_schema != NULL) {
693 skmem_cache_free(
694 skmem_arena_nexus(na->na_arena)->arn_schema_cache,
695 ch->ch_schema);
696 ch->ch_schema = NULL;
697 ch->ch_schema_offset = (mach_vm_offset_t)-1;
698 }
699
700 /* destroy the memory map */
701 skmem_arena_munmap_channel(na->na_arena, ch);
702
703 /* mark the channel as unbound */
704 os_atomic_andnot(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE), relaxed);
705 ch->ch_na = NULL;
706
707 /* and finally release the nexus adapter; this might free it */
708 (void) na_release_locked(na);
709 }
710
711 static void
na_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)712 na_teardown(struct nexus_adapter *na, struct kern_channel *ch,
713 boolean_t defunct)
714 {
715 SK_LOCK_ASSERT_HELD();
716 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
717
718 #if CONFIG_NEXUS_MONITOR
719 /*
720 * Walk through all the rings and tell any monitor
721 * that the port is going to exit Skywalk mode
722 */
723 nx_mon_stop(na);
724 #endif /* CONFIG_NEXUS_MONITOR */
725
726 /*
727 * Deactive the adapter.
728 */
729 (void) na->na_activate(na,
730 (defunct ? NA_ACTIVATE_MODE_DEFUNCT : NA_ACTIVATE_MODE_OFF));
731
732 /*
733 * Free any remaining allocated packets for this process.
734 */
735 if (ch->ch_pp != NULL) {
736 ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
737 pp_purge_upp(ch->ch_pp, ch->ch_pid);
738 if (!defunct) {
739 pp_release(ch->ch_pp);
740 ch->ch_pp = NULL;
741 }
742 }
743
744 /*
745 * Delete rings and buffers.
746 */
747 na->na_krings_delete(na, ch, defunct);
748 }
749
750 /* call with SK_LOCK held */
751 /*
752 * Allocate the per-fd structure __user_channel_schema.
753 */
754 static int
na_schema_alloc(struct kern_channel * ch)755 na_schema_alloc(struct kern_channel *ch)
756 {
757 struct nexus_adapter *na = ch->ch_na;
758 struct skmem_arena *ar = na->na_arena;
759 struct skmem_arena_nexus *arn;
760 mach_vm_offset_t roff[SKMEM_REGIONS];
761 struct __kern_channel_ring *kr;
762 struct __user_channel_schema *csm;
763 struct skmem_obj_info csm_oi, ring_oi, ksd_oi, usd_oi;
764 mach_vm_offset_t base;
765 uint32_t i, j, k, n[NR_ALL];
766 enum txrx t;
767
768 /* see comments for struct __user_channel_schema */
769 _CASSERT(offsetof(struct __user_channel_schema, csm_ver) == 0);
770 _CASSERT(offsetof(struct __user_channel_schema, csm_flags) ==
771 sizeof(csm->csm_ver));
772 _CASSERT(offsetof(struct __user_channel_schema, csm_kern_name) ==
773 sizeof(csm->csm_ver) + sizeof(csm->csm_flags));
774 _CASSERT(offsetof(struct __user_channel_schema, csm_kern_uuid) ==
775 sizeof(csm->csm_ver) + sizeof(csm->csm_flags) +
776 sizeof(csm->csm_kern_name));
777
778 SK_LOCK_ASSERT_HELD();
779
780 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
781 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
782 arn = skmem_arena_nexus(ar);
783 ASSERT(arn != NULL);
784 for_all_rings(t) {
785 n[t] = 0;
786 }
787
788 csm = skmem_cache_alloc(arn->arn_schema_cache, SKMEM_NOSLEEP);
789 if (csm == NULL) {
790 return ENOMEM;
791 }
792
793 skmem_cache_get_obj_info(arn->arn_schema_cache, csm, &csm_oi, NULL);
794 bzero(csm, SKMEM_OBJ_SIZE(&csm_oi));
795
796 *(uint32_t *)(uintptr_t)&csm->csm_ver = CSM_CURRENT_VERSION;
797
798 /* kernel version and executable UUID */
799 _CASSERT(sizeof(csm->csm_kern_name) == _SYS_NAMELEN);
800 (void) strncpy((char *)(uintptr_t)csm->csm_kern_name,
801 version, sizeof(csm->csm_kern_name) - 1);
802 #if !XNU_TARGET_OS_OSX
803 (void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid,
804 kernelcache_uuid, sizeof(csm->csm_kern_uuid));
805 #else /* XNU_TARGET_OS_OSX */
806 if (kernel_uuid != NULL) {
807 (void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid,
808 kernel_uuid, sizeof(csm->csm_kern_uuid));
809 }
810 #endif /* XNU_TARGET_OS_OSX */
811
812 for_rx_tx(t) {
813 ASSERT((ch->ch_last[t] > 0) || (ch->ch_first[t] == 0));
814 n[t] = ch->ch_last[t] - ch->ch_first[t];
815 ASSERT(n[t] == 0 || n[t] <= na_get_nrings(na, t));
816 }
817
818 /* return total number of tx and rx rings for this channel */
819 *(uint32_t *)(uintptr_t)&csm->csm_tx_rings = n[NR_TX];
820 *(uint32_t *)(uintptr_t)&csm->csm_rx_rings = n[NR_RX];
821
822 if (ch->ch_flags & CHANF_USER_PACKET_POOL) {
823 *(uint32_t *)(uintptr_t)&csm->csm_allocator_ring_pairs =
824 na->na_num_allocator_ring_pairs;
825 n[NR_A] = n[NR_F] = na->na_num_allocator_ring_pairs;
826 ASSERT(n[NR_A] != 0 && n[NR_A] <= na_get_nrings(na, NR_A));
827 ASSERT(n[NR_A] == (ch->ch_last[NR_A] - ch->ch_first[NR_A]));
828 ASSERT(n[NR_F] == (ch->ch_last[NR_F] - ch->ch_first[NR_F]));
829
830 n[NR_LBA] = na->na_num_large_buf_alloc_rings;
831 if (n[NR_LBA] != 0) {
832 *(uint32_t *)(uintptr_t)&csm->csm_large_buf_alloc_rings = n[NR_LBA];
833 ASSERT(n[NR_LBA] == (ch->ch_last[NR_LBA] - ch->ch_first[NR_LBA]));
834 }
835 }
836
837 if (ch->ch_flags & CHANF_EVENT_RING) {
838 n[NR_EV] = ch->ch_last[NR_EV] - ch->ch_first[NR_EV];
839 ASSERT(n[NR_EV] != 0 && n[NR_EV] <= na_get_nrings(na, NR_EV));
840 *(uint32_t *)(uintptr_t)&csm->csm_num_event_rings = n[NR_EV];
841 }
842
843 bzero(&roff, sizeof(roff));
844 for (i = 0; i < SKMEM_REGIONS; i++) {
845 if (ar->ar_regions[i] == NULL) {
846 ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
847 i == SKMEM_REGION_SCHEMA ||
848 i == SKMEM_REGION_BUF_LARGE ||
849 i == SKMEM_REGION_RXBUF_DEF ||
850 i == SKMEM_REGION_RXBUF_LARGE ||
851 i == SKMEM_REGION_TXBUF_DEF ||
852 i == SKMEM_REGION_TXBUF_LARGE ||
853 i == SKMEM_REGION_RXKMD ||
854 i == SKMEM_REGION_TXKMD ||
855 i == SKMEM_REGION_UMD ||
856 i == SKMEM_REGION_UBFT ||
857 i == SKMEM_REGION_KBFT ||
858 i == SKMEM_REGION_RXKBFT ||
859 i == SKMEM_REGION_TXKBFT ||
860 i == SKMEM_REGION_TXAUSD ||
861 i == SKMEM_REGION_RXFUSD ||
862 i == SKMEM_REGION_USTATS ||
863 i == SKMEM_REGION_KSTATS ||
864 i == SKMEM_REGION_INTRINSIC ||
865 i == SKMEM_REGION_FLOWADV ||
866 i == SKMEM_REGION_NEXUSADV ||
867 i == SKMEM_REGION_SYSCTLS ||
868 i == SKMEM_REGION_GUARD_TAIL);
869 continue;
870 }
871
872 /* not for nexus */
873 ASSERT(i != SKMEM_REGION_SYSCTLS);
874
875 /*
876 * Get region offsets from base of mmap span; the arena
877 * doesn't need to be mmap'd at this point, since we
878 * simply compute the relative offset.
879 */
880 roff[i] = skmem_arena_get_region_offset(ar, i);
881 }
882
883 /*
884 * The schema is made up of the descriptor followed inline by an array
885 * of offsets to the tx, rx, allocator and event rings in the mmap span.
886 * They contain the offset between the ring and schema, so the
887 * information is usable in userspace to reach the ring from
888 * the schema.
889 */
890 base = roff[SKMEM_REGION_SCHEMA] + SKMEM_OBJ_ROFF(&csm_oi);
891
892 /* initialize schema with tx ring info */
893 for (i = 0, j = ch->ch_first[NR_TX]; i < n[NR_TX]; i++, j++) {
894 kr = &na->na_tx_rings[j];
895 if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
896 continue;
897 }
898
899 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
900 skmem_cache_get_obj_info(arn->arn_ring_cache,
901 kr->ckr_ring, &ring_oi, NULL);
902 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].ring_off =
903 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
904
905 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
906 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
907 kr->ckr_ksds, &ksd_oi, &usd_oi);
908
909 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].sd_off =
910 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
911 base;
912 }
913 /* initialize schema with rx ring info */
914 for (i = 0, j = ch->ch_first[NR_RX]; i < n[NR_RX]; i++, j++) {
915 kr = &na->na_rx_rings[j];
916 if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
917 continue;
918 }
919
920 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
921 skmem_cache_get_obj_info(arn->arn_ring_cache,
922 kr->ckr_ring, &ring_oi, NULL);
923 *(mach_vm_offset_t *)
924 (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].ring_off =
925 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
926
927 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
928 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
929 kr->ckr_ksds, &ksd_oi, &usd_oi);
930
931 *(mach_vm_offset_t *)
932 (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].sd_off =
933 (roff[SKMEM_REGION_RXFUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
934 base;
935 }
936 /* initialize schema with allocator ring info */
937 for (i = 0, j = ch->ch_first[NR_A], k = n[NR_TX] + n[NR_RX];
938 i < n[NR_A]; i++, j++) {
939 mach_vm_offset_t usd_roff;
940
941 usd_roff = roff[SKMEM_REGION_TXAUSD];
942 kr = &na->na_alloc_rings[j];
943 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
944 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
945
946 skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
947 &ring_oi, NULL);
948 *(mach_vm_offset_t *)
949 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
950 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
951
952 skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
953 &ksd_oi, &usd_oi);
954 *(mach_vm_offset_t *)
955 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
956 (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
957 }
958 /* initialize schema with free ring info */
959 for (i = 0, j = ch->ch_first[NR_F], k = n[NR_TX] + n[NR_RX] + n[NR_A];
960 i < n[NR_F]; i++, j++) {
961 mach_vm_offset_t usd_roff;
962
963 usd_roff = roff[SKMEM_REGION_RXFUSD];
964 kr = &na->na_free_rings[j];
965 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
966 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
967
968 skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
969 &ring_oi, NULL);
970 *(mach_vm_offset_t *)
971 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
972 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
973
974 skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
975 &ksd_oi, &usd_oi);
976 *(mach_vm_offset_t *)
977 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
978 (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
979 }
980 /* initialize schema with event ring info */
981 for (i = 0, j = ch->ch_first[NR_EV], k = n[NR_TX] + n[NR_RX] +
982 n[NR_A] + n[NR_F]; i < n[NR_EV]; i++, j++) {
983 ASSERT(csm->csm_num_event_rings != 0);
984 kr = &na->na_event_rings[j];
985 ASSERT(!KR_KERNEL_ONLY(kr));
986 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
987 skmem_cache_get_obj_info(arn->arn_ring_cache,
988 kr->ckr_ring, &ring_oi, NULL);
989 *(mach_vm_offset_t *)
990 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
991 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
992
993 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
994 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
995 kr->ckr_ksds, &ksd_oi, &usd_oi);
996
997 *(mach_vm_offset_t *)
998 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
999 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
1000 base;
1001 }
1002 /* initialize schema with large buf alloc ring info */
1003 for (i = 0, j = ch->ch_first[NR_LBA], k = n[NR_TX] + n[NR_RX] +
1004 n[NR_A] + n[NR_F] + n[NR_EV]; i < n[NR_LBA]; i++, j++) {
1005 ASSERT(csm->csm_large_buf_alloc_rings != 0);
1006 kr = &na->na_large_buf_alloc_rings[j];
1007 ASSERT(!KR_KERNEL_ONLY(kr));
1008 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
1009 skmem_cache_get_obj_info(arn->arn_ring_cache,
1010 kr->ckr_ring, &ring_oi, NULL);
1011 *(mach_vm_offset_t *)
1012 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
1013 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
1014
1015 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
1016 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
1017 kr->ckr_ksds, &ksd_oi, &usd_oi);
1018
1019 *(mach_vm_offset_t *)
1020 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
1021 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
1022 base;
1023 }
1024
1025 *(uint64_t *)(uintptr_t)&csm->csm_md_redzone_cookie =
1026 __ch_umd_redzone_cookie;
1027 *(nexus_meta_type_t *)(uintptr_t)&csm->csm_md_type = na->na_md_type;
1028 *(nexus_meta_subtype_t *)(uintptr_t)&csm->csm_md_subtype =
1029 na->na_md_subtype;
1030
1031 if (arn->arn_stats_obj != NULL) {
1032 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
1033 ASSERT(roff[SKMEM_REGION_USTATS] != 0);
1034 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs =
1035 roff[SKMEM_REGION_USTATS];
1036 *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1037 na->na_stats_type;
1038 } else {
1039 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1040 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs = 0;
1041 *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1042 NEXUS_STATS_TYPE_INVALID;
1043 }
1044
1045 if (arn->arn_flowadv_obj != NULL) {
1046 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL);
1047 ASSERT(roff[SKMEM_REGION_FLOWADV] != 0);
1048 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs =
1049 roff[SKMEM_REGION_FLOWADV];
1050 *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max =
1051 na->na_flowadv_max;
1052 } else {
1053 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1054 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs = 0;
1055 *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max = 0;
1056 }
1057
1058 if (arn->arn_nexusadv_obj != NULL) {
1059 struct __kern_nexus_adv_metadata *adv_md;
1060
1061 adv_md = arn->arn_nexusadv_obj;
1062 ASSERT(adv_md->knam_version == NX_ADVISORY_MD_CURRENT_VERSION);
1063 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] != NULL);
1064 ASSERT(roff[SKMEM_REGION_NEXUSADV] != 0);
1065 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs =
1066 roff[SKMEM_REGION_NEXUSADV];
1067 } else {
1068 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1069 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs = 0;
1070 }
1071
1072 ch->ch_schema = csm;
1073 ch->ch_schema_offset = base;
1074
1075 return 0;
1076 }
1077
1078 /*
1079 * Called by all routines that create nexus_adapters.
1080 * Attach na to the ifp (if any) and provide defaults
1081 * for optional callbacks. Defaults assume that we
1082 * are creating an hardware nexus_adapter.
1083 */
1084 void
na_attach_common(struct nexus_adapter * na,struct kern_nexus * nx,struct kern_nexus_domain_provider * nxdom_prov)1085 na_attach_common(struct nexus_adapter *na, struct kern_nexus *nx,
1086 struct kern_nexus_domain_provider *nxdom_prov)
1087 {
1088 SK_LOCK_ASSERT_HELD();
1089
1090 ASSERT(nx != NULL);
1091 ASSERT(nxdom_prov != NULL);
1092 ASSERT(na->na_krings_create != NULL);
1093 ASSERT(na->na_krings_delete != NULL);
1094 if (na->na_type != NA_NETIF_COMPAT_DEV) {
1095 ASSERT(na_get_nrings(na, NR_TX) != 0);
1096 }
1097 if (na->na_type != NA_NETIF_COMPAT_HOST) {
1098 ASSERT(na_get_nrings(na, NR_RX) != 0);
1099 }
1100 ASSERT(na->na_channels == 0);
1101
1102 if (na->na_notify == NULL) {
1103 na->na_notify = na_notify;
1104 }
1105
1106 na->na_nx = nx;
1107 na->na_nxdom_prov = nxdom_prov;
1108
1109 SK_D("na 0x%llx nx 0x%llx nxtype %u ar 0x%llx",
1110 SK_KVA(na), SK_KVA(nx), nxdom_prov->nxdom_prov_dom->nxdom_type,
1111 SK_KVA(na->na_arena));
1112 }
1113
1114 void
na_post_event(struct __kern_channel_ring * kring,boolean_t nodelay,boolean_t within_kevent,boolean_t selwake,uint32_t hint)1115 na_post_event(struct __kern_channel_ring *kring, boolean_t nodelay,
1116 boolean_t within_kevent, boolean_t selwake, uint32_t hint)
1117 {
1118 struct nexus_adapter *na = KRNA(kring);
1119 enum txrx t = kring->ckr_tx;
1120
1121 SK_DF(SK_VERB_EVENTS,
1122 "%s(%d) na \"%s\" (0x%llx) kr 0x%llx kev %u sel %u hint 0x%b",
1123 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1124 na->na_name, SK_KVA(na), SK_KVA(kring), within_kevent, selwake,
1125 hint, CHAN_FILT_HINT_BITS);
1126
1127 csi_selwakeup_one(kring, nodelay, within_kevent, selwake, hint);
1128 /*
1129 * optimization: avoid a wake up on the global
1130 * queue if nobody has registered for more
1131 * than one ring
1132 */
1133 if (na->na_si_users[t] > 0) {
1134 csi_selwakeup_all(na, t, nodelay, within_kevent, selwake, hint);
1135 }
1136 }
1137
1138 /* default notify callback */
1139 static int
na_notify(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1140 na_notify(struct __kern_channel_ring *kring, struct proc *p, uint32_t flags)
1141 {
1142 #pragma unused(p)
1143 SK_DF(SK_VERB_NOTIFY | ((kring->ckr_tx == NR_TX) ?
1144 SK_VERB_TX : SK_VERB_RX),
1145 "%s(%d) [%s] na \"%s\" (0x%llx) kr \"%s\" (0x%llx) krflags 0x%b "
1146 "flags 0x%x, kh %u kt %u | h %u t %u",
1147 sk_proc_name_address(p), sk_proc_pid(p),
1148 (kring->ckr_tx == NR_TX) ? "W" : "R", KRNA(kring)->na_name,
1149 SK_KVA(KRNA(kring)), kring->ckr_name, SK_KVA(kring),
1150 kring->ckr_flags, CKRF_BITS, flags, kring->ckr_khead,
1151 kring->ckr_ktail, kring->ckr_rhead, kring->ckr_rtail);
1152
1153 na_post_event(kring, (flags & NA_NOTEF_PUSH),
1154 (flags & NA_NOTEF_IN_KEVENT), TRUE, 0);
1155
1156 return 0;
1157 }
1158
1159 /*
1160 * Fetch configuration from the device, to cope with dynamic
1161 * reconfigurations after loading the module.
1162 */
1163 /* call with SK_LOCK held */
1164 int
na_update_config(struct nexus_adapter * na)1165 na_update_config(struct nexus_adapter *na)
1166 {
1167 uint32_t txr, txd, rxr, rxd;
1168
1169 SK_LOCK_ASSERT_HELD();
1170
1171 txr = txd = rxr = rxd = 0;
1172 if (na->na_config == NULL ||
1173 na->na_config(na, &txr, &txd, &rxr, &rxd)) {
1174 /* take whatever we had at init time */
1175 txr = na_get_nrings(na, NR_TX);
1176 txd = na_get_nslots(na, NR_TX);
1177 rxr = na_get_nrings(na, NR_RX);
1178 rxd = na_get_nslots(na, NR_RX);
1179 }
1180
1181 if (na_get_nrings(na, NR_TX) == txr &&
1182 na_get_nslots(na, NR_TX) == txd &&
1183 na_get_nrings(na, NR_RX) == rxr &&
1184 na_get_nslots(na, NR_RX) == rxd) {
1185 return 0; /* nothing changed */
1186 }
1187 SK_D("stored config %s: txring %u x %u, rxring %u x %u",
1188 na->na_name, na_get_nrings(na, NR_TX), na_get_nslots(na, NR_TX),
1189 na_get_nrings(na, NR_RX), na_get_nslots(na, NR_RX));
1190 SK_D("new config %s: txring %u x %u, rxring %u x %u",
1191 na->na_name, txr, txd, rxr, rxd);
1192
1193 if (na->na_channels == 0) {
1194 SK_D("configuration changed (but fine)");
1195 na_set_nrings(na, NR_TX, txr);
1196 na_set_nslots(na, NR_TX, txd);
1197 na_set_nrings(na, NR_RX, rxr);
1198 na_set_nslots(na, NR_RX, rxd);
1199 return 0;
1200 }
1201 SK_ERR("configuration changed while active, this is bad...");
1202 return 1;
1203 }
1204
1205 static void
na_kr_setup_netif_svc_map(struct nexus_adapter * na)1206 na_kr_setup_netif_svc_map(struct nexus_adapter *na)
1207 {
1208 uint32_t i;
1209 uint32_t num_tx_rings;
1210
1211 ASSERT(na->na_type == NA_NETIF_DEV);
1212 num_tx_rings = na_get_nrings(na, NR_TX);
1213
1214 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK_SYS) ==
1215 NAKR_WMM_SC2RINGID(KPKT_SC_BK));
1216 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1217 NAKR_WMM_SC2RINGID(KPKT_SC_RD));
1218 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1219 NAKR_WMM_SC2RINGID(KPKT_SC_OAM));
1220 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1221 NAKR_WMM_SC2RINGID(KPKT_SC_RV));
1222 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1223 NAKR_WMM_SC2RINGID(KPKT_SC_VI));
1224 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) ==
1225 NAKR_WMM_SC2RINGID(KPKT_SC_CTL));
1226
1227 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK) < NA_NUM_WMM_CLASSES);
1228 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) < NA_NUM_WMM_CLASSES);
1229 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VI) < NA_NUM_WMM_CLASSES);
1230 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) < NA_NUM_WMM_CLASSES);
1231
1232 _CASSERT(MBUF_SCIDX(KPKT_SC_BK_SYS) < KPKT_SC_MAX_CLASSES);
1233 _CASSERT(MBUF_SCIDX(KPKT_SC_BK) < KPKT_SC_MAX_CLASSES);
1234 _CASSERT(MBUF_SCIDX(KPKT_SC_BE) < KPKT_SC_MAX_CLASSES);
1235 _CASSERT(MBUF_SCIDX(KPKT_SC_RD) < KPKT_SC_MAX_CLASSES);
1236 _CASSERT(MBUF_SCIDX(KPKT_SC_OAM) < KPKT_SC_MAX_CLASSES);
1237 _CASSERT(MBUF_SCIDX(KPKT_SC_AV) < KPKT_SC_MAX_CLASSES);
1238 _CASSERT(MBUF_SCIDX(KPKT_SC_RV) < KPKT_SC_MAX_CLASSES);
1239 _CASSERT(MBUF_SCIDX(KPKT_SC_VI) < KPKT_SC_MAX_CLASSES);
1240 _CASSERT(MBUF_SCIDX(KPKT_SC_SIG) < KPKT_SC_MAX_CLASSES);
1241 _CASSERT(MBUF_SCIDX(KPKT_SC_VO) < KPKT_SC_MAX_CLASSES);
1242 _CASSERT(MBUF_SCIDX(KPKT_SC_CTL) < KPKT_SC_MAX_CLASSES);
1243
1244 /*
1245 * we support the following 2 configurations:
1246 * 1. packets from all 10 service class map to one ring.
1247 * 2. a 10:4 mapping between service classes and the rings. These 4
1248 * rings map to the 4 WMM access categories.
1249 */
1250 if (na->na_nx->nx_prov->nxprov_params->nxp_qmap == NEXUS_QMAP_TYPE_WMM) {
1251 ASSERT(num_tx_rings == NEXUS_NUM_WMM_QUEUES);
1252 /* setup the adapter's service class LUT */
1253 NAKR_SET_SVC_LUT(na, KPKT_SC_BK_SYS);
1254 NAKR_SET_SVC_LUT(na, KPKT_SC_BK);
1255 NAKR_SET_SVC_LUT(na, KPKT_SC_BE);
1256 NAKR_SET_SVC_LUT(na, KPKT_SC_RD);
1257 NAKR_SET_SVC_LUT(na, KPKT_SC_OAM);
1258 NAKR_SET_SVC_LUT(na, KPKT_SC_AV);
1259 NAKR_SET_SVC_LUT(na, KPKT_SC_RV);
1260 NAKR_SET_SVC_LUT(na, KPKT_SC_VI);
1261 NAKR_SET_SVC_LUT(na, KPKT_SC_SIG);
1262 NAKR_SET_SVC_LUT(na, KPKT_SC_VO);
1263 NAKR_SET_SVC_LUT(na, KPKT_SC_CTL);
1264
1265 /* Initialize the service class for each of the 4 ring */
1266 NAKR_SET_KR_SVC(na, KPKT_SC_BK);
1267 NAKR_SET_KR_SVC(na, KPKT_SC_BE);
1268 NAKR_SET_KR_SVC(na, KPKT_SC_VI);
1269 NAKR_SET_KR_SVC(na, KPKT_SC_VO);
1270 } else {
1271 ASSERT(na->na_nx->nx_prov->nxprov_params->nxp_qmap ==
1272 NEXUS_QMAP_TYPE_DEFAULT);
1273 /* 10: 1 mapping */
1274 for (i = 0; i < KPKT_SC_MAX_CLASSES; i++) {
1275 na->na_kring_svc_lut[i] = 0;
1276 }
1277 for (i = 0; i < num_tx_rings; i++) {
1278 NAKR(na, NR_TX)[i].ckr_svc = KPKT_SC_UNSPEC;
1279 }
1280 }
1281 }
1282
1283 static LCK_GRP_DECLARE(channel_txq_lock_group, "sk_ch_txq_lock");
1284 static LCK_GRP_DECLARE(channel_rxq_lock_group, "sk_ch_rxq_lock");
1285 static LCK_GRP_DECLARE(channel_txs_lock_group, "sk_ch_txs_lock");
1286 static LCK_GRP_DECLARE(channel_rxs_lock_group, "sk_ch_rxs_lock");
1287 static LCK_GRP_DECLARE(channel_alloc_lock_group, "sk_ch_alloc_lock");
1288 static LCK_GRP_DECLARE(channel_evq_lock_group, "sk_ch_evq_lock");
1289 static LCK_GRP_DECLARE(channel_evs_lock_group, "sk_ch_evs_lock");
1290
1291 static lck_grp_t *
na_kr_q_lck_grp(enum txrx t)1292 na_kr_q_lck_grp(enum txrx t)
1293 {
1294 switch (t) {
1295 case NR_TX:
1296 return &channel_txq_lock_group;
1297 case NR_RX:
1298 return &channel_rxq_lock_group;
1299 case NR_A:
1300 case NR_F:
1301 case NR_LBA:
1302 return &channel_alloc_lock_group;
1303 case NR_EV:
1304 return &channel_evq_lock_group;
1305 default:
1306 VERIFY(0);
1307 /* NOTREACHED */
1308 __builtin_unreachable();
1309 }
1310 }
1311
1312 static lck_grp_t *
na_kr_s_lck_grp(enum txrx t)1313 na_kr_s_lck_grp(enum txrx t)
1314 {
1315 switch (t) {
1316 case NR_TX:
1317 return &channel_txs_lock_group;
1318 case NR_RX:
1319 return &channel_rxs_lock_group;
1320 case NR_A:
1321 case NR_F:
1322 case NR_LBA:
1323 return &channel_alloc_lock_group;
1324 case NR_EV:
1325 return &channel_evs_lock_group;
1326 default:
1327 VERIFY(0);
1328 /* NOTREACHED */
1329 __builtin_unreachable();
1330 }
1331 }
1332
1333 static void
kr_init_tbr(struct __kern_channel_ring * r)1334 kr_init_tbr(struct __kern_channel_ring *r)
1335 {
1336 r->ckr_tbr_depth = CKR_TBR_TOKEN_INVALID;
1337 r->ckr_tbr_token = CKR_TBR_TOKEN_INVALID;
1338 r->ckr_tbr_last = 0;
1339 }
1340
1341 struct kern_pbufpool *
na_kr_get_pp(struct nexus_adapter * na,enum txrx t)1342 na_kr_get_pp(struct nexus_adapter *na, enum txrx t)
1343 {
1344 struct kern_pbufpool *pp = NULL;
1345 switch (t) {
1346 case NR_RX:
1347 case NR_F:
1348 case NR_EV:
1349 pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
1350 break;
1351 case NR_TX:
1352 case NR_A:
1353 case NR_LBA:
1354 pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
1355 break;
1356 default:
1357 VERIFY(0);
1358 /* NOTREACHED */
1359 __builtin_unreachable();
1360 }
1361
1362 return pp;
1363 }
1364
1365 /*
1366 * Create the krings array and initialize the fields common to all adapters.
1367 * The array layout is this:
1368 *
1369 * +----------+
1370 * na->na_tx_rings -----> | | \
1371 * | | } na->na_num_tx_rings
1372 * | | /
1373 * na->na_rx_rings ----> +----------+
1374 * | | \
1375 * | | } na->na_num_rx_rings
1376 * | | /
1377 * na->na_alloc_rings -> +----------+
1378 * | | \
1379 * na->na_free_rings --> +----------+ } na->na_num_allocator_ring_pairs
1380 * | | /
1381 * na->na_event_rings -> +----------+
1382 * | | \
1383 * | | } na->na_num_event_rings
1384 * | | /
1385 * na->na_large_buf_alloc_rings -> +----------+
1386 * | | \
1387 * | | } na->na_num_large_buf_alloc_rings
1388 * | | /
1389 * na->na_tail -----> +----------+
1390 */
1391 /* call with SK_LOCK held */
1392 static int
na_kr_create(struct nexus_adapter * na,boolean_t alloc_ctx)1393 na_kr_create(struct nexus_adapter *na, boolean_t alloc_ctx)
1394 {
1395 lck_grp_t *q_lck_grp, *s_lck_grp;
1396 uint32_t i, count, ndesc;
1397 struct kern_pbufpool *pp = NULL;
1398 struct __kern_channel_ring *kring;
1399 uint32_t n[NR_ALL];
1400 int c, tot_slots, err = 0;
1401 enum txrx t;
1402
1403 SK_LOCK_ASSERT_HELD();
1404
1405 n[NR_TX] = na_get_nrings(na, NR_TX);
1406 n[NR_RX] = na_get_nrings(na, NR_RX);
1407 n[NR_A] = na_get_nrings(na, NR_A);
1408 n[NR_F] = na_get_nrings(na, NR_F);
1409 n[NR_EV] = na_get_nrings(na, NR_EV);
1410 n[NR_LBA] = na_get_nrings(na, NR_LBA);
1411
1412 count = n[NR_TX] + n[NR_RX] + n[NR_A] + n[NR_F] + n[NR_EV] + n[NR_LBA];
1413
1414 na->na_tx_rings = sk_alloc_type_array(struct __kern_channel_ring, count,
1415 Z_WAITOK, skmem_tag_nx_rings);
1416 if (__improbable(na->na_tx_rings == NULL)) {
1417 SK_ERR("Cannot allocate krings");
1418 err = ENOMEM;
1419 goto error;
1420 }
1421
1422 na->na_rx_rings = na->na_tx_rings + n[NR_TX];
1423 if (n[NR_A] != 0) {
1424 na->na_alloc_rings = na->na_rx_rings + n[NR_RX];
1425 na->na_free_rings = na->na_alloc_rings + n[NR_A];
1426 } else {
1427 na->na_alloc_rings = na->na_free_rings = NULL;
1428 }
1429 if (n[NR_EV] != 0) {
1430 if (na->na_free_rings != NULL) {
1431 na->na_event_rings = na->na_free_rings + n[NR_F];
1432 } else {
1433 na->na_event_rings = na->na_rx_rings + n[NR_RX];
1434 }
1435 }
1436 if (n[NR_LBA] != 0) {
1437 ASSERT(n[NR_A] != 0);
1438 if (na->na_event_rings != NULL) {
1439 na->na_large_buf_alloc_rings = na->na_event_rings + n[NR_EV];
1440 } else {
1441 /* alloc/free rings must also be present */
1442 ASSERT(na->na_free_rings != NULL);
1443 na->na_large_buf_alloc_rings = na->na_free_rings + n[NR_F];
1444 }
1445 }
1446
1447 /* total number of slots for TX/RX adapter rings */
1448 c = tot_slots = (n[NR_TX] * na_get_nslots(na, NR_TX)) +
1449 (n[NR_RX] * na_get_nslots(na, NR_RX));
1450
1451 /* for scratch space on alloc and free rings */
1452 if (n[NR_A] != 0) {
1453 tot_slots += n[NR_A] * na_get_nslots(na, NR_A);
1454 tot_slots += n[NR_F] * na_get_nslots(na, NR_F);
1455 tot_slots += n[NR_LBA] * na_get_nslots(na, NR_LBA);
1456 c = tot_slots;
1457 }
1458 na->na_total_slots = tot_slots;
1459
1460 /* slot context (optional) for all TX/RX ring slots of this adapter */
1461 if (alloc_ctx) {
1462 na->na_slot_ctxs =
1463 skn_alloc_type_array(slot_ctxs, struct slot_ctx,
1464 na->na_total_slots, Z_WAITOK, skmem_tag_nx_contexts);
1465 if (na->na_slot_ctxs == NULL) {
1466 SK_ERR("Cannot allocate slot contexts");
1467 err = ENOMEM;
1468 goto error;
1469 }
1470 os_atomic_or(&na->na_flags, NAF_SLOT_CONTEXT, relaxed);
1471 }
1472
1473 /*
1474 * packet handle array storage for all TX/RX ring slots of this
1475 * adapter.
1476 */
1477 na->na_scratch = skn_alloc_type_array(scratch, kern_packet_t,
1478 na->na_total_slots, Z_WAITOK, skmem_tag_nx_scratch);
1479 if (na->na_scratch == NULL) {
1480 SK_ERR("Cannot allocate slot contexts");
1481 err = ENOMEM;
1482 goto error;
1483 }
1484
1485 /*
1486 * All fields in krings are 0 except the one initialized below.
1487 * but better be explicit on important kring fields.
1488 */
1489 for_all_rings(t) {
1490 ndesc = na_get_nslots(na, t);
1491 pp = na_kr_get_pp(na, t);
1492 for (i = 0; i < n[t]; i++) {
1493 kring = &NAKR(na, t)[i];
1494 bzero(kring, sizeof(*kring));
1495 kring->ckr_na = na;
1496 kring->ckr_pp = pp;
1497 kring->ckr_max_pkt_len =
1498 (t == NR_LBA ? PP_BUF_SIZE_LARGE(pp) :
1499 PP_BUF_SIZE_DEF(pp)) *
1500 pp->pp_max_frags;
1501 kring->ckr_ring_id = i;
1502 kring->ckr_tx = t;
1503 kr_init_to_mhints(kring, ndesc);
1504 kr_init_tbr(kring);
1505 if (NA_KERNEL_ONLY(na)) {
1506 kring->ckr_flags |= CKRF_KERNEL_ONLY;
1507 }
1508 if (na->na_flags & NAF_HOST_ONLY) {
1509 kring->ckr_flags |= CKRF_HOST;
1510 }
1511 ASSERT((t >= NR_TXRX) || (c > 0));
1512 if ((t < NR_TXRX) &&
1513 (na->na_flags & NAF_SLOT_CONTEXT)) {
1514 ASSERT(na->na_slot_ctxs != NULL);
1515 kring->ckr_flags |= CKRF_SLOT_CONTEXT;
1516 kring->ckr_slot_ctxs =
1517 na->na_slot_ctxs + (tot_slots - c);
1518 }
1519 ASSERT(na->na_scratch != NULL);
1520 if (t < NR_TXRXAF || t == NR_LBA) {
1521 kring->ckr_scratch =
1522 na->na_scratch + (tot_slots - c);
1523 }
1524 if (t < NR_TXRXAF || t == NR_LBA) {
1525 c -= ndesc;
1526 }
1527 switch (t) {
1528 case NR_A:
1529 if (i == 0) {
1530 kring->ckr_na_sync =
1531 na_packet_pool_alloc_sync;
1532 kring->ckr_alloc_ws =
1533 na_upp_alloc_lowat;
1534 } else {
1535 ASSERT(i == 1);
1536 kring->ckr_na_sync =
1537 na_packet_pool_alloc_buf_sync;
1538 kring->ckr_alloc_ws =
1539 na_upp_alloc_buf_lowat;
1540 }
1541 break;
1542 case NR_F:
1543 if (i == 0) {
1544 kring->ckr_na_sync =
1545 na_packet_pool_free_sync;
1546 } else {
1547 ASSERT(i == 1);
1548 kring->ckr_na_sync =
1549 na_packet_pool_free_buf_sync;
1550 }
1551 break;
1552 case NR_TX:
1553 kring->ckr_na_sync = na->na_txsync;
1554 if (na->na_flags & NAF_TX_MITIGATION) {
1555 kring->ckr_flags |= CKRF_MITIGATION;
1556 }
1557 switch (na->na_type) {
1558 #if CONFIG_NEXUS_USER_PIPE
1559 case NA_USER_PIPE:
1560 ASSERT(!(na->na_flags &
1561 NAF_USER_PKT_POOL));
1562 kring->ckr_prologue = kr_txprologue;
1563 kring->ckr_finalize = NULL;
1564 break;
1565 #endif /* CONFIG_NEXUS_USER_PIPE */
1566 #if CONFIG_NEXUS_MONITOR
1567 case NA_MONITOR:
1568 ASSERT(!(na->na_flags &
1569 NAF_USER_PKT_POOL));
1570 kring->ckr_prologue = kr_txprologue;
1571 kring->ckr_finalize = NULL;
1572 break;
1573 #endif /* CONFIG_NEXUS_MONITOR */
1574 default:
1575 if (na->na_flags & NAF_USER_PKT_POOL) {
1576 kring->ckr_prologue =
1577 kr_txprologue_upp;
1578 kring->ckr_finalize =
1579 kr_txfinalize_upp;
1580 } else {
1581 kring->ckr_prologue =
1582 kr_txprologue;
1583 kring->ckr_finalize =
1584 kr_txfinalize;
1585 }
1586 break;
1587 }
1588 break;
1589 case NR_RX:
1590 kring->ckr_na_sync = na->na_rxsync;
1591 if (na->na_flags & NAF_RX_MITIGATION) {
1592 kring->ckr_flags |= CKRF_MITIGATION;
1593 }
1594 switch (na->na_type) {
1595 #if CONFIG_NEXUS_USER_PIPE
1596 case NA_USER_PIPE:
1597 ASSERT(!(na->na_flags &
1598 NAF_USER_PKT_POOL));
1599 kring->ckr_prologue =
1600 kr_rxprologue_nodetach;
1601 kring->ckr_finalize = kr_rxfinalize;
1602 break;
1603 #endif /* CONFIG_NEXUS_USER_PIPE */
1604 #if CONFIG_NEXUS_MONITOR
1605 case NA_MONITOR:
1606 ASSERT(!(na->na_flags &
1607 NAF_USER_PKT_POOL));
1608 kring->ckr_prologue =
1609 kr_rxprologue_nodetach;
1610 kring->ckr_finalize = kr_rxfinalize;
1611 break;
1612 #endif /* CONFIG_NEXUS_MONITOR */
1613 default:
1614 if (na->na_flags & NAF_USER_PKT_POOL) {
1615 kring->ckr_prologue =
1616 kr_rxprologue_upp;
1617 kring->ckr_finalize =
1618 kr_rxfinalize_upp;
1619 } else {
1620 kring->ckr_prologue =
1621 kr_rxprologue;
1622 kring->ckr_finalize =
1623 kr_rxfinalize;
1624 }
1625 break;
1626 }
1627 break;
1628 case NR_EV:
1629 kring->ckr_na_sync = kern_channel_event_sync;
1630 break;
1631 case NR_LBA:
1632 kring->ckr_na_sync = na_packet_pool_alloc_large_sync;
1633 kring->ckr_alloc_ws = na_upp_alloc_lowat;
1634 break;
1635 default:
1636 VERIFY(0);
1637 /* NOTREACHED */
1638 __builtin_unreachable();
1639 }
1640 if (t != NR_EV) {
1641 kring->ckr_na_notify = na->na_notify;
1642 } else {
1643 kring->ckr_na_notify = NULL;
1644 }
1645 (void) snprintf(kring->ckr_name,
1646 sizeof(kring->ckr_name) - 1,
1647 "%s %s%u%s", na->na_name, sk_ring2str(t), i,
1648 ((kring->ckr_flags & CKRF_HOST) ? "^" : ""));
1649 SK_DF(SK_VERB_NA | SK_VERB_RING,
1650 "kr \"%s\" (0x%llx) krflags 0x%b rh %u rt %u",
1651 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1652 CKRF_BITS, kring->ckr_rhead, kring->ckr_rtail);
1653 kring->ckr_state = KR_READY;
1654 q_lck_grp = na_kr_q_lck_grp(t);
1655 s_lck_grp = na_kr_s_lck_grp(t);
1656 kring->ckr_qlock_group = q_lck_grp;
1657 lck_mtx_init(&kring->ckr_qlock, kring->ckr_qlock_group,
1658 &channel_lock_attr);
1659 kring->ckr_slock_group = s_lck_grp;
1660 lck_spin_init(&kring->ckr_slock, kring->ckr_slock_group,
1661 &channel_lock_attr);
1662 csi_init(&kring->ckr_si,
1663 (kring->ckr_flags & CKRF_MITIGATION),
1664 na->na_ch_mit_ival);
1665 }
1666 csi_init(&na->na_si[t],
1667 (na->na_flags & (NAF_TX_MITIGATION | NAF_RX_MITIGATION)),
1668 na->na_ch_mit_ival);
1669 }
1670 ASSERT(c == 0);
1671 na->na_tail = na->na_rx_rings + n[NR_RX] + n[NR_A] + n[NR_F] +
1672 n[NR_EV] + n[NR_LBA];
1673
1674 if (na->na_type == NA_NETIF_DEV) {
1675 na_kr_setup_netif_svc_map(na);
1676 }
1677
1678 /* validate now for cases where we create only krings */
1679 na_krings_verify(na);
1680 return 0;
1681
1682 error:
1683 ASSERT(err != 0);
1684 if (na->na_tx_rings != NULL) {
1685 sk_free_type_array(struct __kern_channel_ring,
1686 na->na_tail - na->na_tx_rings, na->na_tx_rings);
1687 }
1688 if (na->na_slot_ctxs != NULL) {
1689 ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1690 skn_free_type_array(slot_ctxs,
1691 struct slot_ctx, na->na_total_slots,
1692 na->na_slot_ctxs);
1693 na->na_slot_ctxs = NULL;
1694 }
1695 if (na->na_scratch != NULL) {
1696 skn_free_type_array(scratch,
1697 kern_packet_t, na->na_total_slots,
1698 na->na_scratch);
1699 na->na_scratch = NULL;
1700 }
1701 return err;
1702 }
1703
1704 /* undo the actions performed by na_kr_create() */
1705 /* call with SK_LOCK held */
1706 static void
na_kr_delete(struct nexus_adapter * na)1707 na_kr_delete(struct nexus_adapter *na)
1708 {
1709 struct __kern_channel_ring *kring = na->na_tx_rings;
1710 enum txrx t;
1711
1712 ASSERT((kring != NULL) && (na->na_tail != NULL));
1713 SK_LOCK_ASSERT_HELD();
1714
1715 for_all_rings(t) {
1716 csi_destroy(&na->na_si[t]);
1717 }
1718 /* we rely on the krings layout described above */
1719 for (; kring != na->na_tail; kring++) {
1720 lck_mtx_destroy(&kring->ckr_qlock, kring->ckr_qlock_group);
1721 lck_spin_destroy(&kring->ckr_slock, kring->ckr_slock_group);
1722 csi_destroy(&kring->ckr_si);
1723 if (kring->ckr_flags & CKRF_SLOT_CONTEXT) {
1724 kring->ckr_flags &= ~CKRF_SLOT_CONTEXT;
1725 ASSERT(kring->ckr_slot_ctxs != NULL);
1726 kring->ckr_slot_ctxs = NULL;
1727 }
1728 }
1729 if (na->na_slot_ctxs != NULL) {
1730 ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1731 os_atomic_andnot(&na->na_flags, NAF_SLOT_CONTEXT, relaxed);
1732 skn_free_type_array(slot_ctxs,
1733 struct slot_ctx, na->na_total_slots,
1734 na->na_slot_ctxs);
1735 na->na_slot_ctxs = NULL;
1736 }
1737 if (na->na_scratch != NULL) {
1738 skn_free_type_array(scratch,
1739 kern_packet_t, na->na_total_slots,
1740 na->na_scratch);
1741 na->na_scratch = NULL;
1742 }
1743 ASSERT(!(na->na_flags & NAF_SLOT_CONTEXT));
1744 sk_free_type_array(struct __kern_channel_ring,
1745 na->na_tail - na->na_tx_rings, na->na_tx_rings);
1746 na->na_tx_rings = na->na_rx_rings = na->na_alloc_rings =
1747 na->na_free_rings = na->na_event_rings = na->na_tail = NULL;
1748 }
1749
1750 static void
na_kr_slot_desc_init(struct __slot_desc * ksds,boolean_t kernel_only,struct __slot_desc * usds,size_t ndesc)1751 na_kr_slot_desc_init(struct __slot_desc *ksds,
1752 boolean_t kernel_only, struct __slot_desc *usds, size_t ndesc)
1753 {
1754 size_t i;
1755
1756 bzero(ksds, ndesc * SLOT_DESC_SZ);
1757 if (usds != NULL) {
1758 ASSERT(!kernel_only);
1759 bzero(usds, ndesc * SLOT_DESC_SZ);
1760 } else {
1761 ASSERT(kernel_only);
1762 }
1763
1764 for (i = 0; i < ndesc; i++) {
1765 KSD_INIT(SLOT_DESC_KSD(&ksds[i]));
1766 if (!kernel_only) {
1767 USD_INIT(SLOT_DESC_USD(&usds[i]));
1768 }
1769 }
1770 }
1771
1772 /* call with SK_LOCK held */
1773 static int
na_kr_setup(struct nexus_adapter * na,struct kern_channel * ch)1774 na_kr_setup(struct nexus_adapter *na, struct kern_channel *ch)
1775 {
1776 struct skmem_arena *ar = na->na_arena;
1777 struct skmem_arena_nexus *arn;
1778 mach_vm_offset_t roff[SKMEM_REGIONS];
1779 enum txrx t;
1780 uint32_t i;
1781
1782 SK_LOCK_ASSERT_HELD();
1783 ASSERT(!(na->na_flags & NAF_MEM_NO_INIT));
1784 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
1785 arn = skmem_arena_nexus(ar);
1786 ASSERT(arn != NULL);
1787
1788 bzero(&roff, sizeof(roff));
1789 for (i = 0; i < SKMEM_REGIONS; i++) {
1790 if (ar->ar_regions[i] == NULL) {
1791 continue;
1792 }
1793
1794 /* not for nexus */
1795 ASSERT(i != SKMEM_REGION_SYSCTLS);
1796
1797 /*
1798 * Get region offsets from base of mmap span; the arena
1799 * doesn't need to be mmap'd at this point, since we
1800 * simply compute the relative offset.
1801 */
1802 roff[i] = skmem_arena_get_region_offset(ar, i);
1803 }
1804
1805 for_all_rings(t) {
1806 for (i = 0; i < na_get_nrings(na, t); i++) {
1807 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
1808 struct __user_channel_ring *ring = kring->ckr_ring;
1809 mach_vm_offset_t ring_off, usd_roff;
1810 struct skmem_obj_info oi, oim;
1811 uint32_t ndesc;
1812
1813 if (ring != NULL) {
1814 SK_DF(SK_VERB_NA | SK_VERB_RING,
1815 "kr 0x%llx (\"%s\") is already "
1816 "initialized", SK_KVA(kring),
1817 kring->ckr_name);
1818 continue; /* already created by somebody else */
1819 }
1820
1821 if (!KR_KERNEL_ONLY(kring) &&
1822 (ring = skmem_cache_alloc(arn->arn_ring_cache,
1823 SKMEM_NOSLEEP)) == NULL) {
1824 SK_ERR("Cannot allocate %s_ring for kr "
1825 "0x%llx (\"%s\")", sk_ring2str(t),
1826 SK_KVA(kring), kring->ckr_name);
1827 goto cleanup;
1828 }
1829 kring->ckr_flags |= CKRF_MEM_RING_INITED;
1830 kring->ckr_ring = ring;
1831 ndesc = kring->ckr_num_slots;
1832
1833 if (ring == NULL) {
1834 goto skip_user_ring_setup;
1835 }
1836
1837 *(uint32_t *)(uintptr_t)&ring->ring_num_slots = ndesc;
1838
1839 /* offset of current ring in mmap span */
1840 skmem_cache_get_obj_info(arn->arn_ring_cache,
1841 ring, &oi, NULL);
1842 ring_off = (roff[SKMEM_REGION_RING] +
1843 SKMEM_OBJ_ROFF(&oi));
1844
1845 /*
1846 * ring_{buf,md,sd}_ofs offsets are relative to the
1847 * current ring, and not to the base of mmap span.
1848 */
1849 *(mach_vm_offset_t *)(uintptr_t)
1850 &ring->ring_def_buf_base =
1851 (roff[SKMEM_REGION_BUF_DEF] - ring_off);
1852 *(mach_vm_offset_t *)(uintptr_t)
1853 &ring->ring_large_buf_base =
1854 (roff[SKMEM_REGION_BUF_LARGE] - ring_off);
1855 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_md_base =
1856 (roff[SKMEM_REGION_UMD] - ring_off);
1857 _CASSERT(sizeof(uint16_t) ==
1858 sizeof(ring->ring_bft_size));
1859 if (roff[SKMEM_REGION_UBFT] != 0) {
1860 ASSERT(ar->ar_regions[SKMEM_REGION_UBFT] !=
1861 NULL);
1862 *(mach_vm_offset_t *)(uintptr_t)
1863 &ring->ring_bft_base =
1864 (roff[SKMEM_REGION_UBFT] - ring_off);
1865 *(uint16_t *)(uintptr_t)&ring->ring_bft_size =
1866 (uint16_t)ar->ar_regions[SKMEM_REGION_UBFT]->
1867 skr_c_obj_size;
1868 ASSERT(ring->ring_bft_size ==
1869 ar->ar_regions[SKMEM_REGION_KBFT]->
1870 skr_c_obj_size);
1871 } else {
1872 *(mach_vm_offset_t *)(uintptr_t)
1873 &ring->ring_bft_base = 0;
1874 *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1875 }
1876
1877 if (t == NR_TX || t == NR_A || t == NR_EV || t == NR_LBA) {
1878 usd_roff = roff[SKMEM_REGION_TXAUSD];
1879 } else {
1880 ASSERT(t == NR_RX || t == NR_F);
1881 usd_roff = roff[SKMEM_REGION_RXFUSD];
1882 }
1883 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_sd_base =
1884 (usd_roff - ring_off);
1885
1886 /* copy values from kring */
1887 ring->ring_head = kring->ckr_rhead;
1888 *(slot_idx_t *)(uintptr_t)&ring->ring_khead =
1889 kring->ckr_khead;
1890 *(slot_idx_t *)(uintptr_t)&ring->ring_tail =
1891 kring->ckr_rtail;
1892
1893 _CASSERT(sizeof(uint32_t) ==
1894 sizeof(ring->ring_def_buf_size));
1895 _CASSERT(sizeof(uint32_t) ==
1896 sizeof(ring->ring_large_buf_size));
1897 _CASSERT(sizeof(uint16_t) ==
1898 sizeof(ring->ring_md_size));
1899 *(uint32_t *)(uintptr_t)&ring->ring_def_buf_size =
1900 ar->ar_regions[SKMEM_REGION_BUF_DEF]->skr_c_obj_size;
1901 if (ar->ar_regions[SKMEM_REGION_BUF_LARGE] != NULL) {
1902 *(uint32_t *)(uintptr_t)&ring->ring_large_buf_size =
1903 ar->ar_regions[SKMEM_REGION_BUF_LARGE]->skr_c_obj_size;
1904 } else {
1905 *(uint32_t *)(uintptr_t)&ring->ring_large_buf_size = 0;
1906 }
1907 if (ar->ar_regions[SKMEM_REGION_UMD] != NULL) {
1908 *(uint16_t *)(uintptr_t)&ring->ring_md_size =
1909 (uint16_t)ar->ar_regions[SKMEM_REGION_UMD]->
1910 skr_c_obj_size;
1911 ASSERT(ring->ring_md_size ==
1912 ar->ar_regions[SKMEM_REGION_KMD]->
1913 skr_c_obj_size);
1914 } else {
1915 *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1916 ASSERT(PP_KERNEL_ONLY(arn->arn_rx_pp));
1917 ASSERT(PP_KERNEL_ONLY(arn->arn_tx_pp));
1918 }
1919
1920 /* ring info */
1921 _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_id));
1922 _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_kind));
1923 *(uint16_t *)(uintptr_t)&ring->ring_id =
1924 (uint16_t)kring->ckr_ring_id;
1925 *(uint16_t *)(uintptr_t)&ring->ring_kind =
1926 (uint16_t)kring->ckr_tx;
1927
1928 SK_DF(SK_VERB_NA | SK_VERB_RING,
1929 "%s_ring at 0x%llx kr 0x%llx (\"%s\")",
1930 sk_ring2str(t), SK_KVA(ring), SK_KVA(kring),
1931 kring->ckr_name);
1932 SK_DF(SK_VERB_NA | SK_VERB_RING,
1933 " num_slots: %u", ring->ring_num_slots);
1934 SK_DF(SK_VERB_NA | SK_VERB_RING,
1935 " def_buf_base: 0x%llx",
1936 (uint64_t)ring->ring_def_buf_base);
1937 SK_DF(SK_VERB_NA | SK_VERB_RING,
1938 " large_buf_base: 0x%llx",
1939 (uint64_t)ring->ring_large_buf_base);
1940 SK_DF(SK_VERB_NA | SK_VERB_RING,
1941 " md_base: 0x%llx",
1942 (uint64_t)ring->ring_md_base);
1943 SK_DF(SK_VERB_NA | SK_VERB_RING,
1944 " sd_base: 0x%llx",
1945 (uint64_t)ring->ring_sd_base);
1946 SK_DF(SK_VERB_NA | SK_VERB_RING,
1947 " h, t: %u, %u, %u", ring->ring_head,
1948 ring->ring_tail);
1949 SK_DF(SK_VERB_NA | SK_VERB_RING,
1950 " md_size: %d",
1951 (uint64_t)ring->ring_md_size);
1952
1953 /* make sure they're in synch */
1954 _CASSERT(NR_RX == CR_KIND_RX);
1955 _CASSERT(NR_TX == CR_KIND_TX);
1956 _CASSERT(NR_A == CR_KIND_ALLOC);
1957 _CASSERT(NR_F == CR_KIND_FREE);
1958 _CASSERT(NR_EV == CR_KIND_EVENT);
1959 _CASSERT(NR_LBA == CR_KIND_LARGE_BUF_ALLOC);
1960
1961 skip_user_ring_setup:
1962 /*
1963 * This flag tells na_kr_teardown_all() that it should
1964 * go thru the checks to free up the slot maps.
1965 */
1966 kring->ckr_flags |= CKRF_MEM_SD_INITED;
1967 if (t == NR_TX || t == NR_A || t == NR_EV || t == NR_LBA) {
1968 kring->ckr_ksds_cache = arn->arn_txaksd_cache;
1969 } else {
1970 ASSERT(t == NR_RX || t == NR_F);
1971 kring->ckr_ksds_cache = arn->arn_rxfksd_cache;
1972 }
1973 kring->ckr_ksds =
1974 skmem_cache_alloc(kring->ckr_ksds_cache,
1975 SKMEM_NOSLEEP);
1976 if (kring->ckr_ksds == NULL) {
1977 SK_ERR("Cannot allocate %s_ksds for kr "
1978 "0x%llx (\"%s\")", sk_ring2str(t),
1979 SK_KVA(kring), kring->ckr_name);
1980 goto cleanup;
1981 }
1982 if (!KR_KERNEL_ONLY(kring)) {
1983 skmem_cache_get_obj_info(kring->ckr_ksds_cache,
1984 kring->ckr_ksds, &oi, &oim);
1985 kring->ckr_usds = SKMEM_OBJ_ADDR(&oim);
1986 }
1987 na_kr_slot_desc_init(kring->ckr_ksds,
1988 KR_KERNEL_ONLY(kring), kring->ckr_usds, ndesc);
1989
1990 /* cache last slot descriptor address */
1991 ASSERT(kring->ckr_lim == (ndesc - 1));
1992 kring->ckr_ksds_last = &kring->ckr_ksds[kring->ckr_lim];
1993
1994 if ((t < NR_TXRX) &&
1995 !(na->na_flags & NAF_USER_PKT_POOL) &&
1996 na_kr_populate_slots(kring) != 0) {
1997 SK_ERR("Cannot allocate buffers for kr "
1998 "0x%llx (\"%s\")", SK_KVA(kring),
1999 kring->ckr_name);
2000 goto cleanup;
2001 }
2002 }
2003 }
2004
2005 return 0;
2006
2007 cleanup:
2008 na_kr_teardown_all(na, ch, FALSE);
2009
2010 return ENOMEM;
2011 }
2012
2013 static void
na_kr_teardown_common(struct nexus_adapter * na,struct __kern_channel_ring * kring,enum txrx t,struct kern_channel * ch,boolean_t defunct)2014 na_kr_teardown_common(struct nexus_adapter *na,
2015 struct __kern_channel_ring *kring, enum txrx t, struct kern_channel *ch,
2016 boolean_t defunct)
2017 {
2018 struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
2019 struct __user_channel_ring *ckr_ring;
2020 boolean_t sd_idle, sd_inited;
2021
2022 ASSERT(arn != NULL);
2023 kr_enter(kring, TRUE);
2024 /*
2025 * Check for CKRF_MEM_SD_INITED and CKRF_MEM_RING_INITED
2026 * to make sure that the freeing needs to happen (else just
2027 * nullify the values).
2028 * If this adapter owns the memory for the slot descriptors,
2029 * check if the region is marked as busy (sd_idle is false)
2030 * and leave the kring's slot descriptor fields alone if so,
2031 * at defunct time. At final teardown time, sd_idle must be
2032 * true else we assert; this indicates a missing call to
2033 * skmem_arena_nexus_sd_set_noidle().
2034 */
2035 sd_inited = ((kring->ckr_flags & CKRF_MEM_SD_INITED) != 0);
2036 if (sd_inited) {
2037 /* callee will do KR_KSD(), so check */
2038 if (((t < NR_TXRX) || (t == NR_EV)) &&
2039 (kring->ckr_ksds != NULL)) {
2040 na_kr_depopulate_slots(kring, ch, defunct);
2041 }
2042 /* leave CKRF_MEM_SD_INITED flag alone until idle */
2043 sd_idle = skmem_arena_nexus_sd_idle(arn);
2044 VERIFY(sd_idle || defunct);
2045 } else {
2046 sd_idle = TRUE;
2047 }
2048
2049 if (sd_idle) {
2050 kring->ckr_flags &= ~CKRF_MEM_SD_INITED;
2051 if (kring->ckr_ksds != NULL) {
2052 if (sd_inited) {
2053 skmem_cache_free(kring->ckr_ksds_cache,
2054 kring->ckr_ksds);
2055 }
2056 kring->ckr_ksds = NULL;
2057 kring->ckr_ksds_last = NULL;
2058 kring->ckr_usds = NULL;
2059 }
2060 ASSERT(kring->ckr_ksds_last == NULL);
2061 ASSERT(kring->ckr_usds == NULL);
2062 }
2063
2064 if ((ckr_ring = kring->ckr_ring) != NULL) {
2065 kring->ckr_ring = NULL;
2066 }
2067
2068 if (kring->ckr_flags & CKRF_MEM_RING_INITED) {
2069 ASSERT(ckr_ring != NULL || KR_KERNEL_ONLY(kring));
2070 if (ckr_ring != NULL) {
2071 skmem_cache_free(arn->arn_ring_cache, ckr_ring);
2072 }
2073 kring->ckr_flags &= ~CKRF_MEM_RING_INITED;
2074 }
2075
2076 if (defunct) {
2077 /* if defunct, drop everything; see KR_DROP() */
2078 kring->ckr_flags |= CKRF_DEFUNCT;
2079 }
2080 kr_exit(kring);
2081 }
2082
2083 /*
2084 * Teardown ALL rings of a nexus adapter; this includes {tx,rx,alloc,free,event}
2085 */
2086 static void
na_kr_teardown_all(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2087 na_kr_teardown_all(struct nexus_adapter *na, struct kern_channel *ch,
2088 boolean_t defunct)
2089 {
2090 enum txrx t;
2091
2092 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2093
2094 /* skip if this adapter has no allocated rings */
2095 if (na->na_tx_rings == NULL) {
2096 return;
2097 }
2098
2099 for_all_rings(t) {
2100 for (uint32_t i = 0; i < na_get_nrings(na, t); i++) {
2101 na_kr_teardown_common(na, &NAKR(na, t)[i],
2102 t, ch, defunct);
2103 }
2104 }
2105 }
2106
2107 /*
2108 * Teardown only {tx,rx} rings assigned to the channel.
2109 */
2110 static void
na_kr_teardown_txrx(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct,struct proc * p)2111 na_kr_teardown_txrx(struct nexus_adapter *na, struct kern_channel *ch,
2112 boolean_t defunct, struct proc *p)
2113 {
2114 enum txrx t;
2115
2116 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2117
2118 for_rx_tx(t) {
2119 ring_id_t qfirst = ch->ch_first[t];
2120 ring_id_t qlast = ch->ch_last[t];
2121 uint32_t i;
2122
2123 for (i = qfirst; i < qlast; i++) {
2124 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2125 na_kr_teardown_common(na, kring, t, ch, defunct);
2126
2127 /*
2128 * Issue a notify to wake up anyone sleeping in kqueue
2129 * so that they notice the newly defuncted channels and
2130 * return an error
2131 */
2132 kring->ckr_na_notify(kring, p, 0);
2133 }
2134 }
2135 }
2136
2137 static int
na_kr_populate_slots(struct __kern_channel_ring * kring)2138 na_kr_populate_slots(struct __kern_channel_ring *kring)
2139 {
2140 const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2141 struct nexus_adapter *na = KRNA(kring);
2142 kern_pbufpool_t pp = kring->ckr_pp;
2143 uint32_t nslots = kring->ckr_num_slots;
2144 uint32_t start_idx, i;
2145 uint32_t sidx = 0; /* slot counter */
2146 struct __kern_slot_desc *ksd;
2147 struct __user_slot_desc *usd;
2148 struct __kern_quantum *kqum;
2149 nexus_type_t nexus_type;
2150 int err = 0;
2151
2152 ASSERT(kring->ckr_tx < NR_TXRX);
2153 ASSERT(!(KRNA(kring)->na_flags & NAF_USER_PKT_POOL));
2154 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2155 ASSERT(pp != NULL);
2156
2157 /*
2158 * xxx_ppool: remove this special case
2159 */
2160 nexus_type = na->na_nxdom_prov->nxdom_prov_dom->nxdom_type;
2161
2162 switch (nexus_type) {
2163 case NEXUS_TYPE_FLOW_SWITCH:
2164 case NEXUS_TYPE_KERNEL_PIPE:
2165 /*
2166 * xxx_ppool: This is temporary code until we come up with a
2167 * scheme for user space to alloc & attach packets to tx ring.
2168 */
2169 if (kernel_only || kring->ckr_tx == NR_RX) {
2170 return 0;
2171 }
2172 break;
2173
2174 case NEXUS_TYPE_NET_IF:
2175 if (((na->na_type == NA_NETIF_DEV) ||
2176 (na->na_type == NA_NETIF_HOST)) &&
2177 (kernel_only || (kring->ckr_tx == NR_RX))) {
2178 return 0;
2179 }
2180
2181 ASSERT((na->na_type == NA_NETIF_COMPAT_DEV) ||
2182 (na->na_type == NA_NETIF_COMPAT_HOST) ||
2183 (na->na_type == NA_NETIF_DEV) ||
2184 (na->na_type == NA_NETIF_VP));
2185
2186 if (!kernel_only) {
2187 if (kring->ckr_tx == NR_RX) {
2188 return 0;
2189 } else {
2190 break;
2191 }
2192 }
2193
2194 ASSERT(kernel_only);
2195
2196 if ((na->na_type == NA_NETIF_COMPAT_DEV) ||
2197 (na->na_type == NA_NETIF_COMPAT_HOST)) {
2198 return 0;
2199 }
2200 VERIFY(0);
2201 /* NOTREACHED */
2202 __builtin_unreachable();
2203
2204 case NEXUS_TYPE_USER_PIPE:
2205 case NEXUS_TYPE_MONITOR:
2206 break;
2207
2208 default:
2209 VERIFY(0);
2210 /* NOTREACHED */
2211 __builtin_unreachable();
2212 }
2213
2214 /* Fill the ring with packets */
2215 sidx = start_idx = 0;
2216 for (i = 0; i < nslots; i++) {
2217 kqum = SK_PTR_ADDR_KQUM(pp_alloc_packet(pp, pp->pp_max_frags,
2218 SKMEM_NOSLEEP));
2219 if (kqum == NULL) {
2220 err = ENOMEM;
2221 SK_ERR("ar 0x%llx (\"%s\") no more buffers "
2222 "after %u of %u, err %d", SK_KVA(na->na_arena),
2223 na->na_arena->ar_name, i, nslots, err);
2224 goto cleanup;
2225 }
2226 ksd = KR_KSD(kring, i);
2227 usd = (kernel_only ? NULL : KR_USD(kring, i));
2228
2229 /* attach packet to slot */
2230 kqum->qum_ksd = ksd;
2231 ASSERT(!KSD_VALID_METADATA(ksd));
2232 KSD_ATTACH_METADATA(ksd, kqum);
2233 if (usd != NULL) {
2234 USD_ATTACH_METADATA(usd, METADATA_IDX(kqum));
2235 kr_externalize_metadata(kring, pp->pp_max_frags,
2236 kqum, current_proc());
2237 }
2238
2239 SK_DF(SK_VERB_MEM, " C ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2240 " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd), METADATA_IDX(kqum),
2241 SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2242 SK_KVA(&kqum->qum_buf[0]));
2243 if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2244 SK_DF(SK_VERB_MEM, " C usd [%-3d, 0x%llx] "
2245 "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]",
2246 (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2247 SK_KVA(usd), METADATA_IDX(kqum),
2248 SK_KVA(kqum->qum_user),
2249 kqum->qum_user->qum_buf[0].buf_idx,
2250 SK_KVA(&kqum->qum_user->qum_buf[0]));
2251 }
2252
2253 sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2254 }
2255
2256 SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") populated %u slots from idx %u",
2257 SK_KVA(na->na_arena), na->na_arena->ar_name, nslots, start_idx);
2258
2259 cleanup:
2260 if (err != 0) {
2261 sidx = start_idx;
2262 while (i-- > 0) {
2263 ksd = KR_KSD(kring, i);
2264 usd = (kernel_only ? NULL : KR_USD(kring, i));
2265 kqum = ksd->sd_qum;
2266
2267 ASSERT(ksd == kqum->qum_ksd);
2268 KSD_RESET(ksd);
2269 if (usd != NULL) {
2270 USD_RESET(usd);
2271 }
2272 /* detach packet from slot */
2273 kqum->qum_ksd = NULL;
2274 pp_free_packet(pp, SK_PTR_ADDR(kqum));
2275
2276 sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2277 }
2278 }
2279 return err;
2280 }
2281
2282 static void
na_kr_depopulate_slots(struct __kern_channel_ring * kring,struct kern_channel * ch,boolean_t defunct)2283 na_kr_depopulate_slots(struct __kern_channel_ring *kring,
2284 struct kern_channel *ch, boolean_t defunct)
2285 {
2286 #pragma unused(ch)
2287 const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2288 uint32_t i, j, n = kring->ckr_num_slots;
2289 struct nexus_adapter *na = KRNA(kring);
2290 struct kern_pbufpool *pp = kring->ckr_pp;
2291 boolean_t upp = FALSE;
2292 obj_idx_t midx;
2293
2294 ASSERT((kring->ckr_tx < NR_TXRX) || (kring->ckr_tx == NR_EV));
2295 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2296
2297 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2298
2299 if (((na->na_flags & NAF_USER_PKT_POOL) != 0) &&
2300 (kring->ckr_tx != NR_EV)) {
2301 upp = TRUE;
2302 }
2303 for (i = 0, j = 0; i < n; i++) {
2304 struct __kern_slot_desc *ksd = KR_KSD(kring, i);
2305 struct __user_slot_desc *usd;
2306 struct __kern_quantum *qum, *kqum;
2307 boolean_t free_packet = FALSE;
2308 int err;
2309
2310 if (!KSD_VALID_METADATA(ksd)) {
2311 continue;
2312 }
2313
2314 kqum = ksd->sd_qum;
2315 usd = (kernel_only ? NULL : KR_USD(kring, i));
2316 midx = METADATA_IDX(kqum);
2317
2318 /*
2319 * if the packet is internalized it should not be in the
2320 * hash table of packets loaned to user space.
2321 */
2322 if (upp && (kqum->qum_qflags & QUM_F_INTERNALIZED)) {
2323 if ((qum = pp_find_upp(pp, midx)) != NULL) {
2324 panic("internalized packet 0x%llx in htbl",
2325 SK_KVA(qum));
2326 /* NOTREACHED */
2327 __builtin_unreachable();
2328 }
2329 free_packet = TRUE;
2330 } else if (upp) {
2331 /*
2332 * if the packet is not internalized check if it is
2333 * in the list of packets loaned to user-space.
2334 * Remove from the list before freeing.
2335 */
2336 ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
2337 qum = pp_remove_upp(pp, midx, &err);
2338 if (err != 0) {
2339 SK_ERR("un-allocated packet or buflet %d %p",
2340 midx, SK_KVA(qum));
2341 if (qum != NULL) {
2342 free_packet = TRUE;
2343 }
2344 }
2345 } else {
2346 free_packet = TRUE;
2347 }
2348
2349 /*
2350 * Clear the user and kernel slot descriptors. Note that
2351 * if we are depopulating the slots due to defunct (and not
2352 * due to normal deallocation/teardown), we leave the user
2353 * slot descriptor alone. At that point the process may
2354 * be suspended, and later when it resumes it would just
2355 * pick up the original contents and move forward with
2356 * whatever it was doing.
2357 */
2358 KSD_RESET(ksd);
2359 if (usd != NULL && !defunct) {
2360 USD_RESET(usd);
2361 }
2362
2363 /* detach packet from slot */
2364 kqum->qum_ksd = NULL;
2365
2366 SK_DF(SK_VERB_MEM, " D ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2367 " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd),
2368 METADATA_IDX(kqum), SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2369 SK_KVA(&kqum->qum_buf[0]));
2370 if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2371 SK_DF(SK_VERB_MEM, " D usd [%-3u, 0x%llx] "
2372 "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]",
2373 (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2374 SK_KVA(usd), METADATA_IDX(kqum),
2375 SK_KVA(kqum->qum_user),
2376 kqum->qum_user->qum_buf[0].buf_idx,
2377 SK_KVA(&kqum->qum_user->qum_buf[0]));
2378 }
2379
2380 if (free_packet) {
2381 pp_free_packet(pp, SK_PTR_ADDR(kqum)); ++j;
2382 }
2383 }
2384
2385 SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") depopulated %u of %u slots",
2386 SK_KVA(KRNA(kring)->na_arena), KRNA(kring)->na_arena->ar_name,
2387 j, n);
2388 }
2389
2390 int
na_rings_mem_setup(struct nexus_adapter * na,boolean_t alloc_ctx,struct kern_channel * ch)2391 na_rings_mem_setup(struct nexus_adapter *na,
2392 boolean_t alloc_ctx, struct kern_channel *ch)
2393 {
2394 boolean_t kronly;
2395 int err;
2396
2397 SK_LOCK_ASSERT_HELD();
2398 ASSERT(na->na_channels == 0);
2399 /*
2400 * If NAF_MEM_NO_INIT is set, then only create the krings and not
2401 * the backing memory regions for the adapter.
2402 */
2403 kronly = (na->na_flags & NAF_MEM_NO_INIT);
2404 ASSERT(!kronly || NA_KERNEL_ONLY(na));
2405
2406 /*
2407 * Create and initialize the common fields of the krings array.
2408 * using the information that must be already available in the na.
2409 */
2410 if ((err = na_kr_create(na, alloc_ctx)) == 0 && !kronly) {
2411 err = na_kr_setup(na, ch);
2412 if (err != 0) {
2413 na_kr_delete(na);
2414 }
2415 }
2416
2417 return err;
2418 }
2419
2420 void
na_rings_mem_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2421 na_rings_mem_teardown(struct nexus_adapter *na, struct kern_channel *ch,
2422 boolean_t defunct)
2423 {
2424 SK_LOCK_ASSERT_HELD();
2425 ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT));
2426
2427 /*
2428 * Deletes the kring and ring array of the adapter. They
2429 * must have been created using na_rings_mem_setup().
2430 *
2431 * XXX: [email protected] -- the parameter "ch" should not be
2432 * needed here; however na_kr_depopulate_slots() needs to
2433 * go thru the channel's user packet pool hash, and so for
2434 * now we leave it here.
2435 */
2436 na_kr_teardown_all(na, ch, defunct);
2437 if (!defunct) {
2438 na_kr_delete(na);
2439 }
2440 }
2441
2442 void
na_ch_rings_defunct(struct kern_channel * ch,struct proc * p)2443 na_ch_rings_defunct(struct kern_channel *ch, struct proc *p)
2444 {
2445 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2446
2447 /*
2448 * Depopulate slots on the TX and RX rings of this channel,
2449 * but don't touch other rings owned by other channels if
2450 * this adapter is being shared.
2451 */
2452 na_kr_teardown_txrx(ch->ch_na, ch, TRUE, p);
2453 }
2454
2455 void
na_kr_drop(struct nexus_adapter * na,boolean_t drop)2456 na_kr_drop(struct nexus_adapter *na, boolean_t drop)
2457 {
2458 enum txrx t;
2459 uint32_t i;
2460
2461 for_rx_tx(t) {
2462 for (i = 0; i < na_get_nrings(na, t); i++) {
2463 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2464 int error;
2465 error = kr_enter(kring, TRUE);
2466 if (drop) {
2467 kring->ckr_flags |= CKRF_DROP;
2468 } else {
2469 kring->ckr_flags &= ~CKRF_DROP;
2470 }
2471
2472 if (error != 0) {
2473 SK_ERR("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2474 "kr_enter failed %d",
2475 na->na_name, SK_KVA(na),
2476 kring->ckr_name, SK_KVA(kring),
2477 error);
2478 } else {
2479 kr_exit(kring);
2480 }
2481 SK_D("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2482 "krflags 0x%b", na->na_name, SK_KVA(na),
2483 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
2484 CKRF_BITS);
2485 }
2486 }
2487 }
2488
2489 /*
2490 * Set the stopped/enabled status of ring. When stopping, they also wait
2491 * for all current activity on the ring to terminate. The status change
2492 * is then notified using the na na_notify callback.
2493 */
2494 static void
na_set_ring(struct nexus_adapter * na,uint32_t ring_id,enum txrx t,uint32_t state)2495 na_set_ring(struct nexus_adapter *na, uint32_t ring_id, enum txrx t,
2496 uint32_t state)
2497 {
2498 struct __kern_channel_ring *kr = &NAKR(na, t)[ring_id];
2499
2500 /*
2501 * Mark the ring as stopped/enabled, and run through the
2502 * locks to make sure other users get to see it.
2503 */
2504 if (state == KR_READY) {
2505 kr_start(kr);
2506 } else {
2507 kr_stop(kr, state);
2508 }
2509 }
2510
2511
2512 /* stop or enable all the rings of na */
2513 static void
na_set_all_rings(struct nexus_adapter * na,uint32_t state)2514 na_set_all_rings(struct nexus_adapter *na, uint32_t state)
2515 {
2516 uint32_t i;
2517 enum txrx t;
2518
2519 SK_LOCK_ASSERT_HELD();
2520
2521 if (!NA_IS_ACTIVE(na)) {
2522 return;
2523 }
2524
2525 for_rx_tx(t) {
2526 for (i = 0; i < na_get_nrings(na, t); i++) {
2527 na_set_ring(na, i, t, state);
2528 }
2529 }
2530 }
2531
2532 /*
2533 * Convenience function used in drivers. Waits for current txsync()s/rxsync()s
2534 * to finish and prevents any new one from starting. Call this before turning
2535 * Skywalk mode off, or before removing the harware rings (e.g., on module
2536 * onload). As a rule of thumb for linux drivers, this should be placed near
2537 * each napi_disable().
2538 */
2539 void
na_disable_all_rings(struct nexus_adapter * na)2540 na_disable_all_rings(struct nexus_adapter *na)
2541 {
2542 na_set_all_rings(na, KR_STOPPED);
2543 }
2544
2545 /*
2546 * Convenience function used in drivers. Re-enables rxsync and txsync on the
2547 * adapter's rings In linux drivers, this should be placed near each
2548 * napi_enable().
2549 */
2550 void
na_enable_all_rings(struct nexus_adapter * na)2551 na_enable_all_rings(struct nexus_adapter *na)
2552 {
2553 na_set_all_rings(na, KR_READY /* enabled */);
2554 }
2555
2556 void
na_lock_all_rings(struct nexus_adapter * na)2557 na_lock_all_rings(struct nexus_adapter *na)
2558 {
2559 na_set_all_rings(na, KR_LOCKED);
2560 }
2561
2562 void
na_unlock_all_rings(struct nexus_adapter * na)2563 na_unlock_all_rings(struct nexus_adapter *na)
2564 {
2565 na_enable_all_rings(na);
2566 }
2567
2568 int
na_connect(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)2569 na_connect(struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
2570 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
2571 {
2572 struct nexus_adapter *na = NULL;
2573 mach_vm_size_t memsize = 0;
2574 int err = 0;
2575 enum txrx t;
2576
2577 ASSERT(!(chr->cr_mode & CHMODE_KERNEL));
2578 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
2579
2580 SK_LOCK_ASSERT_HELD();
2581
2582 /* find the nexus adapter and return the reference */
2583 err = na_find(ch, nx, chr, ch0, nxb, p, &na, TRUE /* create */);
2584 if (err != 0) {
2585 ASSERT(na == NULL);
2586 goto done;
2587 }
2588
2589 if (NA_KERNEL_ONLY(na)) {
2590 err = EBUSY;
2591 goto done;
2592 }
2593
2594 /* reject if the adapter is defunct of non-permissive */
2595 if ((na->na_flags & NAF_DEFUNCT) || na_reject_channel(ch, na)) {
2596 err = ENXIO;
2597 goto done;
2598 }
2599
2600 err = na_bind_channel(na, ch, chr);
2601 if (err != 0) {
2602 goto done;
2603 }
2604
2605 ASSERT(ch->ch_schema != NULL);
2606 ASSERT(na == ch->ch_na);
2607
2608 for_all_rings(t) {
2609 if (na_get_nrings(na, t) == 0) {
2610 ch->ch_si[t] = NULL;
2611 continue;
2612 }
2613 ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2614 &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2615 }
2616
2617 skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2618
2619 if (!(skmem_arena_nexus(na->na_arena)->arn_mode &
2620 AR_NEXUS_MODE_EXTERNAL_PPOOL)) {
2621 os_atomic_or(__DECONST(uint32_t *, &ch->ch_schema->csm_flags), CSM_PRIV_MEM, relaxed);
2622 }
2623
2624 err = skmem_arena_mmap(na->na_arena, p, &ch->ch_mmap);
2625 if (err != 0) {
2626 goto done;
2627 }
2628
2629 os_atomic_or(__DECONST(uint32_t *, &ch->ch_schema->csm_flags), CSM_ACTIVE, relaxed);
2630 chr->cr_memsize = memsize;
2631 chr->cr_memoffset = ch->ch_schema_offset;
2632
2633 SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2634 "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2635 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2636 na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2637 na->na_flags, NAF_BITS);
2638
2639 done:
2640 if (err != 0) {
2641 if (ch->ch_schema != NULL || na != NULL) {
2642 if (ch->ch_schema != NULL) {
2643 ASSERT(na == ch->ch_na);
2644 /*
2645 * Callee will unmap memory region if needed,
2646 * as well as release reference held on 'na'.
2647 */
2648 na_disconnect(nx, ch);
2649 na = NULL;
2650 }
2651 if (na != NULL) {
2652 (void) na_release_locked(na);
2653 na = NULL;
2654 }
2655 }
2656 }
2657
2658 return err;
2659 }
2660
2661 void
na_disconnect(struct kern_nexus * nx,struct kern_channel * ch)2662 na_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
2663 {
2664 #pragma unused(nx)
2665 enum txrx t;
2666
2667 SK_LOCK_ASSERT_HELD();
2668
2669 SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2670 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2671 ch->ch_na->na_name, ch->ch_info->cinfo_nx_port,
2672 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(ch->ch_na),
2673 ch->ch_na->na_flags, NAF_BITS);
2674
2675 /* destroy mapping and release references */
2676 na_unbind_channel(ch);
2677 ASSERT(ch->ch_na == NULL);
2678 ASSERT(ch->ch_schema == NULL);
2679 for_all_rings(t) {
2680 ch->ch_si[t] = NULL;
2681 }
2682 }
2683
2684 void
na_defunct(struct kern_nexus * nx,struct kern_channel * ch,struct nexus_adapter * na,boolean_t locked)2685 na_defunct(struct kern_nexus *nx, struct kern_channel *ch,
2686 struct nexus_adapter *na, boolean_t locked)
2687 {
2688 #pragma unused(nx)
2689 SK_LOCK_ASSERT_HELD();
2690 if (!locked) {
2691 lck_mtx_lock(&ch->ch_lock);
2692 }
2693
2694 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2695
2696 if (!(na->na_flags & NAF_DEFUNCT)) {
2697 /*
2698 * Mark this adapter as defunct to inform nexus-specific
2699 * teardown handler called by na_teardown() below.
2700 */
2701 os_atomic_or(&na->na_flags, NAF_DEFUNCT, relaxed);
2702
2703 /*
2704 * Depopulate slots.
2705 */
2706 na_teardown(na, ch, TRUE);
2707
2708 /*
2709 * And finally destroy any already-defunct memory regions.
2710 * Do this only if the nexus adapter owns the arena, i.e.
2711 * NAF_MEM_LOANED is not set. Otherwise, we'd expect
2712 * that this routine be called again for the real owner.
2713 */
2714 if (!(na->na_flags & NAF_MEM_LOANED)) {
2715 skmem_arena_defunct(na->na_arena);
2716 }
2717 }
2718
2719 SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d) "
2720 "na 0x%llx naflags %b", ch->ch_name, ch->ch_pid,
2721 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2722 na->na_name, ch->ch_info->cinfo_nx_port,
2723 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2724 na->na_flags, NAF_BITS);
2725
2726 if (!locked) {
2727 lck_mtx_unlock(&ch->ch_lock);
2728 }
2729 }
2730
2731 /*
2732 * TODO: [email protected] -- merge this into na_connect()
2733 */
2734 int
na_connect_spec(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct proc * p)2735 na_connect_spec(struct kern_nexus *nx, struct kern_channel *ch,
2736 struct chreq *chr, struct proc *p)
2737 {
2738 #pragma unused(p)
2739 struct nexus_adapter *na = NULL;
2740 mach_vm_size_t memsize = 0;
2741 int error = 0;
2742 enum txrx t;
2743
2744 ASSERT(chr->cr_mode & CHMODE_KERNEL);
2745 ASSERT(ch->ch_flags & CHANF_KERNEL);
2746 ASSERT(ch->ch_na == NULL);
2747 ASSERT(ch->ch_schema == NULL);
2748
2749 SK_LOCK_ASSERT_HELD();
2750
2751 error = na_find(ch, nx, chr, NULL, NULL, kernproc, &na, TRUE);
2752 if (error != 0) {
2753 goto done;
2754 }
2755
2756 if (na == NULL) {
2757 error = EINVAL;
2758 goto done;
2759 }
2760
2761 if (na->na_channels > 0) {
2762 error = EBUSY;
2763 goto done;
2764 }
2765
2766 if (na->na_flags & NAF_DEFUNCT) {
2767 error = ENXIO;
2768 goto done;
2769 }
2770
2771 /*
2772 * Special connect requires the nexus adapter to handle its
2773 * own channel binding and unbinding via na_special(); bail
2774 * if this adapter doesn't support it.
2775 */
2776 if (na->na_special == NULL) {
2777 error = ENOTSUP;
2778 goto done;
2779 }
2780
2781 /* upon success, "ch->ch_na" will point to "na" */
2782 error = na->na_special(na, ch, chr, NXSPEC_CMD_CONNECT);
2783 if (error != 0) {
2784 ASSERT(ch->ch_na == NULL);
2785 goto done;
2786 }
2787
2788 ASSERT(na->na_flags & NAF_SPEC_INIT);
2789 ASSERT(na == ch->ch_na);
2790 /* make sure this is still the case */
2791 ASSERT(ch->ch_schema == NULL);
2792
2793 for_rx_tx(t) {
2794 ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2795 &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2796 }
2797
2798 skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2799 chr->cr_memsize = memsize;
2800
2801 SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2802 "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2803 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2804 na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2805 na->na_flags, NAF_BITS);
2806
2807 done:
2808 if (error != 0) {
2809 if (ch->ch_na != NULL || na != NULL) {
2810 if (ch->ch_na != NULL) {
2811 ASSERT(na == ch->ch_na);
2812 /* callee will release reference on 'na' */
2813 na_disconnect_spec(nx, ch);
2814 na = NULL;
2815 }
2816 if (na != NULL) {
2817 (void) na_release_locked(na);
2818 na = NULL;
2819 }
2820 }
2821 }
2822
2823 return error;
2824 }
2825
2826 /*
2827 * TODO: [email protected] -- merge this into na_disconnect()
2828 */
2829 void
na_disconnect_spec(struct kern_nexus * nx,struct kern_channel * ch)2830 na_disconnect_spec(struct kern_nexus *nx, struct kern_channel *ch)
2831 {
2832 #pragma unused(nx)
2833 struct nexus_adapter *na = ch->ch_na;
2834 enum txrx t;
2835 int error;
2836
2837 SK_LOCK_ASSERT_HELD();
2838 ASSERT(na != NULL);
2839 ASSERT(na->na_flags & NAF_SPEC_INIT); /* has been bound */
2840
2841 SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2842 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2843 na->na_name, ch->ch_info->cinfo_nx_port,
2844 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2845 na->na_flags, NAF_BITS);
2846
2847 /* take a reference for this routine */
2848 na_retain_locked(na);
2849
2850 ASSERT(ch->ch_flags & CHANF_KERNEL);
2851 ASSERT(ch->ch_schema == NULL);
2852 ASSERT(na->na_special != NULL);
2853 /* unbind this channel */
2854 error = na->na_special(na, ch, NULL, NXSPEC_CMD_DISCONNECT);
2855 ASSERT(error == 0);
2856 ASSERT(!(na->na_flags & NAF_SPEC_INIT));
2857
2858 /* now release our reference; this may be the last */
2859 na_release_locked(na);
2860 na = NULL;
2861
2862 ASSERT(ch->ch_na == NULL);
2863 for_rx_tx(t) {
2864 ch->ch_si[t] = NULL;
2865 }
2866 }
2867
2868 void
na_start_spec(struct kern_nexus * nx,struct kern_channel * ch)2869 na_start_spec(struct kern_nexus *nx, struct kern_channel *ch)
2870 {
2871 #pragma unused(nx)
2872 struct nexus_adapter *na = ch->ch_na;
2873
2874 SK_LOCK_ASSERT_HELD();
2875
2876 ASSERT(ch->ch_flags & CHANF_KERNEL);
2877 ASSERT(NA_KERNEL_ONLY(na));
2878 ASSERT(na->na_special != NULL);
2879
2880 na->na_special(na, ch, NULL, NXSPEC_CMD_START);
2881 }
2882
2883 void
na_stop_spec(struct kern_nexus * nx,struct kern_channel * ch)2884 na_stop_spec(struct kern_nexus *nx, struct kern_channel *ch)
2885 {
2886 #pragma unused(nx)
2887 struct nexus_adapter *na = ch->ch_na;
2888
2889 SK_LOCK_ASSERT_HELD();
2890
2891 ASSERT(ch->ch_flags & CHANF_KERNEL);
2892 ASSERT(NA_KERNEL_ONLY(na));
2893 ASSERT(na->na_special != NULL);
2894
2895 na->na_special(na, ch, NULL, NXSPEC_CMD_STOP);
2896 }
2897
2898 /*
2899 * MUST BE CALLED UNDER SK_LOCK()
2900 *
2901 * Get a refcounted reference to a nexus adapter attached
2902 * to the interface specified by chr.
2903 * This is always called in the execution of an ioctl().
2904 *
2905 * Return ENXIO if the interface specified by the request does
2906 * not exist, ENOTSUP if Skywalk is not supported by the interface,
2907 * EINVAL if parameters are invalid, ENOMEM if needed resources
2908 * could not be allocated.
2909 * If successful, hold a reference to the nexus adapter.
2910 *
2911 * No reference is kept on the real interface, which may then
2912 * disappear at any time.
2913 */
2914 int
na_find(struct kern_channel * ch,struct kern_nexus * nx,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)2915 na_find(struct kern_channel *ch, struct kern_nexus *nx, struct chreq *chr,
2916 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p,
2917 struct nexus_adapter **na, boolean_t create)
2918 {
2919 int error = 0;
2920
2921 _CASSERT(sizeof(chr->cr_name) == sizeof((*na)->na_name));
2922
2923 *na = NULL; /* default return value */
2924
2925 SK_LOCK_ASSERT_HELD();
2926
2927 /*
2928 * We cascade through all possibile types of nexus adapter.
2929 * All nx_*_na_find() functions return an error and an na,
2930 * with the following combinations:
2931 *
2932 * error na
2933 * 0 NULL type doesn't match
2934 * !0 NULL type matches, but na creation/lookup failed
2935 * 0 !NULL type matches and na created/found
2936 * !0 !NULL impossible
2937 */
2938
2939 #if CONFIG_NEXUS_MONITOR
2940 /* try to see if this is a monitor port */
2941 error = nx_monitor_na_find(nx, ch, chr, ch0, nxb, p, na, create);
2942 if (error != 0 || *na != NULL) {
2943 return error;
2944 }
2945 #endif /* CONFIG_NEXUS_MONITOR */
2946 #if CONFIG_NEXUS_USER_PIPE
2947 /* try to see if this is a pipe port */
2948 error = nx_upipe_na_find(nx, ch, chr, nxb, p, na, create);
2949 if (error != 0 || *na != NULL) {
2950 return error;
2951 }
2952 #endif /* CONFIG_NEXUS_USER_PIPE */
2953 #if CONFIG_NEXUS_KERNEL_PIPE
2954 /* try to see if this is a kernel pipe port */
2955 error = nx_kpipe_na_find(nx, ch, chr, nxb, p, na, create);
2956 if (error != 0 || *na != NULL) {
2957 return error;
2958 }
2959 #endif /* CONFIG_NEXUS_KERNEL_PIPE */
2960 #if CONFIG_NEXUS_FLOWSWITCH
2961 /* try to see if this is a flowswitch port */
2962 error = nx_fsw_na_find(nx, ch, chr, nxb, p, na, create);
2963 if (error != 0 || *na != NULL) {
2964 return error;
2965 }
2966 #endif /* CONFIG_NEXUS_FLOWSWITCH */
2967 #if CONFIG_NEXUS_NETIF
2968 error = nx_netif_na_find(nx, ch, chr, nxb, p, na, create);
2969 if (error != 0 || *na != NULL) {
2970 return error;
2971 }
2972 #endif /* CONFIG_NEXUS_NETIF */
2973
2974 ASSERT(*na == NULL);
2975 return ENXIO;
2976 }
2977
2978 void
na_retain_locked(struct nexus_adapter * na)2979 na_retain_locked(struct nexus_adapter *na)
2980 {
2981 SK_LOCK_ASSERT_HELD();
2982
2983 if (na != NULL) {
2984 #if SK_LOG
2985 uint32_t oref = os_atomic_inc_orig(&na->na_refcount, relaxed);
2986 SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
2987 na->na_name, SK_KVA(na), oref + 1, na->na_channels);
2988 #else /* !SK_LOG */
2989 os_atomic_inc(&na->na_refcount, relaxed);
2990 #endif /* !SK_LOG */
2991 }
2992 }
2993
2994 /* returns 1 iff the nexus_adapter is destroyed */
2995 int
na_release_locked(struct nexus_adapter * na)2996 na_release_locked(struct nexus_adapter *na)
2997 {
2998 uint32_t oref;
2999
3000 SK_LOCK_ASSERT_HELD();
3001
3002 ASSERT(na->na_refcount > 0);
3003 oref = os_atomic_dec_orig(&na->na_refcount, relaxed);
3004 if (oref > 1) {
3005 SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
3006 na->na_name, SK_KVA(na), oref - 1, na->na_channels);
3007 return 0;
3008 }
3009 ASSERT(na->na_channels == 0);
3010
3011 if (na->na_dtor != NULL) {
3012 na->na_dtor(na);
3013 }
3014
3015 ASSERT(na->na_tx_rings == NULL && na->na_rx_rings == NULL);
3016 ASSERT(na->na_slot_ctxs == NULL);
3017 ASSERT(na->na_scratch == NULL);
3018
3019 #if CONFIG_NEXUS_USER_PIPE
3020 nx_upipe_na_dealloc(na);
3021 #endif /* CONFIG_NEXUS_USER_PIPE */
3022 if (na->na_arena != NULL) {
3023 skmem_arena_release(na->na_arena);
3024 na->na_arena = NULL;
3025 }
3026
3027 SK_DF(SK_VERB_MEM, "na \"%s\" (0x%llx) being freed",
3028 na->na_name, SK_KVA(na));
3029
3030 NA_FREE(na);
3031 return 1;
3032 }
3033
3034 static struct nexus_adapter *
na_pseudo_alloc(zalloc_flags_t how)3035 na_pseudo_alloc(zalloc_flags_t how)
3036 {
3037 struct nexus_adapter *na;
3038
3039 na = zalloc_flags(na_pseudo_zone, how | Z_ZERO);
3040 if (na) {
3041 na->na_type = NA_PSEUDO;
3042 na->na_free = na_pseudo_free;
3043 }
3044 return na;
3045 }
3046
3047 static void
na_pseudo_free(struct nexus_adapter * na)3048 na_pseudo_free(struct nexus_adapter *na)
3049 {
3050 ASSERT(na->na_refcount == 0);
3051 SK_DF(SK_VERB_MEM, "na 0x%llx FREE", SK_KVA(na));
3052 bzero(na, sizeof(*na));
3053 zfree(na_pseudo_zone, na);
3054 }
3055
3056 static int
na_pseudo_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3057 na_pseudo_txsync(struct __kern_channel_ring *kring, struct proc *p,
3058 uint32_t flags)
3059 {
3060 #pragma unused(kring, p, flags)
3061 SK_DF(SK_VERB_SYNC | SK_VERB_TX,
3062 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3063 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3064 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3065 flags);
3066
3067 return 0;
3068 }
3069
3070 static int
na_pseudo_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3071 na_pseudo_rxsync(struct __kern_channel_ring *kring, struct proc *p,
3072 uint32_t flags)
3073 {
3074 #pragma unused(kring, p, flags)
3075 SK_DF(SK_VERB_SYNC | SK_VERB_RX,
3076 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3077 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3078 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3079 flags);
3080
3081 ASSERT(kring->ckr_rhead <= kring->ckr_lim);
3082
3083 return 0;
3084 }
3085
3086 static int
na_pseudo_activate(struct nexus_adapter * na,na_activate_mode_t mode)3087 na_pseudo_activate(struct nexus_adapter *na, na_activate_mode_t mode)
3088 {
3089 SK_D("na \"%s\" (0x%llx) %s", na->na_name,
3090 SK_KVA(na), na_activate_mode2str(mode));
3091
3092 switch (mode) {
3093 case NA_ACTIVATE_MODE_ON:
3094 os_atomic_or(&na->na_flags, NAF_ACTIVE, relaxed);
3095 break;
3096
3097 case NA_ACTIVATE_MODE_DEFUNCT:
3098 break;
3099
3100 case NA_ACTIVATE_MODE_OFF:
3101 os_atomic_andnot(&na->na_flags, NAF_ACTIVE, relaxed);
3102 break;
3103
3104 default:
3105 VERIFY(0);
3106 /* NOTREACHED */
3107 __builtin_unreachable();
3108 }
3109
3110 return 0;
3111 }
3112
3113 static void
na_pseudo_dtor(struct nexus_adapter * na)3114 na_pseudo_dtor(struct nexus_adapter *na)
3115 {
3116 #pragma unused(na)
3117 }
3118
3119 static int
na_pseudo_krings_create(struct nexus_adapter * na,struct kern_channel * ch)3120 na_pseudo_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
3121 {
3122 return na_rings_mem_setup(na, FALSE, ch);
3123 }
3124
3125 static void
na_pseudo_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)3126 na_pseudo_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
3127 boolean_t defunct)
3128 {
3129 na_rings_mem_teardown(na, ch, defunct);
3130 }
3131
3132 /*
3133 * Pseudo nexus adapter; typically used as a generic parent adapter.
3134 */
3135 int
na_pseudo_create(struct kern_nexus * nx,struct chreq * chr,struct nexus_adapter ** ret)3136 na_pseudo_create(struct kern_nexus *nx, struct chreq *chr,
3137 struct nexus_adapter **ret)
3138 {
3139 struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
3140 struct nexus_adapter *na;
3141 int error;
3142
3143 SK_LOCK_ASSERT_HELD();
3144 *ret = NULL;
3145
3146 na = na_pseudo_alloc(Z_WAITOK);
3147
3148 ASSERT(na->na_type == NA_PSEUDO);
3149 ASSERT(na->na_free == na_pseudo_free);
3150
3151 (void) strncpy(na->na_name, chr->cr_name, sizeof(na->na_name) - 1);
3152 na->na_name[sizeof(na->na_name) - 1] = '\0';
3153 uuid_generate_random(na->na_uuid);
3154
3155 /*
3156 * Verify upper bounds; for all cases including user pipe nexus,
3157 * the parameters must have already been validated by corresponding
3158 * nxdom_prov_params() function defined by each domain.
3159 */
3160 na_set_nrings(na, NR_TX, nxp->nxp_tx_rings);
3161 na_set_nrings(na, NR_RX, nxp->nxp_rx_rings);
3162 na_set_nslots(na, NR_TX, nxp->nxp_tx_slots);
3163 na_set_nslots(na, NR_RX, nxp->nxp_rx_slots);
3164 ASSERT(na_get_nrings(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
3165 ASSERT(na_get_nrings(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
3166 ASSERT(na_get_nslots(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
3167 ASSERT(na_get_nslots(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
3168
3169 na->na_txsync = na_pseudo_txsync;
3170 na->na_rxsync = na_pseudo_rxsync;
3171 na->na_activate = na_pseudo_activate;
3172 na->na_dtor = na_pseudo_dtor;
3173 na->na_krings_create = na_pseudo_krings_create;
3174 na->na_krings_delete = na_pseudo_krings_delete;
3175
3176 *(nexus_stats_type_t *)(uintptr_t)&na->na_stats_type =
3177 NEXUS_STATS_TYPE_INVALID;
3178
3179 /* other fields are set in the common routine */
3180 na_attach_common(na, nx, NX_DOM_PROV(nx));
3181
3182 if ((error = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
3183 nx, na)) != 0) {
3184 ASSERT(na->na_arena == NULL);
3185 goto err;
3186 }
3187 ASSERT(na->na_arena != NULL);
3188
3189 *(uint32_t *)(uintptr_t)&na->na_flowadv_max = nxp->nxp_flowadv_max;
3190 ASSERT(na->na_flowadv_max == 0 ||
3191 skmem_arena_nexus(na->na_arena)->arn_flowadv_obj != NULL);
3192
3193 #if SK_LOG
3194 uuid_string_t uuidstr;
3195 SK_D("na_name: \"%s\"", na->na_name);
3196 SK_D(" UUID: %s", sk_uuid_unparse(na->na_uuid, uuidstr));
3197 SK_D(" nx: 0x%llx (\"%s\":\"%s\")",
3198 SK_KVA(na->na_nx), NX_DOM(na->na_nx)->nxdom_name,
3199 NX_DOM_PROV(na->na_nx)->nxdom_prov_name);
3200 SK_D(" flags: %b", na->na_flags, NAF_BITS);
3201 SK_D(" flowadv_max: %u", na->na_flowadv_max);
3202 SK_D(" rings: tx %u rx %u",
3203 na_get_nrings(na, NR_TX), na_get_nrings(na, NR_RX));
3204 SK_D(" slots: tx %u rx %u",
3205 na_get_nslots(na, NR_TX), na_get_nslots(na, NR_RX));
3206 #if CONFIG_NEXUS_USER_PIPE
3207 SK_D(" next_pipe: %u", na->na_next_pipe);
3208 SK_D(" max_pipes: %u", na->na_max_pipes);
3209 #endif /* CONFIG_NEXUS_USER_PIPE */
3210 #endif /* SK_LOG */
3211
3212 *ret = na;
3213 na_retain_locked(na);
3214
3215 return 0;
3216
3217 err:
3218 if (na != NULL) {
3219 if (na->na_arena != NULL) {
3220 skmem_arena_release(na->na_arena);
3221 na->na_arena = NULL;
3222 }
3223 NA_FREE(na);
3224 }
3225 return error;
3226 }
3227
3228 void
na_flowadv_entry_alloc(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx,const uint32_t flowid)3229 na_flowadv_entry_alloc(const struct nexus_adapter *na, uuid_t fae_id,
3230 const flowadv_idx_t fe_idx, const uint32_t flowid)
3231 {
3232 struct skmem_arena *ar = na->na_arena;
3233 struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
3234 struct __flowadv_entry *fae;
3235
3236 ASSERT(NA_IS_ACTIVE(na) && na->na_flowadv_max != 0);
3237 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3238
3239 AR_LOCK(ar);
3240
3241 /* we must not get here if arena is defunct; this must be valid */
3242 ASSERT(arn->arn_flowadv_obj != NULL);
3243
3244 VERIFY(fe_idx < na->na_flowadv_max);
3245 fae = &arn->arn_flowadv_obj[fe_idx];
3246 uuid_copy(fae->fae_id, fae_id);
3247 fae->fae_flowid = flowid;
3248 fae->fae_flags = FLOWADVF_VALID;
3249
3250 AR_UNLOCK(ar);
3251 }
3252
3253 void
na_flowadv_entry_free(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx,const uint32_t flowid)3254 na_flowadv_entry_free(const struct nexus_adapter *na, uuid_t fae_id,
3255 const flowadv_idx_t fe_idx, const uint32_t flowid)
3256 {
3257 #pragma unused(fae_id)
3258 struct skmem_arena *ar = na->na_arena;
3259 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3260
3261 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3262 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3263
3264 AR_LOCK(ar);
3265
3266 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3267 if (arn->arn_flowadv_obj != NULL) {
3268 struct __flowadv_entry *fae;
3269
3270 VERIFY(fe_idx < na->na_flowadv_max);
3271 fae = &arn->arn_flowadv_obj[fe_idx];
3272 ASSERT(uuid_compare(fae->fae_id, fae_id) == 0);
3273 uuid_clear(fae->fae_id);
3274 VERIFY(fae->fae_flowid == flowid);
3275 fae->fae_flowid = 0;
3276 fae->fae_flags = 0;
3277 }
3278
3279 AR_UNLOCK(ar);
3280 }
3281
3282 bool
na_flowadv_set(const struct nexus_adapter * na,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3283 na_flowadv_set(const struct nexus_adapter *na, const flowadv_idx_t fe_idx,
3284 const flowadv_token_t flow_token)
3285 {
3286 struct skmem_arena *ar = na->na_arena;
3287 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3288 bool suspend;
3289
3290 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3291 ASSERT(fe_idx < na->na_flowadv_max);
3292 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3293
3294 AR_LOCK(ar);
3295
3296 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3297
3298 if (arn->arn_flowadv_obj != NULL) {
3299 struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3300
3301 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3302 /*
3303 * We cannot guarantee that the flow is still around by now,
3304 * so check if that's the case and let the caller know.
3305 */
3306 if ((suspend = (fae->fae_token == flow_token))) {
3307 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3308 fae->fae_flags |= FLOWADVF_SUSPENDED;
3309 }
3310 } else {
3311 suspend = false;
3312 }
3313 if (suspend) {
3314 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) flow token 0x%llu fidx %u "
3315 "SUSPEND", sk_proc_name_address(current_proc()),
3316 sk_proc_pid(current_proc()), flow_token, fe_idx);
3317 } else {
3318 SK_ERR("%s(%d) flow token 0x%llu fidx %u no longer around",
3319 sk_proc_name_address(current_proc()),
3320 sk_proc_pid(current_proc()), flow_token, fe_idx);
3321 }
3322
3323 AR_UNLOCK(ar);
3324
3325 return suspend;
3326 }
3327
3328 int
na_flowadv_clear(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3329 na_flowadv_clear(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3330 const flowadv_token_t flow_token)
3331 {
3332 struct nexus_adapter *na = ch->ch_na;
3333 struct skmem_arena *ar = na->na_arena;
3334 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3335 boolean_t resume;
3336
3337 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3338 ASSERT(fe_idx < na->na_flowadv_max);
3339 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3340
3341 AR_LOCK(ar);
3342
3343 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3344
3345 if (arn->arn_flowadv_obj != NULL) {
3346 struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3347
3348 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3349 /*
3350 * We cannot guarantee that the flow is still around by now,
3351 * so check if that's the case and let the caller know.
3352 */
3353 if ((resume = (fae->fae_token == flow_token))) {
3354 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3355 fae->fae_flags &= ~FLOWADVF_SUSPENDED;
3356 }
3357 } else {
3358 resume = FALSE;
3359 }
3360 if (resume) {
3361 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d): flow token 0x%x "
3362 "fidx %u RESUME", ch->ch_name, ch->ch_pid, flow_token,
3363 fe_idx);
3364 } else {
3365 SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3366 ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3367 }
3368
3369 AR_UNLOCK(ar);
3370
3371 return resume;
3372 }
3373
3374 int
na_flowadv_report_ce_event(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token,uint32_t ce_cnt,uint32_t total_pkt_cnt)3375 na_flowadv_report_ce_event(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3376 const flowadv_token_t flow_token, uint32_t ce_cnt, uint32_t total_pkt_cnt)
3377 {
3378 struct nexus_adapter *na = ch->ch_na;
3379 struct skmem_arena *ar = na->na_arena;
3380 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3381 boolean_t added;
3382
3383 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3384 ASSERT(fe_idx < na->na_flowadv_max);
3385 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3386
3387 AR_LOCK(ar);
3388
3389 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3390
3391 if (arn->arn_flowadv_obj != NULL) {
3392 struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3393
3394 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3395 /*
3396 * We cannot guarantee that the flow is still around by now,
3397 * so check if that's the case and let the caller know.
3398 */
3399 if ((added = (fae->fae_token == flow_token))) {
3400 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3401 fae->fae_ce_cnt += ce_cnt;
3402 fae->fae_pkt_cnt += total_pkt_cnt;
3403 }
3404 } else {
3405 added = FALSE;
3406 }
3407 if (added) {
3408 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d): flow token 0x%x "
3409 "fidx %u ce cnt incremented", ch->ch_name,
3410 ch->ch_pid, flow_token, fe_idx);
3411 } else {
3412 SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3413 ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3414 }
3415
3416 AR_UNLOCK(ar);
3417
3418 return added;
3419 }
3420
3421 void
na_flowadv_event(struct __kern_channel_ring * kring)3422 na_flowadv_event(struct __kern_channel_ring *kring)
3423 {
3424 ASSERT(kring->ckr_tx == NR_TX);
3425
3426 SK_DF(SK_VERB_EVENTS, "%s(%d) na \"%s\" (0x%llx) kr 0x%llx",
3427 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
3428 KRNA(kring)->na_name, SK_KVA(KRNA(kring)), SK_KVA(kring));
3429
3430 na_post_event(kring, TRUE, FALSE, FALSE, CHAN_FILT_HINT_FLOW_ADV_UPD);
3431 }
3432
3433 static int
na_packet_pool_free_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3434 na_packet_pool_free_sync(struct __kern_channel_ring *kring, struct proc *p,
3435 uint32_t flags)
3436 {
3437 #pragma unused(flags, p)
3438 int n, ret = 0;
3439 slot_idx_t j;
3440 struct __kern_slot_desc *ksd;
3441 struct __user_slot_desc *usd;
3442 struct __kern_quantum *kqum;
3443 struct kern_pbufpool *pp = kring->ckr_pp;
3444 uint32_t nfree = 0;
3445
3446 /* packet pool list is protected by channel lock */
3447 ASSERT(!KR_KERNEL_ONLY(kring));
3448
3449 /* # of new slots */
3450 n = kring->ckr_rhead - kring->ckr_khead;
3451 if (n < 0) {
3452 n += kring->ckr_num_slots;
3453 }
3454
3455 /* nothing to free */
3456 if (__improbable(n == 0)) {
3457 SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3458 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3459 "nothing to free");
3460 goto done;
3461 }
3462
3463 j = kring->ckr_khead;
3464 PP_LOCK(pp);
3465 while (n--) {
3466 int err;
3467
3468 ksd = KR_KSD(kring, j);
3469 usd = KR_USD(kring, j);
3470
3471 if (__improbable(!SD_VALID_METADATA(usd))) {
3472 SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3473 ret = EINVAL;
3474 break;
3475 }
3476
3477 kqum = pp_remove_upp_locked(pp, usd->sd_md_idx, &err);
3478 if (__improbable(err != 0)) {
3479 SK_ERR("un-allocated packet or buflet %d %p",
3480 usd->sd_md_idx, SK_KVA(kqum));
3481 ret = EINVAL;
3482 break;
3483 }
3484
3485 /* detach and free the packet */
3486 kqum->qum_qflags &= ~QUM_F_FINALIZED;
3487 kqum->qum_ksd = NULL;
3488 ASSERT(!KSD_VALID_METADATA(ksd));
3489 USD_DETACH_METADATA(usd);
3490 ASSERT(pp == kqum->qum_pp);
3491 ASSERT(nfree < kring->ckr_num_slots);
3492 kring->ckr_scratch[nfree++] = (uint64_t)kqum;
3493 j = SLOT_NEXT(j, kring->ckr_lim);
3494 }
3495 PP_UNLOCK(pp);
3496
3497 if (__probable(nfree > 0)) {
3498 pp_free_packet_batch(pp, &kring->ckr_scratch[0], nfree);
3499 }
3500
3501 kring->ckr_khead = j;
3502 kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3503
3504 done:
3505 return ret;
3506 }
3507
3508 #define MAX_BUFLETS 64
3509 static int
alloc_packets(kern_pbufpool_t pp,uint64_t * buf_arr,bool large,uint32_t * ph_cnt)3510 alloc_packets(kern_pbufpool_t pp, uint64_t *buf_arr, bool large, uint32_t *ph_cnt)
3511 {
3512 int err;
3513 uint32_t need, need_orig, remain, alloced, i;
3514 uint64_t buflets[MAX_BUFLETS];
3515 uint64_t *pkts;
3516
3517 need_orig = *ph_cnt;
3518 err = kern_pbufpool_alloc_batch_nosleep(pp, large ? 0 : 1, buf_arr, ph_cnt);
3519 if (!large) {
3520 return err;
3521 }
3522 if (*ph_cnt == 0) {
3523 SK_ERR("failed to alloc %d packets for alloc ring: err %d",
3524 need_orig, err);
3525 DTRACE_SKYWALK2(alloc__pkts__fail, uint32_t, need_orig, int, err);
3526 return err;
3527 }
3528 need = remain = *ph_cnt;
3529 alloced = 0;
3530 pkts = buf_arr;
3531 while (remain > 0) {
3532 uint32_t cnt, cnt_orig;
3533
3534 cnt = MIN(remain, MAX_BUFLETS);
3535 cnt_orig = cnt;
3536 err = pp_alloc_buflet_batch(pp, buflets, &cnt, SKMEM_NOSLEEP, true);
3537 if (cnt == 0) {
3538 SK_ERR("failed to alloc %d buflets for alloc ring: "
3539 "remain %d, err %d", cnt_orig, remain, err);
3540 DTRACE_SKYWALK3(alloc__bufs__fail, uint32_t, cnt_orig,
3541 uint32_t, remain, int, err);
3542 break;
3543 }
3544 for (i = 0; i < cnt; i++) {
3545 kern_packet_t ph = (kern_packet_t)pkts[i];
3546 kern_buflet_t buf = (kern_buflet_t)buflets[i];
3547 kern_buflet_t pbuf = kern_packet_get_next_buflet(ph, NULL);
3548 VERIFY(kern_packet_add_buflet(ph, pbuf, buf) == 0);
3549 buflets[i] = 0;
3550 }
3551 DTRACE_SKYWALK3(alloc__bufs, uint32_t, remain, uint32_t, cnt,
3552 uint32_t, cnt_orig);
3553 pkts += cnt;
3554 alloced += cnt;
3555 remain -= cnt;
3556 }
3557 /* free packets without attached buffers */
3558 if (remain > 0) {
3559 DTRACE_SKYWALK1(remaining__pkts, uint32_t, remain);
3560 ASSERT(remain + alloced == need);
3561 pp_free_packet_batch(pp, pkts, remain);
3562
3563 /* pp_free_packet_batch() should clear the pkts array */
3564 for (i = 0; i < remain; i++) {
3565 ASSERT(pkts[i] == 0);
3566 }
3567 }
3568 *ph_cnt = alloced;
3569 if (*ph_cnt == 0) {
3570 err = ENOMEM;
3571 } else if (*ph_cnt < need_orig) {
3572 err = EAGAIN;
3573 } else {
3574 err = 0;
3575 }
3576 DTRACE_SKYWALK3(alloc__packets, uint32_t, need_orig, uint32_t, *ph_cnt, int, err);
3577 return err;
3578 }
3579
3580 static int
na_packet_pool_alloc_sync_common(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags,bool large)3581 na_packet_pool_alloc_sync_common(struct __kern_channel_ring *kring, struct proc *p,
3582 uint32_t flags, bool large)
3583 {
3584 int b, err;
3585 uint32_t n = 0;
3586 slot_idx_t j;
3587 uint64_t now;
3588 uint32_t curr_ws, ph_needed, ph_cnt;
3589 struct __kern_slot_desc *ksd;
3590 struct __user_slot_desc *usd;
3591 struct __kern_quantum *kqum;
3592 kern_pbufpool_t pp = kring->ckr_pp;
3593 pid_t pid = proc_pid(p);
3594
3595 /* packet pool list is protected by channel lock */
3596 ASSERT(!KR_KERNEL_ONLY(kring));
3597 ASSERT(!PP_KERNEL_ONLY(pp));
3598
3599 now = _net_uptime;
3600 if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3601 if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3602 kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3603 }
3604 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3605 "%s: purged curr_ws(%d)", kring->ckr_name,
3606 kring->ckr_alloc_ws);
3607 return 0;
3608 }
3609 /* reclaim the completed slots */
3610 kring->ckr_khead = kring->ckr_rhead;
3611
3612 /* # of busy (unclaimed) slots */
3613 b = kring->ckr_ktail - kring->ckr_khead;
3614 if (b < 0) {
3615 b += kring->ckr_num_slots;
3616 }
3617
3618 curr_ws = kring->ckr_alloc_ws;
3619 if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3620 /* increment the working set by 50% */
3621 curr_ws += (curr_ws >> 1);
3622 curr_ws = MIN(curr_ws, kring->ckr_lim);
3623 } else {
3624 if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3625 (uint32_t)b >= (curr_ws >> 2)) {
3626 /* decrease the working set by 25% */
3627 curr_ws -= (curr_ws >> 2);
3628 }
3629 }
3630 curr_ws = MAX(curr_ws, na_upp_alloc_lowat);
3631 if (curr_ws > (uint32_t)b) {
3632 n = curr_ws - b;
3633 }
3634 kring->ckr_alloc_ws = curr_ws;
3635 kring->ckr_sync_time = now;
3636
3637 /* min with # of avail free slots (subtract busy from max) */
3638 n = ph_needed = MIN(n, kring->ckr_lim - b);
3639 j = kring->ckr_ktail;
3640 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3641 "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3642
3643 if ((ph_cnt = ph_needed) == 0) {
3644 goto done;
3645 }
3646
3647 err = alloc_packets(pp, kring->ckr_scratch,
3648 PP_HAS_BUFFER_ON_DEMAND(pp) && large, &ph_cnt);
3649 if (__improbable(ph_cnt == 0)) {
3650 SK_ERR("kr 0x%llx failed to alloc %u packet s(%d)",
3651 SK_KVA(kring), ph_needed, err);
3652 kring->ckr_err_stats.cres_pkt_alloc_failures += ph_needed;
3653 } else {
3654 /*
3655 * Add packets to the allocated list of user packet pool.
3656 */
3657 pp_insert_upp_batch(pp, pid, kring->ckr_scratch, ph_cnt);
3658 }
3659
3660 for (n = 0; n < ph_cnt; n++) {
3661 ksd = KR_KSD(kring, j);
3662 usd = KR_USD(kring, j);
3663
3664 kqum = SK_PTR_ADDR_KQUM(kring->ckr_scratch[n]);
3665 kring->ckr_scratch[n] = 0;
3666 ASSERT(kqum != NULL);
3667
3668 /* cleanup any stale slot mapping */
3669 KSD_RESET(ksd);
3670 ASSERT(usd != NULL);
3671 USD_RESET(usd);
3672
3673 /*
3674 * Since this packet is freshly allocated and we need to
3675 * have the flag set for the attach to succeed, just set
3676 * it here rather than calling __packet_finalize().
3677 */
3678 kqum->qum_qflags |= QUM_F_FINALIZED;
3679
3680 /* Attach packet to slot */
3681 KR_SLOT_ATTACH_METADATA(kring, ksd, kqum);
3682 /*
3683 * externalize the packet as it is being transferred to
3684 * user space.
3685 */
3686 kr_externalize_metadata(kring, pp->pp_max_frags, kqum, p);
3687
3688 j = SLOT_NEXT(j, kring->ckr_lim);
3689 }
3690 done:
3691 ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3692 kring->ckr_ktail = j;
3693 return 0;
3694 }
3695
3696 static int
na_packet_pool_alloc_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3697 na_packet_pool_alloc_sync(struct __kern_channel_ring *kring, struct proc *p,
3698 uint32_t flags)
3699 {
3700 return na_packet_pool_alloc_sync_common(kring, p, flags, false);
3701 }
3702
3703 static int
na_packet_pool_alloc_large_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3704 na_packet_pool_alloc_large_sync(struct __kern_channel_ring *kring, struct proc *p,
3705 uint32_t flags)
3706 {
3707 return na_packet_pool_alloc_sync_common(kring, p, flags, true);
3708 }
3709
3710 static int
na_packet_pool_free_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3711 na_packet_pool_free_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3712 uint32_t flags)
3713 {
3714 #pragma unused(flags, p)
3715 int n, ret = 0;
3716 slot_idx_t j;
3717 struct __kern_slot_desc *ksd;
3718 struct __user_slot_desc *usd;
3719 struct __kern_buflet *kbft;
3720 struct kern_pbufpool *pp = kring->ckr_pp;
3721
3722 /* packet pool list is protected by channel lock */
3723 ASSERT(!KR_KERNEL_ONLY(kring));
3724
3725 /* # of new slots */
3726 n = kring->ckr_rhead - kring->ckr_khead;
3727 if (n < 0) {
3728 n += kring->ckr_num_slots;
3729 }
3730
3731 /* nothing to free */
3732 if (__improbable(n == 0)) {
3733 SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3734 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3735 "nothing to free");
3736 goto done;
3737 }
3738
3739 j = kring->ckr_khead;
3740 while (n--) {
3741 int err;
3742
3743 ksd = KR_KSD(kring, j);
3744 usd = KR_USD(kring, j);
3745
3746 if (__improbable(!SD_VALID_METADATA(usd))) {
3747 SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3748 ret = EINVAL;
3749 break;
3750 }
3751
3752 kbft = pp_remove_upp_bft(pp, usd->sd_md_idx, &err);
3753 if (__improbable(err != 0)) {
3754 SK_ERR("un-allocated buflet %d %p", usd->sd_md_idx,
3755 SK_KVA(kbft));
3756 ret = EINVAL;
3757 break;
3758 }
3759
3760 /* detach and free the packet */
3761 ASSERT(!KSD_VALID_METADATA(ksd));
3762 USD_DETACH_METADATA(usd);
3763 pp_free_buflet(pp, kbft);
3764 j = SLOT_NEXT(j, kring->ckr_lim);
3765 }
3766 kring->ckr_khead = j;
3767 kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3768
3769 done:
3770 return ret;
3771 }
3772
3773 static int
na_packet_pool_alloc_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3774 na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3775 uint32_t flags)
3776 {
3777 int b, err;
3778 uint32_t n = 0;
3779 slot_idx_t j;
3780 uint64_t now;
3781 uint32_t curr_ws, bh_needed, bh_cnt;
3782 struct __kern_slot_desc *ksd;
3783 struct __user_slot_desc *usd;
3784 struct __kern_buflet *kbft;
3785 struct __kern_buflet_ext *kbe;
3786 kern_pbufpool_t pp = kring->ckr_pp;
3787 pid_t pid = proc_pid(p);
3788
3789 /* packet pool list is protected by channel lock */
3790 ASSERT(!KR_KERNEL_ONLY(kring));
3791 ASSERT(!PP_KERNEL_ONLY(pp));
3792
3793 now = _net_uptime;
3794 if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3795 if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3796 kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3797 }
3798 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3799 "%s: purged curr_ws(%d)", kring->ckr_name,
3800 kring->ckr_alloc_ws);
3801 return 0;
3802 }
3803 /* reclaim the completed slots */
3804 kring->ckr_khead = kring->ckr_rhead;
3805
3806 /* # of busy (unclaimed) slots */
3807 b = kring->ckr_ktail - kring->ckr_khead;
3808 if (b < 0) {
3809 b += kring->ckr_num_slots;
3810 }
3811
3812 curr_ws = kring->ckr_alloc_ws;
3813 if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3814 /* increment the working set by 50% */
3815 curr_ws += (curr_ws >> 1);
3816 curr_ws = MIN(curr_ws, kring->ckr_lim);
3817 } else {
3818 if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3819 (uint32_t)b >= (curr_ws >> 2)) {
3820 /* decrease the working set by 25% */
3821 curr_ws -= (curr_ws >> 2);
3822 }
3823 }
3824 curr_ws = MAX(curr_ws, na_upp_alloc_buf_lowat);
3825 if (curr_ws > (uint32_t)b) {
3826 n = curr_ws - b;
3827 }
3828 kring->ckr_alloc_ws = curr_ws;
3829 kring->ckr_sync_time = now;
3830
3831 /* min with # of avail free slots (subtract busy from max) */
3832 n = bh_needed = MIN(n, kring->ckr_lim - b);
3833 j = kring->ckr_ktail;
3834 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3835 "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3836
3837 if ((bh_cnt = bh_needed) == 0) {
3838 goto done;
3839 }
3840
3841 err = pp_alloc_buflet_batch(pp, kring->ckr_scratch, &bh_cnt,
3842 SKMEM_NOSLEEP, false);
3843
3844 if (bh_cnt == 0) {
3845 SK_ERR("kr 0x%llx failed to alloc %u buflets(%d)",
3846 SK_KVA(kring), bh_needed, err);
3847 kring->ckr_err_stats.cres_pkt_alloc_failures += bh_needed;
3848 }
3849
3850 for (n = 0; n < bh_cnt; n++) {
3851 struct __user_buflet *ubft;
3852
3853 ksd = KR_KSD(kring, j);
3854 usd = KR_USD(kring, j);
3855
3856 kbft = (struct __kern_buflet *)(kring->ckr_scratch[n]);
3857 kbe = (struct __kern_buflet_ext *)kbft;
3858 kring->ckr_scratch[n] = 0;
3859 ASSERT(kbft != NULL);
3860
3861 /*
3862 * Add buflet to the allocated list of user packet pool.
3863 */
3864 pp_insert_upp_bft(pp, kbft, pid);
3865
3866 /*
3867 * externalize the buflet as it is being transferred to
3868 * user space.
3869 */
3870 ubft = __DECONST(struct __user_buflet *, kbe->kbe_buf_user);
3871 KBUF_EXTERNALIZE(kbft, ubft, pp);
3872
3873 /* cleanup any stale slot mapping */
3874 KSD_RESET(ksd);
3875 ASSERT(usd != NULL);
3876 USD_RESET(usd);
3877
3878 /* Attach buflet to slot */
3879 KR_SLOT_ATTACH_BUF_METADATA(kring, ksd, kbft);
3880
3881 j = SLOT_NEXT(j, kring->ckr_lim);
3882 }
3883 done:
3884 ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3885 kring->ckr_ktail = j;
3886 return 0;
3887 }
3888
3889 /* The caller needs to ensure that the NA stays intact */
3890 void
na_drain(struct nexus_adapter * na,boolean_t purge)3891 na_drain(struct nexus_adapter *na, boolean_t purge)
3892 {
3893 /* will be cleared on next channel sync */
3894 if (!(os_atomic_or_orig(&na->na_flags, NAF_DRAINING, relaxed) &
3895 NAF_DRAINING) && NA_IS_ACTIVE(na)) {
3896 SK_DF(SK_VERB_NA, "%s: %s na 0x%llx flags %b",
3897 na->na_name, (purge ? "purging" : "pruning"),
3898 SK_KVA(na), na->na_flags, NAF_BITS);
3899
3900 /* reap (purge/prune) caches in the arena */
3901 skmem_arena_reap(na->na_arena, purge);
3902 }
3903 }
3904