1 /*
2 * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri.
31 * All rights reserved.
32 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 */
55 #include <sys/systm.h>
56 #include <skywalk/os_skywalk_private.h>
57 #include <skywalk/nexus/monitor/nx_monitor.h>
58 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
59 #include <skywalk/nexus/netif/nx_netif.h>
60 #include <skywalk/nexus/upipe/nx_user_pipe.h>
61 #include <skywalk/nexus/kpipe/nx_kernel_pipe.h>
62 #include <kern/thread.h>
63
64 static int na_krings_use(struct kern_channel *);
65 static void na_krings_unuse(struct kern_channel *);
66 static void na_krings_verify(struct nexus_adapter *);
67 static int na_notify(struct __kern_channel_ring *, struct proc *, uint32_t);
68 static void na_set_ring(struct nexus_adapter *, uint32_t, enum txrx, uint32_t);
69 static void na_set_all_rings(struct nexus_adapter *, uint32_t);
70 static int na_set_ringid(struct kern_channel *, ring_set_t, ring_id_t);
71 static void na_unset_ringid(struct kern_channel *);
72 static void na_teardown(struct nexus_adapter *, struct kern_channel *,
73 boolean_t);
74
75 static int na_kr_create(struct nexus_adapter *, boolean_t);
76 static void na_kr_delete(struct nexus_adapter *);
77 static int na_kr_setup(struct nexus_adapter *, struct kern_channel *);
78 static void na_kr_teardown_all(struct nexus_adapter *, struct kern_channel *,
79 boolean_t);
80 static void na_kr_teardown_txrx(struct nexus_adapter *, struct kern_channel *,
81 boolean_t, struct proc *);
82 static int na_kr_populate_slots(struct __kern_channel_ring *);
83 static void na_kr_depopulate_slots(struct __kern_channel_ring *,
84 struct kern_channel *, boolean_t defunct);
85
86 static int na_schema_alloc(struct kern_channel *);
87
88 static struct nexus_adapter *na_pseudo_alloc(zalloc_flags_t);
89 static void na_pseudo_free(struct nexus_adapter *);
90 static int na_pseudo_txsync(struct __kern_channel_ring *, struct proc *,
91 uint32_t);
92 static int na_pseudo_rxsync(struct __kern_channel_ring *, struct proc *,
93 uint32_t);
94 static int na_pseudo_activate(struct nexus_adapter *, na_activate_mode_t);
95 static void na_pseudo_dtor(struct nexus_adapter *);
96 static int na_pseudo_krings_create(struct nexus_adapter *,
97 struct kern_channel *);
98 static void na_pseudo_krings_delete(struct nexus_adapter *,
99 struct kern_channel *, boolean_t);
100 static int na_packet_pool_alloc_sync(struct __kern_channel_ring *,
101 struct proc *, uint32_t);
102 static int na_packet_pool_alloc_large_sync(struct __kern_channel_ring *,
103 struct proc *, uint32_t);
104 static int na_packet_pool_free_sync(struct __kern_channel_ring *,
105 struct proc *, uint32_t);
106 static int na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *,
107 struct proc *, uint32_t);
108 static int na_packet_pool_free_buf_sync(struct __kern_channel_ring *,
109 struct proc *, uint32_t);
110
111 #define NA_KRING_IDLE_TIMEOUT (NSEC_PER_SEC * 30) /* 30 seconds */
112
113 static SKMEM_TYPE_DEFINE(na_pseudo_zone, struct nexus_adapter);
114
115 static int __na_inited = 0;
116
117 #define NA_NUM_WMM_CLASSES 4
118 #define NAKR_WMM_SC2RINGID(_s) PKT_SC2TC(_s)
119 #define NAKR_SET_SVC_LUT(_n, _s) \
120 (_n)->na_kring_svc_lut[MBUF_SCIDX(_s)] = NAKR_WMM_SC2RINGID(_s)
121 #define NAKR_SET_KR_SVC(_n, _s) \
122 NAKR((_n), NR_TX)[NAKR_WMM_SC2RINGID(_s)].ckr_svc = (_s)
123
124 #define NA_UPP_ALLOC_LOWAT 8
125 static uint32_t na_upp_alloc_lowat = NA_UPP_ALLOC_LOWAT;
126
127 #define NA_UPP_REAP_INTERVAL 10 /* seconds */
128 static uint32_t na_upp_reap_interval = NA_UPP_REAP_INTERVAL;
129
130 #define NA_UPP_WS_HOLD_TIME 2 /* seconds */
131 static uint32_t na_upp_ws_hold_time = NA_UPP_WS_HOLD_TIME;
132
133 #define NA_UPP_REAP_MIN_PKTS 0
134 static uint32_t na_upp_reap_min_pkts = NA_UPP_REAP_MIN_PKTS;
135
136 #define NA_UPP_ALLOC_BUF_LOWAT 64
137 static uint32_t na_upp_alloc_buf_lowat = NA_UPP_ALLOC_BUF_LOWAT;
138
139 #if (DEVELOPMENT || DEBUG)
140 static uint64_t _na_inject_error = 0;
141 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) \
142 _SK_INJECT_ERROR(_na_inject_error, _en, _ev, _ec, NULL, _f, __VA_ARGS__)
143
144 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_ws_hold_time,
145 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_ws_hold_time,
146 NA_UPP_WS_HOLD_TIME, "");
147 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_interval,
148 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_interval,
149 NA_UPP_REAP_INTERVAL, "");
150 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_min_pkts,
151 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_min_pkts,
152 NA_UPP_REAP_MIN_PKTS, "");
153 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_lowat,
154 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_lowat,
155 NA_UPP_ALLOC_LOWAT, "");
156 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_buf_lowat,
157 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_buf_lowat,
158 NA_UPP_ALLOC_BUF_LOWAT, "");
159 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, na_inject_error,
160 CTLFLAG_RW | CTLFLAG_LOCKED, &_na_inject_error, "");
161 #else
162 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) do { } while (0)
163 #endif /* !DEVELOPMENT && !DEBUG */
164
165 #define SKMEM_TAG_NX_RINGS "com.apple.skywalk.nexus.rings"
166 static SKMEM_TAG_DEFINE(skmem_tag_nx_rings, SKMEM_TAG_NX_RINGS);
167
168 #define SKMEM_TAG_NX_CONTEXTS "com.apple.skywalk.nexus.contexts"
169 static SKMEM_TAG_DEFINE(skmem_tag_nx_contexts, SKMEM_TAG_NX_CONTEXTS);
170
171 #define SKMEM_TAG_NX_SCRATCH "com.apple.skywalk.nexus.scratch"
172 static SKMEM_TAG_DEFINE(skmem_tag_nx_scratch, SKMEM_TAG_NX_SCRATCH);
173
174 void
na_init(void)175 na_init(void)
176 {
177 /*
178 * Changing the size of nexus_mdata structure won't break ABI,
179 * but we need to be mindful of memory consumption; Thus here
180 * we add a compile-time check to make sure the size is within
181 * the expected limit and that it's properly aligned. This
182 * check may be adjusted in future as needed.
183 */
184 _CASSERT(sizeof(struct nexus_mdata) <= 32 &&
185 IS_P2ALIGNED(sizeof(struct nexus_mdata), 8));
186 _CASSERT(sizeof(struct nexus_mdata) <= sizeof(struct __user_quantum));
187
188 /* see comments on nexus_meta_type_t */
189 _CASSERT(NEXUS_META_TYPE_MAX == 3);
190 _CASSERT(NEXUS_META_SUBTYPE_MAX == 3);
191
192 ASSERT(!__na_inited);
193
194 __na_inited = 1;
195 }
196
197 void
na_fini(void)198 na_fini(void)
199 {
200 if (__na_inited) {
201 __na_inited = 0;
202 }
203 }
204
205 /*
206 * Interpret the ringid of an chreq, by translating it into a pair
207 * of intervals of ring indices:
208 *
209 * [txfirst, txlast) and [rxfirst, rxlast)
210 */
211 int
na_interp_ringid(struct nexus_adapter * na,ring_id_t ring_id,ring_set_t ring_set,uint32_t first[NR_TXRX],uint32_t last[NR_TXRX])212 na_interp_ringid(struct nexus_adapter *na, ring_id_t ring_id,
213 ring_set_t ring_set, uint32_t first[NR_TXRX], uint32_t last[NR_TXRX])
214 {
215 enum txrx t;
216
217 switch (ring_set) {
218 case RING_SET_ALL:
219 /*
220 * Ring pair eligibility: all ring(s).
221 */
222 if (ring_id != CHANNEL_RING_ID_ANY &&
223 ring_id >= na_get_nrings(na, NR_TX) &&
224 ring_id >= na_get_nrings(na, NR_RX)) {
225 SK_ERR("\"%s\": invalid ring_id %d for ring_set %u",
226 na->na_name, (int)ring_id, ring_set);
227 return EINVAL;
228 }
229 for_rx_tx(t) {
230 if (ring_id == CHANNEL_RING_ID_ANY) {
231 first[t] = 0;
232 last[t] = na_get_nrings(na, t);
233 } else {
234 first[t] = ring_id;
235 last[t] = ring_id + 1;
236 }
237 }
238 break;
239
240 default:
241 SK_ERR("\"%s\": invalid ring_set %u", na->na_name, ring_set);
242 return EINVAL;
243 }
244
245 SK_DF(SK_VERB_NA | SK_VERB_RING,
246 "\"%s\": ring_id %d, ring_set %u tx [%u,%u) rx [%u,%u)",
247 na->na_name, (int)ring_id, ring_set, first[NR_TX], last[NR_TX],
248 first[NR_RX], last[NR_RX]);
249
250 return 0;
251 }
252
253 /*
254 * Set the ring ID. For devices with a single queue, a request
255 * for all rings is the same as a single ring.
256 */
257 static int
na_set_ringid(struct kern_channel * ch,ring_set_t ring_set,ring_id_t ring_id)258 na_set_ringid(struct kern_channel *ch, ring_set_t ring_set, ring_id_t ring_id)
259 {
260 struct nexus_adapter *na = ch->ch_na;
261 int error;
262 enum txrx t;
263 uint32_t n_alloc_rings;
264
265 if ((error = na_interp_ringid(na, ring_id, ring_set,
266 ch->ch_first, ch->ch_last)) != 0) {
267 return error;
268 }
269
270 n_alloc_rings = na_get_nrings(na, NR_A);
271 if (n_alloc_rings != 0) {
272 uint32_t n_large_alloc_rings;
273
274 ch->ch_first[NR_A] = ch->ch_first[NR_F] = 0;
275 ch->ch_last[NR_A] = ch->ch_last[NR_F] =
276 ch->ch_first[NR_A] + n_alloc_rings;
277
278 n_large_alloc_rings = na_get_nrings(na, NR_LBA);
279 ch->ch_first[NR_LBA] = 0;
280 ch->ch_last[NR_LBA] = ch->ch_first[NR_LBA] + n_large_alloc_rings;
281 } else {
282 ch->ch_first[NR_A] = ch->ch_last[NR_A] = 0;
283 ch->ch_first[NR_F] = ch->ch_last[NR_F] = 0;
284 ch->ch_first[NR_LBA] = ch->ch_last[NR_LBA] = 0;
285 }
286 ch->ch_first[NR_EV] = 0;
287 ch->ch_last[NR_EV] = ch->ch_first[NR_EV] + na_get_nrings(na, NR_EV);
288
289 /* XXX: should we initialize na_si_users for event ring ? */
290
291 /*
292 * Optimization: count the users registered for more than
293 * one ring, which are the ones sleeping on the global queue.
294 * The default na_notify() callback will then avoid signaling
295 * the global queue if nobody is using it
296 */
297 for_rx_tx(t) {
298 if (ch_is_multiplex(ch, t)) {
299 na->na_si_users[t]++;
300 ASSERT(na->na_si_users[t] != 0);
301 }
302 }
303 return 0;
304 }
305
306 static void
na_unset_ringid(struct kern_channel * ch)307 na_unset_ringid(struct kern_channel *ch)
308 {
309 struct nexus_adapter *na = ch->ch_na;
310 enum txrx t;
311
312 for_rx_tx(t) {
313 if (ch_is_multiplex(ch, t)) {
314 ASSERT(na->na_si_users[t] != 0);
315 na->na_si_users[t]--;
316 }
317 ch->ch_first[t] = ch->ch_last[t] = 0;
318 }
319 }
320
321 /*
322 * Check that the rings we want to bind are not exclusively owned by a previous
323 * bind. If exclusive ownership has been requested, we also mark the rings.
324 */
325 /* Hoisted out of line to reduce kernel stack footprint */
326 SK_NO_INLINE_ATTRIBUTE
327 static int
na_krings_use(struct kern_channel * ch)328 na_krings_use(struct kern_channel *ch)
329 {
330 struct nexus_adapter *na = ch->ch_na;
331 struct __kern_channel_ring *__single kring;
332 boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
333 enum txrx t;
334 uint32_t i;
335
336 SK_DF(SK_VERB_NA | SK_VERB_RING, "na \"%s\" (0x%llx) grabbing tx [%u,%u) rx [%u,%u)",
337 na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
338 ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
339
340 /*
341 * First round: check that all the requested rings
342 * are neither alread exclusively owned, nor we
343 * want exclusive ownership when they are already in use
344 */
345 for_all_rings(t) {
346 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
347 kring = &NAKR(na, t)[i];
348 if ((kring->ckr_flags & CKRF_EXCLUSIVE) ||
349 (kring->ckr_users && excl)) {
350 SK_DF(SK_VERB_NA | SK_VERB_RING,
351 "kr \"%s\" (0x%llx) krflags 0x%b is busy",
352 kring->ckr_name, SK_KVA(kring),
353 kring->ckr_flags, CKRF_BITS);
354 return EBUSY;
355 }
356 }
357 }
358
359 /*
360 * Second round: increment usage count and possibly
361 * mark as exclusive
362 */
363
364 for_all_rings(t) {
365 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
366 kring = &NAKR(na, t)[i];
367 kring->ckr_users++;
368 if (excl) {
369 kring->ckr_flags |= CKRF_EXCLUSIVE;
370 }
371 }
372 }
373
374 return 0;
375 }
376
377 /* Hoisted out of line to reduce kernel stack footprint */
378 SK_NO_INLINE_ATTRIBUTE
379 static void
na_krings_unuse(struct kern_channel * ch)380 na_krings_unuse(struct kern_channel *ch)
381 {
382 struct nexus_adapter *na = ch->ch_na;
383 struct __kern_channel_ring *__single kring;
384 boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
385 enum txrx t;
386 uint32_t i;
387
388 SK_DF(SK_VERB_NA | SK_VERB_RING,
389 "na \"%s\" (0x%llx) releasing tx [%u, %u) rx [%u, %u)",
390 na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
391 ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
392
393 for_all_rings(t) {
394 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
395 kring = &NAKR(na, t)[i];
396 if (excl) {
397 kring->ckr_flags &= ~CKRF_EXCLUSIVE;
398 }
399 kring->ckr_users--;
400 }
401 }
402 }
403
404 /* Hoisted out of line to reduce kernel stack footprint */
405 SK_NO_INLINE_ATTRIBUTE
406 static void
na_krings_verify(struct nexus_adapter * na)407 na_krings_verify(struct nexus_adapter *na)
408 {
409 struct __kern_channel_ring *__single kring;
410 enum txrx t;
411 uint32_t i;
412
413 for_all_rings(t) {
414 for (i = 0; i < na_get_nrings(na, t); i++) {
415 kring = &NAKR(na, t)[i];
416 /* na_kr_create() validations */
417 ASSERT(kring->ckr_num_slots > 0);
418 ASSERT(kring->ckr_lim == (kring->ckr_num_slots - 1));
419 ASSERT(kring->ckr_pp != NULL);
420
421 if (!(kring->ckr_flags & CKRF_MEM_RING_INITED)) {
422 continue;
423 }
424 /* na_kr_setup() validations */
425 if (KR_KERNEL_ONLY(kring)) {
426 ASSERT(kring->ckr_ring == NULL);
427 } else {
428 ASSERT(kring->ckr_ring != NULL);
429 }
430 ASSERT(kring->ckr_ksds_last ==
431 &kring->ckr_ksds[kring->ckr_lim]);
432 }
433 }
434 }
435
436 int
na_bind_channel(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr)437 na_bind_channel(struct nexus_adapter *na, struct kern_channel *ch,
438 struct chreq *chr)
439 {
440 struct kern_pbufpool *rx_pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
441 struct kern_pbufpool *tx_pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
442 uint32_t ch_mode = chr->cr_mode;
443 int err = 0;
444
445 SK_LOCK_ASSERT_HELD();
446 ASSERT(ch->ch_schema == NULL);
447 ASSERT(ch->ch_na == NULL);
448
449 /* ring configuration may have changed, fetch from the card */
450 na_update_config(na);
451 ch->ch_na = na; /* store the reference */
452 err = na_set_ringid(ch, chr->cr_ring_set, chr->cr_ring_id);
453 if (err != 0) {
454 goto err;
455 }
456
457 os_atomic_andnot(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE |
458 CHANF_USER_PACKET_POOL | CHANF_EVENT_RING), relaxed);
459 if (ch_mode & CHMODE_EXCLUSIVE) {
460 os_atomic_or(&ch->ch_flags, CHANF_EXCLUSIVE, relaxed);
461 }
462 /*
463 * Disallow automatic sync for monitor mode, since TX
464 * direction is disabled.
465 */
466 if (ch_mode & CHMODE_MONITOR) {
467 os_atomic_or(&ch->ch_flags, CHANF_RXONLY, relaxed);
468 }
469
470 if (!!(na->na_flags & NAF_USER_PKT_POOL) ^
471 !!(ch_mode & CHMODE_USER_PACKET_POOL)) {
472 SK_ERR("incompatible channel mode (0x%b), na_flags (0x%b)",
473 ch_mode, CHMODE_BITS, na->na_flags, NAF_BITS);
474 err = EINVAL;
475 goto err;
476 }
477
478 if (na->na_arena->ar_flags & ARF_DEFUNCT) {
479 err = ENXIO;
480 goto err;
481 }
482
483 if (ch_mode & CHMODE_USER_PACKET_POOL) {
484 ASSERT(na->na_flags & NAF_USER_PKT_POOL);
485 ASSERT(ch->ch_first[NR_A] != ch->ch_last[NR_A]);
486 ASSERT(ch->ch_first[NR_F] != ch->ch_last[NR_F]);
487 os_atomic_or(&ch->ch_flags, CHANF_USER_PACKET_POOL, relaxed);
488 }
489
490 if (ch_mode & CHMODE_EVENT_RING) {
491 ASSERT(na->na_flags & NAF_USER_PKT_POOL);
492 ASSERT(na->na_flags & NAF_EVENT_RING);
493 ASSERT(ch->ch_first[NR_EV] != ch->ch_last[NR_EV]);
494 os_atomic_or(&ch->ch_flags, CHANF_EVENT_RING, relaxed);
495 }
496
497 /*
498 * If this is the first channel of the adapter, create
499 * the rings and their in-kernel view, the krings.
500 */
501 if (na->na_channels == 0) {
502 err = na->na_krings_create(na, ch);
503 if (err != 0) {
504 goto err;
505 }
506
507 /*
508 * Sanity check; this is already done in na_kr_create(),
509 * but we do it here as well to validate na_kr_setup().
510 */
511 na_krings_verify(na);
512 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
513 skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_type;
514 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
515 skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_subtype;
516 }
517
518 /*
519 * Validate ownership and usability of the krings; take into account
520 * whether some previous bind has exclusive ownership on them.
521 */
522 err = na_krings_use(ch);
523 if (err != 0) {
524 goto err_del_rings;
525 }
526
527 /* for user-facing channel, create a new channel schema */
528 if (!(ch->ch_flags & CHANF_KERNEL)) {
529 err = na_schema_alloc(ch);
530 if (err != 0) {
531 goto err_rel_excl;
532 }
533
534 ASSERT(ch->ch_schema != NULL);
535 ASSERT(ch->ch_schema_offset != (mach_vm_offset_t)-1);
536 } else {
537 ASSERT(ch->ch_schema == NULL);
538 ch->ch_schema_offset = (mach_vm_offset_t)-1;
539 }
540
541 /* update our work timestamp */
542 na->na_work_ts = net_uptime();
543
544 na->na_channels++;
545
546 /*
547 * If user packet pool is desired, initialize the allocated
548 * object hash table in the pool, if not already. This also
549 * retains a refcnt on the pool which the caller must release.
550 */
551 ASSERT(ch->ch_pp == NULL);
552 if (ch_mode & CHMODE_USER_PACKET_POOL) {
553 #pragma unused(tx_pp)
554 ASSERT(rx_pp == tx_pp);
555 err = pp_init_upp(rx_pp, TRUE);
556 if (err != 0) {
557 goto err_free_schema;
558 }
559 ch->ch_pp = rx_pp;
560 }
561
562 if (!NA_IS_ACTIVE(na)) {
563 err = na->na_activate(na, NA_ACTIVATE_MODE_ON);
564 if (err != 0) {
565 goto err_release_pp;
566 }
567
568 SK_D("activated \"%s\" adapter 0x%llx", na->na_name,
569 SK_KVA(na));
570 SK_D(" na_md_type: %u", na->na_md_type);
571 SK_D(" na_md_subtype: %u", na->na_md_subtype);
572 }
573
574 SK_D("ch 0x%llx", SK_KVA(ch));
575 SK_D(" ch_flags: 0x%b", ch->ch_flags, CHANF_BITS);
576 if (ch->ch_schema != NULL) {
577 SK_D(" ch_schema: 0x%llx", SK_KVA(ch->ch_schema));
578 }
579 SK_D(" ch_na: 0x%llx (chcnt %u)", SK_KVA(ch->ch_na),
580 ch->ch_na->na_channels);
581 SK_D(" ch_tx_rings: [%u,%u)", ch->ch_first[NR_TX],
582 ch->ch_last[NR_TX]);
583 SK_D(" ch_rx_rings: [%u,%u)", ch->ch_first[NR_RX],
584 ch->ch_last[NR_RX]);
585 SK_D(" ch_alloc_rings: [%u,%u)", ch->ch_first[NR_A],
586 ch->ch_last[NR_A]);
587 SK_D(" ch_free_rings: [%u,%u)", ch->ch_first[NR_F],
588 ch->ch_last[NR_F]);
589 SK_D(" ch_ev_rings: [%u,%u)", ch->ch_first[NR_EV],
590 ch->ch_last[NR_EV]);
591
592 return 0;
593
594 err_release_pp:
595 if (ch_mode & CHMODE_USER_PACKET_POOL) {
596 ASSERT(ch->ch_pp != NULL);
597 pp_release(rx_pp);
598 ch->ch_pp = NULL;
599 }
600 err_free_schema:
601 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
602 NEXUS_META_TYPE_INVALID;
603 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
604 NEXUS_META_SUBTYPE_INVALID;
605 ASSERT(na->na_channels != 0);
606 na->na_channels--;
607 if (ch->ch_schema != NULL) {
608 skmem_cache_free(
609 skmem_arena_nexus(na->na_arena)->arn_schema_cache,
610 ch->ch_schema);
611 ch->ch_schema = NULL;
612 ch->ch_schema_offset = (mach_vm_offset_t)-1;
613 }
614 err_rel_excl:
615 na_krings_unuse(ch);
616 err_del_rings:
617 if (na->na_channels == 0) {
618 na->na_krings_delete(na, ch, FALSE);
619 }
620 err:
621 ch->ch_na = NULL;
622 ASSERT(err != 0);
623
624 return err;
625 }
626
627 /*
628 * Undo everything that was done in na_bind_channel().
629 */
630 /* call with SK_LOCK held */
631 void
na_unbind_channel(struct kern_channel * ch)632 na_unbind_channel(struct kern_channel *ch)
633 {
634 struct nexus_adapter *na = ch->ch_na;
635
636 SK_LOCK_ASSERT_HELD();
637
638 ASSERT(na->na_channels != 0);
639 na->na_channels--;
640
641 /* release exclusive use if it was requested at bind time */
642 na_krings_unuse(ch);
643
644 if (na->na_channels == 0) { /* last instance */
645 SK_D("%s(%d): deleting last channel instance for %s",
646 ch->ch_name, ch->ch_pid, na->na_name);
647
648 /*
649 * Free any remaining allocated packets attached to
650 * the slots, followed by a teardown of the arena.
651 */
652 na_teardown(na, ch, FALSE);
653
654 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
655 NEXUS_META_TYPE_INVALID;
656 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
657 NEXUS_META_SUBTYPE_INVALID;
658 } else {
659 SK_D("%s(%d): %s has %u remaining channel instance(s)",
660 ch->ch_name, ch->ch_pid, na->na_name, na->na_channels);
661 }
662
663 /*
664 * Free any allocated packets (for the process) attached to the slots;
665 * note that na_teardown() could have done this there as well.
666 */
667 if (ch->ch_pp != NULL) {
668 ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
669 pp_purge_upp(ch->ch_pp, ch->ch_pid);
670 pp_release(ch->ch_pp);
671 ch->ch_pp = NULL;
672 }
673
674 /* possibily decrement counter of tx_si/rx_si users */
675 na_unset_ringid(ch);
676
677 /* reap the caches now (purge if adapter is idle) */
678 skmem_arena_reap(na->na_arena, (na->na_channels == 0));
679
680 /* delete the csm */
681 if (ch->ch_schema != NULL) {
682 skmem_cache_free(
683 skmem_arena_nexus(na->na_arena)->arn_schema_cache,
684 ch->ch_schema);
685 ch->ch_schema = NULL;
686 ch->ch_schema_offset = (mach_vm_offset_t)-1;
687 }
688
689 /* destroy the memory map */
690 skmem_arena_munmap_channel(na->na_arena, ch);
691
692 /* mark the channel as unbound */
693 os_atomic_andnot(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE), relaxed);
694 ch->ch_na = NULL;
695
696 /* and finally release the nexus adapter; this might free it */
697 (void) na_release_locked(na);
698 }
699
700 static void
na_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)701 na_teardown(struct nexus_adapter *na, struct kern_channel *ch,
702 boolean_t defunct)
703 {
704 SK_LOCK_ASSERT_HELD();
705 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
706
707 #if CONFIG_NEXUS_MONITOR
708 /*
709 * Walk through all the rings and tell any monitor
710 * that the port is going to exit Skywalk mode
711 */
712 nx_mon_stop(na);
713 #endif /* CONFIG_NEXUS_MONITOR */
714
715 /*
716 * Deactive the adapter.
717 */
718 (void) na->na_activate(na,
719 (defunct ? NA_ACTIVATE_MODE_DEFUNCT : NA_ACTIVATE_MODE_OFF));
720
721 /*
722 * Free any remaining allocated packets for this process.
723 */
724 if (ch->ch_pp != NULL) {
725 ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
726 pp_purge_upp(ch->ch_pp, ch->ch_pid);
727 if (!defunct) {
728 pp_release(ch->ch_pp);
729 ch->ch_pp = NULL;
730 }
731 }
732
733 /*
734 * Delete rings and buffers.
735 */
736 na->na_krings_delete(na, ch, defunct);
737 }
738
739 /* call with SK_LOCK held */
740 /*
741 * Allocate the per-fd structure __user_channel_schema.
742 */
743 static int
na_schema_alloc(struct kern_channel * ch)744 na_schema_alloc(struct kern_channel *ch)
745 {
746 struct nexus_adapter *na = ch->ch_na;
747 struct skmem_arena *ar = na->na_arena;
748 struct skmem_arena_nexus *arn;
749 mach_vm_offset_t roff[SKMEM_REGIONS];
750 struct __kern_channel_ring *__single kr;
751 struct __user_channel_schema *csm;
752 struct skmem_obj_info csm_oi, ring_oi, ksd_oi, usd_oi;
753 mach_vm_offset_t base;
754 uint32_t i, j, k, n[NR_ALL];
755 enum txrx t;
756 /* -fbounds-safety */
757 struct {
758 uint32_t tx_rings;
759 uint32_t rx_rings;
760 uint32_t allocator_ring_pairs;
761 uint32_t num_event_rings;
762 uint32_t large_buf_alloc_rings;
763 } ring_counts;
764 #define ASSERT_COUNT_TYPES_MATCH(FIELD_NAME) \
765 _Static_assert(__builtin_types_compatible_p( \
766 typeof(ring_counts . FIELD_NAME), \
767 typeof(((struct __user_channel_schema*)0)->csm_ ## FIELD_NAME)), \
768 "type for " # FIELD_NAME "doesn't match")
769
770 ASSERT_COUNT_TYPES_MATCH(tx_rings);
771 ASSERT_COUNT_TYPES_MATCH(rx_rings);
772 ASSERT_COUNT_TYPES_MATCH(allocator_ring_pairs);
773 ASSERT_COUNT_TYPES_MATCH(num_event_rings);
774 ASSERT_COUNT_TYPES_MATCH(large_buf_alloc_rings);
775 #undef ASSERT_COUNT_TYPES_MATCH
776
777 /* see comments for struct __user_channel_schema */
778 _CASSERT(offsetof(struct __user_channel_schema, csm_ver) == 0);
779 _CASSERT(offsetof(struct __user_channel_schema, csm_flags) ==
780 sizeof(csm->csm_ver));
781 _CASSERT(offsetof(struct __user_channel_schema, csm_kern_name) ==
782 sizeof(csm->csm_ver) + sizeof(csm->csm_flags));
783 _CASSERT(offsetof(struct __user_channel_schema, csm_kern_uuid) ==
784 sizeof(csm->csm_ver) + sizeof(csm->csm_flags) +
785 sizeof(csm->csm_kern_name));
786
787 SK_LOCK_ASSERT_HELD();
788
789 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
790 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
791 arn = skmem_arena_nexus(ar);
792 ASSERT(arn != NULL);
793 for_all_rings(t) {
794 n[t] = 0;
795 }
796
797 for_rx_tx(t) {
798 ASSERT((ch->ch_last[t] > 0) || (ch->ch_first[t] == 0));
799 n[t] = ch->ch_last[t] - ch->ch_first[t];
800 ASSERT(n[t] == 0 || n[t] <= na_get_nrings(na, t));
801 }
802
803 /* return total number of tx and rx rings for this channel */
804 ring_counts.tx_rings = n[NR_TX];
805 ring_counts.rx_rings = n[NR_RX];
806
807 if (ch->ch_flags & CHANF_USER_PACKET_POOL) {
808 ring_counts.allocator_ring_pairs = na->na_num_allocator_ring_pairs;
809 n[NR_A] = n[NR_F] = na->na_num_allocator_ring_pairs;
810 ASSERT(n[NR_A] != 0 && n[NR_A] <= na_get_nrings(na, NR_A));
811 ASSERT(n[NR_A] == (ch->ch_last[NR_A] - ch->ch_first[NR_A]));
812 ASSERT(n[NR_F] == (ch->ch_last[NR_F] - ch->ch_first[NR_F]));
813
814 n[NR_LBA] = na->na_num_large_buf_alloc_rings;
815 if (n[NR_LBA] != 0) {
816 ring_counts.large_buf_alloc_rings = n[NR_LBA];
817 ASSERT(n[NR_LBA] == (ch->ch_last[NR_LBA] - ch->ch_first[NR_LBA]));
818 }
819 }
820
821 if (ch->ch_flags & CHANF_EVENT_RING) {
822 n[NR_EV] = ch->ch_last[NR_EV] - ch->ch_first[NR_EV];
823 ASSERT(n[NR_EV] != 0 && n[NR_EV] <= na_get_nrings(na, NR_EV));
824 ring_counts.num_event_rings = n[NR_EV];
825 }
826
827 csm = skmem_cache_alloc(arn->arn_schema_cache, SKMEM_NOSLEEP);
828 if (csm == NULL) {
829 return ENOMEM;
830 }
831 skmem_cache_get_obj_info(arn->arn_schema_cache, csm, &csm_oi, NULL);
832 bzero(__unsafe_forge_bidi_indexable(void *, csm, SKMEM_OBJ_SIZE(&csm_oi)),
833 SKMEM_OBJ_SIZE(&csm_oi));
834
835 csm->csm_tx_rings = ring_counts.tx_rings;
836 csm->csm_rx_rings = ring_counts.rx_rings;
837 csm->csm_allocator_ring_pairs = ring_counts.allocator_ring_pairs;
838 csm->csm_large_buf_alloc_rings = ring_counts.large_buf_alloc_rings;
839 csm->csm_num_event_rings = ring_counts.num_event_rings;
840
841 *(uint32_t *)(uintptr_t)&csm->csm_ver = CSM_CURRENT_VERSION;
842
843 /* kernel version and executable UUID */
844 _CASSERT(sizeof(csm->csm_kern_name) == _SYS_NAMELEN);
845
846 (void) strlcpy(csm->csm_kern_name, version, sizeof(csm->csm_kern_name));
847
848 #if !XNU_TARGET_OS_OSX
849 (void) memcpy((void *)csm->csm_kern_uuid, kernelcache_uuid, sizeof(csm->csm_kern_uuid));
850 #else /* XNU_TARGET_OS_OSX */
851 if (kernel_uuid != NULL) {
852 (void) memcpy((void *)csm->csm_kern_uuid, kernel_uuid, sizeof(csm->csm_kern_uuid));
853 }
854 #endif /* XNU_TARGET_OS_OSX */
855
856 bzero(&roff, sizeof(roff));
857 for (i = 0; i < SKMEM_REGIONS; i++) {
858 if (ar->ar_regions[i] == NULL) {
859 ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
860 i == SKMEM_REGION_SCHEMA ||
861 i == SKMEM_REGION_BUF_LARGE ||
862 i == SKMEM_REGION_RXBUF_DEF ||
863 i == SKMEM_REGION_RXBUF_LARGE ||
864 i == SKMEM_REGION_TXBUF_DEF ||
865 i == SKMEM_REGION_TXBUF_LARGE ||
866 i == SKMEM_REGION_RXKMD ||
867 i == SKMEM_REGION_TXKMD ||
868 i == SKMEM_REGION_UMD ||
869 i == SKMEM_REGION_UBFT ||
870 i == SKMEM_REGION_KBFT ||
871 i == SKMEM_REGION_RXKBFT ||
872 i == SKMEM_REGION_TXKBFT ||
873 i == SKMEM_REGION_TXAUSD ||
874 i == SKMEM_REGION_RXFUSD ||
875 i == SKMEM_REGION_USTATS ||
876 i == SKMEM_REGION_KSTATS ||
877 i == SKMEM_REGION_INTRINSIC ||
878 i == SKMEM_REGION_FLOWADV ||
879 i == SKMEM_REGION_NEXUSADV ||
880 i == SKMEM_REGION_SYSCTLS ||
881 i == SKMEM_REGION_GUARD_TAIL);
882 continue;
883 }
884
885 /* not for nexus */
886 ASSERT(i != SKMEM_REGION_SYSCTLS);
887
888 /*
889 * Get region offsets from base of mmap span; the arena
890 * doesn't need to be mmap'd at this point, since we
891 * simply compute the relative offset.
892 */
893 roff[i] = skmem_arena_get_region_offset(ar, i);
894 }
895
896 /*
897 * The schema is made up of the descriptor followed inline by an array
898 * of offsets to the tx, rx, allocator and event rings in the mmap span.
899 * They contain the offset between the ring and schema, so the
900 * information is usable in userspace to reach the ring from
901 * the schema.
902 */
903 base = roff[SKMEM_REGION_SCHEMA] + SKMEM_OBJ_ROFF(&csm_oi);
904
905 /* initialize schema with tx ring info */
906 for (i = 0, j = ch->ch_first[NR_TX]; i < n[NR_TX]; i++, j++) {
907 kr = &na->na_tx_rings[j];
908 if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
909 continue;
910 }
911
912 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
913 skmem_cache_get_obj_info(arn->arn_ring_cache,
914 kr->ckr_ring, &ring_oi, NULL);
915 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].ring_off =
916 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
917
918 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
919 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
920 kr->ckr_ksds, &ksd_oi, &usd_oi);
921
922 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].sd_off =
923 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
924 base;
925 }
926 /* initialize schema with rx ring info */
927 for (i = 0, j = ch->ch_first[NR_RX]; i < n[NR_RX]; i++, j++) {
928 kr = &na->na_rx_rings[j];
929 if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
930 continue;
931 }
932
933 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
934 skmem_cache_get_obj_info(arn->arn_ring_cache,
935 kr->ckr_ring, &ring_oi, NULL);
936 *(mach_vm_offset_t *)
937 (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].ring_off =
938 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
939
940 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
941 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
942 kr->ckr_ksds, &ksd_oi, &usd_oi);
943
944 *(mach_vm_offset_t *)
945 (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].sd_off =
946 (roff[SKMEM_REGION_RXFUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
947 base;
948 }
949 /* initialize schema with allocator ring info */
950 for (i = 0, j = ch->ch_first[NR_A], k = n[NR_TX] + n[NR_RX];
951 i < n[NR_A]; i++, j++) {
952 mach_vm_offset_t usd_roff;
953
954 usd_roff = roff[SKMEM_REGION_TXAUSD];
955 kr = &na->na_alloc_rings[j];
956 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
957 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
958
959 skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
960 &ring_oi, NULL);
961 *(mach_vm_offset_t *)
962 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
963 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
964
965 skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
966 &ksd_oi, &usd_oi);
967 *(mach_vm_offset_t *)
968 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
969 (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
970 }
971 /* initialize schema with free ring info */
972 for (i = 0, j = ch->ch_first[NR_F], k = n[NR_TX] + n[NR_RX] + n[NR_A];
973 i < n[NR_F]; i++, j++) {
974 mach_vm_offset_t usd_roff;
975
976 usd_roff = roff[SKMEM_REGION_RXFUSD];
977 kr = &na->na_free_rings[j];
978 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
979 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
980
981 skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
982 &ring_oi, NULL);
983 *(mach_vm_offset_t *)
984 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
985 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
986
987 skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
988 &ksd_oi, &usd_oi);
989 *(mach_vm_offset_t *)
990 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
991 (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
992 }
993 /* initialize schema with event ring info */
994 for (i = 0, j = ch->ch_first[NR_EV], k = n[NR_TX] + n[NR_RX] +
995 n[NR_A] + n[NR_F]; i < n[NR_EV]; i++, j++) {
996 ASSERT(csm->csm_num_event_rings != 0);
997 kr = &na->na_event_rings[j];
998 ASSERT(!KR_KERNEL_ONLY(kr));
999 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
1000 skmem_cache_get_obj_info(arn->arn_ring_cache,
1001 kr->ckr_ring, &ring_oi, NULL);
1002 *(mach_vm_offset_t *)
1003 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
1004 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
1005
1006 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
1007 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
1008 kr->ckr_ksds, &ksd_oi, &usd_oi);
1009
1010 *(mach_vm_offset_t *)
1011 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
1012 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
1013 base;
1014 }
1015 /* initialize schema with large buf alloc ring info */
1016 for (i = 0, j = ch->ch_first[NR_LBA], k = n[NR_TX] + n[NR_RX] +
1017 n[NR_A] + n[NR_F] + n[NR_EV]; i < n[NR_LBA]; i++, j++) {
1018 ASSERT(csm->csm_large_buf_alloc_rings != 0);
1019 kr = &na->na_large_buf_alloc_rings[j];
1020 ASSERT(!KR_KERNEL_ONLY(kr));
1021 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
1022 skmem_cache_get_obj_info(arn->arn_ring_cache,
1023 kr->ckr_ring, &ring_oi, NULL);
1024 *(mach_vm_offset_t *)
1025 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
1026 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
1027
1028 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
1029 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
1030 kr->ckr_ksds, &ksd_oi, &usd_oi);
1031
1032 *(mach_vm_offset_t *)
1033 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
1034 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
1035 base;
1036 }
1037
1038 *(uint64_t *)(uintptr_t)&csm->csm_md_redzone_cookie =
1039 __ch_umd_redzone_cookie;
1040 *(nexus_meta_type_t *)(uintptr_t)&csm->csm_md_type = na->na_md_type;
1041 *(nexus_meta_subtype_t *)(uintptr_t)&csm->csm_md_subtype =
1042 na->na_md_subtype;
1043
1044 if (arn->arn_stats_obj != NULL) {
1045 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
1046 ASSERT(roff[SKMEM_REGION_USTATS] != 0);
1047 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs =
1048 roff[SKMEM_REGION_USTATS];
1049 *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1050 na->na_stats_type;
1051 } else {
1052 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1053 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs = 0;
1054 *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1055 NEXUS_STATS_TYPE_INVALID;
1056 }
1057
1058 if (arn->arn_flowadv_obj != NULL) {
1059 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL);
1060 ASSERT(roff[SKMEM_REGION_FLOWADV] != 0);
1061 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs =
1062 roff[SKMEM_REGION_FLOWADV];
1063 *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max =
1064 na->na_flowadv_max;
1065 } else {
1066 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1067 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs = 0;
1068 *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max = 0;
1069 }
1070
1071 if (arn->arn_nexusadv_obj != NULL) {
1072 struct __kern_nexus_adv_metadata *__single adv_md;
1073
1074 adv_md = arn->arn_nexusadv_obj;
1075 ASSERT(adv_md->knam_version == NX_ADVISORY_MD_CURRENT_VERSION);
1076 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] != NULL);
1077 ASSERT(roff[SKMEM_REGION_NEXUSADV] != 0);
1078 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs =
1079 roff[SKMEM_REGION_NEXUSADV];
1080 } else {
1081 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1082 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs = 0;
1083 }
1084
1085 ch->ch_schema = csm;
1086 ch->ch_schema_offset = base;
1087
1088 return 0;
1089 }
1090
1091 /*
1092 * Called by all routines that create nexus_adapters.
1093 * Attach na to the ifp (if any) and provide defaults
1094 * for optional callbacks. Defaults assume that we
1095 * are creating an hardware nexus_adapter.
1096 */
1097 void
na_attach_common(struct nexus_adapter * na,struct kern_nexus * nx,struct kern_nexus_domain_provider * nxdom_prov)1098 na_attach_common(struct nexus_adapter *na, struct kern_nexus *nx,
1099 struct kern_nexus_domain_provider *nxdom_prov)
1100 {
1101 SK_LOCK_ASSERT_HELD();
1102
1103 ASSERT(nx != NULL);
1104 ASSERT(nxdom_prov != NULL);
1105 ASSERT(na->na_krings_create != NULL);
1106 ASSERT(na->na_krings_delete != NULL);
1107 if (na->na_type != NA_NETIF_COMPAT_DEV) {
1108 ASSERT(na_get_nrings(na, NR_TX) != 0);
1109 }
1110 if (na->na_type != NA_NETIF_COMPAT_HOST) {
1111 ASSERT(na_get_nrings(na, NR_RX) != 0);
1112 }
1113 ASSERT(na->na_channels == 0);
1114
1115 if (na->na_notify == NULL) {
1116 na->na_notify = na_notify;
1117 }
1118
1119 na->na_nx = nx;
1120 na->na_nxdom_prov = nxdom_prov;
1121
1122 SK_D("na 0x%llx nx 0x%llx nxtype %u ar 0x%llx",
1123 SK_KVA(na), SK_KVA(nx), nxdom_prov->nxdom_prov_dom->nxdom_type,
1124 SK_KVA(na->na_arena));
1125 }
1126
1127 void
na_post_event(struct __kern_channel_ring * kring,boolean_t nodelay,boolean_t within_kevent,boolean_t selwake,uint32_t hint)1128 na_post_event(struct __kern_channel_ring *kring, boolean_t nodelay,
1129 boolean_t within_kevent, boolean_t selwake, uint32_t hint)
1130 {
1131 struct nexus_adapter *na = KRNA(kring);
1132 enum txrx t = kring->ckr_tx;
1133
1134 SK_DF(SK_VERB_EVENTS,
1135 "%s(%d) na \"%s\" (0x%llx) kr 0x%llx kev %u sel %u hint 0x%b",
1136 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1137 na->na_name, SK_KVA(na), SK_KVA(kring), within_kevent, selwake,
1138 hint, CHAN_FILT_HINT_BITS);
1139
1140 csi_selwakeup_one(kring, nodelay, within_kevent, selwake, hint);
1141 /*
1142 * optimization: avoid a wake up on the global
1143 * queue if nobody has registered for more
1144 * than one ring
1145 */
1146 if (na->na_si_users[t] > 0) {
1147 csi_selwakeup_all(na, t, nodelay, within_kevent, selwake, hint);
1148 }
1149 }
1150
1151 /* default notify callback */
1152 static int
na_notify(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1153 na_notify(struct __kern_channel_ring *kring, struct proc *p, uint32_t flags)
1154 {
1155 #pragma unused(p)
1156 SK_DF(SK_VERB_NOTIFY | ((kring->ckr_tx == NR_TX) ?
1157 SK_VERB_TX : SK_VERB_RX),
1158 "%s(%d) [%s] na \"%s\" (0x%llx) kr \"%s\" (0x%llx) krflags 0x%b "
1159 "flags 0x%x, kh %u kt %u | h %u t %u",
1160 sk_proc_name_address(p), sk_proc_pid(p),
1161 (kring->ckr_tx == NR_TX) ? "W" : "R", KRNA(kring)->na_name,
1162 SK_KVA(KRNA(kring)), kring->ckr_name, SK_KVA(kring),
1163 kring->ckr_flags, CKRF_BITS, flags, kring->ckr_khead,
1164 kring->ckr_ktail, kring->ckr_rhead, kring->ckr_rtail);
1165
1166 na_post_event(kring, (flags & NA_NOTEF_PUSH),
1167 (flags & NA_NOTEF_IN_KEVENT), TRUE, 0);
1168
1169 return 0;
1170 }
1171
1172 /*
1173 * Fetch configuration from the device, to cope with dynamic
1174 * reconfigurations after loading the module.
1175 */
1176 /* call with SK_LOCK held */
1177 int
na_update_config(struct nexus_adapter * na)1178 na_update_config(struct nexus_adapter *na)
1179 {
1180 uint32_t txr, txd, rxr, rxd;
1181
1182 SK_LOCK_ASSERT_HELD();
1183
1184 txr = txd = rxr = rxd = 0;
1185 if (na->na_config == NULL ||
1186 na->na_config(na, &txr, &txd, &rxr, &rxd)) {
1187 /* take whatever we had at init time */
1188 txr = na_get_nrings(na, NR_TX);
1189 txd = na_get_nslots(na, NR_TX);
1190 rxr = na_get_nrings(na, NR_RX);
1191 rxd = na_get_nslots(na, NR_RX);
1192 }
1193
1194 if (na_get_nrings(na, NR_TX) == txr &&
1195 na_get_nslots(na, NR_TX) == txd &&
1196 na_get_nrings(na, NR_RX) == rxr &&
1197 na_get_nslots(na, NR_RX) == rxd) {
1198 return 0; /* nothing changed */
1199 }
1200 SK_D("stored config %s: txring %u x %u, rxring %u x %u",
1201 na->na_name, na_get_nrings(na, NR_TX), na_get_nslots(na, NR_TX),
1202 na_get_nrings(na, NR_RX), na_get_nslots(na, NR_RX));
1203 SK_D("new config %s: txring %u x %u, rxring %u x %u",
1204 na->na_name, txr, txd, rxr, rxd);
1205
1206 if (na->na_channels == 0) {
1207 SK_D("configuration changed (but fine)");
1208 na_set_nrings(na, NR_TX, txr);
1209 na_set_nslots(na, NR_TX, txd);
1210 na_set_nrings(na, NR_RX, rxr);
1211 na_set_nslots(na, NR_RX, rxd);
1212 return 0;
1213 }
1214 SK_ERR("configuration changed while active, this is bad...");
1215 return 1;
1216 }
1217
1218 static void
na_kr_setup_netif_svc_map(struct nexus_adapter * na)1219 na_kr_setup_netif_svc_map(struct nexus_adapter *na)
1220 {
1221 uint32_t i;
1222 uint32_t num_tx_rings;
1223
1224 ASSERT(na->na_type == NA_NETIF_DEV);
1225 num_tx_rings = na_get_nrings(na, NR_TX);
1226
1227 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK_SYS) ==
1228 NAKR_WMM_SC2RINGID(KPKT_SC_BK));
1229 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1230 NAKR_WMM_SC2RINGID(KPKT_SC_RD));
1231 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1232 NAKR_WMM_SC2RINGID(KPKT_SC_OAM));
1233 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1234 NAKR_WMM_SC2RINGID(KPKT_SC_RV));
1235 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1236 NAKR_WMM_SC2RINGID(KPKT_SC_VI));
1237 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) ==
1238 NAKR_WMM_SC2RINGID(KPKT_SC_CTL));
1239
1240 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK) < NA_NUM_WMM_CLASSES);
1241 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) < NA_NUM_WMM_CLASSES);
1242 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VI) < NA_NUM_WMM_CLASSES);
1243 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) < NA_NUM_WMM_CLASSES);
1244
1245 _CASSERT(MBUF_SCIDX(KPKT_SC_BK_SYS) < KPKT_SC_MAX_CLASSES);
1246 _CASSERT(MBUF_SCIDX(KPKT_SC_BK) < KPKT_SC_MAX_CLASSES);
1247 _CASSERT(MBUF_SCIDX(KPKT_SC_BE) < KPKT_SC_MAX_CLASSES);
1248 _CASSERT(MBUF_SCIDX(KPKT_SC_RD) < KPKT_SC_MAX_CLASSES);
1249 _CASSERT(MBUF_SCIDX(KPKT_SC_OAM) < KPKT_SC_MAX_CLASSES);
1250 _CASSERT(MBUF_SCIDX(KPKT_SC_AV) < KPKT_SC_MAX_CLASSES);
1251 _CASSERT(MBUF_SCIDX(KPKT_SC_RV) < KPKT_SC_MAX_CLASSES);
1252 _CASSERT(MBUF_SCIDX(KPKT_SC_VI) < KPKT_SC_MAX_CLASSES);
1253 _CASSERT(MBUF_SCIDX(KPKT_SC_SIG) < KPKT_SC_MAX_CLASSES);
1254 _CASSERT(MBUF_SCIDX(KPKT_SC_VO) < KPKT_SC_MAX_CLASSES);
1255 _CASSERT(MBUF_SCIDX(KPKT_SC_CTL) < KPKT_SC_MAX_CLASSES);
1256
1257 /*
1258 * we support the following 2 configurations:
1259 * 1. packets from all 10 service class map to one ring.
1260 * 2. a 10:4 mapping between service classes and the rings. These 4
1261 * rings map to the 4 WMM access categories.
1262 */
1263 if (na->na_nx->nx_prov->nxprov_params->nxp_qmap == NEXUS_QMAP_TYPE_WMM) {
1264 ASSERT(num_tx_rings == NEXUS_NUM_WMM_QUEUES);
1265 /* setup the adapter's service class LUT */
1266 NAKR_SET_SVC_LUT(na, KPKT_SC_BK_SYS);
1267 NAKR_SET_SVC_LUT(na, KPKT_SC_BK);
1268 NAKR_SET_SVC_LUT(na, KPKT_SC_BE);
1269 NAKR_SET_SVC_LUT(na, KPKT_SC_RD);
1270 NAKR_SET_SVC_LUT(na, KPKT_SC_OAM);
1271 NAKR_SET_SVC_LUT(na, KPKT_SC_AV);
1272 NAKR_SET_SVC_LUT(na, KPKT_SC_RV);
1273 NAKR_SET_SVC_LUT(na, KPKT_SC_VI);
1274 NAKR_SET_SVC_LUT(na, KPKT_SC_SIG);
1275 NAKR_SET_SVC_LUT(na, KPKT_SC_VO);
1276 NAKR_SET_SVC_LUT(na, KPKT_SC_CTL);
1277
1278 /* Initialize the service class for each of the 4 ring */
1279 NAKR_SET_KR_SVC(na, KPKT_SC_BK);
1280 NAKR_SET_KR_SVC(na, KPKT_SC_BE);
1281 NAKR_SET_KR_SVC(na, KPKT_SC_VI);
1282 NAKR_SET_KR_SVC(na, KPKT_SC_VO);
1283 } else {
1284 ASSERT(na->na_nx->nx_prov->nxprov_params->nxp_qmap ==
1285 NEXUS_QMAP_TYPE_DEFAULT);
1286 /* 10: 1 mapping */
1287 for (i = 0; i < KPKT_SC_MAX_CLASSES; i++) {
1288 na->na_kring_svc_lut[i] = 0;
1289 }
1290 for (i = 0; i < num_tx_rings; i++) {
1291 NAKR(na, NR_TX)[i].ckr_svc = KPKT_SC_UNSPEC;
1292 }
1293 }
1294 }
1295
1296 static LCK_GRP_DECLARE(channel_txq_lock_group, "sk_ch_txq_lock");
1297 static LCK_GRP_DECLARE(channel_rxq_lock_group, "sk_ch_rxq_lock");
1298 static LCK_GRP_DECLARE(channel_txs_lock_group, "sk_ch_txs_lock");
1299 static LCK_GRP_DECLARE(channel_rxs_lock_group, "sk_ch_rxs_lock");
1300 static LCK_GRP_DECLARE(channel_alloc_lock_group, "sk_ch_alloc_lock");
1301 static LCK_GRP_DECLARE(channel_evq_lock_group, "sk_ch_evq_lock");
1302 static LCK_GRP_DECLARE(channel_evs_lock_group, "sk_ch_evs_lock");
1303
1304 static lck_grp_t *
na_kr_q_lck_grp(enum txrx t)1305 na_kr_q_lck_grp(enum txrx t)
1306 {
1307 switch (t) {
1308 case NR_TX:
1309 return &channel_txq_lock_group;
1310 case NR_RX:
1311 return &channel_rxq_lock_group;
1312 case NR_A:
1313 case NR_F:
1314 case NR_LBA:
1315 return &channel_alloc_lock_group;
1316 case NR_EV:
1317 return &channel_evq_lock_group;
1318 default:
1319 VERIFY(0);
1320 /* NOTREACHED */
1321 __builtin_unreachable();
1322 }
1323 }
1324
1325 static lck_grp_t *
na_kr_s_lck_grp(enum txrx t)1326 na_kr_s_lck_grp(enum txrx t)
1327 {
1328 switch (t) {
1329 case NR_TX:
1330 return &channel_txs_lock_group;
1331 case NR_RX:
1332 return &channel_rxs_lock_group;
1333 case NR_A:
1334 case NR_F:
1335 case NR_LBA:
1336 return &channel_alloc_lock_group;
1337 case NR_EV:
1338 return &channel_evs_lock_group;
1339 default:
1340 VERIFY(0);
1341 /* NOTREACHED */
1342 __builtin_unreachable();
1343 }
1344 }
1345
1346 static void
kr_init_tbr(struct __kern_channel_ring * r)1347 kr_init_tbr(struct __kern_channel_ring *r)
1348 {
1349 r->ckr_tbr_depth = CKR_TBR_TOKEN_INVALID;
1350 r->ckr_tbr_token = CKR_TBR_TOKEN_INVALID;
1351 r->ckr_tbr_last = 0;
1352 }
1353
1354 struct kern_pbufpool *
na_kr_get_pp(struct nexus_adapter * na,enum txrx t)1355 na_kr_get_pp(struct nexus_adapter *na, enum txrx t)
1356 {
1357 struct kern_pbufpool *pp = NULL;
1358 switch (t) {
1359 case NR_RX:
1360 case NR_F:
1361 case NR_EV:
1362 pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
1363 break;
1364 case NR_TX:
1365 case NR_A:
1366 case NR_LBA:
1367 pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
1368 break;
1369 default:
1370 VERIFY(0);
1371 /* NOTREACHED */
1372 __builtin_unreachable();
1373 }
1374
1375 return pp;
1376 }
1377
1378 /*
1379 * Create the krings array and initialize the fields common to all adapters.
1380 * The array layout is this:
1381 *
1382 * +----------+
1383 * na->na_tx_rings -----> | | \
1384 * | | } na->na_num_tx_rings
1385 * | | /
1386 * na->na_rx_rings ----> +----------+
1387 * | | \
1388 * | | } na->na_num_rx_rings
1389 * | | /
1390 * na->na_alloc_rings -> +----------+
1391 * | | \
1392 * na->na_free_rings --> +----------+ } na->na_num_allocator_ring_pairs
1393 * | | /
1394 * na->na_event_rings -> +----------+
1395 * | | \
1396 * | | } na->na_num_event_rings
1397 * | | /
1398 * na->na_large_buf_alloc_rings -> +----------+
1399 * | | \
1400 * | | } na->na_num_large_buf_alloc_rings
1401 * | | /
1402 * na->na_tail -----> +----------+
1403 */
1404 /* call with SK_LOCK held */
1405 static int
na_kr_create(struct nexus_adapter * na,boolean_t alloc_ctx)1406 na_kr_create(struct nexus_adapter *na, boolean_t alloc_ctx)
1407 {
1408 lck_grp_t *q_lck_grp, *s_lck_grp;
1409 uint32_t i, ndesc;
1410 struct kern_pbufpool *pp = NULL;
1411 uint32_t count;
1412 uint32_t tmp_count;
1413 struct __kern_channel_ring *__counted_by(count) rings;
1414 struct __kern_channel_ring *__single kring;
1415 uint32_t n[NR_ALL];
1416 int c, tot_slots, err = 0;
1417 enum txrx t;
1418
1419 SK_LOCK_ASSERT_HELD();
1420
1421 n[NR_TX] = na_get_nrings(na, NR_TX);
1422 n[NR_RX] = na_get_nrings(na, NR_RX);
1423 n[NR_A] = na_get_nrings(na, NR_A);
1424 n[NR_F] = na_get_nrings(na, NR_F);
1425 n[NR_EV] = na_get_nrings(na, NR_EV);
1426 n[NR_LBA] = na_get_nrings(na, NR_LBA);
1427
1428 /*
1429 * -fbounds-safety: rings is __counted_by(count), so rings needs to be
1430 * assigned first, immediately followed by count's assignment.
1431 */
1432 tmp_count = n[NR_TX] + n[NR_RX] + n[NR_A] + n[NR_F] + n[NR_EV] + n[NR_LBA];
1433 rings = sk_alloc_type_array(struct __kern_channel_ring, tmp_count,
1434 Z_WAITOK, skmem_tag_nx_rings);
1435 count = tmp_count;
1436 na->na_all_rings = rings;
1437 na->na_all_rings_cnt = count;
1438
1439 if (__improbable(rings == NULL)) {
1440 SK_ERR("Cannot allocate krings");
1441 err = ENOMEM;
1442 goto error;
1443 }
1444 na->na_tx_rings = rings;
1445 na->na_tx_rings_cnt = n[NR_TX];
1446
1447 na->na_rx_rings = rings + n[NR_TX];
1448 na->na_rx_rings_cnt = n[NR_RX];
1449 if (n[NR_A] != 0) {
1450 na->na_alloc_rings = rings + n[NR_TX] + n[NR_RX];
1451 na->na_free_rings = rings + n[NR_TX] + n[NR_RX] + n[NR_A];
1452 na->na_alloc_free_rings_cnt = n[NR_A];
1453 } else {
1454 na->na_alloc_rings = NULL;
1455 na->na_free_rings = NULL;
1456 na->na_alloc_free_rings_cnt = 0;
1457 }
1458 if (n[NR_EV] != 0) {
1459 if (na->na_free_rings != NULL) {
1460 na->na_event_rings = rings + n[NR_TX] +
1461 n[NR_RX] + n[NR_A] + n[NR_F];
1462 na->na_event_rings_cnt = n[NR_EV];
1463 } else {
1464 na->na_event_rings = rings + n[NR_TX] + n[NR_RX];
1465 na->na_event_rings_cnt = n[NR_EV];
1466 }
1467 }
1468 if (n[NR_LBA] != 0) {
1469 ASSERT(n[NR_A] != 0);
1470 if (na->na_event_rings != NULL) {
1471 na->na_large_buf_alloc_rings = rings + n[NR_TX] + n[NR_RX] +
1472 n[NR_A] + n[NR_F] + n[NR_EV];
1473 na->na_large_buf_alloc_rings_cnt = n[NR_LBA];
1474 } else {
1475 /* alloc/free rings must also be present */
1476 ASSERT(na->na_free_rings != NULL);
1477 na->na_large_buf_alloc_rings = rings + n[NR_TX] + n[NR_RX] +
1478 n[NR_A] + n[NR_F];
1479 na->na_large_buf_alloc_rings_cnt = n[NR_LBA];
1480 }
1481 }
1482
1483 /* total number of slots for TX/RX adapter rings */
1484 c = tot_slots = (n[NR_TX] * na_get_nslots(na, NR_TX)) +
1485 (n[NR_RX] * na_get_nslots(na, NR_RX));
1486
1487 /* for scratch space on alloc and free rings */
1488 if (n[NR_A] != 0) {
1489 tot_slots += n[NR_A] * na_get_nslots(na, NR_A);
1490 tot_slots += n[NR_F] * na_get_nslots(na, NR_F);
1491 tot_slots += n[NR_LBA] * na_get_nslots(na, NR_LBA);
1492 c = tot_slots;
1493 }
1494 na->na_total_slots = tot_slots;
1495
1496 /* slot context (optional) for all TX/RX ring slots of this adapter */
1497 if (alloc_ctx) {
1498 na->na_slot_ctxs =
1499 skn_alloc_type_array(slot_ctxs, struct slot_ctx,
1500 na->na_total_slots, Z_WAITOK, skmem_tag_nx_contexts);
1501 na->na_slot_ctxs_cnt = na->na_total_slots;
1502 if (na->na_slot_ctxs == NULL) {
1503 SK_ERR("Cannot allocate slot contexts");
1504 err = ENOMEM;
1505 na->na_slot_ctxs = NULL;
1506 na->na_slot_ctxs_cnt = 0;
1507 goto error;
1508 }
1509 os_atomic_or(&na->na_flags, NAF_SLOT_CONTEXT, relaxed);
1510 }
1511
1512 /*
1513 * packet handle array storage for all TX/RX ring slots of this
1514 * adapter.
1515 */
1516 na->na_scratch = skn_alloc_type_array(scratch, kern_packet_t,
1517 na->na_total_slots, Z_WAITOK, skmem_tag_nx_scratch);
1518 na->na_scratch_cnt = na->na_total_slots;
1519 if (na->na_scratch == NULL) {
1520 SK_ERR("Cannot allocate slot contexts");
1521 err = ENOMEM;
1522 na->na_scratch = NULL;
1523 na->na_scratch_cnt = 0;
1524 goto error;
1525 }
1526
1527 /*
1528 * All fields in krings are 0 except the one initialized below.
1529 * but better be explicit on important kring fields.
1530 */
1531 for_all_rings(t) {
1532 ndesc = na_get_nslots(na, t);
1533 pp = na_kr_get_pp(na, t);
1534 for (i = 0; i < n[t]; i++) {
1535 kring = &NAKR(na, t)[i];
1536 bzero(kring, sizeof(*kring));
1537 kring->ckr_na = na;
1538 kring->ckr_pp = pp;
1539 kring->ckr_max_pkt_len =
1540 (t == NR_LBA ? PP_BUF_SIZE_LARGE(pp) :
1541 PP_BUF_SIZE_DEF(pp)) *
1542 pp->pp_max_frags;
1543 kring->ckr_ring_id = i;
1544 kring->ckr_tx = t;
1545 kr_init_to_mhints(kring, ndesc);
1546 kr_init_tbr(kring);
1547 if (NA_KERNEL_ONLY(na)) {
1548 kring->ckr_flags |= CKRF_KERNEL_ONLY;
1549 }
1550 if (na->na_flags & NAF_HOST_ONLY) {
1551 kring->ckr_flags |= CKRF_HOST;
1552 }
1553 ASSERT((t >= NR_TXRX) || (c > 0));
1554 if ((t < NR_TXRX) &&
1555 (na->na_flags & NAF_SLOT_CONTEXT)) {
1556 ASSERT(na->na_slot_ctxs != NULL);
1557 kring->ckr_flags |= CKRF_SLOT_CONTEXT;
1558 kring->ckr_slot_ctxs =
1559 na->na_slot_ctxs + (tot_slots - c);
1560 kring->ckr_slot_ctxs_cnt = kring->ckr_num_slots;
1561 }
1562 ASSERT(na->na_scratch != NULL);
1563 if (t < NR_TXRXAF || t == NR_LBA) {
1564 kring->ckr_scratch =
1565 na->na_scratch + (tot_slots - c);
1566 kring->ckr_scratch_cnt = kring->ckr_num_slots;
1567 }
1568 if (t < NR_TXRXAF || t == NR_LBA) {
1569 c -= ndesc;
1570 }
1571 switch (t) {
1572 case NR_A:
1573 if (i == 0) {
1574 kring->ckr_na_sync =
1575 na_packet_pool_alloc_sync;
1576 kring->ckr_alloc_ws =
1577 na_upp_alloc_lowat;
1578 } else {
1579 ASSERT(i == 1);
1580 kring->ckr_na_sync =
1581 na_packet_pool_alloc_buf_sync;
1582 kring->ckr_alloc_ws =
1583 na_upp_alloc_buf_lowat;
1584 }
1585 break;
1586 case NR_F:
1587 if (i == 0) {
1588 kring->ckr_na_sync =
1589 na_packet_pool_free_sync;
1590 } else {
1591 ASSERT(i == 1);
1592 kring->ckr_na_sync =
1593 na_packet_pool_free_buf_sync;
1594 }
1595 break;
1596 case NR_TX:
1597 kring->ckr_na_sync = na->na_txsync;
1598 if (na->na_flags & NAF_TX_MITIGATION) {
1599 kring->ckr_flags |= CKRF_MITIGATION;
1600 }
1601 switch (na->na_type) {
1602 #if CONFIG_NEXUS_USER_PIPE
1603 case NA_USER_PIPE:
1604 ASSERT(!(na->na_flags &
1605 NAF_USER_PKT_POOL));
1606 kring->ckr_prologue = kr_txprologue;
1607 kring->ckr_finalize = NULL;
1608 break;
1609 #endif /* CONFIG_NEXUS_USER_PIPE */
1610 #if CONFIG_NEXUS_MONITOR
1611 case NA_MONITOR:
1612 ASSERT(!(na->na_flags &
1613 NAF_USER_PKT_POOL));
1614 kring->ckr_prologue = kr_txprologue;
1615 kring->ckr_finalize = NULL;
1616 break;
1617 #endif /* CONFIG_NEXUS_MONITOR */
1618 default:
1619 if (na->na_flags & NAF_USER_PKT_POOL) {
1620 kring->ckr_prologue =
1621 kr_txprologue_upp;
1622 kring->ckr_finalize =
1623 kr_txfinalize_upp;
1624 } else {
1625 kring->ckr_prologue =
1626 kr_txprologue;
1627 kring->ckr_finalize =
1628 kr_txfinalize;
1629 }
1630 break;
1631 }
1632 break;
1633 case NR_RX:
1634 kring->ckr_na_sync = na->na_rxsync;
1635 if (na->na_flags & NAF_RX_MITIGATION) {
1636 kring->ckr_flags |= CKRF_MITIGATION;
1637 }
1638 switch (na->na_type) {
1639 #if CONFIG_NEXUS_USER_PIPE
1640 case NA_USER_PIPE:
1641 ASSERT(!(na->na_flags &
1642 NAF_USER_PKT_POOL));
1643 kring->ckr_prologue =
1644 kr_rxprologue_nodetach;
1645 kring->ckr_finalize = kr_rxfinalize;
1646 break;
1647 #endif /* CONFIG_NEXUS_USER_PIPE */
1648 #if CONFIG_NEXUS_MONITOR
1649 case NA_MONITOR:
1650 ASSERT(!(na->na_flags &
1651 NAF_USER_PKT_POOL));
1652 kring->ckr_prologue =
1653 kr_rxprologue_nodetach;
1654 kring->ckr_finalize = kr_rxfinalize;
1655 break;
1656 #endif /* CONFIG_NEXUS_MONITOR */
1657 default:
1658 if (na->na_flags & NAF_USER_PKT_POOL) {
1659 kring->ckr_prologue =
1660 kr_rxprologue_upp;
1661 kring->ckr_finalize =
1662 kr_rxfinalize_upp;
1663 } else {
1664 kring->ckr_prologue =
1665 kr_rxprologue;
1666 kring->ckr_finalize =
1667 kr_rxfinalize;
1668 }
1669 break;
1670 }
1671 break;
1672 case NR_EV:
1673 kring->ckr_na_sync = kern_channel_event_sync;
1674 break;
1675 case NR_LBA:
1676 kring->ckr_na_sync = na_packet_pool_alloc_large_sync;
1677 kring->ckr_alloc_ws = na_upp_alloc_lowat;
1678 break;
1679 default:
1680 VERIFY(0);
1681 /* NOTREACHED */
1682 __builtin_unreachable();
1683 }
1684 if (t != NR_EV) {
1685 kring->ckr_na_notify = na->na_notify;
1686 } else {
1687 kring->ckr_na_notify = NULL;
1688 }
1689 (void) snprintf(kring->ckr_name,
1690 sizeof(kring->ckr_name) - 1,
1691 "%s %s%u%s", na->na_name, sk_ring2str(t), i,
1692 ((kring->ckr_flags & CKRF_HOST) ? "^" : ""));
1693 SK_DF(SK_VERB_NA | SK_VERB_RING,
1694 "kr \"%s\" (0x%llx) krflags 0x%b rh %u rt %u",
1695 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1696 CKRF_BITS, kring->ckr_rhead, kring->ckr_rtail);
1697 kring->ckr_state = KR_READY;
1698 q_lck_grp = na_kr_q_lck_grp(t);
1699 s_lck_grp = na_kr_s_lck_grp(t);
1700 kring->ckr_qlock_group = q_lck_grp;
1701 lck_mtx_init(&kring->ckr_qlock, kring->ckr_qlock_group,
1702 &channel_lock_attr);
1703 kring->ckr_slock_group = s_lck_grp;
1704 lck_spin_init(&kring->ckr_slock, kring->ckr_slock_group,
1705 &channel_lock_attr);
1706 csi_init(&kring->ckr_si,
1707 (kring->ckr_flags & CKRF_MITIGATION),
1708 na->na_ch_mit_ival);
1709 }
1710 csi_init(&na->na_si[t],
1711 (na->na_flags & (NAF_TX_MITIGATION | NAF_RX_MITIGATION)),
1712 na->na_ch_mit_ival);
1713 }
1714 ASSERT(c == 0);
1715 na->na_tail = rings + n[NR_TX] + n[NR_RX] + n[NR_A] + n[NR_F] +
1716 n[NR_EV] + n[NR_LBA];
1717
1718 if (na->na_type == NA_NETIF_DEV) {
1719 na_kr_setup_netif_svc_map(na);
1720 }
1721
1722 /* validate now for cases where we create only krings */
1723 na_krings_verify(na);
1724 return 0;
1725
1726 error:
1727 ASSERT(err != 0);
1728 if (rings != NULL) {
1729 sk_free_type_array_counted_by(struct __kern_channel_ring,
1730 na->na_all_rings_cnt, na->na_all_rings);
1731 na->na_tx_rings = NULL;
1732 na->na_tx_rings_cnt = 0;
1733 na->na_rx_rings = NULL;
1734 na->na_rx_rings_cnt = 0;
1735 na->na_alloc_rings = NULL;
1736 na->na_free_rings = NULL;
1737 na->na_alloc_free_rings_cnt = 0;
1738 na->na_event_rings = NULL;
1739 na->na_event_rings_cnt = 0;
1740 na->na_tail = NULL;
1741 }
1742 if (na->na_slot_ctxs != NULL) {
1743 ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1744 skn_free_type_array_counted_by(slot_ctxs, struct slot_ctx,
1745 na->na_slot_ctxs_cnt, na->na_slot_ctxs);
1746 na->na_slot_ctxs = NULL;
1747 na->na_slot_ctxs_cnt = 0;
1748 }
1749 if (na->na_scratch != NULL) {
1750 skn_free_type_array_counted_by(scratch, kern_packet_t, na->na_scratch_cnt,
1751 na->na_scratch);
1752 na->na_scratch = NULL;
1753 na->na_scratch_cnt = 0;
1754 }
1755 return err;
1756 }
1757
1758 /* undo the actions performed by na_kr_create() */
1759 /* call with SK_LOCK held */
1760 static void
na_kr_delete(struct nexus_adapter * na)1761 na_kr_delete(struct nexus_adapter *na)
1762 {
1763 struct __kern_channel_ring *kring;
1764 enum txrx t;
1765
1766 kring = na->na_all_rings;
1767
1768 ASSERT((kring != NULL) && (na->na_tail != NULL));
1769 SK_LOCK_ASSERT_HELD();
1770
1771 for_all_rings(t) {
1772 csi_destroy(&na->na_si[t]);
1773 }
1774 /* we rely on the krings layout described above */
1775 for (; kring != na->na_tail; kring++) {
1776 lck_mtx_destroy(&kring->ckr_qlock, kring->ckr_qlock_group);
1777 lck_spin_destroy(&kring->ckr_slock, kring->ckr_slock_group);
1778 csi_destroy(&kring->ckr_si);
1779 if (kring->ckr_flags & CKRF_SLOT_CONTEXT) {
1780 kring->ckr_flags &= ~CKRF_SLOT_CONTEXT;
1781 ASSERT(kring->ckr_slot_ctxs != NULL);
1782 kring->ckr_slot_ctxs = NULL;
1783 kring->ckr_slot_ctxs_cnt = 0;
1784 }
1785 kring->ckr_scratch = NULL;
1786 kring->ckr_scratch_cnt = 0;
1787 }
1788 if (na->na_slot_ctxs != NULL) {
1789 ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1790 os_atomic_andnot(&na->na_flags, NAF_SLOT_CONTEXT, relaxed);
1791 skn_free_type_array_counted_by(na->na_slot_ctxs,
1792 struct slot_ctx, na->na_slot_ctxs_cnt,
1793 na->na_slot_ctxs);
1794 na->na_slot_ctxs = NULL;
1795 na->na_slot_ctxs_cnt = 0;
1796 }
1797 if (na->na_scratch != NULL) {
1798 skn_free_type_array_counted_by(na->na_scratch,
1799 kern_packet_t, na->na_scratch_cnt,
1800 na->na_scratch);
1801 na->na_scratch = NULL;
1802 na->na_scratch_cnt = 0;
1803 }
1804 ASSERT(!(na->na_flags & NAF_SLOT_CONTEXT));
1805 sk_free_type_array_counted_by(struct __kern_channel_ring,
1806 na->na_all_rings_cnt, na->na_all_rings);
1807 na->na_tx_rings = NULL;
1808 na->na_tx_rings_cnt = 0;
1809 na->na_rx_rings = NULL;
1810 na->na_rx_rings_cnt = 0;
1811 na->na_alloc_rings = NULL;
1812 na->na_free_rings = NULL;
1813 na->na_alloc_free_rings_cnt = 0;
1814 na->na_event_rings = NULL;
1815 na->na_event_rings_cnt = 0;
1816 na->na_tail = NULL;
1817 na->na_all_rings = NULL;
1818 na->na_all_rings_cnt = 0;
1819 }
1820
1821 /*
1822 * -fbounds-safety: If kernel_only, usds is NULL, so marking it
1823 * __counted_by(ndesc) would fail bounds check. We could use __sized_by_or_null
1824 * when it's ready: rdar://75598414
1825 * If usds != NULL, then ksds_cnt == usds_cnt
1826 */
1827 static void
na_kr_slot_desc_init(struct __slot_desc * __counted_by (ksds_cnt)ksds,boolean_t kernel_only,struct __slot_desc * __counted_by (usds_cnt)usds,size_t ksds_cnt,size_t usds_cnt)1828 na_kr_slot_desc_init(struct __slot_desc *__counted_by(ksds_cnt)ksds,
1829 boolean_t kernel_only, struct __slot_desc *__counted_by(usds_cnt)usds,
1830 size_t ksds_cnt, size_t usds_cnt)
1831 {
1832 size_t i;
1833
1834 bzero(ksds, ksds_cnt * SLOT_DESC_SZ);
1835 if (usds != NULL) {
1836 ASSERT(!kernel_only);
1837 ASSERT(ksds_cnt == usds_cnt);
1838 bzero(usds, usds_cnt * SLOT_DESC_SZ);
1839 } else {
1840 ASSERT(kernel_only);
1841 ASSERT(usds_cnt == 0);
1842 }
1843
1844 for (i = 0; i < ksds_cnt; i++) {
1845 KSD_INIT(SLOT_DESC_KSD(&ksds[i]));
1846 if (!kernel_only) {
1847 USD_INIT(SLOT_DESC_USD(&usds[i]));
1848 }
1849 }
1850 }
1851
1852 /* call with SK_LOCK held */
1853 static int
na_kr_setup(struct nexus_adapter * na,struct kern_channel * ch)1854 na_kr_setup(struct nexus_adapter *na, struct kern_channel *ch)
1855 {
1856 struct skmem_arena *ar = na->na_arena;
1857 struct skmem_arena_nexus *arn;
1858 mach_vm_offset_t roff[SKMEM_REGIONS];
1859 enum txrx t;
1860 uint32_t i;
1861 struct __slot_desc *ksds;
1862
1863 SK_LOCK_ASSERT_HELD();
1864 ASSERT(!(na->na_flags & NAF_MEM_NO_INIT));
1865 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
1866 arn = skmem_arena_nexus(ar);
1867 ASSERT(arn != NULL);
1868
1869 bzero(&roff, sizeof(roff));
1870 for (i = 0; i < SKMEM_REGIONS; i++) {
1871 if (ar->ar_regions[i] == NULL) {
1872 continue;
1873 }
1874
1875 /* not for nexus */
1876 ASSERT(i != SKMEM_REGION_SYSCTLS);
1877
1878 /*
1879 * Get region offsets from base of mmap span; the arena
1880 * doesn't need to be mmap'd at this point, since we
1881 * simply compute the relative offset.
1882 */
1883 roff[i] = skmem_arena_get_region_offset(ar, i);
1884 }
1885
1886 for_all_rings(t) {
1887 for (i = 0; i < na_get_nrings(na, t); i++) {
1888 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
1889 struct __user_channel_ring *__single ring = kring->ckr_ring;
1890 mach_vm_offset_t ring_off, usd_roff;
1891 struct skmem_obj_info oi, oim;
1892 uint32_t ndesc;
1893
1894 if (ring != NULL) {
1895 SK_DF(SK_VERB_NA | SK_VERB_RING,
1896 "kr 0x%llx (\"%s\") is already "
1897 "initialized", SK_KVA(kring),
1898 kring->ckr_name);
1899 continue; /* already created by somebody else */
1900 }
1901
1902 if (!KR_KERNEL_ONLY(kring) &&
1903 (ring = skmem_cache_alloc(arn->arn_ring_cache,
1904 SKMEM_NOSLEEP)) == NULL) {
1905 SK_ERR("Cannot allocate %s_ring for kr "
1906 "0x%llx (\"%s\")", sk_ring2str(t),
1907 SK_KVA(kring), kring->ckr_name);
1908 goto cleanup;
1909 }
1910 kring->ckr_flags |= CKRF_MEM_RING_INITED;
1911 kring->ckr_ring = ring;
1912 ndesc = kring->ckr_num_slots;
1913
1914 if (ring == NULL) {
1915 goto skip_user_ring_setup;
1916 }
1917
1918 *(uint32_t *)(uintptr_t)&ring->ring_num_slots = ndesc;
1919
1920 /* offset of current ring in mmap span */
1921 skmem_cache_get_obj_info(arn->arn_ring_cache,
1922 ring, &oi, NULL);
1923 ring_off = (roff[SKMEM_REGION_RING] +
1924 SKMEM_OBJ_ROFF(&oi));
1925
1926 /*
1927 * ring_{buf,md,sd}_ofs offsets are relative to the
1928 * current ring, and not to the base of mmap span.
1929 */
1930 *(mach_vm_offset_t *)(uintptr_t)
1931 &ring->ring_def_buf_base =
1932 (roff[SKMEM_REGION_BUF_DEF] - ring_off);
1933 *(mach_vm_offset_t *)(uintptr_t)
1934 &ring->ring_large_buf_base =
1935 (roff[SKMEM_REGION_BUF_LARGE] - ring_off);
1936 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_md_base =
1937 (roff[SKMEM_REGION_UMD] - ring_off);
1938 _CASSERT(sizeof(uint16_t) ==
1939 sizeof(ring->ring_bft_size));
1940 if (roff[SKMEM_REGION_UBFT] != 0) {
1941 ASSERT(ar->ar_regions[SKMEM_REGION_UBFT] !=
1942 NULL);
1943 *(mach_vm_offset_t *)(uintptr_t)
1944 &ring->ring_bft_base =
1945 (roff[SKMEM_REGION_UBFT] - ring_off);
1946 *(uint16_t *)(uintptr_t)&ring->ring_bft_size =
1947 (uint16_t)ar->ar_regions[SKMEM_REGION_UBFT]->
1948 skr_c_obj_size;
1949 ASSERT(ring->ring_bft_size ==
1950 ar->ar_regions[SKMEM_REGION_KBFT]->
1951 skr_c_obj_size);
1952 } else {
1953 *(mach_vm_offset_t *)(uintptr_t)
1954 &ring->ring_bft_base = 0;
1955 *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1956 }
1957
1958 if (t == NR_TX || t == NR_A || t == NR_EV || t == NR_LBA) {
1959 usd_roff = roff[SKMEM_REGION_TXAUSD];
1960 } else {
1961 ASSERT(t == NR_RX || t == NR_F);
1962 usd_roff = roff[SKMEM_REGION_RXFUSD];
1963 }
1964 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_sd_base =
1965 (usd_roff - ring_off);
1966
1967 /* copy values from kring */
1968 ring->ring_head = kring->ckr_rhead;
1969 *(slot_idx_t *)(uintptr_t)&ring->ring_khead =
1970 kring->ckr_khead;
1971 *(slot_idx_t *)(uintptr_t)&ring->ring_tail =
1972 kring->ckr_rtail;
1973
1974 _CASSERT(sizeof(uint32_t) ==
1975 sizeof(ring->ring_def_buf_size));
1976 _CASSERT(sizeof(uint32_t) ==
1977 sizeof(ring->ring_large_buf_size));
1978 _CASSERT(sizeof(uint16_t) ==
1979 sizeof(ring->ring_md_size));
1980 *(uint32_t *)(uintptr_t)&ring->ring_def_buf_size =
1981 ar->ar_regions[SKMEM_REGION_BUF_DEF]->skr_c_obj_size;
1982 if (ar->ar_regions[SKMEM_REGION_BUF_LARGE] != NULL) {
1983 *(uint32_t *)(uintptr_t)&ring->ring_large_buf_size =
1984 ar->ar_regions[SKMEM_REGION_BUF_LARGE]->skr_c_obj_size;
1985 } else {
1986 *(uint32_t *)(uintptr_t)&ring->ring_large_buf_size = 0;
1987 }
1988 if (ar->ar_regions[SKMEM_REGION_UMD] != NULL) {
1989 *(uint16_t *)(uintptr_t)&ring->ring_md_size =
1990 (uint16_t)ar->ar_regions[SKMEM_REGION_UMD]->
1991 skr_c_obj_size;
1992 ASSERT(ring->ring_md_size ==
1993 ar->ar_regions[SKMEM_REGION_KMD]->
1994 skr_c_obj_size);
1995 } else {
1996 *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1997 ASSERT(PP_KERNEL_ONLY(arn->arn_rx_pp));
1998 ASSERT(PP_KERNEL_ONLY(arn->arn_tx_pp));
1999 }
2000
2001 /* ring info */
2002 _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_id));
2003 _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_kind));
2004 *(uint16_t *)(uintptr_t)&ring->ring_id =
2005 (uint16_t)kring->ckr_ring_id;
2006 *(uint16_t *)(uintptr_t)&ring->ring_kind =
2007 (uint16_t)kring->ckr_tx;
2008
2009 SK_DF(SK_VERB_NA | SK_VERB_RING,
2010 "%s_ring at 0x%llx kr 0x%llx (\"%s\")",
2011 sk_ring2str(t), SK_KVA(ring), SK_KVA(kring),
2012 kring->ckr_name);
2013 SK_DF(SK_VERB_NA | SK_VERB_RING,
2014 " num_slots: %u", ring->ring_num_slots);
2015 SK_DF(SK_VERB_NA | SK_VERB_RING,
2016 " def_buf_base: 0x%llx",
2017 (uint64_t)ring->ring_def_buf_base);
2018 SK_DF(SK_VERB_NA | SK_VERB_RING,
2019 " large_buf_base: 0x%llx",
2020 (uint64_t)ring->ring_large_buf_base);
2021 SK_DF(SK_VERB_NA | SK_VERB_RING,
2022 " md_base: 0x%llx",
2023 (uint64_t)ring->ring_md_base);
2024 SK_DF(SK_VERB_NA | SK_VERB_RING,
2025 " sd_base: 0x%llx",
2026 (uint64_t)ring->ring_sd_base);
2027 SK_DF(SK_VERB_NA | SK_VERB_RING,
2028 " h, t: %u, %u, %u", ring->ring_head,
2029 ring->ring_tail);
2030 SK_DF(SK_VERB_NA | SK_VERB_RING,
2031 " md_size: %d",
2032 (uint64_t)ring->ring_md_size);
2033
2034 /* make sure they're in synch */
2035 _CASSERT(NR_RX == CR_KIND_RX);
2036 _CASSERT(NR_TX == CR_KIND_TX);
2037 _CASSERT(NR_A == CR_KIND_ALLOC);
2038 _CASSERT(NR_F == CR_KIND_FREE);
2039 _CASSERT(NR_EV == CR_KIND_EVENT);
2040 _CASSERT(NR_LBA == CR_KIND_LARGE_BUF_ALLOC);
2041
2042 skip_user_ring_setup:
2043 /*
2044 * This flag tells na_kr_teardown_all() that it should
2045 * go thru the checks to free up the slot maps.
2046 */
2047 kring->ckr_flags |= CKRF_MEM_SD_INITED;
2048 if (t == NR_TX || t == NR_A || t == NR_EV || t == NR_LBA) {
2049 kring->ckr_ksds_cache = arn->arn_txaksd_cache;
2050 } else {
2051 ASSERT(t == NR_RX || t == NR_F);
2052 kring->ckr_ksds_cache = arn->arn_rxfksd_cache;
2053 }
2054
2055 ksds = skmem_cache_alloc(kring->ckr_ksds_cache,
2056 SKMEM_NOSLEEP);
2057 if (ksds == NULL) {
2058 SK_ERR("Cannot allocate %s_ksds for kr "
2059 "0x%llx (\"%s\")", sk_ring2str(t),
2060 SK_KVA(kring), kring->ckr_name);
2061 goto cleanup;
2062 }
2063 kring->ckr_ksds = ksds;
2064 kring->ckr_ksds_cnt = kring->ckr_num_slots;
2065 if (!KR_KERNEL_ONLY(kring)) {
2066 skmem_cache_get_obj_info(kring->ckr_ksds_cache,
2067 kring->ckr_ksds, &oi, &oim);
2068 kring->ckr_usds = SKMEM_OBJ_ADDR(&oim);
2069 kring->ckr_usds_cnt = kring->ckr_num_slots;
2070 }
2071 na_kr_slot_desc_init(kring->ckr_ksds,
2072 KR_KERNEL_ONLY(kring), kring->ckr_usds,
2073 kring->ckr_ksds_cnt, kring->ckr_usds_cnt);
2074
2075 /* cache last slot descriptor address */
2076 ASSERT(kring->ckr_lim == (ndesc - 1));
2077 kring->ckr_ksds_last = &kring->ckr_ksds[kring->ckr_lim];
2078
2079 if ((t < NR_TXRX) &&
2080 !(na->na_flags & NAF_USER_PKT_POOL) &&
2081 na_kr_populate_slots(kring) != 0) {
2082 SK_ERR("Cannot allocate buffers for kr "
2083 "0x%llx (\"%s\")", SK_KVA(kring),
2084 kring->ckr_name);
2085 goto cleanup;
2086 }
2087 }
2088 }
2089
2090 return 0;
2091
2092 cleanup:
2093 na_kr_teardown_all(na, ch, FALSE);
2094
2095 return ENOMEM;
2096 }
2097
2098 static void
na_kr_teardown_common(struct nexus_adapter * na,struct __kern_channel_ring * kring,enum txrx t,struct kern_channel * ch,boolean_t defunct)2099 na_kr_teardown_common(struct nexus_adapter *na,
2100 struct __kern_channel_ring *kring, enum txrx t, struct kern_channel *ch,
2101 boolean_t defunct)
2102 {
2103 struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
2104 struct __user_channel_ring *ckr_ring;
2105 boolean_t sd_idle, sd_inited;
2106
2107 ASSERT(arn != NULL);
2108 kr_enter(kring, TRUE);
2109 /*
2110 * Check for CKRF_MEM_SD_INITED and CKRF_MEM_RING_INITED
2111 * to make sure that the freeing needs to happen (else just
2112 * nullify the values).
2113 * If this adapter owns the memory for the slot descriptors,
2114 * check if the region is marked as busy (sd_idle is false)
2115 * and leave the kring's slot descriptor fields alone if so,
2116 * at defunct time. At final teardown time, sd_idle must be
2117 * true else we assert; this indicates a missing call to
2118 * skmem_arena_nexus_sd_set_noidle().
2119 */
2120 sd_inited = ((kring->ckr_flags & CKRF_MEM_SD_INITED) != 0);
2121 if (sd_inited) {
2122 /* callee will do KR_KSD(), so check */
2123 if (((t < NR_TXRX) || (t == NR_EV)) &&
2124 (kring->ckr_ksds != NULL)) {
2125 na_kr_depopulate_slots(kring, ch, defunct);
2126 }
2127 /* leave CKRF_MEM_SD_INITED flag alone until idle */
2128 sd_idle = skmem_arena_nexus_sd_idle(arn);
2129 VERIFY(sd_idle || defunct);
2130 } else {
2131 sd_idle = TRUE;
2132 }
2133
2134 if (sd_idle) {
2135 kring->ckr_flags &= ~CKRF_MEM_SD_INITED;
2136 if (kring->ckr_ksds != NULL) {
2137 if (sd_inited) {
2138 skmem_cache_free(kring->ckr_ksds_cache,
2139 kring->ckr_ksds);
2140 }
2141 kring->ckr_ksds = NULL;
2142 kring->ckr_ksds_cnt = 0;
2143 kring->ckr_ksds_last = NULL;
2144 kring->ckr_usds = NULL;
2145 kring->ckr_usds_cnt = 0;
2146 }
2147 ASSERT(kring->ckr_ksds_last == NULL);
2148 ASSERT(kring->ckr_usds == NULL);
2149 }
2150
2151 if ((ckr_ring = kring->ckr_ring) != NULL) {
2152 kring->ckr_ring = NULL;
2153 }
2154
2155 if (kring->ckr_flags & CKRF_MEM_RING_INITED) {
2156 ASSERT(ckr_ring != NULL || KR_KERNEL_ONLY(kring));
2157 if (ckr_ring != NULL) {
2158 skmem_cache_free(arn->arn_ring_cache, ckr_ring);
2159 }
2160 kring->ckr_flags &= ~CKRF_MEM_RING_INITED;
2161 }
2162
2163 if (defunct) {
2164 /* if defunct, drop everything; see KR_DROP() */
2165 kring->ckr_flags |= CKRF_DEFUNCT;
2166 }
2167 kr_exit(kring);
2168 }
2169
2170 /*
2171 * Teardown ALL rings of a nexus adapter; this includes {tx,rx,alloc,free,event}
2172 */
2173 static void
na_kr_teardown_all(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2174 na_kr_teardown_all(struct nexus_adapter *na, struct kern_channel *ch,
2175 boolean_t defunct)
2176 {
2177 enum txrx t;
2178
2179 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2180
2181 /* skip if this adapter has no allocated rings */
2182 if (na->na_tx_rings == NULL) {
2183 return;
2184 }
2185
2186 for_all_rings(t) {
2187 for (uint32_t i = 0; i < na_get_nrings(na, t); i++) {
2188 na_kr_teardown_common(na, &NAKR(na, t)[i],
2189 t, ch, defunct);
2190 }
2191 }
2192 }
2193
2194 /*
2195 * Teardown only {tx,rx} rings assigned to the channel.
2196 */
2197 static void
na_kr_teardown_txrx(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct,struct proc * p)2198 na_kr_teardown_txrx(struct nexus_adapter *na, struct kern_channel *ch,
2199 boolean_t defunct, struct proc *p)
2200 {
2201 enum txrx t;
2202
2203 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2204
2205 for_rx_tx(t) {
2206 ring_id_t qfirst = ch->ch_first[t];
2207 ring_id_t qlast = ch->ch_last[t];
2208 uint32_t i;
2209
2210 for (i = qfirst; i < qlast; i++) {
2211 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2212 na_kr_teardown_common(na, kring, t, ch, defunct);
2213
2214 /*
2215 * Issue a notify to wake up anyone sleeping in kqueue
2216 * so that they notice the newly defuncted channels and
2217 * return an error
2218 */
2219 kring->ckr_na_notify(kring, p, 0);
2220 }
2221 }
2222 }
2223
2224 static int
na_kr_populate_slots(struct __kern_channel_ring * kring)2225 na_kr_populate_slots(struct __kern_channel_ring *kring)
2226 {
2227 const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2228 struct nexus_adapter *na = KRNA(kring);
2229 kern_pbufpool_t pp = kring->ckr_pp;
2230 uint32_t nslots = kring->ckr_num_slots;
2231 uint32_t start_idx, i;
2232 uint32_t sidx = 0; /* slot counter */
2233 struct __kern_slot_desc *ksd;
2234 struct __user_slot_desc *usd;
2235 struct __kern_quantum *kqum;
2236 nexus_type_t nexus_type;
2237 int err = 0;
2238
2239 ASSERT(kring->ckr_tx < NR_TXRX);
2240 ASSERT(!(KRNA(kring)->na_flags & NAF_USER_PKT_POOL));
2241 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2242 ASSERT(pp != NULL);
2243
2244 /*
2245 * xxx_ppool: remove this special case
2246 */
2247 nexus_type = na->na_nxdom_prov->nxdom_prov_dom->nxdom_type;
2248
2249 switch (nexus_type) {
2250 case NEXUS_TYPE_FLOW_SWITCH:
2251 case NEXUS_TYPE_KERNEL_PIPE:
2252 /*
2253 * xxx_ppool: This is temporary code until we come up with a
2254 * scheme for user space to alloc & attach packets to tx ring.
2255 */
2256 if (kernel_only || kring->ckr_tx == NR_RX) {
2257 return 0;
2258 }
2259 break;
2260
2261 case NEXUS_TYPE_NET_IF:
2262 if (((na->na_type == NA_NETIF_DEV) ||
2263 (na->na_type == NA_NETIF_HOST)) &&
2264 (kernel_only || (kring->ckr_tx == NR_RX))) {
2265 return 0;
2266 }
2267
2268 ASSERT((na->na_type == NA_NETIF_COMPAT_DEV) ||
2269 (na->na_type == NA_NETIF_COMPAT_HOST) ||
2270 (na->na_type == NA_NETIF_DEV) ||
2271 (na->na_type == NA_NETIF_VP));
2272
2273 if (!kernel_only) {
2274 if (kring->ckr_tx == NR_RX) {
2275 return 0;
2276 } else {
2277 break;
2278 }
2279 }
2280
2281 ASSERT(kernel_only);
2282
2283 if ((na->na_type == NA_NETIF_COMPAT_DEV) ||
2284 (na->na_type == NA_NETIF_COMPAT_HOST)) {
2285 return 0;
2286 }
2287 VERIFY(0);
2288 /* NOTREACHED */
2289 __builtin_unreachable();
2290
2291 case NEXUS_TYPE_USER_PIPE:
2292 case NEXUS_TYPE_MONITOR:
2293 break;
2294
2295 default:
2296 VERIFY(0);
2297 /* NOTREACHED */
2298 __builtin_unreachable();
2299 }
2300
2301 /* Fill the ring with packets */
2302 sidx = start_idx = 0;
2303 for (i = 0; i < nslots; i++) {
2304 kqum = SK_PTR_ADDR_KQUM(pp_alloc_packet(pp, pp->pp_max_frags,
2305 SKMEM_NOSLEEP));
2306 if (kqum == NULL) {
2307 err = ENOMEM;
2308 SK_ERR("ar 0x%llx (\"%s\") no more buffers "
2309 "after %u of %u, err %d", SK_KVA(na->na_arena),
2310 na->na_arena->ar_name, i, nslots, err);
2311 goto cleanup;
2312 }
2313 ksd = KR_KSD(kring, i);
2314 usd = (kernel_only ? NULL : KR_USD(kring, i));
2315
2316 /* attach packet to slot */
2317 kqum->qum_ksd = ksd;
2318 ASSERT(!KSD_VALID_METADATA(ksd));
2319 KSD_ATTACH_METADATA(ksd, kqum);
2320 if (usd != NULL) {
2321 USD_ATTACH_METADATA(usd, METADATA_IDX(kqum));
2322 kr_externalize_metadata(kring, pp->pp_max_frags,
2323 kqum, current_proc());
2324 }
2325
2326 SK_DF(SK_VERB_MEM, " C ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2327 " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd), METADATA_IDX(kqum),
2328 SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2329 SK_KVA(&kqum->qum_buf[0]));
2330 if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2331 SK_DF(SK_VERB_MEM, " C usd [%-3d, 0x%llx] "
2332 "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]",
2333 (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2334 SK_KVA(usd), METADATA_IDX(kqum),
2335 SK_KVA(kqum->qum_user),
2336 kqum->qum_user->qum_buf[0].buf_idx,
2337 SK_KVA(&kqum->qum_user->qum_buf[0]));
2338 }
2339
2340 sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2341 }
2342
2343 SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") populated %u slots from idx %u",
2344 SK_KVA(na->na_arena), na->na_arena->ar_name, nslots, start_idx);
2345
2346 cleanup:
2347 if (err != 0) {
2348 sidx = start_idx;
2349 while (i-- > 0) {
2350 ksd = KR_KSD(kring, i);
2351 usd = (kernel_only ? NULL : KR_USD(kring, i));
2352 kqum = ksd->sd_qum;
2353
2354 ASSERT(ksd == kqum->qum_ksd);
2355 KSD_RESET(ksd);
2356 if (usd != NULL) {
2357 USD_RESET(usd);
2358 }
2359 /* detach packet from slot */
2360 kqum->qum_ksd = NULL;
2361 pp_free_packet(pp, SK_PTR_ADDR(kqum));
2362
2363 sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2364 }
2365 }
2366 return err;
2367 }
2368
2369 static void
na_kr_depopulate_slots(struct __kern_channel_ring * kring,struct kern_channel * ch,boolean_t defunct)2370 na_kr_depopulate_slots(struct __kern_channel_ring *kring,
2371 struct kern_channel *ch, boolean_t defunct)
2372 {
2373 #pragma unused(ch)
2374 const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2375 uint32_t i, j, n = kring->ckr_num_slots;
2376 struct nexus_adapter *na = KRNA(kring);
2377 struct kern_pbufpool *pp = kring->ckr_pp;
2378 boolean_t upp = FALSE;
2379 obj_idx_t midx;
2380
2381 ASSERT((kring->ckr_tx < NR_TXRX) || (kring->ckr_tx == NR_EV));
2382 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2383
2384 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2385
2386 if (((na->na_flags & NAF_USER_PKT_POOL) != 0) &&
2387 (kring->ckr_tx != NR_EV)) {
2388 upp = TRUE;
2389 }
2390 for (i = 0, j = 0; i < n; i++) {
2391 struct __kern_slot_desc *ksd = KR_KSD(kring, i);
2392 struct __user_slot_desc *usd;
2393 struct __kern_quantum *qum, *kqum;
2394 boolean_t free_packet = FALSE;
2395 int err;
2396
2397 if (!KSD_VALID_METADATA(ksd)) {
2398 continue;
2399 }
2400
2401 kqum = ksd->sd_qum;
2402 usd = (kernel_only ? NULL : KR_USD(kring, i));
2403 midx = METADATA_IDX(kqum);
2404
2405 /*
2406 * if the packet is internalized it should not be in the
2407 * hash table of packets loaned to user space.
2408 */
2409 if (upp && (kqum->qum_qflags & QUM_F_INTERNALIZED)) {
2410 if ((qum = pp_find_upp(pp, midx)) != NULL) {
2411 panic("internalized packet 0x%llx in htbl",
2412 SK_KVA(qum));
2413 /* NOTREACHED */
2414 __builtin_unreachable();
2415 }
2416 free_packet = TRUE;
2417 } else if (upp) {
2418 /*
2419 * if the packet is not internalized check if it is
2420 * in the list of packets loaned to user-space.
2421 * Remove from the list before freeing.
2422 */
2423 ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
2424 qum = pp_remove_upp(pp, midx, &err);
2425 if (err != 0) {
2426 SK_ERR("un-allocated packet or buflet %d %p",
2427 midx, SK_KVA(qum));
2428 if (qum != NULL) {
2429 free_packet = TRUE;
2430 }
2431 }
2432 } else {
2433 free_packet = TRUE;
2434 }
2435
2436 /*
2437 * Clear the user and kernel slot descriptors. Note that
2438 * if we are depopulating the slots due to defunct (and not
2439 * due to normal deallocation/teardown), we leave the user
2440 * slot descriptor alone. At that point the process may
2441 * be suspended, and later when it resumes it would just
2442 * pick up the original contents and move forward with
2443 * whatever it was doing.
2444 */
2445 KSD_RESET(ksd);
2446 if (usd != NULL && !defunct) {
2447 USD_RESET(usd);
2448 }
2449
2450 /* detach packet from slot */
2451 kqum->qum_ksd = NULL;
2452
2453 SK_DF(SK_VERB_MEM, " D ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2454 " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd),
2455 METADATA_IDX(kqum), SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2456 SK_KVA(&kqum->qum_buf[0]));
2457 if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2458 SK_DF(SK_VERB_MEM, " D usd [%-3u, 0x%llx] "
2459 "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]",
2460 (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2461 SK_KVA(usd), METADATA_IDX(kqum),
2462 SK_KVA(kqum->qum_user),
2463 kqum->qum_user->qum_buf[0].buf_idx,
2464 SK_KVA(&kqum->qum_user->qum_buf[0]));
2465 }
2466
2467 if (free_packet) {
2468 pp_free_packet(pp, SK_PTR_ADDR(kqum)); ++j;
2469 }
2470 }
2471
2472 SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") depopulated %u of %u slots",
2473 SK_KVA(KRNA(kring)->na_arena), KRNA(kring)->na_arena->ar_name,
2474 j, n);
2475 }
2476
2477 int
na_rings_mem_setup(struct nexus_adapter * na,boolean_t alloc_ctx,struct kern_channel * ch)2478 na_rings_mem_setup(struct nexus_adapter *na,
2479 boolean_t alloc_ctx, struct kern_channel *ch)
2480 {
2481 boolean_t kronly;
2482 int err;
2483
2484 SK_LOCK_ASSERT_HELD();
2485 ASSERT(na->na_channels == 0);
2486 /*
2487 * If NAF_MEM_NO_INIT is set, then only create the krings and not
2488 * the backing memory regions for the adapter.
2489 */
2490 kronly = (na->na_flags & NAF_MEM_NO_INIT);
2491 ASSERT(!kronly || NA_KERNEL_ONLY(na));
2492
2493 /*
2494 * Create and initialize the common fields of the krings array.
2495 * using the information that must be already available in the na.
2496 */
2497 if ((err = na_kr_create(na, alloc_ctx)) == 0 && !kronly) {
2498 err = na_kr_setup(na, ch);
2499 if (err != 0) {
2500 na_kr_delete(na);
2501 }
2502 }
2503
2504 return err;
2505 }
2506
2507 void
na_rings_mem_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2508 na_rings_mem_teardown(struct nexus_adapter *na, struct kern_channel *ch,
2509 boolean_t defunct)
2510 {
2511 SK_LOCK_ASSERT_HELD();
2512 ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT));
2513
2514 /*
2515 * Deletes the kring and ring array of the adapter. They
2516 * must have been created using na_rings_mem_setup().
2517 *
2518 * XXX: [email protected] -- the parameter "ch" should not be
2519 * needed here; however na_kr_depopulate_slots() needs to
2520 * go thru the channel's user packet pool hash, and so for
2521 * now we leave it here.
2522 */
2523 na_kr_teardown_all(na, ch, defunct);
2524 if (!defunct) {
2525 na_kr_delete(na);
2526 }
2527 }
2528
2529 void
na_ch_rings_defunct(struct kern_channel * ch,struct proc * p)2530 na_ch_rings_defunct(struct kern_channel *ch, struct proc *p)
2531 {
2532 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2533
2534 /*
2535 * Depopulate slots on the TX and RX rings of this channel,
2536 * but don't touch other rings owned by other channels if
2537 * this adapter is being shared.
2538 */
2539 na_kr_teardown_txrx(ch->ch_na, ch, TRUE, p);
2540 }
2541
2542 void
na_kr_drop(struct nexus_adapter * na,boolean_t drop)2543 na_kr_drop(struct nexus_adapter *na, boolean_t drop)
2544 {
2545 enum txrx t;
2546 uint32_t i;
2547
2548 for_rx_tx(t) {
2549 for (i = 0; i < na_get_nrings(na, t); i++) {
2550 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2551 int error;
2552 error = kr_enter(kring, TRUE);
2553 if (drop) {
2554 kring->ckr_flags |= CKRF_DROP;
2555 } else {
2556 kring->ckr_flags &= ~CKRF_DROP;
2557 }
2558
2559 if (error != 0) {
2560 SK_ERR("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2561 "kr_enter failed %d",
2562 na->na_name, SK_KVA(na),
2563 kring->ckr_name, SK_KVA(kring),
2564 error);
2565 } else {
2566 kr_exit(kring);
2567 }
2568 SK_D("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2569 "krflags 0x%b", na->na_name, SK_KVA(na),
2570 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
2571 CKRF_BITS);
2572 }
2573 }
2574 }
2575
2576 /*
2577 * Set the stopped/enabled status of ring. When stopping, they also wait
2578 * for all current activity on the ring to terminate. The status change
2579 * is then notified using the na na_notify callback.
2580 */
2581 static void
na_set_ring(struct nexus_adapter * na,uint32_t ring_id,enum txrx t,uint32_t state)2582 na_set_ring(struct nexus_adapter *na, uint32_t ring_id, enum txrx t,
2583 uint32_t state)
2584 {
2585 struct __kern_channel_ring *kr = &NAKR(na, t)[ring_id];
2586
2587 /*
2588 * Mark the ring as stopped/enabled, and run through the
2589 * locks to make sure other users get to see it.
2590 */
2591 if (state == KR_READY) {
2592 kr_start(kr);
2593 } else {
2594 kr_stop(kr, state);
2595 }
2596 }
2597
2598
2599 /* stop or enable all the rings of na */
2600 static void
na_set_all_rings(struct nexus_adapter * na,uint32_t state)2601 na_set_all_rings(struct nexus_adapter *na, uint32_t state)
2602 {
2603 uint32_t i;
2604 enum txrx t;
2605
2606 SK_LOCK_ASSERT_HELD();
2607
2608 if (!NA_IS_ACTIVE(na)) {
2609 return;
2610 }
2611
2612 for_rx_tx(t) {
2613 for (i = 0; i < na_get_nrings(na, t); i++) {
2614 na_set_ring(na, i, t, state);
2615 }
2616 }
2617 }
2618
2619 /*
2620 * Convenience function used in drivers. Waits for current txsync()s/rxsync()s
2621 * to finish and prevents any new one from starting. Call this before turning
2622 * Skywalk mode off, or before removing the harware rings (e.g., on module
2623 * onload). As a rule of thumb for linux drivers, this should be placed near
2624 * each napi_disable().
2625 */
2626 void
na_disable_all_rings(struct nexus_adapter * na)2627 na_disable_all_rings(struct nexus_adapter *na)
2628 {
2629 na_set_all_rings(na, KR_STOPPED);
2630 }
2631
2632 /*
2633 * Convenience function used in drivers. Re-enables rxsync and txsync on the
2634 * adapter's rings In linux drivers, this should be placed near each
2635 * napi_enable().
2636 */
2637 void
na_enable_all_rings(struct nexus_adapter * na)2638 na_enable_all_rings(struct nexus_adapter *na)
2639 {
2640 na_set_all_rings(na, KR_READY /* enabled */);
2641 }
2642
2643 void
na_lock_all_rings(struct nexus_adapter * na)2644 na_lock_all_rings(struct nexus_adapter *na)
2645 {
2646 na_set_all_rings(na, KR_LOCKED);
2647 }
2648
2649 void
na_unlock_all_rings(struct nexus_adapter * na)2650 na_unlock_all_rings(struct nexus_adapter *na)
2651 {
2652 na_enable_all_rings(na);
2653 }
2654
2655 int
na_connect(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)2656 na_connect(struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
2657 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
2658 {
2659 struct nexus_adapter *__single na = NULL;
2660 mach_vm_size_t memsize = 0;
2661 int err = 0;
2662 enum txrx t;
2663
2664 ASSERT(!(chr->cr_mode & CHMODE_KERNEL));
2665 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
2666
2667 SK_LOCK_ASSERT_HELD();
2668
2669 /* find the nexus adapter and return the reference */
2670 err = na_find(ch, nx, chr, ch0, nxb, p, &na, TRUE /* create */);
2671 if (err != 0) {
2672 ASSERT(na == NULL);
2673 goto done;
2674 }
2675
2676 if (NA_KERNEL_ONLY(na)) {
2677 err = EBUSY;
2678 goto done;
2679 }
2680
2681 /* reject if the adapter is defunct of non-permissive */
2682 if ((na->na_flags & NAF_DEFUNCT) || na_reject_channel(ch, na)) {
2683 err = ENXIO;
2684 goto done;
2685 }
2686
2687 err = na_bind_channel(na, ch, chr);
2688 if (err != 0) {
2689 goto done;
2690 }
2691
2692 ASSERT(ch->ch_schema != NULL);
2693 ASSERT(na == ch->ch_na);
2694
2695 for_all_rings(t) {
2696 if (na_get_nrings(na, t) == 0) {
2697 ch->ch_si[t] = NULL;
2698 continue;
2699 }
2700 ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2701 &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2702 }
2703
2704 skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2705
2706 if (!(skmem_arena_nexus(na->na_arena)->arn_mode &
2707 AR_NEXUS_MODE_EXTERNAL_PPOOL)) {
2708 os_atomic_or(__DECONST(uint32_t *, &ch->ch_schema->csm_flags), CSM_PRIV_MEM, relaxed);
2709 }
2710
2711 err = skmem_arena_mmap(na->na_arena, p, &ch->ch_mmap);
2712 if (err != 0) {
2713 goto done;
2714 }
2715
2716 os_atomic_or(__DECONST(uint32_t *, &ch->ch_schema->csm_flags), CSM_ACTIVE, relaxed);
2717 chr->cr_memsize = memsize;
2718 chr->cr_memoffset = ch->ch_schema_offset;
2719
2720 SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2721 "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2722 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2723 na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2724 na->na_flags, NAF_BITS);
2725
2726 done:
2727 if (err != 0) {
2728 if (ch->ch_schema != NULL || na != NULL) {
2729 if (ch->ch_schema != NULL) {
2730 ASSERT(na == ch->ch_na);
2731 /*
2732 * Callee will unmap memory region if needed,
2733 * as well as release reference held on 'na'.
2734 */
2735 na_disconnect(nx, ch);
2736 na = NULL;
2737 }
2738 if (na != NULL) {
2739 (void) na_release_locked(na);
2740 na = NULL;
2741 }
2742 }
2743 }
2744
2745 return err;
2746 }
2747
2748 void
na_disconnect(struct kern_nexus * nx,struct kern_channel * ch)2749 na_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
2750 {
2751 #pragma unused(nx)
2752 enum txrx t;
2753
2754 SK_LOCK_ASSERT_HELD();
2755
2756 SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2757 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2758 ch->ch_na->na_name, ch->ch_info->cinfo_nx_port,
2759 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(ch->ch_na),
2760 ch->ch_na->na_flags, NAF_BITS);
2761
2762 /* destroy mapping and release references */
2763 na_unbind_channel(ch);
2764 ASSERT(ch->ch_na == NULL);
2765 ASSERT(ch->ch_schema == NULL);
2766 for_all_rings(t) {
2767 ch->ch_si[t] = NULL;
2768 }
2769 }
2770
2771 void
na_defunct(struct kern_nexus * nx,struct kern_channel * ch,struct nexus_adapter * na,boolean_t locked)2772 na_defunct(struct kern_nexus *nx, struct kern_channel *ch,
2773 struct nexus_adapter *na, boolean_t locked)
2774 {
2775 #pragma unused(nx)
2776 SK_LOCK_ASSERT_HELD();
2777 if (!locked) {
2778 lck_mtx_lock(&ch->ch_lock);
2779 }
2780
2781 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2782
2783 if (!(na->na_flags & NAF_DEFUNCT)) {
2784 /*
2785 * Mark this adapter as defunct to inform nexus-specific
2786 * teardown handler called by na_teardown() below.
2787 */
2788 os_atomic_or(&na->na_flags, NAF_DEFUNCT, relaxed);
2789
2790 /*
2791 * Depopulate slots.
2792 */
2793 na_teardown(na, ch, TRUE);
2794
2795 /*
2796 * And finally destroy any already-defunct memory regions.
2797 * Do this only if the nexus adapter owns the arena, i.e.
2798 * NAF_MEM_LOANED is not set. Otherwise, we'd expect
2799 * that this routine be called again for the real owner.
2800 */
2801 if (!(na->na_flags & NAF_MEM_LOANED)) {
2802 skmem_arena_defunct(na->na_arena);
2803 }
2804 }
2805
2806 SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d) "
2807 "na 0x%llx naflags %b", ch->ch_name, ch->ch_pid,
2808 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2809 na->na_name, ch->ch_info->cinfo_nx_port,
2810 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2811 na->na_flags, NAF_BITS);
2812
2813 if (!locked) {
2814 lck_mtx_unlock(&ch->ch_lock);
2815 }
2816 }
2817
2818 /*
2819 * TODO: [email protected] -- merge this into na_connect()
2820 */
2821 int
na_connect_spec(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct proc * p)2822 na_connect_spec(struct kern_nexus *nx, struct kern_channel *ch,
2823 struct chreq *chr, struct proc *p)
2824 {
2825 #pragma unused(p)
2826 struct nexus_adapter *__single na = NULL;
2827 mach_vm_size_t memsize = 0;
2828 int error = 0;
2829 enum txrx t;
2830
2831 ASSERT(chr->cr_mode & CHMODE_KERNEL);
2832 ASSERT(ch->ch_flags & CHANF_KERNEL);
2833 ASSERT(ch->ch_na == NULL);
2834 ASSERT(ch->ch_schema == NULL);
2835
2836 SK_LOCK_ASSERT_HELD();
2837
2838 error = na_find(ch, nx, chr, NULL, NULL, kernproc, &na, TRUE);
2839 if (error != 0) {
2840 goto done;
2841 }
2842
2843 if (na == NULL) {
2844 error = EINVAL;
2845 goto done;
2846 }
2847
2848 if (na->na_channels > 0) {
2849 error = EBUSY;
2850 goto done;
2851 }
2852
2853 if (na->na_flags & NAF_DEFUNCT) {
2854 error = ENXIO;
2855 goto done;
2856 }
2857
2858 /*
2859 * Special connect requires the nexus adapter to handle its
2860 * own channel binding and unbinding via na_special(); bail
2861 * if this adapter doesn't support it.
2862 */
2863 if (na->na_special == NULL) {
2864 error = ENOTSUP;
2865 goto done;
2866 }
2867
2868 /* upon success, "ch->ch_na" will point to "na" */
2869 error = na->na_special(na, ch, chr, NXSPEC_CMD_CONNECT);
2870 if (error != 0) {
2871 ASSERT(ch->ch_na == NULL);
2872 goto done;
2873 }
2874
2875 ASSERT(na->na_flags & NAF_SPEC_INIT);
2876 ASSERT(na == ch->ch_na);
2877 /* make sure this is still the case */
2878 ASSERT(ch->ch_schema == NULL);
2879
2880 for_rx_tx(t) {
2881 ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2882 &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2883 }
2884
2885 skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2886 chr->cr_memsize = memsize;
2887
2888 SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2889 "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2890 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2891 na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2892 na->na_flags, NAF_BITS);
2893
2894 done:
2895 if (error != 0) {
2896 if (ch->ch_na != NULL || na != NULL) {
2897 if (ch->ch_na != NULL) {
2898 ASSERT(na == ch->ch_na);
2899 /* callee will release reference on 'na' */
2900 na_disconnect_spec(nx, ch);
2901 na = NULL;
2902 }
2903 if (na != NULL) {
2904 (void) na_release_locked(na);
2905 na = NULL;
2906 }
2907 }
2908 }
2909
2910 return error;
2911 }
2912
2913 /*
2914 * TODO: [email protected] -- merge this into na_disconnect()
2915 */
2916 void
na_disconnect_spec(struct kern_nexus * nx,struct kern_channel * ch)2917 na_disconnect_spec(struct kern_nexus *nx, struct kern_channel *ch)
2918 {
2919 #pragma unused(nx)
2920 struct nexus_adapter *na = ch->ch_na;
2921 enum txrx t;
2922 int error;
2923
2924 SK_LOCK_ASSERT_HELD();
2925 ASSERT(na != NULL);
2926 ASSERT(na->na_flags & NAF_SPEC_INIT); /* has been bound */
2927
2928 SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2929 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2930 na->na_name, ch->ch_info->cinfo_nx_port,
2931 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2932 na->na_flags, NAF_BITS);
2933
2934 /* take a reference for this routine */
2935 na_retain_locked(na);
2936
2937 ASSERT(ch->ch_flags & CHANF_KERNEL);
2938 ASSERT(ch->ch_schema == NULL);
2939 ASSERT(na->na_special != NULL);
2940 /* unbind this channel */
2941 error = na->na_special(na, ch, NULL, NXSPEC_CMD_DISCONNECT);
2942 ASSERT(error == 0);
2943 ASSERT(!(na->na_flags & NAF_SPEC_INIT));
2944
2945 /* now release our reference; this may be the last */
2946 na_release_locked(na);
2947 na = NULL;
2948
2949 ASSERT(ch->ch_na == NULL);
2950 for_rx_tx(t) {
2951 ch->ch_si[t] = NULL;
2952 }
2953 }
2954
2955 void
na_start_spec(struct kern_nexus * nx,struct kern_channel * ch)2956 na_start_spec(struct kern_nexus *nx, struct kern_channel *ch)
2957 {
2958 #pragma unused(nx)
2959 struct nexus_adapter *na = ch->ch_na;
2960
2961 SK_LOCK_ASSERT_HELD();
2962
2963 ASSERT(ch->ch_flags & CHANF_KERNEL);
2964 ASSERT(NA_KERNEL_ONLY(na));
2965 ASSERT(na->na_special != NULL);
2966
2967 na->na_special(na, ch, NULL, NXSPEC_CMD_START);
2968 }
2969
2970 void
na_stop_spec(struct kern_nexus * nx,struct kern_channel * ch)2971 na_stop_spec(struct kern_nexus *nx, struct kern_channel *ch)
2972 {
2973 #pragma unused(nx)
2974 struct nexus_adapter *na = ch->ch_na;
2975
2976 SK_LOCK_ASSERT_HELD();
2977
2978 ASSERT(ch->ch_flags & CHANF_KERNEL);
2979 ASSERT(NA_KERNEL_ONLY(na));
2980 ASSERT(na->na_special != NULL);
2981
2982 na->na_special(na, ch, NULL, NXSPEC_CMD_STOP);
2983 }
2984
2985 /*
2986 * MUST BE CALLED UNDER SK_LOCK()
2987 *
2988 * Get a refcounted reference to a nexus adapter attached
2989 * to the interface specified by chr.
2990 * This is always called in the execution of an ioctl().
2991 *
2992 * Return ENXIO if the interface specified by the request does
2993 * not exist, ENOTSUP if Skywalk is not supported by the interface,
2994 * EINVAL if parameters are invalid, ENOMEM if needed resources
2995 * could not be allocated.
2996 * If successful, hold a reference to the nexus adapter.
2997 *
2998 * No reference is kept on the real interface, which may then
2999 * disappear at any time.
3000 */
3001 int
na_find(struct kern_channel * ch,struct kern_nexus * nx,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)3002 na_find(struct kern_channel *ch, struct kern_nexus *nx, struct chreq *chr,
3003 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p,
3004 struct nexus_adapter **na, boolean_t create)
3005 {
3006 int error = 0;
3007
3008 _CASSERT(sizeof(chr->cr_name) == sizeof((*na)->na_name));
3009
3010 *na = NULL; /* default return value */
3011
3012 SK_LOCK_ASSERT_HELD();
3013
3014 /*
3015 * We cascade through all possibile types of nexus adapter.
3016 * All nx_*_na_find() functions return an error and an na,
3017 * with the following combinations:
3018 *
3019 * error na
3020 * 0 NULL type doesn't match
3021 * !0 NULL type matches, but na creation/lookup failed
3022 * 0 !NULL type matches and na created/found
3023 * !0 !NULL impossible
3024 */
3025
3026 #if CONFIG_NEXUS_MONITOR
3027 /* try to see if this is a monitor port */
3028 error = nx_monitor_na_find(nx, ch, chr, ch0, nxb, p, na, create);
3029 if (error != 0 || *na != NULL) {
3030 return error;
3031 }
3032 #endif /* CONFIG_NEXUS_MONITOR */
3033 #if CONFIG_NEXUS_USER_PIPE
3034 /* try to see if this is a pipe port */
3035 error = nx_upipe_na_find(nx, ch, chr, nxb, p, na, create);
3036 if (error != 0 || *na != NULL) {
3037 return error;
3038 }
3039 #endif /* CONFIG_NEXUS_USER_PIPE */
3040 #if CONFIG_NEXUS_KERNEL_PIPE
3041 /* try to see if this is a kernel pipe port */
3042 error = nx_kpipe_na_find(nx, ch, chr, nxb, p, na, create);
3043 if (error != 0 || *na != NULL) {
3044 return error;
3045 }
3046 #endif /* CONFIG_NEXUS_KERNEL_PIPE */
3047 #if CONFIG_NEXUS_FLOWSWITCH
3048 /* try to see if this is a flowswitch port */
3049 error = nx_fsw_na_find(nx, ch, chr, nxb, p, na, create);
3050 if (error != 0 || *na != NULL) {
3051 return error;
3052 }
3053 #endif /* CONFIG_NEXUS_FLOWSWITCH */
3054 #if CONFIG_NEXUS_NETIF
3055 error = nx_netif_na_find(nx, ch, chr, nxb, p, na, create);
3056 if (error != 0 || *na != NULL) {
3057 return error;
3058 }
3059 #endif /* CONFIG_NEXUS_NETIF */
3060
3061 ASSERT(*na == NULL);
3062 return ENXIO;
3063 }
3064
3065 void
na_retain_locked(struct nexus_adapter * na)3066 na_retain_locked(struct nexus_adapter *na)
3067 {
3068 SK_LOCK_ASSERT_HELD();
3069
3070 if (na != NULL) {
3071 #if SK_LOG
3072 uint32_t oref = os_atomic_inc_orig(&na->na_refcount, relaxed);
3073 SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
3074 na->na_name, SK_KVA(na), oref + 1, na->na_channels);
3075 #else /* !SK_LOG */
3076 os_atomic_inc(&na->na_refcount, relaxed);
3077 #endif /* !SK_LOG */
3078 }
3079 }
3080
3081 /* returns 1 iff the nexus_adapter is destroyed */
3082 int
na_release_locked(struct nexus_adapter * na)3083 na_release_locked(struct nexus_adapter *na)
3084 {
3085 uint32_t oref;
3086
3087 SK_LOCK_ASSERT_HELD();
3088
3089 ASSERT(na->na_refcount > 0);
3090 oref = os_atomic_dec_orig(&na->na_refcount, relaxed);
3091 if (oref > 1) {
3092 SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
3093 na->na_name, SK_KVA(na), oref - 1, na->na_channels);
3094 return 0;
3095 }
3096 ASSERT(na->na_channels == 0);
3097
3098 if (na->na_dtor != NULL) {
3099 na->na_dtor(na);
3100 }
3101
3102 ASSERT(na->na_tx_rings == NULL && na->na_rx_rings == NULL);
3103 ASSERT(na->na_slot_ctxs == NULL);
3104 ASSERT(na->na_scratch == NULL);
3105
3106 #if CONFIG_NEXUS_USER_PIPE
3107 nx_upipe_na_dealloc(na);
3108 #endif /* CONFIG_NEXUS_USER_PIPE */
3109 if (na->na_arena != NULL) {
3110 skmem_arena_release(na->na_arena);
3111 na->na_arena = NULL;
3112 }
3113
3114 SK_DF(SK_VERB_MEM, "na \"%s\" (0x%llx) being freed",
3115 na->na_name, SK_KVA(na));
3116
3117 NA_FREE(na);
3118 return 1;
3119 }
3120
3121 static struct nexus_adapter *
na_pseudo_alloc(zalloc_flags_t how)3122 na_pseudo_alloc(zalloc_flags_t how)
3123 {
3124 struct nexus_adapter *na;
3125
3126 na = zalloc_flags(na_pseudo_zone, how | Z_ZERO);
3127 if (na) {
3128 na->na_type = NA_PSEUDO;
3129 na->na_free = na_pseudo_free;
3130 }
3131 return na;
3132 }
3133
3134 static void
na_pseudo_free(struct nexus_adapter * na)3135 na_pseudo_free(struct nexus_adapter *na)
3136 {
3137 ASSERT(na->na_refcount == 0);
3138 SK_DF(SK_VERB_MEM, "na 0x%llx FREE", SK_KVA(na));
3139 bzero(na, sizeof(*na));
3140 zfree(na_pseudo_zone, na);
3141 }
3142
3143 static int
na_pseudo_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3144 na_pseudo_txsync(struct __kern_channel_ring *kring, struct proc *p,
3145 uint32_t flags)
3146 {
3147 #pragma unused(kring, p, flags)
3148 SK_DF(SK_VERB_SYNC | SK_VERB_TX,
3149 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3150 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3151 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3152 flags);
3153
3154 return 0;
3155 }
3156
3157 static int
na_pseudo_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3158 na_pseudo_rxsync(struct __kern_channel_ring *kring, struct proc *p,
3159 uint32_t flags)
3160 {
3161 #pragma unused(kring, p, flags)
3162 SK_DF(SK_VERB_SYNC | SK_VERB_RX,
3163 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3164 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3165 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3166 flags);
3167
3168 ASSERT(kring->ckr_rhead <= kring->ckr_lim);
3169
3170 return 0;
3171 }
3172
3173 static int
na_pseudo_activate(struct nexus_adapter * na,na_activate_mode_t mode)3174 na_pseudo_activate(struct nexus_adapter *na, na_activate_mode_t mode)
3175 {
3176 SK_D("na \"%s\" (0x%llx) %s", na->na_name,
3177 SK_KVA(na), na_activate_mode2str(mode));
3178
3179 switch (mode) {
3180 case NA_ACTIVATE_MODE_ON:
3181 os_atomic_or(&na->na_flags, NAF_ACTIVE, relaxed);
3182 break;
3183
3184 case NA_ACTIVATE_MODE_DEFUNCT:
3185 break;
3186
3187 case NA_ACTIVATE_MODE_OFF:
3188 os_atomic_andnot(&na->na_flags, NAF_ACTIVE, relaxed);
3189 break;
3190
3191 default:
3192 VERIFY(0);
3193 /* NOTREACHED */
3194 __builtin_unreachable();
3195 }
3196
3197 return 0;
3198 }
3199
3200 static void
na_pseudo_dtor(struct nexus_adapter * na)3201 na_pseudo_dtor(struct nexus_adapter *na)
3202 {
3203 #pragma unused(na)
3204 }
3205
3206 static int
na_pseudo_krings_create(struct nexus_adapter * na,struct kern_channel * ch)3207 na_pseudo_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
3208 {
3209 return na_rings_mem_setup(na, FALSE, ch);
3210 }
3211
3212 static void
na_pseudo_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)3213 na_pseudo_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
3214 boolean_t defunct)
3215 {
3216 na_rings_mem_teardown(na, ch, defunct);
3217 }
3218
3219 /*
3220 * Pseudo nexus adapter; typically used as a generic parent adapter.
3221 */
3222 int
na_pseudo_create(struct kern_nexus * nx,struct chreq * chr,struct nexus_adapter ** ret)3223 na_pseudo_create(struct kern_nexus *nx, struct chreq *chr,
3224 struct nexus_adapter **ret)
3225 {
3226 struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
3227 struct nexus_adapter *na;
3228 int error;
3229
3230 SK_LOCK_ASSERT_HELD();
3231 *ret = NULL;
3232
3233 na = na_pseudo_alloc(Z_WAITOK);
3234
3235 ASSERT(na->na_type == NA_PSEUDO);
3236 ASSERT(na->na_free == na_pseudo_free);
3237
3238 (void) strbufcpy(na->na_name, chr->cr_name);
3239 uuid_generate_random(na->na_uuid);
3240
3241 /*
3242 * Verify upper bounds; for all cases including user pipe nexus,
3243 * the parameters must have already been validated by corresponding
3244 * nxdom_prov_params() function defined by each domain.
3245 */
3246 na_set_nrings(na, NR_TX, nxp->nxp_tx_rings);
3247 na_set_nrings(na, NR_RX, nxp->nxp_rx_rings);
3248 na_set_nslots(na, NR_TX, nxp->nxp_tx_slots);
3249 na_set_nslots(na, NR_RX, nxp->nxp_rx_slots);
3250 ASSERT(na_get_nrings(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
3251 ASSERT(na_get_nrings(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
3252 ASSERT(na_get_nslots(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
3253 ASSERT(na_get_nslots(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
3254
3255 na->na_txsync = na_pseudo_txsync;
3256 na->na_rxsync = na_pseudo_rxsync;
3257 na->na_activate = na_pseudo_activate;
3258 na->na_dtor = na_pseudo_dtor;
3259 na->na_krings_create = na_pseudo_krings_create;
3260 na->na_krings_delete = na_pseudo_krings_delete;
3261
3262 *(nexus_stats_type_t *)(uintptr_t)&na->na_stats_type =
3263 NEXUS_STATS_TYPE_INVALID;
3264
3265 /* other fields are set in the common routine */
3266 na_attach_common(na, nx, NX_DOM_PROV(nx));
3267
3268 if ((error = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
3269 nx, na)) != 0) {
3270 ASSERT(na->na_arena == NULL);
3271 goto err;
3272 }
3273 ASSERT(na->na_arena != NULL);
3274
3275 *(uint32_t *)(uintptr_t)&na->na_flowadv_max = nxp->nxp_flowadv_max;
3276 ASSERT(na->na_flowadv_max == 0 ||
3277 skmem_arena_nexus(na->na_arena)->arn_flowadv_obj != NULL);
3278
3279 #if SK_LOG
3280 uuid_string_t uuidstr;
3281 SK_D("na_name: \"%s\"", na->na_name);
3282 SK_D(" UUID: %s", sk_uuid_unparse(na->na_uuid, uuidstr));
3283 SK_D(" nx: 0x%llx (\"%s\":\"%s\")",
3284 SK_KVA(na->na_nx), NX_DOM(na->na_nx)->nxdom_name,
3285 NX_DOM_PROV(na->na_nx)->nxdom_prov_name);
3286 SK_D(" flags: %b", na->na_flags, NAF_BITS);
3287 SK_D(" flowadv_max: %u", na->na_flowadv_max);
3288 SK_D(" rings: tx %u rx %u",
3289 na_get_nrings(na, NR_TX), na_get_nrings(na, NR_RX));
3290 SK_D(" slots: tx %u rx %u",
3291 na_get_nslots(na, NR_TX), na_get_nslots(na, NR_RX));
3292 #if CONFIG_NEXUS_USER_PIPE
3293 SK_D(" next_pipe: %u", na->na_next_pipe);
3294 SK_D(" max_pipes: %u", na->na_max_pipes);
3295 #endif /* CONFIG_NEXUS_USER_PIPE */
3296 #endif /* SK_LOG */
3297
3298 *ret = na;
3299 na_retain_locked(na);
3300
3301 return 0;
3302
3303 err:
3304 if (na != NULL) {
3305 if (na->na_arena != NULL) {
3306 skmem_arena_release(na->na_arena);
3307 na->na_arena = NULL;
3308 }
3309 NA_FREE(na);
3310 }
3311 return error;
3312 }
3313
3314 void
na_flowadv_entry_alloc(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx,const uint32_t flowid)3315 na_flowadv_entry_alloc(const struct nexus_adapter *na, uuid_t fae_id,
3316 const flowadv_idx_t fe_idx, const uint32_t flowid)
3317 {
3318 struct skmem_arena *ar = na->na_arena;
3319 struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
3320 struct __flowadv_entry *__single fae;
3321
3322 ASSERT(NA_IS_ACTIVE(na) && na->na_flowadv_max != 0);
3323 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3324
3325 AR_LOCK(ar);
3326
3327 /* we must not get here if arena is defunct; this must be valid */
3328 ASSERT(arn->arn_flowadv_obj != NULL);
3329
3330 VERIFY(fe_idx < na->na_flowadv_max);
3331 fae = &arn->arn_flowadv_obj[fe_idx];
3332 uuid_copy(fae->fae_id, fae_id);
3333 fae->fae_flowid = flowid;
3334 fae->fae_flags = FLOWADVF_VALID;
3335
3336 AR_UNLOCK(ar);
3337 }
3338
3339 void
na_flowadv_entry_free(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx,const uint32_t flowid)3340 na_flowadv_entry_free(const struct nexus_adapter *na, uuid_t fae_id,
3341 const flowadv_idx_t fe_idx, const uint32_t flowid)
3342 {
3343 #pragma unused(fae_id)
3344 struct skmem_arena *ar = na->na_arena;
3345 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3346
3347 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3348 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3349
3350 AR_LOCK(ar);
3351
3352 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3353 if (arn->arn_flowadv_obj != NULL) {
3354 struct __flowadv_entry *__single fae;
3355
3356 VERIFY(fe_idx < na->na_flowadv_max);
3357 fae = &arn->arn_flowadv_obj[fe_idx];
3358 ASSERT(uuid_compare(fae->fae_id, fae_id) == 0);
3359 uuid_clear(fae->fae_id);
3360 VERIFY(fae->fae_flowid == flowid);
3361 fae->fae_flowid = 0;
3362 fae->fae_flags = 0;
3363 }
3364
3365 AR_UNLOCK(ar);
3366 }
3367
3368 bool
na_flowadv_set(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3369 na_flowadv_set(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3370 const flowadv_token_t flow_token)
3371 {
3372 struct nexus_adapter *na = ch->ch_na;
3373 struct skmem_arena *ar = na->na_arena;
3374 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3375 uuid_string_t fae_uuid_str;
3376 bool suspend = false;
3377
3378 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3379 ASSERT(fe_idx < na->na_flowadv_max);
3380 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3381
3382 AR_LOCK(ar);
3383
3384 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3385
3386 if (arn->arn_flowadv_obj != NULL) {
3387 struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3388
3389 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3390 /*
3391 * We cannot guarantee that the flow is still around by now,
3392 * so check if that's the case and let the caller know.
3393 */
3394 if ((suspend = (fae->fae_token == flow_token))) {
3395 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3396 fae->fae_flags |= FLOWADVF_SUSPENDED;
3397 uuid_unparse(fae->fae_id, fae_uuid_str);
3398 }
3399 } else {
3400 suspend = false;
3401 }
3402 if (suspend) {
3403 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) %s flow token 0x%x fidx %u "
3404 "SUSPEND", sk_proc_name_address(current_proc()),
3405 sk_proc_pid(current_proc()), fae_uuid_str, flow_token, fe_idx);
3406 } else {
3407 SK_ERR("%s(%d) flow token 0x%llu fidx %u no longer around",
3408 sk_proc_name_address(current_proc()),
3409 sk_proc_pid(current_proc()), flow_token, fe_idx);
3410 }
3411
3412 AR_UNLOCK(ar);
3413
3414 return suspend;
3415 }
3416
3417 bool
na_flowadv_clear(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3418 na_flowadv_clear(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3419 const flowadv_token_t flow_token)
3420 {
3421 struct nexus_adapter *na = ch->ch_na;
3422 struct skmem_arena *ar = na->na_arena;
3423 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3424 uuid_string_t fae_uuid_str;
3425 boolean_t resume = false;
3426
3427 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3428 ASSERT(fe_idx < na->na_flowadv_max);
3429 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3430
3431 AR_LOCK(ar);
3432
3433 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3434
3435 if (arn->arn_flowadv_obj != NULL) {
3436 struct __flowadv_entry *__single fae = &arn->arn_flowadv_obj[fe_idx];
3437
3438 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3439 /*
3440 * We cannot guarantee that the flow is still around by now,
3441 * so check if that's the case and let the caller know.
3442 */
3443 if ((resume = (fae->fae_token == flow_token))) {
3444 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3445 fae->fae_flags &= ~FLOWADVF_SUSPENDED;
3446 uuid_unparse(fae->fae_id, fae_uuid_str);
3447 }
3448 } else {
3449 resume = FALSE;
3450 }
3451 if (resume) {
3452 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) %s flow token 0x%x "
3453 "fidx %u RESUME", ch->ch_name, ch->ch_pid, fae_uuid_str, flow_token,
3454 fe_idx);
3455 } else {
3456 SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3457 ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3458 }
3459
3460 AR_UNLOCK(ar);
3461
3462 return resume;
3463 }
3464
3465 int
na_flowadv_report_ce_event(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token,uint32_t ce_cnt,uint32_t total_pkt_cnt)3466 na_flowadv_report_ce_event(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3467 const flowadv_token_t flow_token, uint32_t ce_cnt, uint32_t total_pkt_cnt)
3468 {
3469 struct nexus_adapter *na = ch->ch_na;
3470 struct skmem_arena *ar = na->na_arena;
3471 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3472 uuid_string_t fae_uuid_str;
3473 boolean_t added;
3474
3475 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3476 ASSERT(fe_idx < na->na_flowadv_max);
3477 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3478
3479 AR_LOCK(ar);
3480
3481 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3482
3483 if (arn->arn_flowadv_obj != NULL) {
3484 struct __flowadv_entry *__single fae = &arn->arn_flowadv_obj[fe_idx];
3485
3486 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3487 /*
3488 * We cannot guarantee that the flow is still around by now,
3489 * so check if that's the case and let the caller know.
3490 */
3491 if ((added = (fae->fae_token == flow_token))) {
3492 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3493 fae->fae_ce_cnt += ce_cnt;
3494 fae->fae_pkt_cnt += total_pkt_cnt;
3495 uuid_unparse(fae->fae_id, fae_uuid_str);
3496 }
3497 } else {
3498 added = FALSE;
3499 }
3500 if (added) {
3501 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) %s flow token 0x%x "
3502 "fidx %u ce cnt incremented", ch->ch_name,
3503 ch->ch_pid, fae_uuid_str, flow_token, fe_idx);
3504 } else {
3505 SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3506 ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3507 }
3508
3509 AR_UNLOCK(ar);
3510
3511 return added;
3512 }
3513
3514 void
na_flowadv_event(struct __kern_channel_ring * kring)3515 na_flowadv_event(struct __kern_channel_ring *kring)
3516 {
3517 ASSERT(kring->ckr_tx == NR_TX);
3518
3519 SK_DF(SK_VERB_EVENTS, "%s(%d) na \"%s\" (0x%llx) kr 0x%llx",
3520 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
3521 KRNA(kring)->na_name, SK_KVA(KRNA(kring)), SK_KVA(kring));
3522
3523 na_post_event(kring, TRUE, FALSE, FALSE, CHAN_FILT_HINT_FLOW_ADV_UPD);
3524 }
3525
3526 static int
na_packet_pool_free_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3527 na_packet_pool_free_sync(struct __kern_channel_ring *kring, struct proc *p,
3528 uint32_t flags)
3529 {
3530 #pragma unused(flags, p)
3531 int n, ret = 0;
3532 slot_idx_t j;
3533 struct __kern_slot_desc *ksd;
3534 struct __user_slot_desc *usd;
3535 struct __kern_quantum *kqum;
3536 struct kern_pbufpool *pp = kring->ckr_pp;
3537 uint32_t nfree = 0;
3538
3539 /* packet pool list is protected by channel lock */
3540 ASSERT(!KR_KERNEL_ONLY(kring));
3541
3542 /* # of new slots */
3543 n = kring->ckr_rhead - kring->ckr_khead;
3544 if (n < 0) {
3545 n += kring->ckr_num_slots;
3546 }
3547
3548 /* nothing to free */
3549 if (__improbable(n == 0)) {
3550 SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3551 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3552 "nothing to free");
3553 goto done;
3554 }
3555
3556 j = kring->ckr_khead;
3557 PP_LOCK(pp);
3558 while (n--) {
3559 int err;
3560
3561 ksd = KR_KSD(kring, j);
3562 usd = KR_USD(kring, j);
3563
3564 if (__improbable(!SD_VALID_METADATA(usd))) {
3565 SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3566 ret = EINVAL;
3567 break;
3568 }
3569
3570 kqum = pp_remove_upp_locked(pp, usd->sd_md_idx, &err);
3571 if (__improbable(err != 0)) {
3572 SK_ERR("un-allocated packet or buflet %d %p",
3573 usd->sd_md_idx, SK_KVA(kqum));
3574 ret = EINVAL;
3575 break;
3576 }
3577
3578 /* detach and free the packet */
3579 kqum->qum_qflags &= ~QUM_F_FINALIZED;
3580 kqum->qum_ksd = NULL;
3581 ASSERT(!KSD_VALID_METADATA(ksd));
3582 USD_DETACH_METADATA(usd);
3583 ASSERT(pp == kqum->qum_pp);
3584 ASSERT(nfree < kring->ckr_num_slots);
3585 kring->ckr_scratch[nfree++] = (uint64_t)kqum;
3586 j = SLOT_NEXT(j, kring->ckr_lim);
3587 }
3588 PP_UNLOCK(pp);
3589
3590 if (__probable(nfree > 0)) {
3591 pp_free_packet_batch(pp, &kring->ckr_scratch[0], nfree);
3592 }
3593
3594 kring->ckr_khead = j;
3595 kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3596
3597 done:
3598 return ret;
3599 }
3600
3601 #define MAX_BUFLETS 64
3602 static int
alloc_packets(kern_pbufpool_t pp,uint64_t * __counted_by (* ph_cnt)buf_arr,bool large,uint32_t * ph_cnt)3603 alloc_packets(kern_pbufpool_t pp, uint64_t *__counted_by(*ph_cnt)buf_arr, bool large,
3604 uint32_t *ph_cnt)
3605 {
3606 int err;
3607 uint32_t need, need_orig, remain, alloced, i;
3608 uint64_t buflets[MAX_BUFLETS];
3609 uint64_t *__indexable pkts;
3610
3611 need_orig = *ph_cnt;
3612 err = kern_pbufpool_alloc_batch_nosleep(pp, large ? 0 : 1, buf_arr, ph_cnt);
3613 if (!large) {
3614 return err;
3615 }
3616 if (*ph_cnt == 0) {
3617 SK_ERR("failed to alloc %d packets for alloc ring: err %d",
3618 need_orig, err);
3619 DTRACE_SKYWALK2(alloc__pkts__fail, uint32_t, need_orig, int, err);
3620 return err;
3621 }
3622 need = remain = *ph_cnt;
3623 alloced = 0;
3624 pkts = buf_arr;
3625 while (remain > 0) {
3626 uint32_t cnt, cnt_orig;
3627
3628 cnt = MIN(remain, MAX_BUFLETS);
3629 cnt_orig = cnt;
3630 err = pp_alloc_buflet_batch(pp, buflets, &cnt, SKMEM_NOSLEEP, true);
3631 if (cnt == 0) {
3632 SK_ERR("failed to alloc %d buflets for alloc ring: "
3633 "remain %d, err %d", cnt_orig, remain, err);
3634 DTRACE_SKYWALK3(alloc__bufs__fail, uint32_t, cnt_orig,
3635 uint32_t, remain, int, err);
3636 break;
3637 }
3638 for (i = 0; i < cnt; i++) {
3639 kern_packet_t ph = (kern_packet_t)pkts[i];
3640 kern_buflet_t __single buf = __unsafe_forge_single(
3641 kern_buflet_t, buflets[i]);
3642 kern_buflet_t pbuf = kern_packet_get_next_buflet(ph, NULL);
3643 VERIFY(kern_packet_add_buflet(ph, pbuf, buf) == 0);
3644 buflets[i] = 0;
3645 }
3646 DTRACE_SKYWALK3(alloc__bufs, uint32_t, remain, uint32_t, cnt,
3647 uint32_t, cnt_orig);
3648 pkts += cnt;
3649 alloced += cnt;
3650 remain -= cnt;
3651 }
3652 /* free packets without attached buffers */
3653 if (remain > 0) {
3654 DTRACE_SKYWALK1(remaining__pkts, uint32_t, remain);
3655 ASSERT(remain + alloced == need);
3656 pp_free_packet_batch(pp, pkts, remain);
3657
3658 /* pp_free_packet_batch() should clear the pkts array */
3659 for (i = 0; i < remain; i++) {
3660 ASSERT(pkts[i] == 0);
3661 }
3662 }
3663 *ph_cnt = alloced;
3664 if (*ph_cnt == 0) {
3665 err = ENOMEM;
3666 } else if (*ph_cnt < need_orig) {
3667 err = EAGAIN;
3668 } else {
3669 err = 0;
3670 }
3671 DTRACE_SKYWALK3(alloc__packets, uint32_t, need_orig, uint32_t, *ph_cnt, int, err);
3672 return err;
3673 }
3674
3675 static int
na_packet_pool_alloc_sync_common(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags,bool large)3676 na_packet_pool_alloc_sync_common(struct __kern_channel_ring *kring, struct proc *p,
3677 uint32_t flags, bool large)
3678 {
3679 int b, err;
3680 uint32_t n = 0;
3681 slot_idx_t j;
3682 uint64_t now;
3683 uint32_t curr_ws, ph_needed, ph_cnt;
3684 struct __kern_slot_desc *ksd;
3685 struct __user_slot_desc *usd;
3686 struct __kern_quantum *kqum;
3687 kern_pbufpool_t pp = kring->ckr_pp;
3688 pid_t pid = proc_pid(p);
3689
3690 /* packet pool list is protected by channel lock */
3691 ASSERT(!KR_KERNEL_ONLY(kring));
3692 ASSERT(!PP_KERNEL_ONLY(pp));
3693
3694 now = _net_uptime;
3695 if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3696 if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3697 kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3698 }
3699 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3700 "%s: purged curr_ws(%d)", kring->ckr_name,
3701 kring->ckr_alloc_ws);
3702 return 0;
3703 }
3704 /* reclaim the completed slots */
3705 kring->ckr_khead = kring->ckr_rhead;
3706
3707 /* # of busy (unclaimed) slots */
3708 b = kring->ckr_ktail - kring->ckr_khead;
3709 if (b < 0) {
3710 b += kring->ckr_num_slots;
3711 }
3712
3713 curr_ws = kring->ckr_alloc_ws;
3714 if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3715 /* increment the working set by 50% */
3716 curr_ws += (curr_ws >> 1);
3717 curr_ws = MIN(curr_ws, kring->ckr_lim);
3718 } else {
3719 if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3720 (uint32_t)b >= (curr_ws >> 2)) {
3721 /* decrease the working set by 25% */
3722 curr_ws -= (curr_ws >> 2);
3723 }
3724 }
3725 curr_ws = MAX(curr_ws, na_upp_alloc_lowat);
3726 if (curr_ws > (uint32_t)b) {
3727 n = curr_ws - b;
3728 }
3729 kring->ckr_alloc_ws = curr_ws;
3730 kring->ckr_sync_time = now;
3731
3732 /* min with # of avail free slots (subtract busy from max) */
3733 n = ph_needed = MIN(n, kring->ckr_lim - b);
3734 j = kring->ckr_ktail;
3735 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3736 "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3737
3738 if ((ph_cnt = ph_needed) == 0) {
3739 goto done;
3740 }
3741
3742 err = alloc_packets(pp, kring->ckr_scratch,
3743 PP_HAS_BUFFER_ON_DEMAND(pp) && large, &ph_cnt);
3744 if (__improbable(ph_cnt == 0)) {
3745 SK_ERR("kr 0x%llx failed to alloc %u packet s(%d)",
3746 SK_KVA(kring), ph_needed, err);
3747 kring->ckr_err_stats.cres_pkt_alloc_failures += ph_needed;
3748 } else {
3749 /*
3750 * Add packets to the allocated list of user packet pool.
3751 */
3752 pp_insert_upp_batch(pp, pid, kring->ckr_scratch, ph_cnt);
3753 }
3754
3755 for (n = 0; n < ph_cnt; n++) {
3756 ksd = KR_KSD(kring, j);
3757 usd = KR_USD(kring, j);
3758
3759 kqum = SK_PTR_ADDR_KQUM(kring->ckr_scratch[n]);
3760 kring->ckr_scratch[n] = 0;
3761 ASSERT(kqum != NULL);
3762
3763 /* cleanup any stale slot mapping */
3764 KSD_RESET(ksd);
3765 ASSERT(usd != NULL);
3766 USD_RESET(usd);
3767
3768 /*
3769 * Since this packet is freshly allocated and we need to
3770 * have the flag set for the attach to succeed, just set
3771 * it here rather than calling __packet_finalize().
3772 */
3773 kqum->qum_qflags |= QUM_F_FINALIZED;
3774
3775 /* Attach packet to slot */
3776 KR_SLOT_ATTACH_METADATA(kring, ksd, kqum);
3777 /*
3778 * externalize the packet as it is being transferred to
3779 * user space.
3780 */
3781 kr_externalize_metadata(kring, pp->pp_max_frags, kqum, p);
3782
3783 j = SLOT_NEXT(j, kring->ckr_lim);
3784 }
3785 done:
3786 ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3787 kring->ckr_ktail = j;
3788 return 0;
3789 }
3790
3791 static int
na_packet_pool_alloc_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3792 na_packet_pool_alloc_sync(struct __kern_channel_ring *kring, struct proc *p,
3793 uint32_t flags)
3794 {
3795 return na_packet_pool_alloc_sync_common(kring, p, flags, false);
3796 }
3797
3798 static int
na_packet_pool_alloc_large_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3799 na_packet_pool_alloc_large_sync(struct __kern_channel_ring *kring, struct proc *p,
3800 uint32_t flags)
3801 {
3802 return na_packet_pool_alloc_sync_common(kring, p, flags, true);
3803 }
3804
3805 static int
na_packet_pool_free_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3806 na_packet_pool_free_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3807 uint32_t flags)
3808 {
3809 #pragma unused(flags, p)
3810 int n, ret = 0;
3811 slot_idx_t j;
3812 struct __kern_slot_desc *ksd;
3813 struct __user_slot_desc *usd;
3814 struct __kern_buflet *kbft;
3815 struct kern_pbufpool *pp = kring->ckr_pp;
3816
3817 /* packet pool list is protected by channel lock */
3818 ASSERT(!KR_KERNEL_ONLY(kring));
3819
3820 /* # of new slots */
3821 n = kring->ckr_rhead - kring->ckr_khead;
3822 if (n < 0) {
3823 n += kring->ckr_num_slots;
3824 }
3825
3826 /* nothing to free */
3827 if (__improbable(n == 0)) {
3828 SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3829 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3830 "nothing to free");
3831 goto done;
3832 }
3833
3834 j = kring->ckr_khead;
3835 while (n--) {
3836 int err;
3837
3838 ksd = KR_KSD(kring, j);
3839 usd = KR_USD(kring, j);
3840
3841 if (__improbable(!SD_VALID_METADATA(usd))) {
3842 SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3843 ret = EINVAL;
3844 break;
3845 }
3846
3847 kbft = pp_remove_upp_bft(pp, usd->sd_md_idx, &err);
3848 if (__improbable(err != 0)) {
3849 SK_ERR("un-allocated buflet %d %p", usd->sd_md_idx,
3850 SK_KVA(kbft));
3851 ret = EINVAL;
3852 break;
3853 }
3854
3855 /* detach and free the packet */
3856 ASSERT(!KSD_VALID_METADATA(ksd));
3857 USD_DETACH_METADATA(usd);
3858 pp_free_buflet(pp, kbft);
3859 j = SLOT_NEXT(j, kring->ckr_lim);
3860 }
3861 kring->ckr_khead = j;
3862 kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3863
3864 done:
3865 return ret;
3866 }
3867
3868 static int
na_packet_pool_alloc_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3869 na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3870 uint32_t flags)
3871 {
3872 int b, err;
3873 uint32_t n = 0;
3874 slot_idx_t j;
3875 uint64_t now;
3876 uint32_t curr_ws, bh_needed, bh_cnt;
3877 struct __kern_slot_desc *ksd;
3878 struct __user_slot_desc *usd;
3879 struct __kern_buflet *kbft;
3880 struct __kern_buflet_ext *kbe;
3881 kern_pbufpool_t pp = kring->ckr_pp;
3882 pid_t pid = proc_pid(p);
3883
3884 /* packet pool list is protected by channel lock */
3885 ASSERT(!KR_KERNEL_ONLY(kring));
3886 ASSERT(!PP_KERNEL_ONLY(pp));
3887
3888 now = _net_uptime;
3889 if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3890 if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3891 kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3892 }
3893 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3894 "%s: purged curr_ws(%d)", kring->ckr_name,
3895 kring->ckr_alloc_ws);
3896 return 0;
3897 }
3898 /* reclaim the completed slots */
3899 kring->ckr_khead = kring->ckr_rhead;
3900
3901 /* # of busy (unclaimed) slots */
3902 b = kring->ckr_ktail - kring->ckr_khead;
3903 if (b < 0) {
3904 b += kring->ckr_num_slots;
3905 }
3906
3907 curr_ws = kring->ckr_alloc_ws;
3908 if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3909 /* increment the working set by 50% */
3910 curr_ws += (curr_ws >> 1);
3911 curr_ws = MIN(curr_ws, kring->ckr_lim);
3912 } else {
3913 if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3914 (uint32_t)b >= (curr_ws >> 2)) {
3915 /* decrease the working set by 25% */
3916 curr_ws -= (curr_ws >> 2);
3917 }
3918 }
3919 curr_ws = MAX(curr_ws, na_upp_alloc_buf_lowat);
3920 if (curr_ws > (uint32_t)b) {
3921 n = curr_ws - b;
3922 }
3923 kring->ckr_alloc_ws = curr_ws;
3924 kring->ckr_sync_time = now;
3925
3926 /* min with # of avail free slots (subtract busy from max) */
3927 n = bh_needed = MIN(n, kring->ckr_lim - b);
3928 j = kring->ckr_ktail;
3929 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3930 "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3931
3932 if ((bh_cnt = bh_needed) == 0) {
3933 goto done;
3934 }
3935
3936 err = pp_alloc_buflet_batch(pp, kring->ckr_scratch, &bh_cnt,
3937 SKMEM_NOSLEEP, false);
3938
3939 if (bh_cnt == 0) {
3940 SK_ERR("kr 0x%llx failed to alloc %u buflets(%d)",
3941 SK_KVA(kring), bh_needed, err);
3942 kring->ckr_err_stats.cres_pkt_alloc_failures += bh_needed;
3943 }
3944
3945 for (n = 0; n < bh_cnt; n++) {
3946 struct __user_buflet *ubft;
3947
3948 ksd = KR_KSD(kring, j);
3949 usd = KR_USD(kring, j);
3950
3951 kbe = __unsafe_forge_single(struct __kern_buflet_ext *,
3952 (kring->ckr_scratch[n]));
3953 kbft = &kbe->kbe_overlay;
3954
3955 kring->ckr_scratch[n] = 0;
3956 ASSERT(kbft != NULL);
3957
3958 /*
3959 * Add buflet to the allocated list of user packet pool.
3960 */
3961 pp_insert_upp_bft(pp, kbft, pid);
3962
3963 /*
3964 * externalize the buflet as it is being transferred to
3965 * user space.
3966 */
3967 ubft = __DECONST(struct __user_buflet *, kbe->kbe_buf_user);
3968 KBUF_EXTERNALIZE(kbft, ubft, pp);
3969
3970 /* cleanup any stale slot mapping */
3971 KSD_RESET(ksd);
3972 ASSERT(usd != NULL);
3973 USD_RESET(usd);
3974
3975 /* Attach buflet to slot */
3976 KR_SLOT_ATTACH_BUF_METADATA(kring, ksd, kbft);
3977
3978 j = SLOT_NEXT(j, kring->ckr_lim);
3979 }
3980 done:
3981 ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3982 kring->ckr_ktail = j;
3983 return 0;
3984 }
3985
3986 /* The caller needs to ensure that the NA stays intact */
3987 void
na_drain(struct nexus_adapter * na,boolean_t purge)3988 na_drain(struct nexus_adapter *na, boolean_t purge)
3989 {
3990 /* will be cleared on next channel sync */
3991 if (!(os_atomic_or_orig(&na->na_flags, NAF_DRAINING, relaxed) &
3992 NAF_DRAINING) && NA_IS_ACTIVE(na)) {
3993 SK_DF(SK_VERB_NA, "%s: %s na 0x%llx flags %b",
3994 na->na_name, (purge ? "purging" : "pruning"),
3995 SK_KVA(na), na->na_flags, NAF_BITS);
3996
3997 /* reap (purge/prune) caches in the arena */
3998 skmem_arena_reap(na->na_arena, purge);
3999 }
4000 }
4001