1 /*
2 * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri.
31 * All rights reserved.
32 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 */
55 #include <sys/systm.h>
56 #include <skywalk/os_skywalk_private.h>
57 #include <skywalk/nexus/monitor/nx_monitor.h>
58 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
59 #include <skywalk/nexus/netif/nx_netif.h>
60 #include <skywalk/nexus/upipe/nx_user_pipe.h>
61 #include <skywalk/nexus/kpipe/nx_kernel_pipe.h>
62 #include <kern/thread.h>
63
64 static int na_krings_use(struct kern_channel *);
65 static void na_krings_unuse(struct kern_channel *);
66 static void na_krings_verify(struct nexus_adapter *);
67 static int na_notify(struct __kern_channel_ring *, struct proc *, uint32_t);
68 static void na_set_ring(struct nexus_adapter *, uint32_t, enum txrx, uint32_t);
69 static void na_set_all_rings(struct nexus_adapter *, uint32_t);
70 static int na_set_ringid(struct kern_channel *, ring_set_t, ring_id_t);
71 static void na_unset_ringid(struct kern_channel *);
72 static void na_teardown(struct nexus_adapter *, struct kern_channel *,
73 boolean_t);
74
75 static int na_kr_create(struct nexus_adapter *, boolean_t);
76 static void na_kr_delete(struct nexus_adapter *);
77 static int na_kr_setup(struct nexus_adapter *, struct kern_channel *);
78 static void na_kr_teardown_all(struct nexus_adapter *, struct kern_channel *,
79 boolean_t);
80 static void na_kr_teardown_txrx(struct nexus_adapter *, struct kern_channel *,
81 boolean_t, struct proc *);
82 static int na_kr_populate_slots(struct __kern_channel_ring *);
83 static void na_kr_depopulate_slots(struct __kern_channel_ring *,
84 struct kern_channel *, boolean_t defunct);
85
86 static int na_schema_alloc(struct kern_channel *);
87
88 static struct nexus_adapter *na_pseudo_alloc(zalloc_flags_t);
89 static void na_pseudo_free(struct nexus_adapter *);
90 static int na_pseudo_txsync(struct __kern_channel_ring *, struct proc *,
91 uint32_t);
92 static int na_pseudo_rxsync(struct __kern_channel_ring *, struct proc *,
93 uint32_t);
94 static int na_pseudo_activate(struct nexus_adapter *, na_activate_mode_t);
95 static void na_pseudo_dtor(struct nexus_adapter *);
96 static int na_pseudo_krings_create(struct nexus_adapter *,
97 struct kern_channel *);
98 static void na_pseudo_krings_delete(struct nexus_adapter *,
99 struct kern_channel *, boolean_t);
100 static int na_packet_pool_alloc_sync(struct __kern_channel_ring *,
101 struct proc *, uint32_t);
102 static int na_packet_pool_alloc_large_sync(struct __kern_channel_ring *,
103 struct proc *, uint32_t);
104 static int na_packet_pool_free_sync(struct __kern_channel_ring *,
105 struct proc *, uint32_t);
106 static int na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *,
107 struct proc *, uint32_t);
108 static int na_packet_pool_free_buf_sync(struct __kern_channel_ring *,
109 struct proc *, uint32_t);
110
111 #define NA_KRING_IDLE_TIMEOUT (NSEC_PER_SEC * 30) /* 30 seconds */
112
113 static SKMEM_TYPE_DEFINE(na_pseudo_zone, struct nexus_adapter);
114
115 static int __na_inited = 0;
116
117 #define NA_NUM_WMM_CLASSES 4
118 #define NAKR_WMM_SC2RINGID(_s) PKT_SC2TC(_s)
119 #define NAKR_SET_SVC_LUT(_n, _s) \
120 (_n)->na_kring_svc_lut[MBUF_SCIDX(_s)] = NAKR_WMM_SC2RINGID(_s)
121 #define NAKR_SET_KR_SVC(_n, _s) \
122 NAKR((_n), NR_TX)[NAKR_WMM_SC2RINGID(_s)].ckr_svc = (_s)
123
124 #define NA_UPP_ALLOC_LOWAT 8
125 static uint32_t na_upp_alloc_lowat = NA_UPP_ALLOC_LOWAT;
126
127 #define NA_UPP_REAP_INTERVAL 10 /* seconds */
128 static uint32_t na_upp_reap_interval = NA_UPP_REAP_INTERVAL;
129
130 #define NA_UPP_WS_HOLD_TIME 2 /* seconds */
131 static uint32_t na_upp_ws_hold_time = NA_UPP_WS_HOLD_TIME;
132
133 #define NA_UPP_REAP_MIN_PKTS 0
134 static uint32_t na_upp_reap_min_pkts = NA_UPP_REAP_MIN_PKTS;
135
136 #define NA_UPP_ALLOC_BUF_LOWAT 64
137 static uint32_t na_upp_alloc_buf_lowat = NA_UPP_ALLOC_BUF_LOWAT;
138
139 #if (DEVELOPMENT || DEBUG)
140 static uint64_t _na_inject_error = 0;
141 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) \
142 _SK_INJECT_ERROR(_na_inject_error, _en, _ev, _ec, NULL, _f, __VA_ARGS__)
143
144 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_ws_hold_time,
145 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_ws_hold_time,
146 NA_UPP_WS_HOLD_TIME, "");
147 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_interval,
148 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_interval,
149 NA_UPP_REAP_INTERVAL, "");
150 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_min_pkts,
151 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_min_pkts,
152 NA_UPP_REAP_MIN_PKTS, "");
153 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_lowat,
154 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_lowat,
155 NA_UPP_ALLOC_LOWAT, "");
156 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_buf_lowat,
157 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_buf_lowat,
158 NA_UPP_ALLOC_BUF_LOWAT, "");
159 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, na_inject_error,
160 CTLFLAG_RW | CTLFLAG_LOCKED, &_na_inject_error, "");
161 #else
162 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) do { } while (0)
163 #endif /* !DEVELOPMENT && !DEBUG */
164
165 #define SKMEM_TAG_NX_RINGS "com.apple.skywalk.nexus.rings"
166 static SKMEM_TAG_DEFINE(skmem_tag_nx_rings, SKMEM_TAG_NX_RINGS);
167
168 #define SKMEM_TAG_NX_CONTEXTS "com.apple.skywalk.nexus.contexts"
169 static SKMEM_TAG_DEFINE(skmem_tag_nx_contexts, SKMEM_TAG_NX_CONTEXTS);
170
171 #define SKMEM_TAG_NX_SCRATCH "com.apple.skywalk.nexus.scratch"
172 static SKMEM_TAG_DEFINE(skmem_tag_nx_scratch, SKMEM_TAG_NX_SCRATCH);
173
174 void
na_init(void)175 na_init(void)
176 {
177 /*
178 * Changing the size of nexus_mdata structure won't break ABI,
179 * but we need to be mindful of memory consumption; Thus here
180 * we add a compile-time check to make sure the size is within
181 * the expected limit and that it's properly aligned. This
182 * check may be adjusted in future as needed.
183 */
184 _CASSERT(sizeof(struct nexus_mdata) <= 32 &&
185 IS_P2ALIGNED(sizeof(struct nexus_mdata), 8));
186 _CASSERT(sizeof(struct nexus_mdata) <= sizeof(struct __user_quantum));
187
188 /* see comments on nexus_meta_type_t */
189 _CASSERT(NEXUS_META_TYPE_MAX == 3);
190 _CASSERT(NEXUS_META_SUBTYPE_MAX == 3);
191
192 ASSERT(!__na_inited);
193
194 __na_inited = 1;
195 }
196
197 void
na_fini(void)198 na_fini(void)
199 {
200 if (__na_inited) {
201 __na_inited = 0;
202 }
203 }
204
205 /*
206 * Interpret the ringid of an chreq, by translating it into a pair
207 * of intervals of ring indices:
208 *
209 * [txfirst, txlast) and [rxfirst, rxlast)
210 */
211 int
na_interp_ringid(struct nexus_adapter * na,ring_id_t ring_id,ring_set_t ring_set,uint32_t first[NR_TXRX],uint32_t last[NR_TXRX])212 na_interp_ringid(struct nexus_adapter *na, ring_id_t ring_id,
213 ring_set_t ring_set, uint32_t first[NR_TXRX], uint32_t last[NR_TXRX])
214 {
215 enum txrx t;
216
217 switch (ring_set) {
218 case RING_SET_ALL:
219 /*
220 * Ring pair eligibility: all ring(s).
221 */
222 if (ring_id != CHANNEL_RING_ID_ANY &&
223 ring_id >= na_get_nrings(na, NR_TX) &&
224 ring_id >= na_get_nrings(na, NR_RX)) {
225 SK_ERR("\"%s\": invalid ring_id %d for ring_set %u",
226 na->na_name, (int)ring_id, ring_set);
227 return EINVAL;
228 }
229 for_rx_tx(t) {
230 if (ring_id == CHANNEL_RING_ID_ANY) {
231 first[t] = 0;
232 last[t] = na_get_nrings(na, t);
233 } else {
234 first[t] = ring_id;
235 last[t] = ring_id + 1;
236 }
237 }
238 break;
239
240 default:
241 SK_ERR("\"%s\": invalid ring_set %u", na->na_name, ring_set);
242 return EINVAL;
243 }
244
245 SK_DF(SK_VERB_NA | SK_VERB_RING,
246 "\"%s\": ring_id %d, ring_set %u tx [%u,%u) rx [%u,%u)",
247 na->na_name, (int)ring_id, ring_set, first[NR_TX], last[NR_TX],
248 first[NR_RX], last[NR_RX]);
249
250 return 0;
251 }
252
253 /*
254 * Set the ring ID. For devices with a single queue, a request
255 * for all rings is the same as a single ring.
256 */
257 static int
na_set_ringid(struct kern_channel * ch,ring_set_t ring_set,ring_id_t ring_id)258 na_set_ringid(struct kern_channel *ch, ring_set_t ring_set, ring_id_t ring_id)
259 {
260 struct nexus_adapter *na = ch->ch_na;
261 int error;
262 enum txrx t;
263 uint32_t n_alloc_rings;
264
265 if ((error = na_interp_ringid(na, ring_id, ring_set,
266 ch->ch_first, ch->ch_last)) != 0) {
267 return error;
268 }
269
270 n_alloc_rings = na_get_nrings(na, NR_A);
271 if (n_alloc_rings != 0) {
272 uint32_t n_large_alloc_rings;
273
274 ch->ch_first[NR_A] = ch->ch_first[NR_F] = 0;
275 ch->ch_last[NR_A] = ch->ch_last[NR_F] =
276 ch->ch_first[NR_A] + n_alloc_rings;
277
278 n_large_alloc_rings = na_get_nrings(na, NR_LBA);
279 ch->ch_first[NR_LBA] = 0;
280 ch->ch_last[NR_LBA] = ch->ch_first[NR_LBA] + n_large_alloc_rings;
281 } else {
282 ch->ch_first[NR_A] = ch->ch_last[NR_A] = 0;
283 ch->ch_first[NR_F] = ch->ch_last[NR_F] = 0;
284 ch->ch_first[NR_LBA] = ch->ch_last[NR_LBA] = 0;
285 }
286 ch->ch_first[NR_EV] = 0;
287 ch->ch_last[NR_EV] = ch->ch_first[NR_EV] + na_get_nrings(na, NR_EV);
288
289 /* XXX: should we initialize na_si_users for event ring ? */
290
291 /*
292 * Optimization: count the users registered for more than
293 * one ring, which are the ones sleeping on the global queue.
294 * The default na_notify() callback will then avoid signaling
295 * the global queue if nobody is using it
296 */
297 for_rx_tx(t) {
298 if (ch_is_multiplex(ch, t)) {
299 na->na_si_users[t]++;
300 ASSERT(na->na_si_users[t] != 0);
301 }
302 }
303 return 0;
304 }
305
306 static void
na_unset_ringid(struct kern_channel * ch)307 na_unset_ringid(struct kern_channel *ch)
308 {
309 struct nexus_adapter *na = ch->ch_na;
310 enum txrx t;
311
312 for_rx_tx(t) {
313 if (ch_is_multiplex(ch, t)) {
314 ASSERT(na->na_si_users[t] != 0);
315 na->na_si_users[t]--;
316 }
317 ch->ch_first[t] = ch->ch_last[t] = 0;
318 }
319 }
320
321 /*
322 * Check that the rings we want to bind are not exclusively owned by a previous
323 * bind. If exclusive ownership has been requested, we also mark the rings.
324 */
325 /* Hoisted out of line to reduce kernel stack footprint */
326 SK_NO_INLINE_ATTRIBUTE
327 static int
na_krings_use(struct kern_channel * ch)328 na_krings_use(struct kern_channel *ch)
329 {
330 struct nexus_adapter *na = ch->ch_na;
331 struct __kern_channel_ring *__single kring;
332 boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
333 enum txrx t;
334 uint32_t i;
335
336 SK_DF(SK_VERB_NA | SK_VERB_RING, "na \"%s\" (0x%llx) grabbing tx [%u,%u) rx [%u,%u)",
337 na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
338 ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
339
340 /*
341 * First round: check that all the requested rings
342 * are neither alread exclusively owned, nor we
343 * want exclusive ownership when they are already in use
344 */
345 for_all_rings(t) {
346 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
347 kring = &NAKR(na, t)[i];
348 if ((kring->ckr_flags & CKRF_EXCLUSIVE) ||
349 (kring->ckr_users && excl)) {
350 SK_DF(SK_VERB_NA | SK_VERB_RING,
351 "kr \"%s\" (0x%llx) krflags 0x%b is busy",
352 kring->ckr_name, SK_KVA(kring),
353 kring->ckr_flags, CKRF_BITS);
354 return EBUSY;
355 }
356 }
357 }
358
359 /*
360 * Second round: increment usage count and possibly
361 * mark as exclusive
362 */
363
364 for_all_rings(t) {
365 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
366 kring = &NAKR(na, t)[i];
367 kring->ckr_users++;
368 if (excl) {
369 kring->ckr_flags |= CKRF_EXCLUSIVE;
370 }
371 }
372 }
373
374 return 0;
375 }
376
377 /* Hoisted out of line to reduce kernel stack footprint */
378 SK_NO_INLINE_ATTRIBUTE
379 static void
na_krings_unuse(struct kern_channel * ch)380 na_krings_unuse(struct kern_channel *ch)
381 {
382 struct nexus_adapter *na = ch->ch_na;
383 struct __kern_channel_ring *__single kring;
384 boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
385 enum txrx t;
386 uint32_t i;
387
388 SK_DF(SK_VERB_NA | SK_VERB_RING,
389 "na \"%s\" (0x%llx) releasing tx [%u, %u) rx [%u, %u)",
390 na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
391 ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
392
393 for_all_rings(t) {
394 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
395 kring = &NAKR(na, t)[i];
396 if (excl) {
397 kring->ckr_flags &= ~CKRF_EXCLUSIVE;
398 }
399 kring->ckr_users--;
400 }
401 }
402 }
403
404 /* Hoisted out of line to reduce kernel stack footprint */
405 SK_NO_INLINE_ATTRIBUTE
406 static void
na_krings_verify(struct nexus_adapter * na)407 na_krings_verify(struct nexus_adapter *na)
408 {
409 struct __kern_channel_ring *__single kring;
410 enum txrx t;
411 uint32_t i;
412
413 for_all_rings(t) {
414 for (i = 0; i < na_get_nrings(na, t); i++) {
415 kring = &NAKR(na, t)[i];
416 /* na_kr_create() validations */
417 ASSERT(kring->ckr_num_slots > 0);
418 ASSERT(kring->ckr_lim == (kring->ckr_num_slots - 1));
419 ASSERT(kring->ckr_pp != NULL);
420
421 if (!(kring->ckr_flags & CKRF_MEM_RING_INITED)) {
422 continue;
423 }
424 /* na_kr_setup() validations */
425 if (KR_KERNEL_ONLY(kring)) {
426 ASSERT(kring->ckr_ring == NULL);
427 } else {
428 ASSERT(kring->ckr_ring != NULL);
429 }
430 ASSERT(kring->ckr_ksds_last ==
431 &kring->ckr_ksds[kring->ckr_lim]);
432 }
433 }
434 }
435
436 int
na_bind_channel(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr)437 na_bind_channel(struct nexus_adapter *na, struct kern_channel *ch,
438 struct chreq *chr)
439 {
440 struct kern_pbufpool *rx_pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
441 struct kern_pbufpool *tx_pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
442 uint32_t ch_mode = chr->cr_mode;
443 int err = 0;
444
445 SK_LOCK_ASSERT_HELD();
446 ASSERT(ch->ch_schema == NULL);
447 ASSERT(ch->ch_na == NULL);
448
449 /* ring configuration may have changed, fetch from the card */
450 na_update_config(na);
451 ch->ch_na = na; /* store the reference */
452 err = na_set_ringid(ch, chr->cr_ring_set, chr->cr_ring_id);
453 if (err != 0) {
454 goto err;
455 }
456
457 os_atomic_andnot(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE |
458 CHANF_USER_PACKET_POOL | CHANF_EVENT_RING), relaxed);
459 if (ch_mode & CHMODE_EXCLUSIVE) {
460 os_atomic_or(&ch->ch_flags, CHANF_EXCLUSIVE, relaxed);
461 }
462 /*
463 * Disallow automatic sync for monitor mode, since TX
464 * direction is disabled.
465 */
466 if (ch_mode & CHMODE_MONITOR) {
467 os_atomic_or(&ch->ch_flags, CHANF_RXONLY, relaxed);
468 }
469
470 if (!!(na->na_flags & NAF_USER_PKT_POOL) ^
471 !!(ch_mode & CHMODE_USER_PACKET_POOL)) {
472 SK_ERR("incompatible channel mode (0x%b), na_flags (0x%b)",
473 ch_mode, CHMODE_BITS, na->na_flags, NAF_BITS);
474 err = EINVAL;
475 goto err;
476 }
477
478 if (na->na_arena->ar_flags & ARF_DEFUNCT) {
479 err = ENXIO;
480 goto err;
481 }
482
483 if (ch_mode & CHMODE_USER_PACKET_POOL) {
484 ASSERT(na->na_flags & NAF_USER_PKT_POOL);
485 ASSERT(ch->ch_first[NR_A] != ch->ch_last[NR_A]);
486 ASSERT(ch->ch_first[NR_F] != ch->ch_last[NR_F]);
487 os_atomic_or(&ch->ch_flags, CHANF_USER_PACKET_POOL, relaxed);
488 }
489
490 if (ch_mode & CHMODE_EVENT_RING) {
491 ASSERT(na->na_flags & NAF_USER_PKT_POOL);
492 ASSERT(na->na_flags & NAF_EVENT_RING);
493 ASSERT(ch->ch_first[NR_EV] != ch->ch_last[NR_EV]);
494 os_atomic_or(&ch->ch_flags, CHANF_EVENT_RING, relaxed);
495 }
496
497 /*
498 * If this is the first channel of the adapter, create
499 * the rings and their in-kernel view, the krings.
500 */
501 if (na->na_channels == 0) {
502 err = na->na_krings_create(na, ch);
503 if (err != 0) {
504 goto err;
505 }
506
507 /*
508 * Sanity check; this is already done in na_kr_create(),
509 * but we do it here as well to validate na_kr_setup().
510 */
511 na_krings_verify(na);
512 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
513 skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_type;
514 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
515 skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_subtype;
516 }
517
518 /*
519 * Validate ownership and usability of the krings; take into account
520 * whether some previous bind has exclusive ownership on them.
521 */
522 err = na_krings_use(ch);
523 if (err != 0) {
524 goto err_del_rings;
525 }
526
527 /* for user-facing channel, create a new channel schema */
528 if (!(ch->ch_flags & CHANF_KERNEL)) {
529 err = na_schema_alloc(ch);
530 if (err != 0) {
531 goto err_rel_excl;
532 }
533
534 ASSERT(ch->ch_schema != NULL);
535 ASSERT(ch->ch_schema_offset != (mach_vm_offset_t)-1);
536 } else {
537 ASSERT(ch->ch_schema == NULL);
538 ch->ch_schema_offset = (mach_vm_offset_t)-1;
539 }
540
541 /* update our work timestamp */
542 na->na_work_ts = net_uptime();
543
544 na->na_channels++;
545
546 /*
547 * If user packet pool is desired, initialize the allocated
548 * object hash table in the pool, if not already. This also
549 * retains a refcnt on the pool which the caller must release.
550 */
551 ASSERT(ch->ch_pp == NULL);
552 if (ch_mode & CHMODE_USER_PACKET_POOL) {
553 #pragma unused(tx_pp)
554 ASSERT(rx_pp == tx_pp);
555 err = pp_init_upp(rx_pp, TRUE);
556 if (err != 0) {
557 goto err_free_schema;
558 }
559 ch->ch_pp = rx_pp;
560 }
561
562 if (!NA_IS_ACTIVE(na)) {
563 err = na->na_activate(na, NA_ACTIVATE_MODE_ON);
564 if (err != 0) {
565 goto err_release_pp;
566 }
567
568 SK_D("activated \"%s\" adapter 0x%llx", na->na_name,
569 SK_KVA(na));
570 SK_D(" na_md_type: %u", na->na_md_type);
571 SK_D(" na_md_subtype: %u", na->na_md_subtype);
572 }
573
574 SK_D("ch 0x%llx", SK_KVA(ch));
575 SK_D(" ch_flags: 0x%b", ch->ch_flags, CHANF_BITS);
576 if (ch->ch_schema != NULL) {
577 SK_D(" ch_schema: 0x%llx", SK_KVA(ch->ch_schema));
578 }
579 SK_D(" ch_na: 0x%llx (chcnt %u)", SK_KVA(ch->ch_na),
580 ch->ch_na->na_channels);
581 SK_D(" ch_tx_rings: [%u,%u)", ch->ch_first[NR_TX],
582 ch->ch_last[NR_TX]);
583 SK_D(" ch_rx_rings: [%u,%u)", ch->ch_first[NR_RX],
584 ch->ch_last[NR_RX]);
585 SK_D(" ch_alloc_rings: [%u,%u)", ch->ch_first[NR_A],
586 ch->ch_last[NR_A]);
587 SK_D(" ch_free_rings: [%u,%u)", ch->ch_first[NR_F],
588 ch->ch_last[NR_F]);
589 SK_D(" ch_ev_rings: [%u,%u)", ch->ch_first[NR_EV],
590 ch->ch_last[NR_EV]);
591
592 return 0;
593
594 err_release_pp:
595 if (ch_mode & CHMODE_USER_PACKET_POOL) {
596 ASSERT(ch->ch_pp != NULL);
597 pp_release(rx_pp);
598 ch->ch_pp = NULL;
599 }
600 err_free_schema:
601 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
602 NEXUS_META_TYPE_INVALID;
603 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
604 NEXUS_META_SUBTYPE_INVALID;
605 ASSERT(na->na_channels != 0);
606 na->na_channels--;
607 if (ch->ch_schema != NULL) {
608 skmem_cache_free(
609 skmem_arena_nexus(na->na_arena)->arn_schema_cache,
610 ch->ch_schema);
611 ch->ch_schema = NULL;
612 ch->ch_schema_offset = (mach_vm_offset_t)-1;
613 }
614 err_rel_excl:
615 na_krings_unuse(ch);
616 err_del_rings:
617 if (na->na_channels == 0) {
618 na->na_krings_delete(na, ch, FALSE);
619 }
620 err:
621 ch->ch_na = NULL;
622 ASSERT(err != 0);
623
624 return err;
625 }
626
627 /*
628 * Undo everything that was done in na_bind_channel().
629 */
630 /* call with SK_LOCK held */
631 void
na_unbind_channel(struct kern_channel * ch)632 na_unbind_channel(struct kern_channel *ch)
633 {
634 struct nexus_adapter *na = ch->ch_na;
635
636 SK_LOCK_ASSERT_HELD();
637
638 ASSERT(na->na_channels != 0);
639 na->na_channels--;
640
641 /* release exclusive use if it was requested at bind time */
642 na_krings_unuse(ch);
643
644 if (na->na_channels == 0) { /* last instance */
645 SK_D("%s(%d): deleting last channel instance for %s",
646 ch->ch_name, ch->ch_pid, na->na_name);
647
648 /*
649 * Free any remaining allocated packets attached to
650 * the slots, followed by a teardown of the arena.
651 */
652 na_teardown(na, ch, FALSE);
653
654 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
655 NEXUS_META_TYPE_INVALID;
656 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
657 NEXUS_META_SUBTYPE_INVALID;
658 } else {
659 SK_D("%s(%d): %s has %u remaining channel instance(s)",
660 ch->ch_name, ch->ch_pid, na->na_name, na->na_channels);
661 }
662
663 /*
664 * Free any allocated packets (for the process) attached to the slots;
665 * note that na_teardown() could have done this there as well.
666 */
667 if (ch->ch_pp != NULL) {
668 ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
669 pp_purge_upp(ch->ch_pp, ch->ch_pid);
670 pp_release(ch->ch_pp);
671 ch->ch_pp = NULL;
672 }
673
674 /* possibily decrement counter of tx_si/rx_si users */
675 na_unset_ringid(ch);
676
677 /* reap the caches now (purge if adapter is idle) */
678 skmem_arena_reap(na->na_arena, (na->na_channels == 0));
679
680 /* delete the csm */
681 if (ch->ch_schema != NULL) {
682 skmem_cache_free(
683 skmem_arena_nexus(na->na_arena)->arn_schema_cache,
684 ch->ch_schema);
685 ch->ch_schema = NULL;
686 ch->ch_schema_offset = (mach_vm_offset_t)-1;
687 }
688
689 /* destroy the memory map */
690 skmem_arena_munmap_channel(na->na_arena, ch);
691
692 /* mark the channel as unbound */
693 os_atomic_andnot(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE), relaxed);
694 ch->ch_na = NULL;
695
696 /* and finally release the nexus adapter; this might free it */
697 (void) na_release_locked(na);
698 }
699
700 static void
na_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)701 na_teardown(struct nexus_adapter *na, struct kern_channel *ch,
702 boolean_t defunct)
703 {
704 SK_LOCK_ASSERT_HELD();
705 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
706
707 #if CONFIG_NEXUS_MONITOR
708 /*
709 * Walk through all the rings and tell any monitor
710 * that the port is going to exit Skywalk mode
711 */
712 nx_mon_stop(na);
713 #endif /* CONFIG_NEXUS_MONITOR */
714
715 /*
716 * Deactive the adapter.
717 */
718 (void) na->na_activate(na,
719 (defunct ? NA_ACTIVATE_MODE_DEFUNCT : NA_ACTIVATE_MODE_OFF));
720
721 /*
722 * Free any remaining allocated packets for this process.
723 */
724 if (ch->ch_pp != NULL) {
725 ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
726 pp_purge_upp(ch->ch_pp, ch->ch_pid);
727 if (!defunct) {
728 pp_release(ch->ch_pp);
729 ch->ch_pp = NULL;
730 }
731 }
732
733 /*
734 * Delete rings and buffers.
735 */
736 na->na_krings_delete(na, ch, defunct);
737 }
738
739 /* call with SK_LOCK held */
740 /*
741 * Allocate the per-fd structure __user_channel_schema.
742 */
743 static int
na_schema_alloc(struct kern_channel * ch)744 na_schema_alloc(struct kern_channel *ch)
745 {
746 struct nexus_adapter *na = ch->ch_na;
747 struct skmem_arena *ar = na->na_arena;
748 struct skmem_arena_nexus *arn;
749 mach_vm_offset_t roff[SKMEM_REGIONS];
750 struct __kern_channel_ring *__single kr;
751 struct __user_channel_schema *__single csm;
752 struct skmem_obj_info csm_oi, ring_oi, ksd_oi, usd_oi;
753 mach_vm_offset_t base;
754 uint32_t i, j, k, n[NR_ALL];
755 enum txrx t;
756
757 /* see comments for struct __user_channel_schema */
758 _CASSERT(offsetof(struct __user_channel_schema, csm_ver) == 0);
759 _CASSERT(offsetof(struct __user_channel_schema, csm_flags) ==
760 sizeof(csm->csm_ver));
761 _CASSERT(offsetof(struct __user_channel_schema, csm_kern_name) ==
762 sizeof(csm->csm_ver) + sizeof(csm->csm_flags));
763 _CASSERT(offsetof(struct __user_channel_schema, csm_kern_uuid) ==
764 sizeof(csm->csm_ver) + sizeof(csm->csm_flags) +
765 sizeof(csm->csm_kern_name));
766
767 SK_LOCK_ASSERT_HELD();
768
769 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
770 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
771 arn = skmem_arena_nexus(ar);
772 ASSERT(arn != NULL);
773 for_all_rings(t) {
774 n[t] = 0;
775 }
776
777 csm = skmem_cache_alloc(arn->arn_schema_cache, SKMEM_NOSLEEP);
778 if (csm == NULL) {
779 return ENOMEM;
780 }
781
782 skmem_cache_get_obj_info(arn->arn_schema_cache, csm, &csm_oi, NULL);
783 bzero(__unsafe_forge_bidi_indexable(void *, csm, SKMEM_OBJ_SIZE(&csm_oi)),
784 SKMEM_OBJ_SIZE(&csm_oi));
785
786 *(uint32_t *)(uintptr_t)&csm->csm_ver = CSM_CURRENT_VERSION;
787
788 /* kernel version and executable UUID */
789 _CASSERT(sizeof(csm->csm_kern_name) == _SYS_NAMELEN);
790
791 (void) strlcpy(csm->csm_kern_name,
792 __unsafe_forge_null_terminated(const char *, version),
793 sizeof(csm->csm_kern_name));
794
795 #if !XNU_TARGET_OS_OSX
796 (void) memcpy((void *)csm->csm_kern_uuid, kernelcache_uuid, sizeof(csm->csm_kern_uuid));
797 #else /* XNU_TARGET_OS_OSX */
798 if (kernel_uuid != NULL) {
799 (void) memcpy((void *)csm->csm_kern_uuid, kernel_uuid, sizeof(csm->csm_kern_uuid));
800 }
801 #endif /* XNU_TARGET_OS_OSX */
802
803 for_rx_tx(t) {
804 ASSERT((ch->ch_last[t] > 0) || (ch->ch_first[t] == 0));
805 n[t] = ch->ch_last[t] - ch->ch_first[t];
806 ASSERT(n[t] == 0 || n[t] <= na_get_nrings(na, t));
807 }
808
809 /* return total number of tx and rx rings for this channel */
810 *(uint32_t *)(uintptr_t)&csm->csm_tx_rings = n[NR_TX];
811 *(uint32_t *)(uintptr_t)&csm->csm_rx_rings = n[NR_RX];
812
813 if (ch->ch_flags & CHANF_USER_PACKET_POOL) {
814 *(uint32_t *)(uintptr_t)&csm->csm_allocator_ring_pairs =
815 na->na_num_allocator_ring_pairs;
816 n[NR_A] = n[NR_F] = na->na_num_allocator_ring_pairs;
817 ASSERT(n[NR_A] != 0 && n[NR_A] <= na_get_nrings(na, NR_A));
818 ASSERT(n[NR_A] == (ch->ch_last[NR_A] - ch->ch_first[NR_A]));
819 ASSERT(n[NR_F] == (ch->ch_last[NR_F] - ch->ch_first[NR_F]));
820
821 n[NR_LBA] = na->na_num_large_buf_alloc_rings;
822 if (n[NR_LBA] != 0) {
823 *(uint32_t *)(uintptr_t)&csm->csm_large_buf_alloc_rings = n[NR_LBA];
824 ASSERT(n[NR_LBA] == (ch->ch_last[NR_LBA] - ch->ch_first[NR_LBA]));
825 }
826 }
827
828 if (ch->ch_flags & CHANF_EVENT_RING) {
829 n[NR_EV] = ch->ch_last[NR_EV] - ch->ch_first[NR_EV];
830 ASSERT(n[NR_EV] != 0 && n[NR_EV] <= na_get_nrings(na, NR_EV));
831 *(uint32_t *)(uintptr_t)&csm->csm_num_event_rings = n[NR_EV];
832 }
833
834 bzero(&roff, sizeof(roff));
835 for (i = 0; i < SKMEM_REGIONS; i++) {
836 if (ar->ar_regions[i] == NULL) {
837 ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
838 i == SKMEM_REGION_SCHEMA ||
839 i == SKMEM_REGION_BUF_LARGE ||
840 i == SKMEM_REGION_RXBUF_DEF ||
841 i == SKMEM_REGION_RXBUF_LARGE ||
842 i == SKMEM_REGION_TXBUF_DEF ||
843 i == SKMEM_REGION_TXBUF_LARGE ||
844 i == SKMEM_REGION_RXKMD ||
845 i == SKMEM_REGION_TXKMD ||
846 i == SKMEM_REGION_UMD ||
847 i == SKMEM_REGION_UBFT ||
848 i == SKMEM_REGION_KBFT ||
849 i == SKMEM_REGION_RXKBFT ||
850 i == SKMEM_REGION_TXKBFT ||
851 i == SKMEM_REGION_TXAUSD ||
852 i == SKMEM_REGION_RXFUSD ||
853 i == SKMEM_REGION_USTATS ||
854 i == SKMEM_REGION_KSTATS ||
855 i == SKMEM_REGION_INTRINSIC ||
856 i == SKMEM_REGION_FLOWADV ||
857 i == SKMEM_REGION_NEXUSADV ||
858 i == SKMEM_REGION_SYSCTLS ||
859 i == SKMEM_REGION_GUARD_TAIL);
860 continue;
861 }
862
863 /* not for nexus */
864 ASSERT(i != SKMEM_REGION_SYSCTLS);
865
866 /*
867 * Get region offsets from base of mmap span; the arena
868 * doesn't need to be mmap'd at this point, since we
869 * simply compute the relative offset.
870 */
871 roff[i] = skmem_arena_get_region_offset(ar, i);
872 }
873
874 /*
875 * The schema is made up of the descriptor followed inline by an array
876 * of offsets to the tx, rx, allocator and event rings in the mmap span.
877 * They contain the offset between the ring and schema, so the
878 * information is usable in userspace to reach the ring from
879 * the schema.
880 */
881 base = roff[SKMEM_REGION_SCHEMA] + SKMEM_OBJ_ROFF(&csm_oi);
882
883 /* initialize schema with tx ring info */
884 for (i = 0, j = ch->ch_first[NR_TX]; i < n[NR_TX]; i++, j++) {
885 kr = &na->na_tx_rings[j];
886 if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
887 continue;
888 }
889
890 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
891 skmem_cache_get_obj_info(arn->arn_ring_cache,
892 kr->ckr_ring, &ring_oi, NULL);
893 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].ring_off =
894 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
895
896 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
897 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
898 kr->ckr_ksds, &ksd_oi, &usd_oi);
899
900 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].sd_off =
901 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
902 base;
903 }
904 /* initialize schema with rx ring info */
905 for (i = 0, j = ch->ch_first[NR_RX]; i < n[NR_RX]; i++, j++) {
906 kr = &na->na_rx_rings[j];
907 if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
908 continue;
909 }
910
911 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
912 skmem_cache_get_obj_info(arn->arn_ring_cache,
913 kr->ckr_ring, &ring_oi, NULL);
914 *(mach_vm_offset_t *)
915 (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].ring_off =
916 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
917
918 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
919 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
920 kr->ckr_ksds, &ksd_oi, &usd_oi);
921
922 *(mach_vm_offset_t *)
923 (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].sd_off =
924 (roff[SKMEM_REGION_RXFUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
925 base;
926 }
927 /* initialize schema with allocator ring info */
928 for (i = 0, j = ch->ch_first[NR_A], k = n[NR_TX] + n[NR_RX];
929 i < n[NR_A]; i++, j++) {
930 mach_vm_offset_t usd_roff;
931
932 usd_roff = roff[SKMEM_REGION_TXAUSD];
933 kr = &na->na_alloc_rings[j];
934 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
935 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
936
937 skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
938 &ring_oi, NULL);
939 *(mach_vm_offset_t *)
940 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
941 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
942
943 skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
944 &ksd_oi, &usd_oi);
945 *(mach_vm_offset_t *)
946 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
947 (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
948 }
949 /* initialize schema with free ring info */
950 for (i = 0, j = ch->ch_first[NR_F], k = n[NR_TX] + n[NR_RX] + n[NR_A];
951 i < n[NR_F]; i++, j++) {
952 mach_vm_offset_t usd_roff;
953
954 usd_roff = roff[SKMEM_REGION_RXFUSD];
955 kr = &na->na_free_rings[j];
956 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
957 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
958
959 skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
960 &ring_oi, NULL);
961 *(mach_vm_offset_t *)
962 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
963 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
964
965 skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
966 &ksd_oi, &usd_oi);
967 *(mach_vm_offset_t *)
968 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
969 (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
970 }
971 /* initialize schema with event ring info */
972 for (i = 0, j = ch->ch_first[NR_EV], k = n[NR_TX] + n[NR_RX] +
973 n[NR_A] + n[NR_F]; i < n[NR_EV]; i++, j++) {
974 ASSERT(csm->csm_num_event_rings != 0);
975 kr = &na->na_event_rings[j];
976 ASSERT(!KR_KERNEL_ONLY(kr));
977 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
978 skmem_cache_get_obj_info(arn->arn_ring_cache,
979 kr->ckr_ring, &ring_oi, NULL);
980 *(mach_vm_offset_t *)
981 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
982 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
983
984 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
985 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
986 kr->ckr_ksds, &ksd_oi, &usd_oi);
987
988 *(mach_vm_offset_t *)
989 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
990 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
991 base;
992 }
993 /* initialize schema with large buf alloc ring info */
994 for (i = 0, j = ch->ch_first[NR_LBA], k = n[NR_TX] + n[NR_RX] +
995 n[NR_A] + n[NR_F] + n[NR_EV]; i < n[NR_LBA]; i++, j++) {
996 ASSERT(csm->csm_large_buf_alloc_rings != 0);
997 kr = &na->na_large_buf_alloc_rings[j];
998 ASSERT(!KR_KERNEL_ONLY(kr));
999 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
1000 skmem_cache_get_obj_info(arn->arn_ring_cache,
1001 kr->ckr_ring, &ring_oi, NULL);
1002 *(mach_vm_offset_t *)
1003 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
1004 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
1005
1006 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
1007 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
1008 kr->ckr_ksds, &ksd_oi, &usd_oi);
1009
1010 *(mach_vm_offset_t *)
1011 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
1012 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
1013 base;
1014 }
1015
1016 *(uint64_t *)(uintptr_t)&csm->csm_md_redzone_cookie =
1017 __ch_umd_redzone_cookie;
1018 *(nexus_meta_type_t *)(uintptr_t)&csm->csm_md_type = na->na_md_type;
1019 *(nexus_meta_subtype_t *)(uintptr_t)&csm->csm_md_subtype =
1020 na->na_md_subtype;
1021
1022 if (arn->arn_stats_obj != NULL) {
1023 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
1024 ASSERT(roff[SKMEM_REGION_USTATS] != 0);
1025 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs =
1026 roff[SKMEM_REGION_USTATS];
1027 *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1028 na->na_stats_type;
1029 } else {
1030 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1031 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs = 0;
1032 *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1033 NEXUS_STATS_TYPE_INVALID;
1034 }
1035
1036 if (arn->arn_flowadv_obj != NULL) {
1037 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL);
1038 ASSERT(roff[SKMEM_REGION_FLOWADV] != 0);
1039 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs =
1040 roff[SKMEM_REGION_FLOWADV];
1041 *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max =
1042 na->na_flowadv_max;
1043 } else {
1044 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1045 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs = 0;
1046 *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max = 0;
1047 }
1048
1049 if (arn->arn_nexusadv_obj != NULL) {
1050 struct __kern_nexus_adv_metadata *__single adv_md;
1051
1052 adv_md = arn->arn_nexusadv_obj;
1053 ASSERT(adv_md->knam_version == NX_ADVISORY_MD_CURRENT_VERSION);
1054 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] != NULL);
1055 ASSERT(roff[SKMEM_REGION_NEXUSADV] != 0);
1056 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs =
1057 roff[SKMEM_REGION_NEXUSADV];
1058 } else {
1059 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1060 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs = 0;
1061 }
1062
1063 ch->ch_schema = csm;
1064 ch->ch_schema_offset = base;
1065
1066 return 0;
1067 }
1068
1069 /*
1070 * Called by all routines that create nexus_adapters.
1071 * Attach na to the ifp (if any) and provide defaults
1072 * for optional callbacks. Defaults assume that we
1073 * are creating an hardware nexus_adapter.
1074 */
1075 void
na_attach_common(struct nexus_adapter * na,struct kern_nexus * nx,struct kern_nexus_domain_provider * nxdom_prov)1076 na_attach_common(struct nexus_adapter *na, struct kern_nexus *nx,
1077 struct kern_nexus_domain_provider *nxdom_prov)
1078 {
1079 SK_LOCK_ASSERT_HELD();
1080
1081 ASSERT(nx != NULL);
1082 ASSERT(nxdom_prov != NULL);
1083 ASSERT(na->na_krings_create != NULL);
1084 ASSERT(na->na_krings_delete != NULL);
1085 if (na->na_type != NA_NETIF_COMPAT_DEV) {
1086 ASSERT(na_get_nrings(na, NR_TX) != 0);
1087 }
1088 if (na->na_type != NA_NETIF_COMPAT_HOST) {
1089 ASSERT(na_get_nrings(na, NR_RX) != 0);
1090 }
1091 ASSERT(na->na_channels == 0);
1092
1093 if (na->na_notify == NULL) {
1094 na->na_notify = na_notify;
1095 }
1096
1097 na->na_nx = nx;
1098 na->na_nxdom_prov = nxdom_prov;
1099
1100 SK_D("na 0x%llx nx 0x%llx nxtype %u ar 0x%llx",
1101 SK_KVA(na), SK_KVA(nx), nxdom_prov->nxdom_prov_dom->nxdom_type,
1102 SK_KVA(na->na_arena));
1103 }
1104
1105 void
na_post_event(struct __kern_channel_ring * kring,boolean_t nodelay,boolean_t within_kevent,boolean_t selwake,uint32_t hint)1106 na_post_event(struct __kern_channel_ring *kring, boolean_t nodelay,
1107 boolean_t within_kevent, boolean_t selwake, uint32_t hint)
1108 {
1109 struct nexus_adapter *na = KRNA(kring);
1110 enum txrx t = kring->ckr_tx;
1111
1112 SK_DF(SK_VERB_EVENTS,
1113 "%s(%d) na \"%s\" (0x%llx) kr 0x%llx kev %u sel %u hint 0x%b",
1114 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1115 na->na_name, SK_KVA(na), SK_KVA(kring), within_kevent, selwake,
1116 hint, CHAN_FILT_HINT_BITS);
1117
1118 csi_selwakeup_one(kring, nodelay, within_kevent, selwake, hint);
1119 /*
1120 * optimization: avoid a wake up on the global
1121 * queue if nobody has registered for more
1122 * than one ring
1123 */
1124 if (na->na_si_users[t] > 0) {
1125 csi_selwakeup_all(na, t, nodelay, within_kevent, selwake, hint);
1126 }
1127 }
1128
1129 /* default notify callback */
1130 static int
na_notify(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1131 na_notify(struct __kern_channel_ring *kring, struct proc *p, uint32_t flags)
1132 {
1133 #pragma unused(p)
1134 SK_DF(SK_VERB_NOTIFY | ((kring->ckr_tx == NR_TX) ?
1135 SK_VERB_TX : SK_VERB_RX),
1136 "%s(%d) [%s] na \"%s\" (0x%llx) kr \"%s\" (0x%llx) krflags 0x%b "
1137 "flags 0x%x, kh %u kt %u | h %u t %u",
1138 sk_proc_name_address(p), sk_proc_pid(p),
1139 (kring->ckr_tx == NR_TX) ? "W" : "R", KRNA(kring)->na_name,
1140 SK_KVA(KRNA(kring)), kring->ckr_name, SK_KVA(kring),
1141 kring->ckr_flags, CKRF_BITS, flags, kring->ckr_khead,
1142 kring->ckr_ktail, kring->ckr_rhead, kring->ckr_rtail);
1143
1144 na_post_event(kring, (flags & NA_NOTEF_PUSH),
1145 (flags & NA_NOTEF_IN_KEVENT), TRUE, 0);
1146
1147 return 0;
1148 }
1149
1150 /*
1151 * Fetch configuration from the device, to cope with dynamic
1152 * reconfigurations after loading the module.
1153 */
1154 /* call with SK_LOCK held */
1155 int
na_update_config(struct nexus_adapter * na)1156 na_update_config(struct nexus_adapter *na)
1157 {
1158 uint32_t txr, txd, rxr, rxd;
1159
1160 SK_LOCK_ASSERT_HELD();
1161
1162 txr = txd = rxr = rxd = 0;
1163 if (na->na_config == NULL ||
1164 na->na_config(na, &txr, &txd, &rxr, &rxd)) {
1165 /* take whatever we had at init time */
1166 txr = na_get_nrings(na, NR_TX);
1167 txd = na_get_nslots(na, NR_TX);
1168 rxr = na_get_nrings(na, NR_RX);
1169 rxd = na_get_nslots(na, NR_RX);
1170 }
1171
1172 if (na_get_nrings(na, NR_TX) == txr &&
1173 na_get_nslots(na, NR_TX) == txd &&
1174 na_get_nrings(na, NR_RX) == rxr &&
1175 na_get_nslots(na, NR_RX) == rxd) {
1176 return 0; /* nothing changed */
1177 }
1178 SK_D("stored config %s: txring %u x %u, rxring %u x %u",
1179 na->na_name, na_get_nrings(na, NR_TX), na_get_nslots(na, NR_TX),
1180 na_get_nrings(na, NR_RX), na_get_nslots(na, NR_RX));
1181 SK_D("new config %s: txring %u x %u, rxring %u x %u",
1182 na->na_name, txr, txd, rxr, rxd);
1183
1184 if (na->na_channels == 0) {
1185 SK_D("configuration changed (but fine)");
1186 na_set_nrings(na, NR_TX, txr);
1187 na_set_nslots(na, NR_TX, txd);
1188 na_set_nrings(na, NR_RX, rxr);
1189 na_set_nslots(na, NR_RX, rxd);
1190 return 0;
1191 }
1192 SK_ERR("configuration changed while active, this is bad...");
1193 return 1;
1194 }
1195
1196 static void
na_kr_setup_netif_svc_map(struct nexus_adapter * na)1197 na_kr_setup_netif_svc_map(struct nexus_adapter *na)
1198 {
1199 uint32_t i;
1200 uint32_t num_tx_rings;
1201
1202 ASSERT(na->na_type == NA_NETIF_DEV);
1203 num_tx_rings = na_get_nrings(na, NR_TX);
1204
1205 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK_SYS) ==
1206 NAKR_WMM_SC2RINGID(KPKT_SC_BK));
1207 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1208 NAKR_WMM_SC2RINGID(KPKT_SC_RD));
1209 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1210 NAKR_WMM_SC2RINGID(KPKT_SC_OAM));
1211 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1212 NAKR_WMM_SC2RINGID(KPKT_SC_RV));
1213 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1214 NAKR_WMM_SC2RINGID(KPKT_SC_VI));
1215 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) ==
1216 NAKR_WMM_SC2RINGID(KPKT_SC_CTL));
1217
1218 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK) < NA_NUM_WMM_CLASSES);
1219 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) < NA_NUM_WMM_CLASSES);
1220 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VI) < NA_NUM_WMM_CLASSES);
1221 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) < NA_NUM_WMM_CLASSES);
1222
1223 _CASSERT(MBUF_SCIDX(KPKT_SC_BK_SYS) < KPKT_SC_MAX_CLASSES);
1224 _CASSERT(MBUF_SCIDX(KPKT_SC_BK) < KPKT_SC_MAX_CLASSES);
1225 _CASSERT(MBUF_SCIDX(KPKT_SC_BE) < KPKT_SC_MAX_CLASSES);
1226 _CASSERT(MBUF_SCIDX(KPKT_SC_RD) < KPKT_SC_MAX_CLASSES);
1227 _CASSERT(MBUF_SCIDX(KPKT_SC_OAM) < KPKT_SC_MAX_CLASSES);
1228 _CASSERT(MBUF_SCIDX(KPKT_SC_AV) < KPKT_SC_MAX_CLASSES);
1229 _CASSERT(MBUF_SCIDX(KPKT_SC_RV) < KPKT_SC_MAX_CLASSES);
1230 _CASSERT(MBUF_SCIDX(KPKT_SC_VI) < KPKT_SC_MAX_CLASSES);
1231 _CASSERT(MBUF_SCIDX(KPKT_SC_SIG) < KPKT_SC_MAX_CLASSES);
1232 _CASSERT(MBUF_SCIDX(KPKT_SC_VO) < KPKT_SC_MAX_CLASSES);
1233 _CASSERT(MBUF_SCIDX(KPKT_SC_CTL) < KPKT_SC_MAX_CLASSES);
1234
1235 /*
1236 * we support the following 2 configurations:
1237 * 1. packets from all 10 service class map to one ring.
1238 * 2. a 10:4 mapping between service classes and the rings. These 4
1239 * rings map to the 4 WMM access categories.
1240 */
1241 if (na->na_nx->nx_prov->nxprov_params->nxp_qmap == NEXUS_QMAP_TYPE_WMM) {
1242 ASSERT(num_tx_rings == NEXUS_NUM_WMM_QUEUES);
1243 /* setup the adapter's service class LUT */
1244 NAKR_SET_SVC_LUT(na, KPKT_SC_BK_SYS);
1245 NAKR_SET_SVC_LUT(na, KPKT_SC_BK);
1246 NAKR_SET_SVC_LUT(na, KPKT_SC_BE);
1247 NAKR_SET_SVC_LUT(na, KPKT_SC_RD);
1248 NAKR_SET_SVC_LUT(na, KPKT_SC_OAM);
1249 NAKR_SET_SVC_LUT(na, KPKT_SC_AV);
1250 NAKR_SET_SVC_LUT(na, KPKT_SC_RV);
1251 NAKR_SET_SVC_LUT(na, KPKT_SC_VI);
1252 NAKR_SET_SVC_LUT(na, KPKT_SC_SIG);
1253 NAKR_SET_SVC_LUT(na, KPKT_SC_VO);
1254 NAKR_SET_SVC_LUT(na, KPKT_SC_CTL);
1255
1256 /* Initialize the service class for each of the 4 ring */
1257 NAKR_SET_KR_SVC(na, KPKT_SC_BK);
1258 NAKR_SET_KR_SVC(na, KPKT_SC_BE);
1259 NAKR_SET_KR_SVC(na, KPKT_SC_VI);
1260 NAKR_SET_KR_SVC(na, KPKT_SC_VO);
1261 } else {
1262 ASSERT(na->na_nx->nx_prov->nxprov_params->nxp_qmap ==
1263 NEXUS_QMAP_TYPE_DEFAULT);
1264 /* 10: 1 mapping */
1265 for (i = 0; i < KPKT_SC_MAX_CLASSES; i++) {
1266 na->na_kring_svc_lut[i] = 0;
1267 }
1268 for (i = 0; i < num_tx_rings; i++) {
1269 NAKR(na, NR_TX)[i].ckr_svc = KPKT_SC_UNSPEC;
1270 }
1271 }
1272 }
1273
1274 static LCK_GRP_DECLARE(channel_txq_lock_group, "sk_ch_txq_lock");
1275 static LCK_GRP_DECLARE(channel_rxq_lock_group, "sk_ch_rxq_lock");
1276 static LCK_GRP_DECLARE(channel_txs_lock_group, "sk_ch_txs_lock");
1277 static LCK_GRP_DECLARE(channel_rxs_lock_group, "sk_ch_rxs_lock");
1278 static LCK_GRP_DECLARE(channel_alloc_lock_group, "sk_ch_alloc_lock");
1279 static LCK_GRP_DECLARE(channel_evq_lock_group, "sk_ch_evq_lock");
1280 static LCK_GRP_DECLARE(channel_evs_lock_group, "sk_ch_evs_lock");
1281
1282 static lck_grp_t *
na_kr_q_lck_grp(enum txrx t)1283 na_kr_q_lck_grp(enum txrx t)
1284 {
1285 switch (t) {
1286 case NR_TX:
1287 return &channel_txq_lock_group;
1288 case NR_RX:
1289 return &channel_rxq_lock_group;
1290 case NR_A:
1291 case NR_F:
1292 case NR_LBA:
1293 return &channel_alloc_lock_group;
1294 case NR_EV:
1295 return &channel_evq_lock_group;
1296 default:
1297 VERIFY(0);
1298 /* NOTREACHED */
1299 __builtin_unreachable();
1300 }
1301 }
1302
1303 static lck_grp_t *
na_kr_s_lck_grp(enum txrx t)1304 na_kr_s_lck_grp(enum txrx t)
1305 {
1306 switch (t) {
1307 case NR_TX:
1308 return &channel_txs_lock_group;
1309 case NR_RX:
1310 return &channel_rxs_lock_group;
1311 case NR_A:
1312 case NR_F:
1313 case NR_LBA:
1314 return &channel_alloc_lock_group;
1315 case NR_EV:
1316 return &channel_evs_lock_group;
1317 default:
1318 VERIFY(0);
1319 /* NOTREACHED */
1320 __builtin_unreachable();
1321 }
1322 }
1323
1324 static void
kr_init_tbr(struct __kern_channel_ring * r)1325 kr_init_tbr(struct __kern_channel_ring *r)
1326 {
1327 r->ckr_tbr_depth = CKR_TBR_TOKEN_INVALID;
1328 r->ckr_tbr_token = CKR_TBR_TOKEN_INVALID;
1329 r->ckr_tbr_last = 0;
1330 }
1331
1332 struct kern_pbufpool *
na_kr_get_pp(struct nexus_adapter * na,enum txrx t)1333 na_kr_get_pp(struct nexus_adapter *na, enum txrx t)
1334 {
1335 struct kern_pbufpool *pp = NULL;
1336 switch (t) {
1337 case NR_RX:
1338 case NR_F:
1339 case NR_EV:
1340 pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
1341 break;
1342 case NR_TX:
1343 case NR_A:
1344 case NR_LBA:
1345 pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
1346 break;
1347 default:
1348 VERIFY(0);
1349 /* NOTREACHED */
1350 __builtin_unreachable();
1351 }
1352
1353 return pp;
1354 }
1355
1356 /*
1357 * Create the krings array and initialize the fields common to all adapters.
1358 * The array layout is this:
1359 *
1360 * +----------+
1361 * na->na_tx_rings -----> | | \
1362 * | | } na->na_num_tx_rings
1363 * | | /
1364 * na->na_rx_rings ----> +----------+
1365 * | | \
1366 * | | } na->na_num_rx_rings
1367 * | | /
1368 * na->na_alloc_rings -> +----------+
1369 * | | \
1370 * na->na_free_rings --> +----------+ } na->na_num_allocator_ring_pairs
1371 * | | /
1372 * na->na_event_rings -> +----------+
1373 * | | \
1374 * | | } na->na_num_event_rings
1375 * | | /
1376 * na->na_large_buf_alloc_rings -> +----------+
1377 * | | \
1378 * | | } na->na_num_large_buf_alloc_rings
1379 * | | /
1380 * na->na_tail -----> +----------+
1381 */
1382 /* call with SK_LOCK held */
1383 static int
na_kr_create(struct nexus_adapter * na,boolean_t alloc_ctx)1384 na_kr_create(struct nexus_adapter *na, boolean_t alloc_ctx)
1385 {
1386 lck_grp_t *q_lck_grp, *s_lck_grp;
1387 uint32_t i, ndesc;
1388 struct kern_pbufpool *pp = NULL;
1389 uint32_t count;
1390 uint32_t tmp_count;
1391 struct __kern_channel_ring *__counted_by(count) rings;
1392 struct __kern_channel_ring *__single kring;
1393 uint32_t n[NR_ALL];
1394 int c, tot_slots, err = 0;
1395 enum txrx t;
1396
1397 SK_LOCK_ASSERT_HELD();
1398
1399 n[NR_TX] = na_get_nrings(na, NR_TX);
1400 n[NR_RX] = na_get_nrings(na, NR_RX);
1401 n[NR_A] = na_get_nrings(na, NR_A);
1402 n[NR_F] = na_get_nrings(na, NR_F);
1403 n[NR_EV] = na_get_nrings(na, NR_EV);
1404 n[NR_LBA] = na_get_nrings(na, NR_LBA);
1405
1406 /*
1407 * -fbounds-safety: rings is __counted_by(count), so rings needs to be
1408 * assigned first, immediately followed by count's assignment.
1409 */
1410 tmp_count = n[NR_TX] + n[NR_RX] + n[NR_A] + n[NR_F] + n[NR_EV] + n[NR_LBA];
1411 rings = sk_alloc_type_array(struct __kern_channel_ring, tmp_count,
1412 Z_WAITOK, skmem_tag_nx_rings);
1413 count = tmp_count;
1414 na->na_all_rings = rings;
1415 na->na_all_rings_cnt = count;
1416
1417 if (__improbable(rings == NULL)) {
1418 SK_ERR("Cannot allocate krings");
1419 err = ENOMEM;
1420 goto error;
1421 }
1422 na->na_tx_rings = rings;
1423 na->na_tx_rings_cnt = n[NR_TX];
1424
1425 na->na_rx_rings = rings + n[NR_TX];
1426 na->na_rx_rings_cnt = n[NR_RX];
1427 if (n[NR_A] != 0) {
1428 na->na_alloc_rings = rings + n[NR_TX] + n[NR_RX];
1429 na->na_free_rings = rings + n[NR_TX] + n[NR_RX] + n[NR_A];
1430 na->na_alloc_free_rings_cnt = n[NR_A];
1431 } else {
1432 na->na_alloc_rings = NULL;
1433 na->na_free_rings = NULL;
1434 na->na_alloc_free_rings_cnt = 0;
1435 }
1436 if (n[NR_EV] != 0) {
1437 if (na->na_free_rings != NULL) {
1438 na->na_event_rings = rings + n[NR_TX] +
1439 n[NR_RX] + n[NR_A] + n[NR_F];
1440 na->na_event_rings_cnt = n[NR_EV];
1441 } else {
1442 na->na_event_rings = rings + n[NR_TX] + n[NR_RX];
1443 na->na_event_rings_cnt = n[NR_EV];
1444 }
1445 }
1446 if (n[NR_LBA] != 0) {
1447 ASSERT(n[NR_A] != 0);
1448 if (na->na_event_rings != NULL) {
1449 na->na_large_buf_alloc_rings = rings + n[NR_TX] + n[NR_RX] +
1450 n[NR_A] + n[NR_F] + n[NR_EV];
1451 na->na_large_buf_alloc_rings_cnt = n[NR_LBA];
1452 } else {
1453 /* alloc/free rings must also be present */
1454 ASSERT(na->na_free_rings != NULL);
1455 na->na_large_buf_alloc_rings = rings + n[NR_TX] + n[NR_RX] +
1456 n[NR_A] + n[NR_F];
1457 na->na_large_buf_alloc_rings_cnt = n[NR_LBA];
1458 }
1459 }
1460
1461 /* total number of slots for TX/RX adapter rings */
1462 c = tot_slots = (n[NR_TX] * na_get_nslots(na, NR_TX)) +
1463 (n[NR_RX] * na_get_nslots(na, NR_RX));
1464
1465 /* for scratch space on alloc and free rings */
1466 if (n[NR_A] != 0) {
1467 tot_slots += n[NR_A] * na_get_nslots(na, NR_A);
1468 tot_slots += n[NR_F] * na_get_nslots(na, NR_F);
1469 tot_slots += n[NR_LBA] * na_get_nslots(na, NR_LBA);
1470 c = tot_slots;
1471 }
1472 na->na_total_slots = tot_slots;
1473
1474 /* slot context (optional) for all TX/RX ring slots of this adapter */
1475 if (alloc_ctx) {
1476 na->na_slot_ctxs =
1477 skn_alloc_type_array(slot_ctxs, struct slot_ctx,
1478 na->na_total_slots, Z_WAITOK, skmem_tag_nx_contexts);
1479 na->na_slot_ctxs_cnt = na->na_total_slots;
1480 if (na->na_slot_ctxs == NULL) {
1481 SK_ERR("Cannot allocate slot contexts");
1482 err = ENOMEM;
1483 na->na_slot_ctxs = NULL;
1484 na->na_slot_ctxs_cnt = 0;
1485 goto error;
1486 }
1487 os_atomic_or(&na->na_flags, NAF_SLOT_CONTEXT, relaxed);
1488 }
1489
1490 /*
1491 * packet handle array storage for all TX/RX ring slots of this
1492 * adapter.
1493 */
1494 na->na_scratch = skn_alloc_type_array(scratch, kern_packet_t,
1495 na->na_total_slots, Z_WAITOK, skmem_tag_nx_scratch);
1496 na->na_scratch_cnt = na->na_total_slots;
1497 if (na->na_scratch == NULL) {
1498 SK_ERR("Cannot allocate slot contexts");
1499 err = ENOMEM;
1500 na->na_scratch = NULL;
1501 na->na_scratch_cnt = 0;
1502 goto error;
1503 }
1504
1505 /*
1506 * All fields in krings are 0 except the one initialized below.
1507 * but better be explicit on important kring fields.
1508 */
1509 for_all_rings(t) {
1510 ndesc = na_get_nslots(na, t);
1511 pp = na_kr_get_pp(na, t);
1512 for (i = 0; i < n[t]; i++) {
1513 kring = &NAKR(na, t)[i];
1514 bzero(kring, sizeof(*kring));
1515 kring->ckr_na = na;
1516 kring->ckr_pp = pp;
1517 kring->ckr_max_pkt_len =
1518 (t == NR_LBA ? PP_BUF_SIZE_LARGE(pp) :
1519 PP_BUF_SIZE_DEF(pp)) *
1520 pp->pp_max_frags;
1521 kring->ckr_ring_id = i;
1522 kring->ckr_tx = t;
1523 kr_init_to_mhints(kring, ndesc);
1524 kr_init_tbr(kring);
1525 if (NA_KERNEL_ONLY(na)) {
1526 kring->ckr_flags |= CKRF_KERNEL_ONLY;
1527 }
1528 if (na->na_flags & NAF_HOST_ONLY) {
1529 kring->ckr_flags |= CKRF_HOST;
1530 }
1531 ASSERT((t >= NR_TXRX) || (c > 0));
1532 if ((t < NR_TXRX) &&
1533 (na->na_flags & NAF_SLOT_CONTEXT)) {
1534 ASSERT(na->na_slot_ctxs != NULL);
1535 kring->ckr_flags |= CKRF_SLOT_CONTEXT;
1536 kring->ckr_slot_ctxs =
1537 na->na_slot_ctxs + (tot_slots - c);
1538 kring->ckr_slot_ctxs_cnt = kring->ckr_num_slots;
1539 }
1540 ASSERT(na->na_scratch != NULL);
1541 if (t < NR_TXRXAF || t == NR_LBA) {
1542 kring->ckr_scratch =
1543 na->na_scratch + (tot_slots - c);
1544 kring->ckr_scratch_cnt = kring->ckr_num_slots;
1545 }
1546 if (t < NR_TXRXAF || t == NR_LBA) {
1547 c -= ndesc;
1548 }
1549 switch (t) {
1550 case NR_A:
1551 if (i == 0) {
1552 kring->ckr_na_sync =
1553 na_packet_pool_alloc_sync;
1554 kring->ckr_alloc_ws =
1555 na_upp_alloc_lowat;
1556 } else {
1557 ASSERT(i == 1);
1558 kring->ckr_na_sync =
1559 na_packet_pool_alloc_buf_sync;
1560 kring->ckr_alloc_ws =
1561 na_upp_alloc_buf_lowat;
1562 }
1563 break;
1564 case NR_F:
1565 if (i == 0) {
1566 kring->ckr_na_sync =
1567 na_packet_pool_free_sync;
1568 } else {
1569 ASSERT(i == 1);
1570 kring->ckr_na_sync =
1571 na_packet_pool_free_buf_sync;
1572 }
1573 break;
1574 case NR_TX:
1575 kring->ckr_na_sync = na->na_txsync;
1576 if (na->na_flags & NAF_TX_MITIGATION) {
1577 kring->ckr_flags |= CKRF_MITIGATION;
1578 }
1579 switch (na->na_type) {
1580 #if CONFIG_NEXUS_USER_PIPE
1581 case NA_USER_PIPE:
1582 ASSERT(!(na->na_flags &
1583 NAF_USER_PKT_POOL));
1584 kring->ckr_prologue = kr_txprologue;
1585 kring->ckr_finalize = NULL;
1586 break;
1587 #endif /* CONFIG_NEXUS_USER_PIPE */
1588 #if CONFIG_NEXUS_MONITOR
1589 case NA_MONITOR:
1590 ASSERT(!(na->na_flags &
1591 NAF_USER_PKT_POOL));
1592 kring->ckr_prologue = kr_txprologue;
1593 kring->ckr_finalize = NULL;
1594 break;
1595 #endif /* CONFIG_NEXUS_MONITOR */
1596 default:
1597 if (na->na_flags & NAF_USER_PKT_POOL) {
1598 kring->ckr_prologue =
1599 kr_txprologue_upp;
1600 kring->ckr_finalize =
1601 kr_txfinalize_upp;
1602 } else {
1603 kring->ckr_prologue =
1604 kr_txprologue;
1605 kring->ckr_finalize =
1606 kr_txfinalize;
1607 }
1608 break;
1609 }
1610 break;
1611 case NR_RX:
1612 kring->ckr_na_sync = na->na_rxsync;
1613 if (na->na_flags & NAF_RX_MITIGATION) {
1614 kring->ckr_flags |= CKRF_MITIGATION;
1615 }
1616 switch (na->na_type) {
1617 #if CONFIG_NEXUS_USER_PIPE
1618 case NA_USER_PIPE:
1619 ASSERT(!(na->na_flags &
1620 NAF_USER_PKT_POOL));
1621 kring->ckr_prologue =
1622 kr_rxprologue_nodetach;
1623 kring->ckr_finalize = kr_rxfinalize;
1624 break;
1625 #endif /* CONFIG_NEXUS_USER_PIPE */
1626 #if CONFIG_NEXUS_MONITOR
1627 case NA_MONITOR:
1628 ASSERT(!(na->na_flags &
1629 NAF_USER_PKT_POOL));
1630 kring->ckr_prologue =
1631 kr_rxprologue_nodetach;
1632 kring->ckr_finalize = kr_rxfinalize;
1633 break;
1634 #endif /* CONFIG_NEXUS_MONITOR */
1635 default:
1636 if (na->na_flags & NAF_USER_PKT_POOL) {
1637 kring->ckr_prologue =
1638 kr_rxprologue_upp;
1639 kring->ckr_finalize =
1640 kr_rxfinalize_upp;
1641 } else {
1642 kring->ckr_prologue =
1643 kr_rxprologue;
1644 kring->ckr_finalize =
1645 kr_rxfinalize;
1646 }
1647 break;
1648 }
1649 break;
1650 case NR_EV:
1651 kring->ckr_na_sync = kern_channel_event_sync;
1652 break;
1653 case NR_LBA:
1654 kring->ckr_na_sync = na_packet_pool_alloc_large_sync;
1655 kring->ckr_alloc_ws = na_upp_alloc_lowat;
1656 break;
1657 default:
1658 VERIFY(0);
1659 /* NOTREACHED */
1660 __builtin_unreachable();
1661 }
1662 if (t != NR_EV) {
1663 kring->ckr_na_notify = na->na_notify;
1664 } else {
1665 kring->ckr_na_notify = NULL;
1666 }
1667 (void) snprintf(kring->ckr_name,
1668 sizeof(kring->ckr_name) - 1,
1669 "%s %s%u%s", na->na_name, sk_ring2str(t), i,
1670 ((kring->ckr_flags & CKRF_HOST) ? "^" : ""));
1671 SK_DF(SK_VERB_NA | SK_VERB_RING,
1672 "kr \"%s\" (0x%llx) krflags 0x%b rh %u rt %u",
1673 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1674 CKRF_BITS, kring->ckr_rhead, kring->ckr_rtail);
1675 kring->ckr_state = KR_READY;
1676 q_lck_grp = na_kr_q_lck_grp(t);
1677 s_lck_grp = na_kr_s_lck_grp(t);
1678 kring->ckr_qlock_group = q_lck_grp;
1679 lck_mtx_init(&kring->ckr_qlock, kring->ckr_qlock_group,
1680 &channel_lock_attr);
1681 kring->ckr_slock_group = s_lck_grp;
1682 lck_spin_init(&kring->ckr_slock, kring->ckr_slock_group,
1683 &channel_lock_attr);
1684 csi_init(&kring->ckr_si,
1685 (kring->ckr_flags & CKRF_MITIGATION),
1686 na->na_ch_mit_ival);
1687 }
1688 csi_init(&na->na_si[t],
1689 (na->na_flags & (NAF_TX_MITIGATION | NAF_RX_MITIGATION)),
1690 na->na_ch_mit_ival);
1691 }
1692 ASSERT(c == 0);
1693 na->na_tail = rings + n[NR_TX] + n[NR_RX] + n[NR_A] + n[NR_F] +
1694 n[NR_EV] + n[NR_LBA];
1695
1696 if (na->na_type == NA_NETIF_DEV) {
1697 na_kr_setup_netif_svc_map(na);
1698 }
1699
1700 /* validate now for cases where we create only krings */
1701 na_krings_verify(na);
1702 return 0;
1703
1704 error:
1705 ASSERT(err != 0);
1706 if (rings != NULL) {
1707 sk_free_type_array_counted_by(struct __kern_channel_ring,
1708 na->na_all_rings_cnt, na->na_all_rings);
1709 na->na_tx_rings = NULL;
1710 na->na_tx_rings_cnt = 0;
1711 na->na_rx_rings = NULL;
1712 na->na_rx_rings_cnt = 0;
1713 na->na_alloc_rings = NULL;
1714 na->na_free_rings = NULL;
1715 na->na_alloc_free_rings_cnt = 0;
1716 na->na_event_rings = NULL;
1717 na->na_event_rings_cnt = 0;
1718 na->na_tail = NULL;
1719 }
1720 if (na->na_slot_ctxs != NULL) {
1721 ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1722 skn_free_type_array_counted_by(slot_ctxs, struct slot_ctx,
1723 na->na_slot_ctxs_cnt, na->na_slot_ctxs);
1724 na->na_slot_ctxs = NULL;
1725 na->na_slot_ctxs_cnt = 0;
1726 }
1727 if (na->na_scratch != NULL) {
1728 skn_free_type_array_counted_by(scratch, kern_packet_t, na->na_scratch_cnt,
1729 na->na_scratch);
1730 na->na_scratch = NULL;
1731 na->na_scratch_cnt = 0;
1732 }
1733 return err;
1734 }
1735
1736 /* undo the actions performed by na_kr_create() */
1737 /* call with SK_LOCK held */
1738 static void
na_kr_delete(struct nexus_adapter * na)1739 na_kr_delete(struct nexus_adapter *na)
1740 {
1741 struct __kern_channel_ring *kring;
1742 enum txrx t;
1743
1744 kring = na->na_all_rings;
1745
1746 ASSERT((kring != NULL) && (na->na_tail != NULL));
1747 SK_LOCK_ASSERT_HELD();
1748
1749 for_all_rings(t) {
1750 csi_destroy(&na->na_si[t]);
1751 }
1752 /* we rely on the krings layout described above */
1753 for (; kring != na->na_tail; kring++) {
1754 lck_mtx_destroy(&kring->ckr_qlock, kring->ckr_qlock_group);
1755 lck_spin_destroy(&kring->ckr_slock, kring->ckr_slock_group);
1756 csi_destroy(&kring->ckr_si);
1757 if (kring->ckr_flags & CKRF_SLOT_CONTEXT) {
1758 kring->ckr_flags &= ~CKRF_SLOT_CONTEXT;
1759 ASSERT(kring->ckr_slot_ctxs != NULL);
1760 kring->ckr_slot_ctxs = NULL;
1761 kring->ckr_slot_ctxs_cnt = 0;
1762 }
1763 kring->ckr_scratch = NULL;
1764 kring->ckr_scratch_cnt = 0;
1765 }
1766 if (na->na_slot_ctxs != NULL) {
1767 ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1768 os_atomic_andnot(&na->na_flags, NAF_SLOT_CONTEXT, relaxed);
1769 skn_free_type_array_counted_by(na->na_slot_ctxs,
1770 struct slot_ctx, na->na_slot_ctxs_cnt,
1771 na->na_slot_ctxs);
1772 na->na_slot_ctxs = NULL;
1773 na->na_slot_ctxs_cnt = 0;
1774 }
1775 if (na->na_scratch != NULL) {
1776 skn_free_type_array_counted_by(na->na_scratch,
1777 kern_packet_t, na->na_scratch_cnt,
1778 na->na_scratch);
1779 na->na_scratch = NULL;
1780 na->na_scratch_cnt = 0;
1781 }
1782 ASSERT(!(na->na_flags & NAF_SLOT_CONTEXT));
1783 sk_free_type_array_counted_by(struct __kern_channel_ring,
1784 na->na_all_rings_cnt, na->na_all_rings);
1785 na->na_tx_rings = NULL;
1786 na->na_tx_rings_cnt = 0;
1787 na->na_rx_rings = NULL;
1788 na->na_rx_rings_cnt = 0;
1789 na->na_alloc_rings = NULL;
1790 na->na_free_rings = NULL;
1791 na->na_alloc_free_rings_cnt = 0;
1792 na->na_event_rings = NULL;
1793 na->na_event_rings_cnt = 0;
1794 na->na_tail = NULL;
1795 na->na_all_rings = NULL;
1796 na->na_all_rings_cnt = 0;
1797 }
1798
1799 /*
1800 * -fbounds-safety: If kernel_only, usds is NULL, so marking it
1801 * __counted_by(ndesc) would fail bounds check. We could use __sized_by_or_null
1802 * when it's ready: rdar://75598414
1803 * If usds != NULL, then ksds_cnt == usds_cnt
1804 */
1805 static void
na_kr_slot_desc_init(struct __slot_desc * __counted_by (ksds_cnt)ksds,boolean_t kernel_only,struct __slot_desc * __counted_by (usds_cnt)usds,size_t ksds_cnt,size_t usds_cnt)1806 na_kr_slot_desc_init(struct __slot_desc *__counted_by(ksds_cnt)ksds,
1807 boolean_t kernel_only, struct __slot_desc *__counted_by(usds_cnt)usds,
1808 size_t ksds_cnt, size_t usds_cnt)
1809 {
1810 size_t i;
1811
1812 bzero(ksds, ksds_cnt * SLOT_DESC_SZ);
1813 if (usds != NULL) {
1814 ASSERT(!kernel_only);
1815 ASSERT(ksds_cnt == usds_cnt);
1816 bzero(usds, usds_cnt * SLOT_DESC_SZ);
1817 } else {
1818 ASSERT(kernel_only);
1819 ASSERT(usds_cnt == 0);
1820 }
1821
1822 for (i = 0; i < ksds_cnt; i++) {
1823 KSD_INIT(SLOT_DESC_KSD(&ksds[i]));
1824 if (!kernel_only) {
1825 USD_INIT(SLOT_DESC_USD(&usds[i]));
1826 }
1827 }
1828 }
1829
1830 /* call with SK_LOCK held */
1831 static int
na_kr_setup(struct nexus_adapter * na,struct kern_channel * ch)1832 na_kr_setup(struct nexus_adapter *na, struct kern_channel *ch)
1833 {
1834 struct skmem_arena *ar = na->na_arena;
1835 struct skmem_arena_nexus *arn;
1836 mach_vm_offset_t roff[SKMEM_REGIONS];
1837 enum txrx t;
1838 uint32_t i;
1839 struct __slot_desc *ksds;
1840
1841 SK_LOCK_ASSERT_HELD();
1842 ASSERT(!(na->na_flags & NAF_MEM_NO_INIT));
1843 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
1844 arn = skmem_arena_nexus(ar);
1845 ASSERT(arn != NULL);
1846
1847 bzero(&roff, sizeof(roff));
1848 for (i = 0; i < SKMEM_REGIONS; i++) {
1849 if (ar->ar_regions[i] == NULL) {
1850 continue;
1851 }
1852
1853 /* not for nexus */
1854 ASSERT(i != SKMEM_REGION_SYSCTLS);
1855
1856 /*
1857 * Get region offsets from base of mmap span; the arena
1858 * doesn't need to be mmap'd at this point, since we
1859 * simply compute the relative offset.
1860 */
1861 roff[i] = skmem_arena_get_region_offset(ar, i);
1862 }
1863
1864 for_all_rings(t) {
1865 for (i = 0; i < na_get_nrings(na, t); i++) {
1866 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
1867 struct __user_channel_ring *__single ring = kring->ckr_ring;
1868 mach_vm_offset_t ring_off, usd_roff;
1869 struct skmem_obj_info oi, oim;
1870 uint32_t ndesc;
1871
1872 if (ring != NULL) {
1873 SK_DF(SK_VERB_NA | SK_VERB_RING,
1874 "kr 0x%llx (\"%s\") is already "
1875 "initialized", SK_KVA(kring),
1876 kring->ckr_name);
1877 continue; /* already created by somebody else */
1878 }
1879
1880 if (!KR_KERNEL_ONLY(kring) &&
1881 (ring = skmem_cache_alloc(arn->arn_ring_cache,
1882 SKMEM_NOSLEEP)) == NULL) {
1883 SK_ERR("Cannot allocate %s_ring for kr "
1884 "0x%llx (\"%s\")", sk_ring2str(t),
1885 SK_KVA(kring), kring->ckr_name);
1886 goto cleanup;
1887 }
1888 kring->ckr_flags |= CKRF_MEM_RING_INITED;
1889 kring->ckr_ring = ring;
1890 ndesc = kring->ckr_num_slots;
1891
1892 if (ring == NULL) {
1893 goto skip_user_ring_setup;
1894 }
1895
1896 *(uint32_t *)(uintptr_t)&ring->ring_num_slots = ndesc;
1897
1898 /* offset of current ring in mmap span */
1899 skmem_cache_get_obj_info(arn->arn_ring_cache,
1900 ring, &oi, NULL);
1901 ring_off = (roff[SKMEM_REGION_RING] +
1902 SKMEM_OBJ_ROFF(&oi));
1903
1904 /*
1905 * ring_{buf,md,sd}_ofs offsets are relative to the
1906 * current ring, and not to the base of mmap span.
1907 */
1908 *(mach_vm_offset_t *)(uintptr_t)
1909 &ring->ring_def_buf_base =
1910 (roff[SKMEM_REGION_BUF_DEF] - ring_off);
1911 *(mach_vm_offset_t *)(uintptr_t)
1912 &ring->ring_large_buf_base =
1913 (roff[SKMEM_REGION_BUF_LARGE] - ring_off);
1914 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_md_base =
1915 (roff[SKMEM_REGION_UMD] - ring_off);
1916 _CASSERT(sizeof(uint16_t) ==
1917 sizeof(ring->ring_bft_size));
1918 if (roff[SKMEM_REGION_UBFT] != 0) {
1919 ASSERT(ar->ar_regions[SKMEM_REGION_UBFT] !=
1920 NULL);
1921 *(mach_vm_offset_t *)(uintptr_t)
1922 &ring->ring_bft_base =
1923 (roff[SKMEM_REGION_UBFT] - ring_off);
1924 *(uint16_t *)(uintptr_t)&ring->ring_bft_size =
1925 (uint16_t)ar->ar_regions[SKMEM_REGION_UBFT]->
1926 skr_c_obj_size;
1927 ASSERT(ring->ring_bft_size ==
1928 ar->ar_regions[SKMEM_REGION_KBFT]->
1929 skr_c_obj_size);
1930 } else {
1931 *(mach_vm_offset_t *)(uintptr_t)
1932 &ring->ring_bft_base = 0;
1933 *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1934 }
1935
1936 if (t == NR_TX || t == NR_A || t == NR_EV || t == NR_LBA) {
1937 usd_roff = roff[SKMEM_REGION_TXAUSD];
1938 } else {
1939 ASSERT(t == NR_RX || t == NR_F);
1940 usd_roff = roff[SKMEM_REGION_RXFUSD];
1941 }
1942 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_sd_base =
1943 (usd_roff - ring_off);
1944
1945 /* copy values from kring */
1946 ring->ring_head = kring->ckr_rhead;
1947 *(slot_idx_t *)(uintptr_t)&ring->ring_khead =
1948 kring->ckr_khead;
1949 *(slot_idx_t *)(uintptr_t)&ring->ring_tail =
1950 kring->ckr_rtail;
1951
1952 _CASSERT(sizeof(uint32_t) ==
1953 sizeof(ring->ring_def_buf_size));
1954 _CASSERT(sizeof(uint32_t) ==
1955 sizeof(ring->ring_large_buf_size));
1956 _CASSERT(sizeof(uint16_t) ==
1957 sizeof(ring->ring_md_size));
1958 *(uint32_t *)(uintptr_t)&ring->ring_def_buf_size =
1959 ar->ar_regions[SKMEM_REGION_BUF_DEF]->skr_c_obj_size;
1960 if (ar->ar_regions[SKMEM_REGION_BUF_LARGE] != NULL) {
1961 *(uint32_t *)(uintptr_t)&ring->ring_large_buf_size =
1962 ar->ar_regions[SKMEM_REGION_BUF_LARGE]->skr_c_obj_size;
1963 } else {
1964 *(uint32_t *)(uintptr_t)&ring->ring_large_buf_size = 0;
1965 }
1966 if (ar->ar_regions[SKMEM_REGION_UMD] != NULL) {
1967 *(uint16_t *)(uintptr_t)&ring->ring_md_size =
1968 (uint16_t)ar->ar_regions[SKMEM_REGION_UMD]->
1969 skr_c_obj_size;
1970 ASSERT(ring->ring_md_size ==
1971 ar->ar_regions[SKMEM_REGION_KMD]->
1972 skr_c_obj_size);
1973 } else {
1974 *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1975 ASSERT(PP_KERNEL_ONLY(arn->arn_rx_pp));
1976 ASSERT(PP_KERNEL_ONLY(arn->arn_tx_pp));
1977 }
1978
1979 /* ring info */
1980 _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_id));
1981 _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_kind));
1982 *(uint16_t *)(uintptr_t)&ring->ring_id =
1983 (uint16_t)kring->ckr_ring_id;
1984 *(uint16_t *)(uintptr_t)&ring->ring_kind =
1985 (uint16_t)kring->ckr_tx;
1986
1987 SK_DF(SK_VERB_NA | SK_VERB_RING,
1988 "%s_ring at 0x%llx kr 0x%llx (\"%s\")",
1989 sk_ring2str(t), SK_KVA(ring), SK_KVA(kring),
1990 kring->ckr_name);
1991 SK_DF(SK_VERB_NA | SK_VERB_RING,
1992 " num_slots: %u", ring->ring_num_slots);
1993 SK_DF(SK_VERB_NA | SK_VERB_RING,
1994 " def_buf_base: 0x%llx",
1995 (uint64_t)ring->ring_def_buf_base);
1996 SK_DF(SK_VERB_NA | SK_VERB_RING,
1997 " large_buf_base: 0x%llx",
1998 (uint64_t)ring->ring_large_buf_base);
1999 SK_DF(SK_VERB_NA | SK_VERB_RING,
2000 " md_base: 0x%llx",
2001 (uint64_t)ring->ring_md_base);
2002 SK_DF(SK_VERB_NA | SK_VERB_RING,
2003 " sd_base: 0x%llx",
2004 (uint64_t)ring->ring_sd_base);
2005 SK_DF(SK_VERB_NA | SK_VERB_RING,
2006 " h, t: %u, %u, %u", ring->ring_head,
2007 ring->ring_tail);
2008 SK_DF(SK_VERB_NA | SK_VERB_RING,
2009 " md_size: %d",
2010 (uint64_t)ring->ring_md_size);
2011
2012 /* make sure they're in synch */
2013 _CASSERT(NR_RX == CR_KIND_RX);
2014 _CASSERT(NR_TX == CR_KIND_TX);
2015 _CASSERT(NR_A == CR_KIND_ALLOC);
2016 _CASSERT(NR_F == CR_KIND_FREE);
2017 _CASSERT(NR_EV == CR_KIND_EVENT);
2018 _CASSERT(NR_LBA == CR_KIND_LARGE_BUF_ALLOC);
2019
2020 skip_user_ring_setup:
2021 /*
2022 * This flag tells na_kr_teardown_all() that it should
2023 * go thru the checks to free up the slot maps.
2024 */
2025 kring->ckr_flags |= CKRF_MEM_SD_INITED;
2026 if (t == NR_TX || t == NR_A || t == NR_EV || t == NR_LBA) {
2027 kring->ckr_ksds_cache = arn->arn_txaksd_cache;
2028 } else {
2029 ASSERT(t == NR_RX || t == NR_F);
2030 kring->ckr_ksds_cache = arn->arn_rxfksd_cache;
2031 }
2032
2033 ksds = skmem_cache_alloc(kring->ckr_ksds_cache,
2034 SKMEM_NOSLEEP);
2035 if (ksds == NULL) {
2036 SK_ERR("Cannot allocate %s_ksds for kr "
2037 "0x%llx (\"%s\")", sk_ring2str(t),
2038 SK_KVA(kring), kring->ckr_name);
2039 goto cleanup;
2040 }
2041 kring->ckr_ksds = ksds;
2042 kring->ckr_ksds_cnt = kring->ckr_num_slots;
2043 if (!KR_KERNEL_ONLY(kring)) {
2044 skmem_cache_get_obj_info(kring->ckr_ksds_cache,
2045 kring->ckr_ksds, &oi, &oim);
2046 kring->ckr_usds = SKMEM_OBJ_ADDR(&oim);
2047 kring->ckr_usds_cnt = kring->ckr_num_slots;
2048 }
2049 na_kr_slot_desc_init(kring->ckr_ksds,
2050 KR_KERNEL_ONLY(kring), kring->ckr_usds,
2051 kring->ckr_ksds_cnt, kring->ckr_usds_cnt);
2052
2053 /* cache last slot descriptor address */
2054 ASSERT(kring->ckr_lim == (ndesc - 1));
2055 kring->ckr_ksds_last = &kring->ckr_ksds[kring->ckr_lim];
2056
2057 if ((t < NR_TXRX) &&
2058 !(na->na_flags & NAF_USER_PKT_POOL) &&
2059 na_kr_populate_slots(kring) != 0) {
2060 SK_ERR("Cannot allocate buffers for kr "
2061 "0x%llx (\"%s\")", SK_KVA(kring),
2062 kring->ckr_name);
2063 goto cleanup;
2064 }
2065 }
2066 }
2067
2068 return 0;
2069
2070 cleanup:
2071 na_kr_teardown_all(na, ch, FALSE);
2072
2073 return ENOMEM;
2074 }
2075
2076 static void
na_kr_teardown_common(struct nexus_adapter * na,struct __kern_channel_ring * kring,enum txrx t,struct kern_channel * ch,boolean_t defunct)2077 na_kr_teardown_common(struct nexus_adapter *na,
2078 struct __kern_channel_ring *kring, enum txrx t, struct kern_channel *ch,
2079 boolean_t defunct)
2080 {
2081 struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
2082 struct __user_channel_ring *ckr_ring;
2083 boolean_t sd_idle, sd_inited;
2084
2085 ASSERT(arn != NULL);
2086 kr_enter(kring, TRUE);
2087 /*
2088 * Check for CKRF_MEM_SD_INITED and CKRF_MEM_RING_INITED
2089 * to make sure that the freeing needs to happen (else just
2090 * nullify the values).
2091 * If this adapter owns the memory for the slot descriptors,
2092 * check if the region is marked as busy (sd_idle is false)
2093 * and leave the kring's slot descriptor fields alone if so,
2094 * at defunct time. At final teardown time, sd_idle must be
2095 * true else we assert; this indicates a missing call to
2096 * skmem_arena_nexus_sd_set_noidle().
2097 */
2098 sd_inited = ((kring->ckr_flags & CKRF_MEM_SD_INITED) != 0);
2099 if (sd_inited) {
2100 /* callee will do KR_KSD(), so check */
2101 if (((t < NR_TXRX) || (t == NR_EV)) &&
2102 (kring->ckr_ksds != NULL)) {
2103 na_kr_depopulate_slots(kring, ch, defunct);
2104 }
2105 /* leave CKRF_MEM_SD_INITED flag alone until idle */
2106 sd_idle = skmem_arena_nexus_sd_idle(arn);
2107 VERIFY(sd_idle || defunct);
2108 } else {
2109 sd_idle = TRUE;
2110 }
2111
2112 if (sd_idle) {
2113 kring->ckr_flags &= ~CKRF_MEM_SD_INITED;
2114 if (kring->ckr_ksds != NULL) {
2115 if (sd_inited) {
2116 skmem_cache_free(kring->ckr_ksds_cache,
2117 kring->ckr_ksds);
2118 }
2119 kring->ckr_ksds = NULL;
2120 kring->ckr_ksds_cnt = 0;
2121 kring->ckr_ksds_last = NULL;
2122 kring->ckr_usds = NULL;
2123 kring->ckr_usds_cnt = 0;
2124 }
2125 ASSERT(kring->ckr_ksds_last == NULL);
2126 ASSERT(kring->ckr_usds == NULL);
2127 }
2128
2129 if ((ckr_ring = kring->ckr_ring) != NULL) {
2130 kring->ckr_ring = NULL;
2131 }
2132
2133 if (kring->ckr_flags & CKRF_MEM_RING_INITED) {
2134 ASSERT(ckr_ring != NULL || KR_KERNEL_ONLY(kring));
2135 if (ckr_ring != NULL) {
2136 skmem_cache_free(arn->arn_ring_cache, ckr_ring);
2137 }
2138 kring->ckr_flags &= ~CKRF_MEM_RING_INITED;
2139 }
2140
2141 if (defunct) {
2142 /* if defunct, drop everything; see KR_DROP() */
2143 kring->ckr_flags |= CKRF_DEFUNCT;
2144 }
2145 kr_exit(kring);
2146 }
2147
2148 /*
2149 * Teardown ALL rings of a nexus adapter; this includes {tx,rx,alloc,free,event}
2150 */
2151 static void
na_kr_teardown_all(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2152 na_kr_teardown_all(struct nexus_adapter *na, struct kern_channel *ch,
2153 boolean_t defunct)
2154 {
2155 enum txrx t;
2156
2157 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2158
2159 /* skip if this adapter has no allocated rings */
2160 if (na->na_tx_rings == NULL) {
2161 return;
2162 }
2163
2164 for_all_rings(t) {
2165 for (uint32_t i = 0; i < na_get_nrings(na, t); i++) {
2166 na_kr_teardown_common(na, &NAKR(na, t)[i],
2167 t, ch, defunct);
2168 }
2169 }
2170 }
2171
2172 /*
2173 * Teardown only {tx,rx} rings assigned to the channel.
2174 */
2175 static void
na_kr_teardown_txrx(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct,struct proc * p)2176 na_kr_teardown_txrx(struct nexus_adapter *na, struct kern_channel *ch,
2177 boolean_t defunct, struct proc *p)
2178 {
2179 enum txrx t;
2180
2181 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2182
2183 for_rx_tx(t) {
2184 ring_id_t qfirst = ch->ch_first[t];
2185 ring_id_t qlast = ch->ch_last[t];
2186 uint32_t i;
2187
2188 for (i = qfirst; i < qlast; i++) {
2189 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2190 na_kr_teardown_common(na, kring, t, ch, defunct);
2191
2192 /*
2193 * Issue a notify to wake up anyone sleeping in kqueue
2194 * so that they notice the newly defuncted channels and
2195 * return an error
2196 */
2197 kring->ckr_na_notify(kring, p, 0);
2198 }
2199 }
2200 }
2201
2202 static int
na_kr_populate_slots(struct __kern_channel_ring * kring)2203 na_kr_populate_slots(struct __kern_channel_ring *kring)
2204 {
2205 const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2206 struct nexus_adapter *na = KRNA(kring);
2207 kern_pbufpool_t pp = kring->ckr_pp;
2208 uint32_t nslots = kring->ckr_num_slots;
2209 uint32_t start_idx, i;
2210 uint32_t sidx = 0; /* slot counter */
2211 struct __kern_slot_desc *ksd;
2212 struct __user_slot_desc *usd;
2213 struct __kern_quantum *kqum;
2214 nexus_type_t nexus_type;
2215 int err = 0;
2216
2217 ASSERT(kring->ckr_tx < NR_TXRX);
2218 ASSERT(!(KRNA(kring)->na_flags & NAF_USER_PKT_POOL));
2219 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2220 ASSERT(pp != NULL);
2221
2222 /*
2223 * xxx_ppool: remove this special case
2224 */
2225 nexus_type = na->na_nxdom_prov->nxdom_prov_dom->nxdom_type;
2226
2227 switch (nexus_type) {
2228 case NEXUS_TYPE_FLOW_SWITCH:
2229 case NEXUS_TYPE_KERNEL_PIPE:
2230 /*
2231 * xxx_ppool: This is temporary code until we come up with a
2232 * scheme for user space to alloc & attach packets to tx ring.
2233 */
2234 if (kernel_only || kring->ckr_tx == NR_RX) {
2235 return 0;
2236 }
2237 break;
2238
2239 case NEXUS_TYPE_NET_IF:
2240 if (((na->na_type == NA_NETIF_DEV) ||
2241 (na->na_type == NA_NETIF_HOST)) &&
2242 (kernel_only || (kring->ckr_tx == NR_RX))) {
2243 return 0;
2244 }
2245
2246 ASSERT((na->na_type == NA_NETIF_COMPAT_DEV) ||
2247 (na->na_type == NA_NETIF_COMPAT_HOST) ||
2248 (na->na_type == NA_NETIF_DEV) ||
2249 (na->na_type == NA_NETIF_VP));
2250
2251 if (!kernel_only) {
2252 if (kring->ckr_tx == NR_RX) {
2253 return 0;
2254 } else {
2255 break;
2256 }
2257 }
2258
2259 ASSERT(kernel_only);
2260
2261 if ((na->na_type == NA_NETIF_COMPAT_DEV) ||
2262 (na->na_type == NA_NETIF_COMPAT_HOST)) {
2263 return 0;
2264 }
2265 VERIFY(0);
2266 /* NOTREACHED */
2267 __builtin_unreachable();
2268
2269 case NEXUS_TYPE_USER_PIPE:
2270 case NEXUS_TYPE_MONITOR:
2271 break;
2272
2273 default:
2274 VERIFY(0);
2275 /* NOTREACHED */
2276 __builtin_unreachable();
2277 }
2278
2279 /* Fill the ring with packets */
2280 sidx = start_idx = 0;
2281 for (i = 0; i < nslots; i++) {
2282 kqum = SK_PTR_ADDR_KQUM(pp_alloc_packet(pp, pp->pp_max_frags,
2283 SKMEM_NOSLEEP));
2284 if (kqum == NULL) {
2285 err = ENOMEM;
2286 SK_ERR("ar 0x%llx (\"%s\") no more buffers "
2287 "after %u of %u, err %d", SK_KVA(na->na_arena),
2288 na->na_arena->ar_name, i, nslots, err);
2289 goto cleanup;
2290 }
2291 ksd = KR_KSD(kring, i);
2292 usd = (kernel_only ? NULL : KR_USD(kring, i));
2293
2294 /* attach packet to slot */
2295 kqum->qum_ksd = ksd;
2296 ASSERT(!KSD_VALID_METADATA(ksd));
2297 KSD_ATTACH_METADATA(ksd, kqum);
2298 if (usd != NULL) {
2299 USD_ATTACH_METADATA(usd, METADATA_IDX(kqum));
2300 kr_externalize_metadata(kring, pp->pp_max_frags,
2301 kqum, current_proc());
2302 }
2303
2304 SK_DF(SK_VERB_MEM, " C ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2305 " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd), METADATA_IDX(kqum),
2306 SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2307 SK_KVA(&kqum->qum_buf[0]));
2308 if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2309 SK_DF(SK_VERB_MEM, " C usd [%-3d, 0x%llx] "
2310 "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]",
2311 (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2312 SK_KVA(usd), METADATA_IDX(kqum),
2313 SK_KVA(kqum->qum_user),
2314 kqum->qum_user->qum_buf[0].buf_idx,
2315 SK_KVA(&kqum->qum_user->qum_buf[0]));
2316 }
2317
2318 sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2319 }
2320
2321 SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") populated %u slots from idx %u",
2322 SK_KVA(na->na_arena), na->na_arena->ar_name, nslots, start_idx);
2323
2324 cleanup:
2325 if (err != 0) {
2326 sidx = start_idx;
2327 while (i-- > 0) {
2328 ksd = KR_KSD(kring, i);
2329 usd = (kernel_only ? NULL : KR_USD(kring, i));
2330 kqum = ksd->sd_qum;
2331
2332 ASSERT(ksd == kqum->qum_ksd);
2333 KSD_RESET(ksd);
2334 if (usd != NULL) {
2335 USD_RESET(usd);
2336 }
2337 /* detach packet from slot */
2338 kqum->qum_ksd = NULL;
2339 pp_free_packet(pp, SK_PTR_ADDR(kqum));
2340
2341 sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2342 }
2343 }
2344 return err;
2345 }
2346
2347 static void
na_kr_depopulate_slots(struct __kern_channel_ring * kring,struct kern_channel * ch,boolean_t defunct)2348 na_kr_depopulate_slots(struct __kern_channel_ring *kring,
2349 struct kern_channel *ch, boolean_t defunct)
2350 {
2351 #pragma unused(ch)
2352 const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2353 uint32_t i, j, n = kring->ckr_num_slots;
2354 struct nexus_adapter *na = KRNA(kring);
2355 struct kern_pbufpool *pp = kring->ckr_pp;
2356 boolean_t upp = FALSE;
2357 obj_idx_t midx;
2358
2359 ASSERT((kring->ckr_tx < NR_TXRX) || (kring->ckr_tx == NR_EV));
2360 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2361
2362 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2363
2364 if (((na->na_flags & NAF_USER_PKT_POOL) != 0) &&
2365 (kring->ckr_tx != NR_EV)) {
2366 upp = TRUE;
2367 }
2368 for (i = 0, j = 0; i < n; i++) {
2369 struct __kern_slot_desc *ksd = KR_KSD(kring, i);
2370 struct __user_slot_desc *usd;
2371 struct __kern_quantum *qum, *kqum;
2372 boolean_t free_packet = FALSE;
2373 int err;
2374
2375 if (!KSD_VALID_METADATA(ksd)) {
2376 continue;
2377 }
2378
2379 kqum = ksd->sd_qum;
2380 usd = (kernel_only ? NULL : KR_USD(kring, i));
2381 midx = METADATA_IDX(kqum);
2382
2383 /*
2384 * if the packet is internalized it should not be in the
2385 * hash table of packets loaned to user space.
2386 */
2387 if (upp && (kqum->qum_qflags & QUM_F_INTERNALIZED)) {
2388 if ((qum = pp_find_upp(pp, midx)) != NULL) {
2389 panic("internalized packet 0x%llx in htbl",
2390 SK_KVA(qum));
2391 /* NOTREACHED */
2392 __builtin_unreachable();
2393 }
2394 free_packet = TRUE;
2395 } else if (upp) {
2396 /*
2397 * if the packet is not internalized check if it is
2398 * in the list of packets loaned to user-space.
2399 * Remove from the list before freeing.
2400 */
2401 ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
2402 qum = pp_remove_upp(pp, midx, &err);
2403 if (err != 0) {
2404 SK_ERR("un-allocated packet or buflet %d %p",
2405 midx, SK_KVA(qum));
2406 if (qum != NULL) {
2407 free_packet = TRUE;
2408 }
2409 }
2410 } else {
2411 free_packet = TRUE;
2412 }
2413
2414 /*
2415 * Clear the user and kernel slot descriptors. Note that
2416 * if we are depopulating the slots due to defunct (and not
2417 * due to normal deallocation/teardown), we leave the user
2418 * slot descriptor alone. At that point the process may
2419 * be suspended, and later when it resumes it would just
2420 * pick up the original contents and move forward with
2421 * whatever it was doing.
2422 */
2423 KSD_RESET(ksd);
2424 if (usd != NULL && !defunct) {
2425 USD_RESET(usd);
2426 }
2427
2428 /* detach packet from slot */
2429 kqum->qum_ksd = NULL;
2430
2431 SK_DF(SK_VERB_MEM, " D ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2432 " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd),
2433 METADATA_IDX(kqum), SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2434 SK_KVA(&kqum->qum_buf[0]));
2435 if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2436 SK_DF(SK_VERB_MEM, " D usd [%-3u, 0x%llx] "
2437 "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]",
2438 (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2439 SK_KVA(usd), METADATA_IDX(kqum),
2440 SK_KVA(kqum->qum_user),
2441 kqum->qum_user->qum_buf[0].buf_idx,
2442 SK_KVA(&kqum->qum_user->qum_buf[0]));
2443 }
2444
2445 if (free_packet) {
2446 pp_free_packet(pp, SK_PTR_ADDR(kqum)); ++j;
2447 }
2448 }
2449
2450 SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") depopulated %u of %u slots",
2451 SK_KVA(KRNA(kring)->na_arena), KRNA(kring)->na_arena->ar_name,
2452 j, n);
2453 }
2454
2455 int
na_rings_mem_setup(struct nexus_adapter * na,boolean_t alloc_ctx,struct kern_channel * ch)2456 na_rings_mem_setup(struct nexus_adapter *na,
2457 boolean_t alloc_ctx, struct kern_channel *ch)
2458 {
2459 boolean_t kronly;
2460 int err;
2461
2462 SK_LOCK_ASSERT_HELD();
2463 ASSERT(na->na_channels == 0);
2464 /*
2465 * If NAF_MEM_NO_INIT is set, then only create the krings and not
2466 * the backing memory regions for the adapter.
2467 */
2468 kronly = (na->na_flags & NAF_MEM_NO_INIT);
2469 ASSERT(!kronly || NA_KERNEL_ONLY(na));
2470
2471 /*
2472 * Create and initialize the common fields of the krings array.
2473 * using the information that must be already available in the na.
2474 */
2475 if ((err = na_kr_create(na, alloc_ctx)) == 0 && !kronly) {
2476 err = na_kr_setup(na, ch);
2477 if (err != 0) {
2478 na_kr_delete(na);
2479 }
2480 }
2481
2482 return err;
2483 }
2484
2485 void
na_rings_mem_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2486 na_rings_mem_teardown(struct nexus_adapter *na, struct kern_channel *ch,
2487 boolean_t defunct)
2488 {
2489 SK_LOCK_ASSERT_HELD();
2490 ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT));
2491
2492 /*
2493 * Deletes the kring and ring array of the adapter. They
2494 * must have been created using na_rings_mem_setup().
2495 *
2496 * XXX: [email protected] -- the parameter "ch" should not be
2497 * needed here; however na_kr_depopulate_slots() needs to
2498 * go thru the channel's user packet pool hash, and so for
2499 * now we leave it here.
2500 */
2501 na_kr_teardown_all(na, ch, defunct);
2502 if (!defunct) {
2503 na_kr_delete(na);
2504 }
2505 }
2506
2507 void
na_ch_rings_defunct(struct kern_channel * ch,struct proc * p)2508 na_ch_rings_defunct(struct kern_channel *ch, struct proc *p)
2509 {
2510 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2511
2512 /*
2513 * Depopulate slots on the TX and RX rings of this channel,
2514 * but don't touch other rings owned by other channels if
2515 * this adapter is being shared.
2516 */
2517 na_kr_teardown_txrx(ch->ch_na, ch, TRUE, p);
2518 }
2519
2520 void
na_kr_drop(struct nexus_adapter * na,boolean_t drop)2521 na_kr_drop(struct nexus_adapter *na, boolean_t drop)
2522 {
2523 enum txrx t;
2524 uint32_t i;
2525
2526 for_rx_tx(t) {
2527 for (i = 0; i < na_get_nrings(na, t); i++) {
2528 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2529 int error;
2530 error = kr_enter(kring, TRUE);
2531 if (drop) {
2532 kring->ckr_flags |= CKRF_DROP;
2533 } else {
2534 kring->ckr_flags &= ~CKRF_DROP;
2535 }
2536
2537 if (error != 0) {
2538 SK_ERR("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2539 "kr_enter failed %d",
2540 na->na_name, SK_KVA(na),
2541 kring->ckr_name, SK_KVA(kring),
2542 error);
2543 } else {
2544 kr_exit(kring);
2545 }
2546 SK_D("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2547 "krflags 0x%b", na->na_name, SK_KVA(na),
2548 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
2549 CKRF_BITS);
2550 }
2551 }
2552 }
2553
2554 /*
2555 * Set the stopped/enabled status of ring. When stopping, they also wait
2556 * for all current activity on the ring to terminate. The status change
2557 * is then notified using the na na_notify callback.
2558 */
2559 static void
na_set_ring(struct nexus_adapter * na,uint32_t ring_id,enum txrx t,uint32_t state)2560 na_set_ring(struct nexus_adapter *na, uint32_t ring_id, enum txrx t,
2561 uint32_t state)
2562 {
2563 struct __kern_channel_ring *kr = &NAKR(na, t)[ring_id];
2564
2565 /*
2566 * Mark the ring as stopped/enabled, and run through the
2567 * locks to make sure other users get to see it.
2568 */
2569 if (state == KR_READY) {
2570 kr_start(kr);
2571 } else {
2572 kr_stop(kr, state);
2573 }
2574 }
2575
2576
2577 /* stop or enable all the rings of na */
2578 static void
na_set_all_rings(struct nexus_adapter * na,uint32_t state)2579 na_set_all_rings(struct nexus_adapter *na, uint32_t state)
2580 {
2581 uint32_t i;
2582 enum txrx t;
2583
2584 SK_LOCK_ASSERT_HELD();
2585
2586 if (!NA_IS_ACTIVE(na)) {
2587 return;
2588 }
2589
2590 for_rx_tx(t) {
2591 for (i = 0; i < na_get_nrings(na, t); i++) {
2592 na_set_ring(na, i, t, state);
2593 }
2594 }
2595 }
2596
2597 /*
2598 * Convenience function used in drivers. Waits for current txsync()s/rxsync()s
2599 * to finish and prevents any new one from starting. Call this before turning
2600 * Skywalk mode off, or before removing the harware rings (e.g., on module
2601 * onload). As a rule of thumb for linux drivers, this should be placed near
2602 * each napi_disable().
2603 */
2604 void
na_disable_all_rings(struct nexus_adapter * na)2605 na_disable_all_rings(struct nexus_adapter *na)
2606 {
2607 na_set_all_rings(na, KR_STOPPED);
2608 }
2609
2610 /*
2611 * Convenience function used in drivers. Re-enables rxsync and txsync on the
2612 * adapter's rings In linux drivers, this should be placed near each
2613 * napi_enable().
2614 */
2615 void
na_enable_all_rings(struct nexus_adapter * na)2616 na_enable_all_rings(struct nexus_adapter *na)
2617 {
2618 na_set_all_rings(na, KR_READY /* enabled */);
2619 }
2620
2621 void
na_lock_all_rings(struct nexus_adapter * na)2622 na_lock_all_rings(struct nexus_adapter *na)
2623 {
2624 na_set_all_rings(na, KR_LOCKED);
2625 }
2626
2627 void
na_unlock_all_rings(struct nexus_adapter * na)2628 na_unlock_all_rings(struct nexus_adapter *na)
2629 {
2630 na_enable_all_rings(na);
2631 }
2632
2633 int
na_connect(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)2634 na_connect(struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
2635 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
2636 {
2637 struct nexus_adapter *__single na = NULL;
2638 mach_vm_size_t memsize = 0;
2639 int err = 0;
2640 enum txrx t;
2641
2642 ASSERT(!(chr->cr_mode & CHMODE_KERNEL));
2643 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
2644
2645 SK_LOCK_ASSERT_HELD();
2646
2647 /* find the nexus adapter and return the reference */
2648 err = na_find(ch, nx, chr, ch0, nxb, p, &na, TRUE /* create */);
2649 if (err != 0) {
2650 ASSERT(na == NULL);
2651 goto done;
2652 }
2653
2654 if (NA_KERNEL_ONLY(na)) {
2655 err = EBUSY;
2656 goto done;
2657 }
2658
2659 /* reject if the adapter is defunct of non-permissive */
2660 if ((na->na_flags & NAF_DEFUNCT) || na_reject_channel(ch, na)) {
2661 err = ENXIO;
2662 goto done;
2663 }
2664
2665 err = na_bind_channel(na, ch, chr);
2666 if (err != 0) {
2667 goto done;
2668 }
2669
2670 ASSERT(ch->ch_schema != NULL);
2671 ASSERT(na == ch->ch_na);
2672
2673 for_all_rings(t) {
2674 if (na_get_nrings(na, t) == 0) {
2675 ch->ch_si[t] = NULL;
2676 continue;
2677 }
2678 ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2679 &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2680 }
2681
2682 skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2683
2684 if (!(skmem_arena_nexus(na->na_arena)->arn_mode &
2685 AR_NEXUS_MODE_EXTERNAL_PPOOL)) {
2686 os_atomic_or(__DECONST(uint32_t *, &ch->ch_schema->csm_flags), CSM_PRIV_MEM, relaxed);
2687 }
2688
2689 err = skmem_arena_mmap(na->na_arena, p, &ch->ch_mmap);
2690 if (err != 0) {
2691 goto done;
2692 }
2693
2694 os_atomic_or(__DECONST(uint32_t *, &ch->ch_schema->csm_flags), CSM_ACTIVE, relaxed);
2695 chr->cr_memsize = memsize;
2696 chr->cr_memoffset = ch->ch_schema_offset;
2697
2698 SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2699 "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2700 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2701 na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2702 na->na_flags, NAF_BITS);
2703
2704 done:
2705 if (err != 0) {
2706 if (ch->ch_schema != NULL || na != NULL) {
2707 if (ch->ch_schema != NULL) {
2708 ASSERT(na == ch->ch_na);
2709 /*
2710 * Callee will unmap memory region if needed,
2711 * as well as release reference held on 'na'.
2712 */
2713 na_disconnect(nx, ch);
2714 na = NULL;
2715 }
2716 if (na != NULL) {
2717 (void) na_release_locked(na);
2718 na = NULL;
2719 }
2720 }
2721 }
2722
2723 return err;
2724 }
2725
2726 void
na_disconnect(struct kern_nexus * nx,struct kern_channel * ch)2727 na_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
2728 {
2729 #pragma unused(nx)
2730 enum txrx t;
2731
2732 SK_LOCK_ASSERT_HELD();
2733
2734 SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2735 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2736 ch->ch_na->na_name, ch->ch_info->cinfo_nx_port,
2737 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(ch->ch_na),
2738 ch->ch_na->na_flags, NAF_BITS);
2739
2740 /* destroy mapping and release references */
2741 na_unbind_channel(ch);
2742 ASSERT(ch->ch_na == NULL);
2743 ASSERT(ch->ch_schema == NULL);
2744 for_all_rings(t) {
2745 ch->ch_si[t] = NULL;
2746 }
2747 }
2748
2749 void
na_defunct(struct kern_nexus * nx,struct kern_channel * ch,struct nexus_adapter * na,boolean_t locked)2750 na_defunct(struct kern_nexus *nx, struct kern_channel *ch,
2751 struct nexus_adapter *na, boolean_t locked)
2752 {
2753 #pragma unused(nx)
2754 SK_LOCK_ASSERT_HELD();
2755 if (!locked) {
2756 lck_mtx_lock(&ch->ch_lock);
2757 }
2758
2759 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2760
2761 if (!(na->na_flags & NAF_DEFUNCT)) {
2762 /*
2763 * Mark this adapter as defunct to inform nexus-specific
2764 * teardown handler called by na_teardown() below.
2765 */
2766 os_atomic_or(&na->na_flags, NAF_DEFUNCT, relaxed);
2767
2768 /*
2769 * Depopulate slots.
2770 */
2771 na_teardown(na, ch, TRUE);
2772
2773 /*
2774 * And finally destroy any already-defunct memory regions.
2775 * Do this only if the nexus adapter owns the arena, i.e.
2776 * NAF_MEM_LOANED is not set. Otherwise, we'd expect
2777 * that this routine be called again for the real owner.
2778 */
2779 if (!(na->na_flags & NAF_MEM_LOANED)) {
2780 skmem_arena_defunct(na->na_arena);
2781 }
2782 }
2783
2784 SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d) "
2785 "na 0x%llx naflags %b", ch->ch_name, ch->ch_pid,
2786 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2787 na->na_name, ch->ch_info->cinfo_nx_port,
2788 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2789 na->na_flags, NAF_BITS);
2790
2791 if (!locked) {
2792 lck_mtx_unlock(&ch->ch_lock);
2793 }
2794 }
2795
2796 /*
2797 * TODO: [email protected] -- merge this into na_connect()
2798 */
2799 int
na_connect_spec(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct proc * p)2800 na_connect_spec(struct kern_nexus *nx, struct kern_channel *ch,
2801 struct chreq *chr, struct proc *p)
2802 {
2803 #pragma unused(p)
2804 struct nexus_adapter *__single na = NULL;
2805 mach_vm_size_t memsize = 0;
2806 int error = 0;
2807 enum txrx t;
2808
2809 ASSERT(chr->cr_mode & CHMODE_KERNEL);
2810 ASSERT(ch->ch_flags & CHANF_KERNEL);
2811 ASSERT(ch->ch_na == NULL);
2812 ASSERT(ch->ch_schema == NULL);
2813
2814 SK_LOCK_ASSERT_HELD();
2815
2816 error = na_find(ch, nx, chr, NULL, NULL, kernproc, &na, TRUE);
2817 if (error != 0) {
2818 goto done;
2819 }
2820
2821 if (na == NULL) {
2822 error = EINVAL;
2823 goto done;
2824 }
2825
2826 if (na->na_channels > 0) {
2827 error = EBUSY;
2828 goto done;
2829 }
2830
2831 if (na->na_flags & NAF_DEFUNCT) {
2832 error = ENXIO;
2833 goto done;
2834 }
2835
2836 /*
2837 * Special connect requires the nexus adapter to handle its
2838 * own channel binding and unbinding via na_special(); bail
2839 * if this adapter doesn't support it.
2840 */
2841 if (na->na_special == NULL) {
2842 error = ENOTSUP;
2843 goto done;
2844 }
2845
2846 /* upon success, "ch->ch_na" will point to "na" */
2847 error = na->na_special(na, ch, chr, NXSPEC_CMD_CONNECT);
2848 if (error != 0) {
2849 ASSERT(ch->ch_na == NULL);
2850 goto done;
2851 }
2852
2853 ASSERT(na->na_flags & NAF_SPEC_INIT);
2854 ASSERT(na == ch->ch_na);
2855 /* make sure this is still the case */
2856 ASSERT(ch->ch_schema == NULL);
2857
2858 for_rx_tx(t) {
2859 ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2860 &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2861 }
2862
2863 skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2864 chr->cr_memsize = memsize;
2865
2866 SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2867 "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2868 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2869 na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2870 na->na_flags, NAF_BITS);
2871
2872 done:
2873 if (error != 0) {
2874 if (ch->ch_na != NULL || na != NULL) {
2875 if (ch->ch_na != NULL) {
2876 ASSERT(na == ch->ch_na);
2877 /* callee will release reference on 'na' */
2878 na_disconnect_spec(nx, ch);
2879 na = NULL;
2880 }
2881 if (na != NULL) {
2882 (void) na_release_locked(na);
2883 na = NULL;
2884 }
2885 }
2886 }
2887
2888 return error;
2889 }
2890
2891 /*
2892 * TODO: [email protected] -- merge this into na_disconnect()
2893 */
2894 void
na_disconnect_spec(struct kern_nexus * nx,struct kern_channel * ch)2895 na_disconnect_spec(struct kern_nexus *nx, struct kern_channel *ch)
2896 {
2897 #pragma unused(nx)
2898 struct nexus_adapter *na = ch->ch_na;
2899 enum txrx t;
2900 int error;
2901
2902 SK_LOCK_ASSERT_HELD();
2903 ASSERT(na != NULL);
2904 ASSERT(na->na_flags & NAF_SPEC_INIT); /* has been bound */
2905
2906 SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2907 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2908 na->na_name, ch->ch_info->cinfo_nx_port,
2909 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2910 na->na_flags, NAF_BITS);
2911
2912 /* take a reference for this routine */
2913 na_retain_locked(na);
2914
2915 ASSERT(ch->ch_flags & CHANF_KERNEL);
2916 ASSERT(ch->ch_schema == NULL);
2917 ASSERT(na->na_special != NULL);
2918 /* unbind this channel */
2919 error = na->na_special(na, ch, NULL, NXSPEC_CMD_DISCONNECT);
2920 ASSERT(error == 0);
2921 ASSERT(!(na->na_flags & NAF_SPEC_INIT));
2922
2923 /* now release our reference; this may be the last */
2924 na_release_locked(na);
2925 na = NULL;
2926
2927 ASSERT(ch->ch_na == NULL);
2928 for_rx_tx(t) {
2929 ch->ch_si[t] = NULL;
2930 }
2931 }
2932
2933 void
na_start_spec(struct kern_nexus * nx,struct kern_channel * ch)2934 na_start_spec(struct kern_nexus *nx, struct kern_channel *ch)
2935 {
2936 #pragma unused(nx)
2937 struct nexus_adapter *na = ch->ch_na;
2938
2939 SK_LOCK_ASSERT_HELD();
2940
2941 ASSERT(ch->ch_flags & CHANF_KERNEL);
2942 ASSERT(NA_KERNEL_ONLY(na));
2943 ASSERT(na->na_special != NULL);
2944
2945 na->na_special(na, ch, NULL, NXSPEC_CMD_START);
2946 }
2947
2948 void
na_stop_spec(struct kern_nexus * nx,struct kern_channel * ch)2949 na_stop_spec(struct kern_nexus *nx, struct kern_channel *ch)
2950 {
2951 #pragma unused(nx)
2952 struct nexus_adapter *na = ch->ch_na;
2953
2954 SK_LOCK_ASSERT_HELD();
2955
2956 ASSERT(ch->ch_flags & CHANF_KERNEL);
2957 ASSERT(NA_KERNEL_ONLY(na));
2958 ASSERT(na->na_special != NULL);
2959
2960 na->na_special(na, ch, NULL, NXSPEC_CMD_STOP);
2961 }
2962
2963 /*
2964 * MUST BE CALLED UNDER SK_LOCK()
2965 *
2966 * Get a refcounted reference to a nexus adapter attached
2967 * to the interface specified by chr.
2968 * This is always called in the execution of an ioctl().
2969 *
2970 * Return ENXIO if the interface specified by the request does
2971 * not exist, ENOTSUP if Skywalk is not supported by the interface,
2972 * EINVAL if parameters are invalid, ENOMEM if needed resources
2973 * could not be allocated.
2974 * If successful, hold a reference to the nexus adapter.
2975 *
2976 * No reference is kept on the real interface, which may then
2977 * disappear at any time.
2978 */
2979 int
na_find(struct kern_channel * ch,struct kern_nexus * nx,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)2980 na_find(struct kern_channel *ch, struct kern_nexus *nx, struct chreq *chr,
2981 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p,
2982 struct nexus_adapter **na, boolean_t create)
2983 {
2984 int error = 0;
2985
2986 _CASSERT(sizeof(chr->cr_name) == sizeof((*na)->na_name));
2987
2988 *na = NULL; /* default return value */
2989
2990 SK_LOCK_ASSERT_HELD();
2991
2992 /*
2993 * We cascade through all possibile types of nexus adapter.
2994 * All nx_*_na_find() functions return an error and an na,
2995 * with the following combinations:
2996 *
2997 * error na
2998 * 0 NULL type doesn't match
2999 * !0 NULL type matches, but na creation/lookup failed
3000 * 0 !NULL type matches and na created/found
3001 * !0 !NULL impossible
3002 */
3003
3004 #if CONFIG_NEXUS_MONITOR
3005 /* try to see if this is a monitor port */
3006 error = nx_monitor_na_find(nx, ch, chr, ch0, nxb, p, na, create);
3007 if (error != 0 || *na != NULL) {
3008 return error;
3009 }
3010 #endif /* CONFIG_NEXUS_MONITOR */
3011 #if CONFIG_NEXUS_USER_PIPE
3012 /* try to see if this is a pipe port */
3013 error = nx_upipe_na_find(nx, ch, chr, nxb, p, na, create);
3014 if (error != 0 || *na != NULL) {
3015 return error;
3016 }
3017 #endif /* CONFIG_NEXUS_USER_PIPE */
3018 #if CONFIG_NEXUS_KERNEL_PIPE
3019 /* try to see if this is a kernel pipe port */
3020 error = nx_kpipe_na_find(nx, ch, chr, nxb, p, na, create);
3021 if (error != 0 || *na != NULL) {
3022 return error;
3023 }
3024 #endif /* CONFIG_NEXUS_KERNEL_PIPE */
3025 #if CONFIG_NEXUS_FLOWSWITCH
3026 /* try to see if this is a flowswitch port */
3027 error = nx_fsw_na_find(nx, ch, chr, nxb, p, na, create);
3028 if (error != 0 || *na != NULL) {
3029 return error;
3030 }
3031 #endif /* CONFIG_NEXUS_FLOWSWITCH */
3032 #if CONFIG_NEXUS_NETIF
3033 error = nx_netif_na_find(nx, ch, chr, nxb, p, na, create);
3034 if (error != 0 || *na != NULL) {
3035 return error;
3036 }
3037 #endif /* CONFIG_NEXUS_NETIF */
3038
3039 ASSERT(*na == NULL);
3040 return ENXIO;
3041 }
3042
3043 void
na_retain_locked(struct nexus_adapter * na)3044 na_retain_locked(struct nexus_adapter *na)
3045 {
3046 SK_LOCK_ASSERT_HELD();
3047
3048 if (na != NULL) {
3049 #if SK_LOG
3050 uint32_t oref = os_atomic_inc_orig(&na->na_refcount, relaxed);
3051 SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
3052 na->na_name, SK_KVA(na), oref + 1, na->na_channels);
3053 #else /* !SK_LOG */
3054 os_atomic_inc(&na->na_refcount, relaxed);
3055 #endif /* !SK_LOG */
3056 }
3057 }
3058
3059 /* returns 1 iff the nexus_adapter is destroyed */
3060 int
na_release_locked(struct nexus_adapter * na)3061 na_release_locked(struct nexus_adapter *na)
3062 {
3063 uint32_t oref;
3064
3065 SK_LOCK_ASSERT_HELD();
3066
3067 ASSERT(na->na_refcount > 0);
3068 oref = os_atomic_dec_orig(&na->na_refcount, relaxed);
3069 if (oref > 1) {
3070 SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
3071 na->na_name, SK_KVA(na), oref - 1, na->na_channels);
3072 return 0;
3073 }
3074 ASSERT(na->na_channels == 0);
3075
3076 if (na->na_dtor != NULL) {
3077 na->na_dtor(na);
3078 }
3079
3080 ASSERT(na->na_tx_rings == NULL && na->na_rx_rings == NULL);
3081 ASSERT(na->na_slot_ctxs == NULL);
3082 ASSERT(na->na_scratch == NULL);
3083
3084 #if CONFIG_NEXUS_USER_PIPE
3085 nx_upipe_na_dealloc(na);
3086 #endif /* CONFIG_NEXUS_USER_PIPE */
3087 if (na->na_arena != NULL) {
3088 skmem_arena_release(na->na_arena);
3089 na->na_arena = NULL;
3090 }
3091
3092 SK_DF(SK_VERB_MEM, "na \"%s\" (0x%llx) being freed",
3093 na->na_name, SK_KVA(na));
3094
3095 NA_FREE(na);
3096 return 1;
3097 }
3098
3099 static struct nexus_adapter *
na_pseudo_alloc(zalloc_flags_t how)3100 na_pseudo_alloc(zalloc_flags_t how)
3101 {
3102 struct nexus_adapter *na;
3103
3104 na = zalloc_flags(na_pseudo_zone, how | Z_ZERO);
3105 if (na) {
3106 na->na_type = NA_PSEUDO;
3107 na->na_free = na_pseudo_free;
3108 }
3109 return na;
3110 }
3111
3112 static void
na_pseudo_free(struct nexus_adapter * na)3113 na_pseudo_free(struct nexus_adapter *na)
3114 {
3115 ASSERT(na->na_refcount == 0);
3116 SK_DF(SK_VERB_MEM, "na 0x%llx FREE", SK_KVA(na));
3117 bzero(na, sizeof(*na));
3118 zfree(na_pseudo_zone, na);
3119 }
3120
3121 static int
na_pseudo_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3122 na_pseudo_txsync(struct __kern_channel_ring *kring, struct proc *p,
3123 uint32_t flags)
3124 {
3125 #pragma unused(kring, p, flags)
3126 SK_DF(SK_VERB_SYNC | SK_VERB_TX,
3127 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3128 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3129 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3130 flags);
3131
3132 return 0;
3133 }
3134
3135 static int
na_pseudo_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3136 na_pseudo_rxsync(struct __kern_channel_ring *kring, struct proc *p,
3137 uint32_t flags)
3138 {
3139 #pragma unused(kring, p, flags)
3140 SK_DF(SK_VERB_SYNC | SK_VERB_RX,
3141 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3142 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3143 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3144 flags);
3145
3146 ASSERT(kring->ckr_rhead <= kring->ckr_lim);
3147
3148 return 0;
3149 }
3150
3151 static int
na_pseudo_activate(struct nexus_adapter * na,na_activate_mode_t mode)3152 na_pseudo_activate(struct nexus_adapter *na, na_activate_mode_t mode)
3153 {
3154 SK_D("na \"%s\" (0x%llx) %s", na->na_name,
3155 SK_KVA(na), na_activate_mode2str(mode));
3156
3157 switch (mode) {
3158 case NA_ACTIVATE_MODE_ON:
3159 os_atomic_or(&na->na_flags, NAF_ACTIVE, relaxed);
3160 break;
3161
3162 case NA_ACTIVATE_MODE_DEFUNCT:
3163 break;
3164
3165 case NA_ACTIVATE_MODE_OFF:
3166 os_atomic_andnot(&na->na_flags, NAF_ACTIVE, relaxed);
3167 break;
3168
3169 default:
3170 VERIFY(0);
3171 /* NOTREACHED */
3172 __builtin_unreachable();
3173 }
3174
3175 return 0;
3176 }
3177
3178 static void
na_pseudo_dtor(struct nexus_adapter * na)3179 na_pseudo_dtor(struct nexus_adapter *na)
3180 {
3181 #pragma unused(na)
3182 }
3183
3184 static int
na_pseudo_krings_create(struct nexus_adapter * na,struct kern_channel * ch)3185 na_pseudo_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
3186 {
3187 return na_rings_mem_setup(na, FALSE, ch);
3188 }
3189
3190 static void
na_pseudo_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)3191 na_pseudo_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
3192 boolean_t defunct)
3193 {
3194 na_rings_mem_teardown(na, ch, defunct);
3195 }
3196
3197 /*
3198 * Pseudo nexus adapter; typically used as a generic parent adapter.
3199 */
3200 int
na_pseudo_create(struct kern_nexus * nx,struct chreq * chr,struct nexus_adapter ** ret)3201 na_pseudo_create(struct kern_nexus *nx, struct chreq *chr,
3202 struct nexus_adapter **ret)
3203 {
3204 struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
3205 struct nexus_adapter *na;
3206 int error;
3207
3208 SK_LOCK_ASSERT_HELD();
3209 *ret = NULL;
3210
3211 na = na_pseudo_alloc(Z_WAITOK);
3212
3213 ASSERT(na->na_type == NA_PSEUDO);
3214 ASSERT(na->na_free == na_pseudo_free);
3215
3216 (void) strbufcpy(na->na_name, chr->cr_name);
3217 uuid_generate_random(na->na_uuid);
3218
3219 /*
3220 * Verify upper bounds; for all cases including user pipe nexus,
3221 * the parameters must have already been validated by corresponding
3222 * nxdom_prov_params() function defined by each domain.
3223 */
3224 na_set_nrings(na, NR_TX, nxp->nxp_tx_rings);
3225 na_set_nrings(na, NR_RX, nxp->nxp_rx_rings);
3226 na_set_nslots(na, NR_TX, nxp->nxp_tx_slots);
3227 na_set_nslots(na, NR_RX, nxp->nxp_rx_slots);
3228 ASSERT(na_get_nrings(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
3229 ASSERT(na_get_nrings(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
3230 ASSERT(na_get_nslots(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
3231 ASSERT(na_get_nslots(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
3232
3233 na->na_txsync = na_pseudo_txsync;
3234 na->na_rxsync = na_pseudo_rxsync;
3235 na->na_activate = na_pseudo_activate;
3236 na->na_dtor = na_pseudo_dtor;
3237 na->na_krings_create = na_pseudo_krings_create;
3238 na->na_krings_delete = na_pseudo_krings_delete;
3239
3240 *(nexus_stats_type_t *)(uintptr_t)&na->na_stats_type =
3241 NEXUS_STATS_TYPE_INVALID;
3242
3243 /* other fields are set in the common routine */
3244 na_attach_common(na, nx, NX_DOM_PROV(nx));
3245
3246 if ((error = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
3247 nx, na)) != 0) {
3248 ASSERT(na->na_arena == NULL);
3249 goto err;
3250 }
3251 ASSERT(na->na_arena != NULL);
3252
3253 *(uint32_t *)(uintptr_t)&na->na_flowadv_max = nxp->nxp_flowadv_max;
3254 ASSERT(na->na_flowadv_max == 0 ||
3255 skmem_arena_nexus(na->na_arena)->arn_flowadv_obj != NULL);
3256
3257 #if SK_LOG
3258 uuid_string_t uuidstr;
3259 SK_D("na_name: \"%s\"", na->na_name);
3260 SK_D(" UUID: %s", sk_uuid_unparse(na->na_uuid, uuidstr));
3261 SK_D(" nx: 0x%llx (\"%s\":\"%s\")",
3262 SK_KVA(na->na_nx), NX_DOM(na->na_nx)->nxdom_name,
3263 NX_DOM_PROV(na->na_nx)->nxdom_prov_name);
3264 SK_D(" flags: %b", na->na_flags, NAF_BITS);
3265 SK_D(" flowadv_max: %u", na->na_flowadv_max);
3266 SK_D(" rings: tx %u rx %u",
3267 na_get_nrings(na, NR_TX), na_get_nrings(na, NR_RX));
3268 SK_D(" slots: tx %u rx %u",
3269 na_get_nslots(na, NR_TX), na_get_nslots(na, NR_RX));
3270 #if CONFIG_NEXUS_USER_PIPE
3271 SK_D(" next_pipe: %u", na->na_next_pipe);
3272 SK_D(" max_pipes: %u", na->na_max_pipes);
3273 #endif /* CONFIG_NEXUS_USER_PIPE */
3274 #endif /* SK_LOG */
3275
3276 *ret = na;
3277 na_retain_locked(na);
3278
3279 return 0;
3280
3281 err:
3282 if (na != NULL) {
3283 if (na->na_arena != NULL) {
3284 skmem_arena_release(na->na_arena);
3285 na->na_arena = NULL;
3286 }
3287 NA_FREE(na);
3288 }
3289 return error;
3290 }
3291
3292 void
na_flowadv_entry_alloc(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx,const uint32_t flowid)3293 na_flowadv_entry_alloc(const struct nexus_adapter *na, uuid_t fae_id,
3294 const flowadv_idx_t fe_idx, const uint32_t flowid)
3295 {
3296 struct skmem_arena *ar = na->na_arena;
3297 struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
3298 struct __flowadv_entry *__single fae;
3299
3300 ASSERT(NA_IS_ACTIVE(na) && na->na_flowadv_max != 0);
3301 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3302
3303 AR_LOCK(ar);
3304
3305 /* we must not get here if arena is defunct; this must be valid */
3306 ASSERT(arn->arn_flowadv_obj != NULL);
3307
3308 VERIFY(fe_idx < na->na_flowadv_max);
3309 fae = &arn->arn_flowadv_obj[fe_idx];
3310 uuid_copy(fae->fae_id, fae_id);
3311 fae->fae_flowid = flowid;
3312 fae->fae_flags = FLOWADVF_VALID;
3313
3314 AR_UNLOCK(ar);
3315 }
3316
3317 void
na_flowadv_entry_free(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx,const uint32_t flowid)3318 na_flowadv_entry_free(const struct nexus_adapter *na, uuid_t fae_id,
3319 const flowadv_idx_t fe_idx, const uint32_t flowid)
3320 {
3321 #pragma unused(fae_id)
3322 struct skmem_arena *ar = na->na_arena;
3323 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3324
3325 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3326 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3327
3328 AR_LOCK(ar);
3329
3330 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3331 if (arn->arn_flowadv_obj != NULL) {
3332 struct __flowadv_entry *__single fae;
3333
3334 VERIFY(fe_idx < na->na_flowadv_max);
3335 fae = &arn->arn_flowadv_obj[fe_idx];
3336 ASSERT(uuid_compare(fae->fae_id, fae_id) == 0);
3337 uuid_clear(fae->fae_id);
3338 VERIFY(fae->fae_flowid == flowid);
3339 fae->fae_flowid = 0;
3340 fae->fae_flags = 0;
3341 }
3342
3343 AR_UNLOCK(ar);
3344 }
3345
3346 bool
na_flowadv_set(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3347 na_flowadv_set(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3348 const flowadv_token_t flow_token)
3349 {
3350 struct nexus_adapter *na = ch->ch_na;
3351 struct skmem_arena *ar = na->na_arena;
3352 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3353 uuid_string_t fae_uuid_str;
3354 bool suspend = false;
3355
3356 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3357 ASSERT(fe_idx < na->na_flowadv_max);
3358 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3359
3360 AR_LOCK(ar);
3361
3362 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3363
3364 if (arn->arn_flowadv_obj != NULL) {
3365 struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3366
3367 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3368 /*
3369 * We cannot guarantee that the flow is still around by now,
3370 * so check if that's the case and let the caller know.
3371 */
3372 if ((suspend = (fae->fae_token == flow_token))) {
3373 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3374 fae->fae_flags |= FLOWADVF_SUSPENDED;
3375 uuid_unparse(fae->fae_id, fae_uuid_str);
3376 }
3377 } else {
3378 suspend = false;
3379 }
3380 if (suspend) {
3381 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) %s flow token 0x%x fidx %u "
3382 "SUSPEND", sk_proc_name_address(current_proc()),
3383 sk_proc_pid(current_proc()), fae_uuid_str, flow_token, fe_idx);
3384 } else {
3385 SK_ERR("%s(%d) flow token 0x%llu fidx %u no longer around",
3386 sk_proc_name_address(current_proc()),
3387 sk_proc_pid(current_proc()), flow_token, fe_idx);
3388 }
3389
3390 AR_UNLOCK(ar);
3391
3392 return suspend;
3393 }
3394
3395 bool
na_flowadv_clear(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3396 na_flowadv_clear(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3397 const flowadv_token_t flow_token)
3398 {
3399 struct nexus_adapter *na = ch->ch_na;
3400 struct skmem_arena *ar = na->na_arena;
3401 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3402 uuid_string_t fae_uuid_str;
3403 boolean_t resume = false;
3404
3405 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3406 ASSERT(fe_idx < na->na_flowadv_max);
3407 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3408
3409 AR_LOCK(ar);
3410
3411 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3412
3413 if (arn->arn_flowadv_obj != NULL) {
3414 struct __flowadv_entry *__single fae = &arn->arn_flowadv_obj[fe_idx];
3415
3416 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3417 /*
3418 * We cannot guarantee that the flow is still around by now,
3419 * so check if that's the case and let the caller know.
3420 */
3421 if ((resume = (fae->fae_token == flow_token))) {
3422 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3423 fae->fae_flags &= ~FLOWADVF_SUSPENDED;
3424 uuid_unparse(fae->fae_id, fae_uuid_str);
3425 }
3426 } else {
3427 resume = FALSE;
3428 }
3429 if (resume) {
3430 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) %s flow token 0x%x "
3431 "fidx %u RESUME", ch->ch_name, ch->ch_pid, fae_uuid_str, flow_token,
3432 fe_idx);
3433 } else {
3434 SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3435 ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3436 }
3437
3438 AR_UNLOCK(ar);
3439
3440 return resume;
3441 }
3442
3443 int
na_flowadv_report_ce_event(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token,uint32_t ce_cnt,uint32_t total_pkt_cnt)3444 na_flowadv_report_ce_event(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3445 const flowadv_token_t flow_token, uint32_t ce_cnt, uint32_t total_pkt_cnt)
3446 {
3447 struct nexus_adapter *na = ch->ch_na;
3448 struct skmem_arena *ar = na->na_arena;
3449 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3450 uuid_string_t fae_uuid_str;
3451 boolean_t added;
3452
3453 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3454 ASSERT(fe_idx < na->na_flowadv_max);
3455 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3456
3457 AR_LOCK(ar);
3458
3459 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3460
3461 if (arn->arn_flowadv_obj != NULL) {
3462 struct __flowadv_entry *__single fae = &arn->arn_flowadv_obj[fe_idx];
3463
3464 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3465 /*
3466 * We cannot guarantee that the flow is still around by now,
3467 * so check if that's the case and let the caller know.
3468 */
3469 if ((added = (fae->fae_token == flow_token))) {
3470 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3471 fae->fae_ce_cnt += ce_cnt;
3472 fae->fae_pkt_cnt += total_pkt_cnt;
3473 uuid_unparse(fae->fae_id, fae_uuid_str);
3474 }
3475 } else {
3476 added = FALSE;
3477 }
3478 if (added) {
3479 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) %s flow token 0x%x "
3480 "fidx %u ce cnt incremented", ch->ch_name,
3481 ch->ch_pid, fae_uuid_str, flow_token, fe_idx);
3482 } else {
3483 SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3484 ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3485 }
3486
3487 AR_UNLOCK(ar);
3488
3489 return added;
3490 }
3491
3492 void
na_flowadv_event(struct __kern_channel_ring * kring)3493 na_flowadv_event(struct __kern_channel_ring *kring)
3494 {
3495 ASSERT(kring->ckr_tx == NR_TX);
3496
3497 SK_DF(SK_VERB_EVENTS, "%s(%d) na \"%s\" (0x%llx) kr 0x%llx",
3498 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
3499 KRNA(kring)->na_name, SK_KVA(KRNA(kring)), SK_KVA(kring));
3500
3501 na_post_event(kring, TRUE, FALSE, FALSE, CHAN_FILT_HINT_FLOW_ADV_UPD);
3502 }
3503
3504 static int
na_packet_pool_free_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3505 na_packet_pool_free_sync(struct __kern_channel_ring *kring, struct proc *p,
3506 uint32_t flags)
3507 {
3508 #pragma unused(flags, p)
3509 int n, ret = 0;
3510 slot_idx_t j;
3511 struct __kern_slot_desc *ksd;
3512 struct __user_slot_desc *usd;
3513 struct __kern_quantum *kqum;
3514 struct kern_pbufpool *pp = kring->ckr_pp;
3515 uint32_t nfree = 0;
3516
3517 /* packet pool list is protected by channel lock */
3518 ASSERT(!KR_KERNEL_ONLY(kring));
3519
3520 /* # of new slots */
3521 n = kring->ckr_rhead - kring->ckr_khead;
3522 if (n < 0) {
3523 n += kring->ckr_num_slots;
3524 }
3525
3526 /* nothing to free */
3527 if (__improbable(n == 0)) {
3528 SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3529 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3530 "nothing to free");
3531 goto done;
3532 }
3533
3534 j = kring->ckr_khead;
3535 PP_LOCK(pp);
3536 while (n--) {
3537 int err;
3538
3539 ksd = KR_KSD(kring, j);
3540 usd = KR_USD(kring, j);
3541
3542 if (__improbable(!SD_VALID_METADATA(usd))) {
3543 SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3544 ret = EINVAL;
3545 break;
3546 }
3547
3548 kqum = pp_remove_upp_locked(pp, usd->sd_md_idx, &err);
3549 if (__improbable(err != 0)) {
3550 SK_ERR("un-allocated packet or buflet %d %p",
3551 usd->sd_md_idx, SK_KVA(kqum));
3552 ret = EINVAL;
3553 break;
3554 }
3555
3556 /* detach and free the packet */
3557 kqum->qum_qflags &= ~QUM_F_FINALIZED;
3558 kqum->qum_ksd = NULL;
3559 ASSERT(!KSD_VALID_METADATA(ksd));
3560 USD_DETACH_METADATA(usd);
3561 ASSERT(pp == kqum->qum_pp);
3562 ASSERT(nfree < kring->ckr_num_slots);
3563 kring->ckr_scratch[nfree++] = (uint64_t)kqum;
3564 j = SLOT_NEXT(j, kring->ckr_lim);
3565 }
3566 PP_UNLOCK(pp);
3567
3568 if (__probable(nfree > 0)) {
3569 pp_free_packet_batch(pp, &kring->ckr_scratch[0], nfree);
3570 }
3571
3572 kring->ckr_khead = j;
3573 kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3574
3575 done:
3576 return ret;
3577 }
3578
3579 #define MAX_BUFLETS 64
3580 static int
alloc_packets(kern_pbufpool_t pp,uint64_t * __counted_by (* ph_cnt)buf_arr,bool large,uint32_t * ph_cnt)3581 alloc_packets(kern_pbufpool_t pp, uint64_t *__counted_by(*ph_cnt)buf_arr, bool large,
3582 uint32_t *ph_cnt)
3583 {
3584 int err;
3585 uint32_t need, need_orig, remain, alloced, i;
3586 uint64_t buflets[MAX_BUFLETS];
3587 uint64_t *__indexable pkts;
3588
3589 need_orig = *ph_cnt;
3590 err = kern_pbufpool_alloc_batch_nosleep(pp, large ? 0 : 1, buf_arr, ph_cnt);
3591 if (!large) {
3592 return err;
3593 }
3594 if (*ph_cnt == 0) {
3595 SK_ERR("failed to alloc %d packets for alloc ring: err %d",
3596 need_orig, err);
3597 DTRACE_SKYWALK2(alloc__pkts__fail, uint32_t, need_orig, int, err);
3598 return err;
3599 }
3600 need = remain = *ph_cnt;
3601 alloced = 0;
3602 pkts = buf_arr;
3603 while (remain > 0) {
3604 uint32_t cnt, cnt_orig;
3605
3606 cnt = MIN(remain, MAX_BUFLETS);
3607 cnt_orig = cnt;
3608 err = pp_alloc_buflet_batch(pp, buflets, &cnt, SKMEM_NOSLEEP, true);
3609 if (cnt == 0) {
3610 SK_ERR("failed to alloc %d buflets for alloc ring: "
3611 "remain %d, err %d", cnt_orig, remain, err);
3612 DTRACE_SKYWALK3(alloc__bufs__fail, uint32_t, cnt_orig,
3613 uint32_t, remain, int, err);
3614 break;
3615 }
3616 for (i = 0; i < cnt; i++) {
3617 kern_packet_t ph = (kern_packet_t)pkts[i];
3618 kern_buflet_t __single buf = __unsafe_forge_single(
3619 kern_buflet_t, buflets[i]);
3620 kern_buflet_t pbuf = kern_packet_get_next_buflet(ph, NULL);
3621 VERIFY(kern_packet_add_buflet(ph, pbuf, buf) == 0);
3622 buflets[i] = 0;
3623 }
3624 DTRACE_SKYWALK3(alloc__bufs, uint32_t, remain, uint32_t, cnt,
3625 uint32_t, cnt_orig);
3626 pkts += cnt;
3627 alloced += cnt;
3628 remain -= cnt;
3629 }
3630 /* free packets without attached buffers */
3631 if (remain > 0) {
3632 DTRACE_SKYWALK1(remaining__pkts, uint32_t, remain);
3633 ASSERT(remain + alloced == need);
3634 pp_free_packet_batch(pp, pkts, remain);
3635
3636 /* pp_free_packet_batch() should clear the pkts array */
3637 for (i = 0; i < remain; i++) {
3638 ASSERT(pkts[i] == 0);
3639 }
3640 }
3641 *ph_cnt = alloced;
3642 if (*ph_cnt == 0) {
3643 err = ENOMEM;
3644 } else if (*ph_cnt < need_orig) {
3645 err = EAGAIN;
3646 } else {
3647 err = 0;
3648 }
3649 DTRACE_SKYWALK3(alloc__packets, uint32_t, need_orig, uint32_t, *ph_cnt, int, err);
3650 return err;
3651 }
3652
3653 static int
na_packet_pool_alloc_sync_common(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags,bool large)3654 na_packet_pool_alloc_sync_common(struct __kern_channel_ring *kring, struct proc *p,
3655 uint32_t flags, bool large)
3656 {
3657 int b, err;
3658 uint32_t n = 0;
3659 slot_idx_t j;
3660 uint64_t now;
3661 uint32_t curr_ws, ph_needed, ph_cnt;
3662 struct __kern_slot_desc *ksd;
3663 struct __user_slot_desc *usd;
3664 struct __kern_quantum *kqum;
3665 kern_pbufpool_t pp = kring->ckr_pp;
3666 pid_t pid = proc_pid(p);
3667
3668 /* packet pool list is protected by channel lock */
3669 ASSERT(!KR_KERNEL_ONLY(kring));
3670 ASSERT(!PP_KERNEL_ONLY(pp));
3671
3672 now = _net_uptime;
3673 if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3674 if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3675 kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3676 }
3677 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3678 "%s: purged curr_ws(%d)", kring->ckr_name,
3679 kring->ckr_alloc_ws);
3680 return 0;
3681 }
3682 /* reclaim the completed slots */
3683 kring->ckr_khead = kring->ckr_rhead;
3684
3685 /* # of busy (unclaimed) slots */
3686 b = kring->ckr_ktail - kring->ckr_khead;
3687 if (b < 0) {
3688 b += kring->ckr_num_slots;
3689 }
3690
3691 curr_ws = kring->ckr_alloc_ws;
3692 if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3693 /* increment the working set by 50% */
3694 curr_ws += (curr_ws >> 1);
3695 curr_ws = MIN(curr_ws, kring->ckr_lim);
3696 } else {
3697 if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3698 (uint32_t)b >= (curr_ws >> 2)) {
3699 /* decrease the working set by 25% */
3700 curr_ws -= (curr_ws >> 2);
3701 }
3702 }
3703 curr_ws = MAX(curr_ws, na_upp_alloc_lowat);
3704 if (curr_ws > (uint32_t)b) {
3705 n = curr_ws - b;
3706 }
3707 kring->ckr_alloc_ws = curr_ws;
3708 kring->ckr_sync_time = now;
3709
3710 /* min with # of avail free slots (subtract busy from max) */
3711 n = ph_needed = MIN(n, kring->ckr_lim - b);
3712 j = kring->ckr_ktail;
3713 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3714 "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3715
3716 if ((ph_cnt = ph_needed) == 0) {
3717 goto done;
3718 }
3719
3720 err = alloc_packets(pp, kring->ckr_scratch,
3721 PP_HAS_BUFFER_ON_DEMAND(pp) && large, &ph_cnt);
3722 if (__improbable(ph_cnt == 0)) {
3723 SK_ERR("kr 0x%llx failed to alloc %u packet s(%d)",
3724 SK_KVA(kring), ph_needed, err);
3725 kring->ckr_err_stats.cres_pkt_alloc_failures += ph_needed;
3726 } else {
3727 /*
3728 * Add packets to the allocated list of user packet pool.
3729 */
3730 pp_insert_upp_batch(pp, pid, kring->ckr_scratch, ph_cnt);
3731 }
3732
3733 for (n = 0; n < ph_cnt; n++) {
3734 ksd = KR_KSD(kring, j);
3735 usd = KR_USD(kring, j);
3736
3737 kqum = SK_PTR_ADDR_KQUM(kring->ckr_scratch[n]);
3738 kring->ckr_scratch[n] = 0;
3739 ASSERT(kqum != NULL);
3740
3741 /* cleanup any stale slot mapping */
3742 KSD_RESET(ksd);
3743 ASSERT(usd != NULL);
3744 USD_RESET(usd);
3745
3746 /*
3747 * Since this packet is freshly allocated and we need to
3748 * have the flag set for the attach to succeed, just set
3749 * it here rather than calling __packet_finalize().
3750 */
3751 kqum->qum_qflags |= QUM_F_FINALIZED;
3752
3753 /* Attach packet to slot */
3754 KR_SLOT_ATTACH_METADATA(kring, ksd, kqum);
3755 /*
3756 * externalize the packet as it is being transferred to
3757 * user space.
3758 */
3759 kr_externalize_metadata(kring, pp->pp_max_frags, kqum, p);
3760
3761 j = SLOT_NEXT(j, kring->ckr_lim);
3762 }
3763 done:
3764 ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3765 kring->ckr_ktail = j;
3766 return 0;
3767 }
3768
3769 static int
na_packet_pool_alloc_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3770 na_packet_pool_alloc_sync(struct __kern_channel_ring *kring, struct proc *p,
3771 uint32_t flags)
3772 {
3773 return na_packet_pool_alloc_sync_common(kring, p, flags, false);
3774 }
3775
3776 static int
na_packet_pool_alloc_large_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3777 na_packet_pool_alloc_large_sync(struct __kern_channel_ring *kring, struct proc *p,
3778 uint32_t flags)
3779 {
3780 return na_packet_pool_alloc_sync_common(kring, p, flags, true);
3781 }
3782
3783 static int
na_packet_pool_free_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3784 na_packet_pool_free_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3785 uint32_t flags)
3786 {
3787 #pragma unused(flags, p)
3788 int n, ret = 0;
3789 slot_idx_t j;
3790 struct __kern_slot_desc *ksd;
3791 struct __user_slot_desc *usd;
3792 struct __kern_buflet *kbft;
3793 struct kern_pbufpool *pp = kring->ckr_pp;
3794
3795 /* packet pool list is protected by channel lock */
3796 ASSERT(!KR_KERNEL_ONLY(kring));
3797
3798 /* # of new slots */
3799 n = kring->ckr_rhead - kring->ckr_khead;
3800 if (n < 0) {
3801 n += kring->ckr_num_slots;
3802 }
3803
3804 /* nothing to free */
3805 if (__improbable(n == 0)) {
3806 SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3807 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3808 "nothing to free");
3809 goto done;
3810 }
3811
3812 j = kring->ckr_khead;
3813 while (n--) {
3814 int err;
3815
3816 ksd = KR_KSD(kring, j);
3817 usd = KR_USD(kring, j);
3818
3819 if (__improbable(!SD_VALID_METADATA(usd))) {
3820 SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3821 ret = EINVAL;
3822 break;
3823 }
3824
3825 kbft = pp_remove_upp_bft(pp, usd->sd_md_idx, &err);
3826 if (__improbable(err != 0)) {
3827 SK_ERR("un-allocated buflet %d %p", usd->sd_md_idx,
3828 SK_KVA(kbft));
3829 ret = EINVAL;
3830 break;
3831 }
3832
3833 /* detach and free the packet */
3834 ASSERT(!KSD_VALID_METADATA(ksd));
3835 USD_DETACH_METADATA(usd);
3836 pp_free_buflet(pp, kbft);
3837 j = SLOT_NEXT(j, kring->ckr_lim);
3838 }
3839 kring->ckr_khead = j;
3840 kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3841
3842 done:
3843 return ret;
3844 }
3845
3846 static int
na_packet_pool_alloc_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3847 na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3848 uint32_t flags)
3849 {
3850 int b, err;
3851 uint32_t n = 0;
3852 slot_idx_t j;
3853 uint64_t now;
3854 uint32_t curr_ws, bh_needed, bh_cnt;
3855 struct __kern_slot_desc *ksd;
3856 struct __user_slot_desc *usd;
3857 struct __kern_buflet *kbft;
3858 struct __kern_buflet_ext *kbe;
3859 kern_pbufpool_t pp = kring->ckr_pp;
3860 pid_t pid = proc_pid(p);
3861
3862 /* packet pool list is protected by channel lock */
3863 ASSERT(!KR_KERNEL_ONLY(kring));
3864 ASSERT(!PP_KERNEL_ONLY(pp));
3865
3866 now = _net_uptime;
3867 if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3868 if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3869 kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3870 }
3871 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3872 "%s: purged curr_ws(%d)", kring->ckr_name,
3873 kring->ckr_alloc_ws);
3874 return 0;
3875 }
3876 /* reclaim the completed slots */
3877 kring->ckr_khead = kring->ckr_rhead;
3878
3879 /* # of busy (unclaimed) slots */
3880 b = kring->ckr_ktail - kring->ckr_khead;
3881 if (b < 0) {
3882 b += kring->ckr_num_slots;
3883 }
3884
3885 curr_ws = kring->ckr_alloc_ws;
3886 if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3887 /* increment the working set by 50% */
3888 curr_ws += (curr_ws >> 1);
3889 curr_ws = MIN(curr_ws, kring->ckr_lim);
3890 } else {
3891 if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3892 (uint32_t)b >= (curr_ws >> 2)) {
3893 /* decrease the working set by 25% */
3894 curr_ws -= (curr_ws >> 2);
3895 }
3896 }
3897 curr_ws = MAX(curr_ws, na_upp_alloc_buf_lowat);
3898 if (curr_ws > (uint32_t)b) {
3899 n = curr_ws - b;
3900 }
3901 kring->ckr_alloc_ws = curr_ws;
3902 kring->ckr_sync_time = now;
3903
3904 /* min with # of avail free slots (subtract busy from max) */
3905 n = bh_needed = MIN(n, kring->ckr_lim - b);
3906 j = kring->ckr_ktail;
3907 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3908 "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3909
3910 if ((bh_cnt = bh_needed) == 0) {
3911 goto done;
3912 }
3913
3914 err = pp_alloc_buflet_batch(pp, kring->ckr_scratch, &bh_cnt,
3915 SKMEM_NOSLEEP, false);
3916
3917 if (bh_cnt == 0) {
3918 SK_ERR("kr 0x%llx failed to alloc %u buflets(%d)",
3919 SK_KVA(kring), bh_needed, err);
3920 kring->ckr_err_stats.cres_pkt_alloc_failures += bh_needed;
3921 }
3922
3923 for (n = 0; n < bh_cnt; n++) {
3924 struct __user_buflet *ubft;
3925
3926 ksd = KR_KSD(kring, j);
3927 usd = KR_USD(kring, j);
3928
3929 kbe = __unsafe_forge_single(struct __kern_buflet_ext *,
3930 (kring->ckr_scratch[n]));
3931 kbft = &kbe->kbe_overlay;
3932
3933 kring->ckr_scratch[n] = 0;
3934 ASSERT(kbft != NULL);
3935
3936 /*
3937 * Add buflet to the allocated list of user packet pool.
3938 */
3939 pp_insert_upp_bft(pp, kbft, pid);
3940
3941 /*
3942 * externalize the buflet as it is being transferred to
3943 * user space.
3944 */
3945 ubft = __DECONST(struct __user_buflet *, kbe->kbe_buf_user);
3946 KBUF_EXTERNALIZE(kbft, ubft, pp);
3947
3948 /* cleanup any stale slot mapping */
3949 KSD_RESET(ksd);
3950 ASSERT(usd != NULL);
3951 USD_RESET(usd);
3952
3953 /* Attach buflet to slot */
3954 KR_SLOT_ATTACH_BUF_METADATA(kring, ksd, kbft);
3955
3956 j = SLOT_NEXT(j, kring->ckr_lim);
3957 }
3958 done:
3959 ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3960 kring->ckr_ktail = j;
3961 return 0;
3962 }
3963
3964 /* The caller needs to ensure that the NA stays intact */
3965 void
na_drain(struct nexus_adapter * na,boolean_t purge)3966 na_drain(struct nexus_adapter *na, boolean_t purge)
3967 {
3968 /* will be cleared on next channel sync */
3969 if (!(os_atomic_or_orig(&na->na_flags, NAF_DRAINING, relaxed) &
3970 NAF_DRAINING) && NA_IS_ACTIVE(na)) {
3971 SK_DF(SK_VERB_NA, "%s: %s na 0x%llx flags %b",
3972 na->na_name, (purge ? "purging" : "pruning"),
3973 SK_KVA(na), na->na_flags, NAF_BITS);
3974
3975 /* reap (purge/prune) caches in the arena */
3976 skmem_arena_reap(na->na_arena, purge);
3977 }
3978 }
3979