1 /*
2 * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri.
31 * All rights reserved.
32 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 */
55 #include <sys/systm.h>
56 #include <skywalk/os_skywalk_private.h>
57 #include <skywalk/nexus/monitor/nx_monitor.h>
58 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
59 #include <skywalk/nexus/netif/nx_netif.h>
60 #include <skywalk/nexus/upipe/nx_user_pipe.h>
61 #include <skywalk/nexus/kpipe/nx_kernel_pipe.h>
62 #include <kern/thread.h>
63
64 static int na_krings_use(struct kern_channel *);
65 static void na_krings_unuse(struct kern_channel *);
66 static void na_krings_verify(struct nexus_adapter *);
67 static int na_notify(struct __kern_channel_ring *, struct proc *, uint32_t);
68 static void na_set_ring(struct nexus_adapter *, uint32_t, enum txrx, uint32_t);
69 static void na_set_all_rings(struct nexus_adapter *, uint32_t);
70 static int na_set_ringid(struct kern_channel *, ring_set_t, ring_id_t);
71 static void na_unset_ringid(struct kern_channel *);
72 static void na_teardown(struct nexus_adapter *, struct kern_channel *,
73 boolean_t);
74
75 static int na_kr_create(struct nexus_adapter *, uint32_t, boolean_t);
76 static void na_kr_delete(struct nexus_adapter *);
77 static int na_kr_setup(struct nexus_adapter *, struct kern_channel *);
78 static void na_kr_teardown_all(struct nexus_adapter *, struct kern_channel *,
79 boolean_t);
80 static void na_kr_teardown_txrx(struct nexus_adapter *, struct kern_channel *,
81 boolean_t, struct proc *);
82 static int na_kr_populate_slots(struct __kern_channel_ring *);
83 static void na_kr_depopulate_slots(struct __kern_channel_ring *,
84 struct kern_channel *, boolean_t defunct);
85
86 static int na_schema_alloc(struct kern_channel *);
87
88 static struct nexus_adapter *na_pseudo_alloc(zalloc_flags_t);
89 static void na_pseudo_free(struct nexus_adapter *);
90 static int na_pseudo_txsync(struct __kern_channel_ring *, struct proc *,
91 uint32_t);
92 static int na_pseudo_rxsync(struct __kern_channel_ring *, struct proc *,
93 uint32_t);
94 static int na_pseudo_activate(struct nexus_adapter *, na_activate_mode_t);
95 static void na_pseudo_dtor(struct nexus_adapter *);
96 static int na_pseudo_krings_create(struct nexus_adapter *,
97 struct kern_channel *);
98 static void na_pseudo_krings_delete(struct nexus_adapter *,
99 struct kern_channel *, boolean_t);
100 static int na_packet_pool_alloc_sync(struct __kern_channel_ring *,
101 struct proc *, uint32_t);
102 static int na_packet_pool_free_sync(struct __kern_channel_ring *,
103 struct proc *, uint32_t);
104 static int na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *,
105 struct proc *, uint32_t);
106 static int na_packet_pool_free_buf_sync(struct __kern_channel_ring *,
107 struct proc *, uint32_t);
108
109 static void na_destroyer_final(struct nexus_adapter *);
110 static void na_destroyer_enqueue(struct nexus_adapter *);
111 static struct nexus_adapter *na_destroyer_dequeue(void);
112 static int na_destroyer_thread_cont(int);
113 static void na_destroyer_thread_func(void *, wait_result_t);
114
115 extern kern_return_t thread_terminate(thread_t);
116
117 #define NA_KRING_IDLE_TIMEOUT (NSEC_PER_SEC * 30) /* 30 seconds */
118
119 static ZONE_DECLARE(na_pseudo_zone, SKMEM_ZONE_PREFIX ".na.pseudo",
120 sizeof(struct nexus_adapter), ZC_ZFREE_CLEARMEM);
121
122 static int __na_inited = 0;
123
124 #define NA_NUM_WMM_CLASSES 4
125 #define NAKR_WMM_SC2RINGID(_s) PKT_SC2TC(_s)
126 #define NAKR_SET_SVC_LUT(_n, _s) \
127 (_n)->na_kring_svc_lut[MBUF_SCIDX(_s)] = NAKR_WMM_SC2RINGID(_s)
128 #define NAKR_SET_KR_SVC(_n, _s) \
129 NAKR((_n), NR_TX)[NAKR_WMM_SC2RINGID(_s)].ckr_svc = (_s)
130
131 #define NA_UPP_ALLOC_LOWAT 8
132 static uint32_t na_upp_alloc_lowat = NA_UPP_ALLOC_LOWAT;
133
134 #define NA_UPP_REAP_INTERVAL 10 /* seconds */
135 static uint32_t na_upp_reap_interval = NA_UPP_REAP_INTERVAL;
136
137 #define NA_UPP_WS_HOLD_TIME 2 /* seconds */
138 static uint32_t na_upp_ws_hold_time = NA_UPP_WS_HOLD_TIME;
139
140 #define NA_UPP_REAP_MIN_PKTS 0
141 static uint32_t na_upp_reap_min_pkts = NA_UPP_REAP_MIN_PKTS;
142
143 #define NA_UPP_ALLOC_BUF_LOWAT 64
144 static uint32_t na_upp_alloc_buf_lowat = NA_UPP_ALLOC_BUF_LOWAT;
145
146 #if (DEVELOPMENT || DEBUG)
147 static uint64_t _na_inject_error = 0;
148 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) \
149 _SK_INJECT_ERROR(_na_inject_error, _en, _ev, _ec, NULL, _f, __VA_ARGS__)
150
151 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_ws_hold_time,
152 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_ws_hold_time,
153 NA_UPP_WS_HOLD_TIME, "");
154 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_interval,
155 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_interval,
156 NA_UPP_REAP_INTERVAL, "");
157 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_min_pkts,
158 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_min_pkts,
159 NA_UPP_REAP_MIN_PKTS, "");
160 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_lowat,
161 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_lowat,
162 NA_UPP_ALLOC_LOWAT, "");
163 SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_buf_lowat,
164 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_buf_lowat,
165 NA_UPP_ALLOC_BUF_LOWAT, "");
166 SYSCTL_QUAD(_kern_skywalk, OID_AUTO, na_inject_error,
167 CTLFLAG_RW | CTLFLAG_LOCKED, &_na_inject_error, "");
168 #else
169 #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) do { } while (0)
170 #endif /* !DEVELOPMENT && !DEBUG */
171
172 #define SKMEM_TAG_NX_RINGS "com.apple.skywalk.nexus.rings"
173 static kern_allocation_name_t skmem_tag_nx_rings;
174
175 #define SKMEM_TAG_NX_CONTEXTS "com.apple.skywalk.nexus.contexts"
176 static kern_allocation_name_t skmem_tag_nx_contexts;
177
178 #define SKMEM_TAG_NX_SCRATCH "com.apple.skywalk.nexus.scratch"
179 static kern_allocation_name_t skmem_tag_nx_scratch;
180
181 #if !XNU_TARGET_OS_OSX
182 /* see KLDBootstrap::readPrelinkedExtensions() for details */
183 extern uuid_t kernelcache_uuid;
184 #else /* XNU_TARGET_OS_OSX */
185 /* see panic_init() for details */
186 extern unsigned char *kernel_uuid;
187 #endif /* XNU_TARGET_OS_OSX */
188
189 /* The following are protected by sk_lock */
190 static TAILQ_HEAD(, nexus_adapter) na_destroyer_head;
191 static uint32_t na_destroyer_cnt;
192 static void *na_destroyer_run; /* wait channel for destroyer thread */
193 static thread_t na_destroyer_thread = THREAD_NULL;
194
195 void
na_init(void)196 na_init(void)
197 {
198 /*
199 * Changing the size of nexus_mdata structure won't break ABI,
200 * but we need to be mindful of memory consumption; Thus here
201 * we add a compile-time check to make sure the size is within
202 * the expected limit and that it's properly aligned. This
203 * check may be adjusted in future as needed.
204 */
205 _CASSERT(sizeof(struct nexus_mdata) <= 32 &&
206 IS_P2ALIGNED(sizeof(struct nexus_mdata), 8));
207 _CASSERT(sizeof(struct nexus_mdata) <= sizeof(struct __user_quantum));
208
209 /* see comments on nexus_meta_type_t */
210 _CASSERT(NEXUS_META_TYPE_MAX == 3);
211 _CASSERT(NEXUS_META_SUBTYPE_MAX == 3);
212
213 ASSERT(!__na_inited);
214
215 ASSERT(skmem_tag_nx_rings == NULL);
216 skmem_tag_nx_rings =
217 kern_allocation_name_allocate(SKMEM_TAG_NX_RINGS, 0);
218 ASSERT(skmem_tag_nx_rings != NULL);
219
220 ASSERT(skmem_tag_nx_contexts == NULL);
221 skmem_tag_nx_contexts =
222 kern_allocation_name_allocate(SKMEM_TAG_NX_CONTEXTS, 0);
223 ASSERT(skmem_tag_nx_contexts != NULL);
224
225 ASSERT(skmem_tag_nx_scratch == NULL);
226 skmem_tag_nx_scratch =
227 kern_allocation_name_allocate(SKMEM_TAG_NX_SCRATCH, 0);
228 ASSERT(skmem_tag_nx_scratch != NULL);
229
230 TAILQ_INIT(&na_destroyer_head);
231 ASSERT(na_destroyer_thread == THREAD_NULL);
232 if (kernel_thread_start(na_destroyer_thread_func,
233 NULL, &na_destroyer_thread) != KERN_SUCCESS) {
234 panic_plain("%s: couldn't create destroyer thread", __func__);
235 /* NOTREACHED */
236 }
237
238 __na_inited = 1;
239 }
240
241 void
na_fini(void)242 na_fini(void)
243 {
244 if (__na_inited) {
245 if (na_destroyer_thread != THREAD_NULL) {
246 /* for the extra refcnt from kernel_thread_start() */
247 thread_deallocate(na_destroyer_thread);
248 /* terminate it */
249 (void) thread_terminate(na_destroyer_thread);
250 na_destroyer_thread = THREAD_NULL;
251 }
252 ASSERT(TAILQ_EMPTY(&na_destroyer_head));
253 ASSERT(na_destroyer_cnt == 0);
254
255 if (skmem_tag_nx_rings != NULL) {
256 kern_allocation_name_release(skmem_tag_nx_rings);
257 skmem_tag_nx_rings = NULL;
258 }
259 if (skmem_tag_nx_contexts != NULL) {
260 kern_allocation_name_release(skmem_tag_nx_contexts);
261 skmem_tag_nx_contexts = NULL;
262 }
263 if (skmem_tag_nx_scratch != NULL) {
264 kern_allocation_name_release(skmem_tag_nx_scratch);
265 skmem_tag_nx_scratch = NULL;
266 }
267
268 __na_inited = 0;
269 }
270 }
271
272 /*
273 * Interpret the ringid of an chreq, by translating it into a pair
274 * of intervals of ring indices:
275 *
276 * [txfirst, txlast) and [rxfirst, rxlast)
277 */
278 int
na_interp_ringid(struct nexus_adapter * na,ring_id_t ring_id,ring_set_t ring_set,uint32_t first[NR_TXRX],uint32_t last[NR_TXRX])279 na_interp_ringid(struct nexus_adapter *na, ring_id_t ring_id,
280 ring_set_t ring_set, uint32_t first[NR_TXRX], uint32_t last[NR_TXRX])
281 {
282 enum txrx t;
283
284 switch (ring_set) {
285 case RING_SET_ALL:
286 /*
287 * Ring pair eligibility: all ring(s).
288 */
289 if (ring_id != CHANNEL_RING_ID_ANY &&
290 ring_id >= na_get_nrings(na, NR_TX) &&
291 ring_id >= na_get_nrings(na, NR_RX)) {
292 SK_ERR("\"%s\": invalid ring_id %d for ring_set %u",
293 na->na_name, (int)ring_id, ring_set);
294 return EINVAL;
295 }
296 for_rx_tx(t) {
297 if (ring_id == CHANNEL_RING_ID_ANY) {
298 first[t] = 0;
299 last[t] = na_get_nrings(na, t);
300 } else {
301 first[t] = ring_id;
302 last[t] = ring_id + 1;
303 }
304 }
305 break;
306
307 default:
308 SK_ERR("\"%s\": invalid ring_set %u", na->na_name, ring_set);
309 return EINVAL;
310 }
311
312 SK_DF(SK_VERB_NA | SK_VERB_RING,
313 "\"%s\": ring_id %d, ring_set %u tx [%u,%u) rx [%u,%u)",
314 na->na_name, (int)ring_id, ring_set, first[NR_TX], last[NR_TX],
315 first[NR_RX], last[NR_RX]);
316
317 return 0;
318 }
319
320 /*
321 * Set the ring ID. For devices with a single queue, a request
322 * for all rings is the same as a single ring.
323 */
324 static int
na_set_ringid(struct kern_channel * ch,ring_set_t ring_set,ring_id_t ring_id)325 na_set_ringid(struct kern_channel *ch, ring_set_t ring_set, ring_id_t ring_id)
326 {
327 struct nexus_adapter *na = ch->ch_na;
328 int error;
329 enum txrx t;
330 uint32_t n_alloc_rings;
331
332 if ((error = na_interp_ringid(na, ring_id, ring_set,
333 ch->ch_first, ch->ch_last)) != 0) {
334 return error;
335 }
336
337 n_alloc_rings = na_get_nrings(na, NR_A);
338 if (n_alloc_rings != 0) {
339 ch->ch_first[NR_A] = ch->ch_first[NR_F] = 0;
340 ch->ch_last[NR_A] = ch->ch_last[NR_F] =
341 ch->ch_first[NR_A] + n_alloc_rings;
342 } else {
343 ch->ch_first[NR_A] = ch->ch_last[NR_A] = 0;
344 ch->ch_first[NR_F] = ch->ch_last[NR_F] = 0;
345 }
346 ch->ch_first[NR_EV] = 0;
347 ch->ch_last[NR_EV] = ch->ch_first[NR_EV] + na_get_nrings(na, NR_EV);
348 /* XXX: should we initialize na_si_users for event ring ? */
349
350 /*
351 * Optimization: count the users registered for more than
352 * one ring, which are the ones sleeping on the global queue.
353 * The default na_notify() callback will then avoid signaling
354 * the global queue if nobody is using it
355 */
356 for_rx_tx(t) {
357 if (ch_is_multiplex(ch, t)) {
358 na->na_si_users[t]++;
359 ASSERT(na->na_si_users[t] != 0);
360 }
361 }
362 return 0;
363 }
364
365 static void
na_unset_ringid(struct kern_channel * ch)366 na_unset_ringid(struct kern_channel *ch)
367 {
368 struct nexus_adapter *na = ch->ch_na;
369 enum txrx t;
370
371 for_rx_tx(t) {
372 if (ch_is_multiplex(ch, t)) {
373 ASSERT(na->na_si_users[t] != 0);
374 na->na_si_users[t]--;
375 }
376 ch->ch_first[t] = ch->ch_last[t] = 0;
377 }
378 }
379
380 /*
381 * Check that the rings we want to bind are not exclusively owned by a previous
382 * bind. If exclusive ownership has been requested, we also mark the rings.
383 */
384 /* Hoisted out of line to reduce kernel stack footprint */
385 SK_NO_INLINE_ATTRIBUTE
386 static int
na_krings_use(struct kern_channel * ch)387 na_krings_use(struct kern_channel *ch)
388 {
389 struct nexus_adapter *na = ch->ch_na;
390 struct __kern_channel_ring *kring;
391 boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
392 enum txrx t;
393 uint32_t i;
394
395 SK_DF(SK_VERB_NA | SK_VERB_RING, "na \"%s\" (0x%llx) grabbing tx [%u,%u) rx [%u,%u)",
396 na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
397 ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
398
399 /*
400 * First round: check that all the requested rings
401 * are neither alread exclusively owned, nor we
402 * want exclusive ownership when they are already in use
403 */
404 for_all_rings(t) {
405 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
406 kring = &NAKR(na, t)[i];
407 if ((kring->ckr_flags & CKRF_EXCLUSIVE) ||
408 (kring->ckr_users && excl)) {
409 SK_DF(SK_VERB_NA | SK_VERB_RING,
410 "kr \"%s\" (0x%llx) krflags 0x%b is busy",
411 kring->ckr_name, SK_KVA(kring),
412 kring->ckr_flags, CKRF_BITS);
413 return EBUSY;
414 }
415 }
416 }
417
418 /*
419 * Second round: increment usage count and possibly
420 * mark as exclusive
421 */
422
423 for_all_rings(t) {
424 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
425 kring = &NAKR(na, t)[i];
426 kring->ckr_users++;
427 if (excl) {
428 kring->ckr_flags |= CKRF_EXCLUSIVE;
429 }
430 }
431 }
432
433 return 0;
434 }
435
436 /* Hoisted out of line to reduce kernel stack footprint */
437 SK_NO_INLINE_ATTRIBUTE
438 static void
na_krings_unuse(struct kern_channel * ch)439 na_krings_unuse(struct kern_channel *ch)
440 {
441 struct nexus_adapter *na = ch->ch_na;
442 struct __kern_channel_ring *kring;
443 boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
444 enum txrx t;
445 uint32_t i;
446
447 SK_DF(SK_VERB_NA | SK_VERB_RING,
448 "na \"%s\" (0x%llx) releasing tx [%u, %u) rx [%u, %u)",
449 na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
450 ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
451
452 for_all_rings(t) {
453 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
454 kring = &NAKR(na, t)[i];
455 if (excl) {
456 kring->ckr_flags &= ~CKRF_EXCLUSIVE;
457 }
458 kring->ckr_users--;
459 }
460 }
461 }
462
463 /* Hoisted out of line to reduce kernel stack footprint */
464 SK_NO_INLINE_ATTRIBUTE
465 static void
na_krings_verify(struct nexus_adapter * na)466 na_krings_verify(struct nexus_adapter *na)
467 {
468 struct __kern_channel_ring *kring;
469 enum txrx t;
470 uint32_t i;
471
472 for_all_rings(t) {
473 for (i = 0; i < na_get_nrings(na, t); i++) {
474 kring = &NAKR(na, t)[i];
475 /* na_kr_create() validations */
476 ASSERT(kring->ckr_num_slots > 0);
477 ASSERT(kring->ckr_lim == (kring->ckr_num_slots - 1));
478 ASSERT(kring->ckr_pp != NULL);
479
480 if (!(kring->ckr_flags & CKRF_MEM_RING_INITED)) {
481 continue;
482 }
483 /* na_kr_setup() validations */
484 if (KR_KERNEL_ONLY(kring)) {
485 ASSERT(kring->ckr_ring == NULL);
486 } else {
487 ASSERT(kring->ckr_ring != NULL);
488 }
489 ASSERT(kring->ckr_ksds_last ==
490 &kring->ckr_ksds[kring->ckr_lim]);
491 }
492 }
493 }
494
495 int
na_bind_channel(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr)496 na_bind_channel(struct nexus_adapter *na, struct kern_channel *ch,
497 struct chreq *chr)
498 {
499 struct kern_pbufpool *rx_pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
500 struct kern_pbufpool *tx_pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
501 uint32_t ch_mode = chr->cr_mode;
502 int err = 0;
503
504 SK_LOCK_ASSERT_HELD();
505 ASSERT(ch->ch_schema == NULL);
506 ASSERT(ch->ch_na == NULL);
507
508 /* ring configuration may have changed, fetch from the card */
509 na_update_config(na);
510 ch->ch_na = na; /* store the reference */
511 err = na_set_ringid(ch, chr->cr_ring_set, chr->cr_ring_id);
512 if (err != 0) {
513 goto err;
514 }
515
516 atomic_bitclear_32(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE |
517 CHANF_USER_PACKET_POOL | CHANF_EVENT_RING));
518 if (ch_mode & CHMODE_EXCLUSIVE) {
519 atomic_bitset_32(&ch->ch_flags, CHANF_EXCLUSIVE);
520 }
521 /*
522 * Disallow automatic sync for monitor mode, since TX
523 * direction is disabled.
524 */
525 if (ch_mode & CHMODE_MONITOR) {
526 atomic_bitset_32(&ch->ch_flags, CHANF_RXONLY);
527 }
528
529 if (!!(na->na_flags & NAF_USER_PKT_POOL) ^
530 !!(ch_mode & CHMODE_USER_PACKET_POOL)) {
531 SK_ERR("incompatible channel mode (0x%b), na_flags (0x%b)",
532 ch_mode, CHMODE_BITS, na->na_flags, NAF_BITS);
533 err = EINVAL;
534 goto err;
535 }
536
537 if (na->na_arena->ar_flags & ARF_DEFUNCT) {
538 err = ENXIO;
539 goto err;
540 }
541
542 if (ch_mode & CHMODE_USER_PACKET_POOL) {
543 ASSERT(na->na_flags & NAF_USER_PKT_POOL);
544 ASSERT(ch->ch_first[NR_A] != ch->ch_last[NR_A]);
545 ASSERT(ch->ch_first[NR_F] != ch->ch_last[NR_F]);
546 atomic_bitset_32(&ch->ch_flags, CHANF_USER_PACKET_POOL);
547 }
548
549 if (ch_mode & CHMODE_EVENT_RING) {
550 ASSERT(na->na_flags & NAF_USER_PKT_POOL);
551 ASSERT(na->na_flags & NAF_EVENT_RING);
552 ASSERT(ch->ch_first[NR_EV] != ch->ch_last[NR_EV]);
553 atomic_bitset_32(&ch->ch_flags, CHANF_EVENT_RING);
554 }
555
556 /*
557 * If this is the first channel of the adapter, create
558 * the rings and their in-kernel view, the krings.
559 */
560 if (na->na_channels == 0) {
561 err = na->na_krings_create(na, ch);
562 if (err != 0) {
563 goto err;
564 }
565
566 /*
567 * Sanity check; this is already done in na_kr_create(),
568 * but we do it here as well to validate na_kr_setup().
569 */
570 na_krings_verify(na);
571 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
572 skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_type;
573 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
574 skmem_arena_nexus(na->na_arena)->arn_rx_pp->pp_md_subtype;
575 }
576
577 /*
578 * Validate ownership and usability of the krings; take into account
579 * whether some previous bind has exclusive ownership on them.
580 */
581 err = na_krings_use(ch);
582 if (err != 0) {
583 goto err_del_rings;
584 }
585
586 /* for user-facing channel, create a new channel schema */
587 if (!(ch->ch_flags & CHANF_KERNEL)) {
588 err = na_schema_alloc(ch);
589 if (err != 0) {
590 goto err_rel_excl;
591 }
592
593 ASSERT(ch->ch_schema != NULL);
594 ASSERT(ch->ch_schema_offset != (mach_vm_offset_t)-1);
595 } else {
596 ASSERT(ch->ch_schema == NULL);
597 ch->ch_schema_offset = (mach_vm_offset_t)-1;
598 }
599
600 /* update our work timestamp */
601 na->na_work_ts = net_uptime();
602
603 /* update our work timestamp */
604 na->na_work_ts = net_uptime();
605
606 na->na_channels++;
607
608 /*
609 * If user packet pool is desired, initialize the allocated
610 * object hash table in the pool, if not already. This also
611 * retains a refcnt on the pool which the caller must release.
612 */
613 ASSERT(ch->ch_pp == NULL);
614 if (ch_mode & CHMODE_USER_PACKET_POOL) {
615 #pragma unused(tx_pp)
616 ASSERT(rx_pp == tx_pp);
617 err = pp_init_upp(rx_pp, TRUE);
618 if (err != 0) {
619 goto err_free_schema;
620 }
621 ch->ch_pp = rx_pp;
622 }
623
624 if (!NA_IS_ACTIVE(na)) {
625 err = na->na_activate(na, NA_ACTIVATE_MODE_ON);
626 if (err != 0) {
627 goto err_release_pp;
628 }
629
630 SK_D("activated \"%s\" adapter 0x%llx", na->na_name,
631 SK_KVA(na));
632 SK_D(" na_md_type: %u", na->na_md_type);
633 SK_D(" na_md_subtype: %u", na->na_md_subtype);
634 }
635
636 SK_D("ch 0x%llx", SK_KVA(ch));
637 SK_D(" ch_flags: 0x%b", ch->ch_flags, CHANF_BITS);
638 if (ch->ch_schema != NULL) {
639 SK_D(" ch_schema: 0x%llx", SK_KVA(ch->ch_schema));
640 }
641 SK_D(" ch_na: 0x%llx (chcnt %u)", SK_KVA(ch->ch_na),
642 ch->ch_na->na_channels);
643 SK_D(" ch_tx_rings: [%u,%u)", ch->ch_first[NR_TX],
644 ch->ch_last[NR_TX]);
645 SK_D(" ch_rx_rings: [%u,%u)", ch->ch_first[NR_RX],
646 ch->ch_last[NR_RX]);
647 SK_D(" ch_alloc_rings: [%u,%u)", ch->ch_first[NR_A],
648 ch->ch_last[NR_A]);
649 SK_D(" ch_free_rings: [%u,%u)", ch->ch_first[NR_F],
650 ch->ch_last[NR_F]);
651 SK_D(" ch_ev_rings: [%u,%u)", ch->ch_first[NR_EV],
652 ch->ch_last[NR_EV]);
653
654 return 0;
655
656 err_release_pp:
657 if (ch_mode & CHMODE_USER_PACKET_POOL) {
658 ASSERT(ch->ch_pp != NULL);
659 pp_release(rx_pp);
660 ch->ch_pp = NULL;
661 }
662 err_free_schema:
663 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
664 NEXUS_META_TYPE_INVALID;
665 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
666 NEXUS_META_SUBTYPE_INVALID;
667 ASSERT(na->na_channels != 0);
668 na->na_channels--;
669 if (ch->ch_schema != NULL) {
670 skmem_cache_free(
671 skmem_arena_nexus(na->na_arena)->arn_schema_cache,
672 ch->ch_schema);
673 ch->ch_schema = NULL;
674 ch->ch_schema_offset = (mach_vm_offset_t)-1;
675 }
676 err_rel_excl:
677 na_krings_unuse(ch);
678 err_del_rings:
679 if (na->na_channels == 0) {
680 na->na_krings_delete(na, ch, FALSE);
681 }
682 err:
683 ch->ch_na = NULL;
684 ASSERT(err != 0);
685
686 return err;
687 }
688
689 /*
690 * Undo everything that was done in na_bind_channel().
691 */
692 /* call with SK_LOCK held */
693 void
na_unbind_channel(struct kern_channel * ch)694 na_unbind_channel(struct kern_channel *ch)
695 {
696 struct nexus_adapter *na = ch->ch_na;
697
698 SK_LOCK_ASSERT_HELD();
699
700 ASSERT(na->na_channels != 0);
701 na->na_channels--;
702
703 /* release exclusive use if it was requested at bind time */
704 na_krings_unuse(ch);
705
706 if (na->na_channels == 0) { /* last instance */
707 SK_D("%s(%d): deleting last channel instance for %s",
708 ch->ch_name, ch->ch_pid, na->na_name);
709
710 /*
711 * Free any remaining allocated packets attached to
712 * the slots, followed by a teardown of the arena.
713 */
714 na_teardown(na, ch, FALSE);
715
716 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
717 NEXUS_META_TYPE_INVALID;
718 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
719 NEXUS_META_SUBTYPE_INVALID;
720 } else {
721 SK_D("%s(%d): %s has %u remaining channel instance(s)",
722 ch->ch_name, ch->ch_pid, na->na_name, na->na_channels);
723 }
724
725 /*
726 * Free any allocated packets (for the process) attached to the slots;
727 * note that na_teardown() could have done this there as well.
728 */
729 if (ch->ch_pp != NULL) {
730 ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
731 pp_purge_upp(ch->ch_pp, ch->ch_pid);
732 pp_release(ch->ch_pp);
733 ch->ch_pp = NULL;
734 }
735
736 /* possibily decrement counter of tx_si/rx_si users */
737 na_unset_ringid(ch);
738
739 /* reap the caches now (purge if adapter is idle) */
740 skmem_arena_reap(na->na_arena, (na->na_channels == 0));
741
742 /* delete the csm */
743 if (ch->ch_schema != NULL) {
744 skmem_cache_free(
745 skmem_arena_nexus(na->na_arena)->arn_schema_cache,
746 ch->ch_schema);
747 ch->ch_schema = NULL;
748 ch->ch_schema_offset = (mach_vm_offset_t)-1;
749 }
750
751 /* destroy the memory map */
752 skmem_arena_munmap_channel(na->na_arena, ch);
753
754 /* mark the channel as unbound */
755 atomic_bitclear_32(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE));
756 ch->ch_na = NULL;
757
758 /* and finally release the nexus adapter; this might free it */
759 (void) na_release_locked(na);
760 }
761
762 static void
na_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)763 na_teardown(struct nexus_adapter *na, struct kern_channel *ch,
764 boolean_t defunct)
765 {
766 SK_LOCK_ASSERT_HELD();
767 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
768
769 #if CONFIG_NEXUS_MONITOR
770 /*
771 * Walk through all the rings and tell any monitor
772 * that the port is going to exit Skywalk mode
773 */
774 nx_mon_stop(na);
775 #endif /* CONFIG_NEXUS_MONITOR */
776
777 /*
778 * Deactive the adapter.
779 */
780 (void) na->na_activate(na,
781 (defunct ? NA_ACTIVATE_MODE_DEFUNCT : NA_ACTIVATE_MODE_OFF));
782
783 /*
784 * Free any remaining allocated packets for this process.
785 */
786 if (ch->ch_pp != NULL) {
787 ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
788 pp_purge_upp(ch->ch_pp, ch->ch_pid);
789 if (!defunct) {
790 pp_release(ch->ch_pp);
791 ch->ch_pp = NULL;
792 }
793 }
794
795 /*
796 * Delete rings and buffers.
797 */
798 na->na_krings_delete(na, ch, defunct);
799 }
800
801 /* call with SK_LOCK held */
802 /*
803 * Allocate the per-fd structure __user_channel_schema.
804 */
805 static int
na_schema_alloc(struct kern_channel * ch)806 na_schema_alloc(struct kern_channel *ch)
807 {
808 struct nexus_adapter *na = ch->ch_na;
809 struct skmem_arena *ar = na->na_arena;
810 struct skmem_arena_nexus *arn;
811 mach_vm_offset_t roff[SKMEM_REGIONS];
812 struct __kern_channel_ring *kr;
813 struct __user_channel_schema *csm;
814 struct skmem_obj_info csm_oi, ring_oi, ksd_oi, usd_oi;
815 mach_vm_offset_t base;
816 uint32_t i, j, k, n[NR_ALL];
817 enum txrx t;
818
819 /* see comments for struct __user_channel_schema */
820 _CASSERT(offsetof(struct __user_channel_schema, csm_ver) == 0);
821 _CASSERT(offsetof(struct __user_channel_schema, csm_flags) ==
822 sizeof(csm->csm_ver));
823 _CASSERT(offsetof(struct __user_channel_schema, csm_kern_name) ==
824 sizeof(csm->csm_ver) + sizeof(csm->csm_flags));
825 _CASSERT(offsetof(struct __user_channel_schema, csm_kern_uuid) ==
826 sizeof(csm->csm_ver) + sizeof(csm->csm_flags) +
827 sizeof(csm->csm_kern_name));
828
829 SK_LOCK_ASSERT_HELD();
830
831 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
832 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
833 arn = skmem_arena_nexus(ar);
834 ASSERT(arn != NULL);
835 for_all_rings(t) {
836 n[t] = 0;
837 }
838
839 csm = skmem_cache_alloc(arn->arn_schema_cache, SKMEM_NOSLEEP);
840 if (csm == NULL) {
841 return ENOMEM;
842 }
843
844 skmem_cache_get_obj_info(arn->arn_schema_cache, csm, &csm_oi, NULL);
845 bzero(csm, SKMEM_OBJ_SIZE(&csm_oi));
846
847 *(uint32_t *)(uintptr_t)&csm->csm_ver = CSM_CURRENT_VERSION;
848
849 /* kernel version and executable UUID */
850 _CASSERT(sizeof(csm->csm_kern_name) == _SYS_NAMELEN);
851 (void) strncpy((char *)(uintptr_t)csm->csm_kern_name,
852 version, sizeof(csm->csm_kern_name) - 1);
853 #if !XNU_TARGET_OS_OSX
854 (void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid,
855 kernelcache_uuid, sizeof(csm->csm_kern_uuid));
856 #else /* XNU_TARGET_OS_OSX */
857 if (kernel_uuid != NULL) {
858 (void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid,
859 kernel_uuid, sizeof(csm->csm_kern_uuid));
860 }
861 #endif /* XNU_TARGET_OS_OSX */
862
863 for_rx_tx(t) {
864 ASSERT((ch->ch_last[t] > 0) || (ch->ch_first[t] == 0));
865 n[t] = ch->ch_last[t] - ch->ch_first[t];
866 ASSERT(n[t] == 0 || n[t] <= na_get_nrings(na, t));
867 }
868
869 /* return total number of tx and rx rings for this channel */
870 *(uint32_t *)(uintptr_t)&csm->csm_tx_rings = n[NR_TX];
871 *(uint32_t *)(uintptr_t)&csm->csm_rx_rings = n[NR_RX];
872
873 if (ch->ch_flags & CHANF_USER_PACKET_POOL) {
874 *(uint32_t *)(uintptr_t)&csm->csm_allocator_ring_pairs =
875 na->na_num_allocator_ring_pairs;
876 n[NR_A] = n[NR_F] = na->na_num_allocator_ring_pairs;
877 ASSERT(n[NR_A] != 0 && n[NR_A] <= na_get_nrings(na, NR_A));
878 ASSERT(n[NR_A] == (ch->ch_last[NR_A] - ch->ch_first[NR_A]));
879 ASSERT(n[NR_F] == (ch->ch_last[NR_F] - ch->ch_first[NR_F]));
880 }
881
882 if (ch->ch_flags & CHANF_EVENT_RING) {
883 n[NR_EV] = ch->ch_last[NR_EV] - ch->ch_first[NR_EV];
884 ASSERT(n[NR_EV] != 0 && n[NR_EV] <= na_get_nrings(na, NR_EV));
885 *(uint32_t *)(uintptr_t)&csm->csm_num_event_rings = n[NR_EV];
886 }
887
888 bzero(&roff, sizeof(roff));
889 for (i = 0; i < SKMEM_REGIONS; i++) {
890 if (ar->ar_regions[i] == NULL) {
891 ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
892 i == SKMEM_REGION_SCHEMA ||
893 i == SKMEM_REGION_RXBUF ||
894 i == SKMEM_REGION_TXBUF ||
895 i == SKMEM_REGION_RXKMD ||
896 i == SKMEM_REGION_TXKMD ||
897 i == SKMEM_REGION_UMD ||
898 i == SKMEM_REGION_UBFT ||
899 i == SKMEM_REGION_KBFT ||
900 i == SKMEM_REGION_RXKBFT ||
901 i == SKMEM_REGION_TXKBFT ||
902 i == SKMEM_REGION_TXAUSD ||
903 i == SKMEM_REGION_RXFUSD ||
904 i == SKMEM_REGION_USTATS ||
905 i == SKMEM_REGION_KSTATS ||
906 i == SKMEM_REGION_INTRINSIC ||
907 i == SKMEM_REGION_FLOWADV ||
908 i == SKMEM_REGION_NEXUSADV ||
909 i == SKMEM_REGION_SYSCTLS ||
910 i == SKMEM_REGION_GUARD_TAIL);
911 continue;
912 }
913
914 /* not for nexus */
915 ASSERT(i != SKMEM_REGION_SYSCTLS);
916
917 /*
918 * Get region offsets from base of mmap span; the arena
919 * doesn't need to be mmap'd at this point, since we
920 * simply compute the relative offset.
921 */
922 roff[i] = skmem_arena_get_region_offset(ar, i);
923 }
924
925 /*
926 * The schema is made up of the descriptor followed inline by an array
927 * of offsets to the tx, rx, allocator and event rings in the mmap span.
928 * They contain the offset between the ring and schema, so the
929 * information is usable in userspace to reach the ring from
930 * the schema.
931 */
932 base = roff[SKMEM_REGION_SCHEMA] + SKMEM_OBJ_ROFF(&csm_oi);
933
934 /* initialize schema with tx ring info */
935 for (i = 0, j = ch->ch_first[NR_TX]; i < n[NR_TX]; i++, j++) {
936 kr = &na->na_tx_rings[j];
937 if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
938 continue;
939 }
940
941 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
942 skmem_cache_get_obj_info(arn->arn_ring_cache,
943 kr->ckr_ring, &ring_oi, NULL);
944 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].ring_off =
945 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
946
947 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
948 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
949 kr->ckr_ksds, &ksd_oi, &usd_oi);
950
951 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].sd_off =
952 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
953 base;
954 }
955 /* initialize schema with rx ring info */
956 for (i = 0, j = ch->ch_first[NR_RX]; i < n[NR_RX]; i++, j++) {
957 kr = &na->na_rx_rings[j];
958 if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
959 continue;
960 }
961
962 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
963 skmem_cache_get_obj_info(arn->arn_ring_cache,
964 kr->ckr_ring, &ring_oi, NULL);
965 *(mach_vm_offset_t *)
966 (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].ring_off =
967 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
968
969 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
970 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
971 kr->ckr_ksds, &ksd_oi, &usd_oi);
972
973 *(mach_vm_offset_t *)
974 (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].sd_off =
975 (roff[SKMEM_REGION_RXFUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
976 base;
977 }
978 /* initialize schema with allocator ring info */
979 for (i = 0, j = ch->ch_first[NR_A], k = n[NR_TX] + n[NR_RX];
980 i < n[NR_A]; i++, j++) {
981 mach_vm_offset_t usd_roff;
982
983 usd_roff = roff[SKMEM_REGION_TXAUSD];
984 kr = &na->na_alloc_rings[j];
985 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
986 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
987
988 skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
989 &ring_oi, NULL);
990 *(mach_vm_offset_t *)
991 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
992 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
993
994 skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
995 &ksd_oi, &usd_oi);
996 *(mach_vm_offset_t *)
997 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
998 (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
999 }
1000 /* initialize schema with free ring info */
1001 for (i = 0, j = ch->ch_first[NR_F], k = n[NR_TX] + n[NR_RX] + n[NR_A];
1002 i < n[NR_F]; i++, j++) {
1003 mach_vm_offset_t usd_roff;
1004
1005 usd_roff = roff[SKMEM_REGION_RXFUSD];
1006 kr = &na->na_free_rings[j];
1007 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
1008 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
1009
1010 skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
1011 &ring_oi, NULL);
1012 *(mach_vm_offset_t *)
1013 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
1014 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
1015
1016 skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
1017 &ksd_oi, &usd_oi);
1018 *(mach_vm_offset_t *)
1019 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
1020 (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
1021 }
1022 /* initialize schema with event ring info */
1023 for (i = 0, j = ch->ch_first[NR_EV], k = n[NR_TX] + n[NR_RX] +
1024 n[NR_A] + n[NR_F]; i < n[NR_EV]; i++, j++) {
1025 ASSERT(csm->csm_num_event_rings != 0);
1026 kr = &na->na_event_rings[j];
1027 ASSERT(!KR_KERNEL_ONLY(kr));
1028 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
1029 skmem_cache_get_obj_info(arn->arn_ring_cache,
1030 kr->ckr_ring, &ring_oi, NULL);
1031 *(mach_vm_offset_t *)
1032 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
1033 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
1034
1035 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
1036 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
1037 kr->ckr_ksds, &ksd_oi, &usd_oi);
1038
1039 *(mach_vm_offset_t *)
1040 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
1041 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
1042 base;
1043 }
1044
1045 *(uint64_t *)(uintptr_t)&csm->csm_md_redzone_cookie =
1046 __ch_umd_redzone_cookie;
1047 *(nexus_meta_type_t *)(uintptr_t)&csm->csm_md_type = na->na_md_type;
1048 *(nexus_meta_subtype_t *)(uintptr_t)&csm->csm_md_subtype =
1049 na->na_md_subtype;
1050
1051 if (arn->arn_stats_obj != NULL) {
1052 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
1053 ASSERT(roff[SKMEM_REGION_USTATS] != 0);
1054 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs =
1055 roff[SKMEM_REGION_USTATS];
1056 *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1057 na->na_stats_type;
1058 } else {
1059 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1060 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs = 0;
1061 *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1062 NEXUS_STATS_TYPE_INVALID;
1063 }
1064
1065 if (arn->arn_flowadv_obj != NULL) {
1066 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL);
1067 ASSERT(roff[SKMEM_REGION_FLOWADV] != 0);
1068 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs =
1069 roff[SKMEM_REGION_FLOWADV];
1070 *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max =
1071 na->na_flowadv_max;
1072 } else {
1073 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1074 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs = 0;
1075 *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max = 0;
1076 }
1077
1078 if (arn->arn_nexusadv_obj != NULL) {
1079 struct __kern_nexus_adv_metadata *adv_md;
1080
1081 adv_md = arn->arn_nexusadv_obj;
1082 ASSERT(adv_md->knam_version == NX_ADVISORY_MD_CURRENT_VERSION);
1083 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] != NULL);
1084 ASSERT(roff[SKMEM_REGION_NEXUSADV] != 0);
1085 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs =
1086 roff[SKMEM_REGION_NEXUSADV];
1087 } else {
1088 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1089 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs = 0;
1090 }
1091
1092 ch->ch_schema = csm;
1093 ch->ch_schema_offset = base;
1094
1095 return 0;
1096 }
1097
1098 /*
1099 * Called by all routines that create nexus_adapters.
1100 * Attach na to the ifp (if any) and provide defaults
1101 * for optional callbacks. Defaults assume that we
1102 * are creating an hardware nexus_adapter.
1103 */
1104 void
na_attach_common(struct nexus_adapter * na,struct kern_nexus * nx,struct kern_nexus_domain_provider * nxdom_prov)1105 na_attach_common(struct nexus_adapter *na, struct kern_nexus *nx,
1106 struct kern_nexus_domain_provider *nxdom_prov)
1107 {
1108 SK_LOCK_ASSERT_HELD();
1109
1110 ASSERT(nx != NULL);
1111 ASSERT(nxdom_prov != NULL);
1112 ASSERT(na->na_krings_create != NULL);
1113 ASSERT(na->na_krings_delete != NULL);
1114 if (na->na_type != NA_NETIF_COMPAT_DEV) {
1115 ASSERT(na_get_nrings(na, NR_TX) != 0);
1116 }
1117 if (na->na_type != NA_NETIF_COMPAT_HOST) {
1118 ASSERT(na_get_nrings(na, NR_RX) != 0);
1119 }
1120 ASSERT(na->na_channels == 0);
1121
1122 if (na->na_notify == NULL) {
1123 na->na_notify = na_notify;
1124 }
1125
1126 na->na_nx = nx;
1127 na->na_nxdom_prov = nxdom_prov;
1128
1129 SK_D("na 0x%llx nx 0x%llx nxtype %u ar 0x%llx",
1130 SK_KVA(na), SK_KVA(nx), nxdom_prov->nxdom_prov_dom->nxdom_type,
1131 SK_KVA(na->na_arena));
1132 }
1133
1134 void
na_post_event(struct __kern_channel_ring * kring,boolean_t nodelay,boolean_t within_kevent,boolean_t selwake,uint32_t hint)1135 na_post_event(struct __kern_channel_ring *kring, boolean_t nodelay,
1136 boolean_t within_kevent, boolean_t selwake, uint32_t hint)
1137 {
1138 struct nexus_adapter *na = KRNA(kring);
1139 enum txrx t = kring->ckr_tx;
1140
1141 SK_DF(SK_VERB_EVENTS,
1142 "%s(%d) na \"%s\" (0x%llx) kr 0x%llx kev %u sel %u hint 0x%b",
1143 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1144 na->na_name, SK_KVA(na), SK_KVA(kring), within_kevent, selwake,
1145 hint, CHAN_FILT_HINT_BITS);
1146
1147 csi_selwakeup_one(kring, nodelay, within_kevent, selwake, hint);
1148 /*
1149 * optimization: avoid a wake up on the global
1150 * queue if nobody has registered for more
1151 * than one ring
1152 */
1153 if (na->na_si_users[t] > 0) {
1154 csi_selwakeup_all(na, t, nodelay, within_kevent, selwake, hint);
1155 }
1156 }
1157
1158 /* default notify callback */
1159 static int
na_notify(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1160 na_notify(struct __kern_channel_ring *kring, struct proc *p, uint32_t flags)
1161 {
1162 #pragma unused(p)
1163 SK_DF(SK_VERB_NOTIFY | ((kring->ckr_tx == NR_TX) ?
1164 SK_VERB_TX : SK_VERB_RX),
1165 "%s(%d) [%s] na \"%s\" (0x%llx) kr \"%s\" (0x%llx) krflags 0x%b "
1166 "flags 0x%x, kh %u kt %u | h %u t %u",
1167 sk_proc_name_address(p), sk_proc_pid(p),
1168 (kring->ckr_tx == NR_TX) ? "W" : "R", KRNA(kring)->na_name,
1169 SK_KVA(KRNA(kring)), kring->ckr_name, SK_KVA(kring),
1170 kring->ckr_flags, CKRF_BITS, flags, kring->ckr_khead,
1171 kring->ckr_ktail, kring->ckr_rhead, kring->ckr_rtail);
1172
1173 na_post_event(kring, (flags & NA_NOTEF_PUSH),
1174 (flags & NA_NOTEF_IN_KEVENT), TRUE, 0);
1175
1176 return 0;
1177 }
1178
1179 /*
1180 * Fetch configuration from the device, to cope with dynamic
1181 * reconfigurations after loading the module.
1182 */
1183 /* call with SK_LOCK held */
1184 int
na_update_config(struct nexus_adapter * na)1185 na_update_config(struct nexus_adapter *na)
1186 {
1187 uint32_t txr, txd, rxr, rxd;
1188
1189 SK_LOCK_ASSERT_HELD();
1190
1191 txr = txd = rxr = rxd = 0;
1192 if (na->na_config == NULL ||
1193 na->na_config(na, &txr, &txd, &rxr, &rxd)) {
1194 /* take whatever we had at init time */
1195 txr = na_get_nrings(na, NR_TX);
1196 txd = na_get_nslots(na, NR_TX);
1197 rxr = na_get_nrings(na, NR_RX);
1198 rxd = na_get_nslots(na, NR_RX);
1199 }
1200
1201 if (na_get_nrings(na, NR_TX) == txr &&
1202 na_get_nslots(na, NR_TX) == txd &&
1203 na_get_nrings(na, NR_RX) == rxr &&
1204 na_get_nslots(na, NR_RX) == rxd) {
1205 return 0; /* nothing changed */
1206 }
1207 SK_D("stored config %s: txring %u x %u, rxring %u x %u",
1208 na->na_name, na_get_nrings(na, NR_TX), na_get_nslots(na, NR_TX),
1209 na_get_nrings(na, NR_RX), na_get_nslots(na, NR_RX));
1210 SK_D("new config %s: txring %u x %u, rxring %u x %u",
1211 na->na_name, txr, txd, rxr, rxd);
1212
1213 if (na->na_channels == 0) {
1214 SK_D("configuration changed (but fine)");
1215 na_set_nrings(na, NR_TX, txr);
1216 na_set_nslots(na, NR_TX, txd);
1217 na_set_nrings(na, NR_RX, rxr);
1218 na_set_nslots(na, NR_RX, rxd);
1219 return 0;
1220 }
1221 SK_ERR("configuration changed while active, this is bad...");
1222 return 1;
1223 }
1224
1225 static void
na_kr_setup_netif_svc_map(struct nexus_adapter * na)1226 na_kr_setup_netif_svc_map(struct nexus_adapter *na)
1227 {
1228 uint32_t i;
1229 uint32_t num_tx_rings;
1230
1231 ASSERT(na->na_type == NA_NETIF_DEV);
1232 num_tx_rings = na_get_nrings(na, NR_TX);
1233
1234 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK_SYS) ==
1235 NAKR_WMM_SC2RINGID(KPKT_SC_BK));
1236 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1237 NAKR_WMM_SC2RINGID(KPKT_SC_RD));
1238 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1239 NAKR_WMM_SC2RINGID(KPKT_SC_OAM));
1240 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1241 NAKR_WMM_SC2RINGID(KPKT_SC_RV));
1242 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1243 NAKR_WMM_SC2RINGID(KPKT_SC_VI));
1244 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) ==
1245 NAKR_WMM_SC2RINGID(KPKT_SC_CTL));
1246
1247 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK) < NA_NUM_WMM_CLASSES);
1248 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) < NA_NUM_WMM_CLASSES);
1249 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VI) < NA_NUM_WMM_CLASSES);
1250 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) < NA_NUM_WMM_CLASSES);
1251
1252 _CASSERT(MBUF_SCIDX(KPKT_SC_BK_SYS) < KPKT_SC_MAX_CLASSES);
1253 _CASSERT(MBUF_SCIDX(KPKT_SC_BK) < KPKT_SC_MAX_CLASSES);
1254 _CASSERT(MBUF_SCIDX(KPKT_SC_BE) < KPKT_SC_MAX_CLASSES);
1255 _CASSERT(MBUF_SCIDX(KPKT_SC_RD) < KPKT_SC_MAX_CLASSES);
1256 _CASSERT(MBUF_SCIDX(KPKT_SC_OAM) < KPKT_SC_MAX_CLASSES);
1257 _CASSERT(MBUF_SCIDX(KPKT_SC_AV) < KPKT_SC_MAX_CLASSES);
1258 _CASSERT(MBUF_SCIDX(KPKT_SC_RV) < KPKT_SC_MAX_CLASSES);
1259 _CASSERT(MBUF_SCIDX(KPKT_SC_VI) < KPKT_SC_MAX_CLASSES);
1260 _CASSERT(MBUF_SCIDX(KPKT_SC_SIG) < KPKT_SC_MAX_CLASSES);
1261 _CASSERT(MBUF_SCIDX(KPKT_SC_VO) < KPKT_SC_MAX_CLASSES);
1262 _CASSERT(MBUF_SCIDX(KPKT_SC_CTL) < KPKT_SC_MAX_CLASSES);
1263
1264 /*
1265 * we support the following 2 configurations:
1266 * 1. packets from all 10 service class map to one ring.
1267 * 2. a 10:4 mapping between service classes and the rings. These 4
1268 * rings map to the 4 WMM access categories.
1269 */
1270 if (na->na_nx->nx_prov->nxprov_params->nxp_qmap == NEXUS_QMAP_TYPE_WMM) {
1271 ASSERT(num_tx_rings == NEXUS_NUM_WMM_QUEUES);
1272 /* setup the adapter's service class LUT */
1273 NAKR_SET_SVC_LUT(na, KPKT_SC_BK_SYS);
1274 NAKR_SET_SVC_LUT(na, KPKT_SC_BK);
1275 NAKR_SET_SVC_LUT(na, KPKT_SC_BE);
1276 NAKR_SET_SVC_LUT(na, KPKT_SC_RD);
1277 NAKR_SET_SVC_LUT(na, KPKT_SC_OAM);
1278 NAKR_SET_SVC_LUT(na, KPKT_SC_AV);
1279 NAKR_SET_SVC_LUT(na, KPKT_SC_RV);
1280 NAKR_SET_SVC_LUT(na, KPKT_SC_VI);
1281 NAKR_SET_SVC_LUT(na, KPKT_SC_SIG);
1282 NAKR_SET_SVC_LUT(na, KPKT_SC_VO);
1283 NAKR_SET_SVC_LUT(na, KPKT_SC_CTL);
1284
1285 /* Initialize the service class for each of the 4 ring */
1286 NAKR_SET_KR_SVC(na, KPKT_SC_BK);
1287 NAKR_SET_KR_SVC(na, KPKT_SC_BE);
1288 NAKR_SET_KR_SVC(na, KPKT_SC_VI);
1289 NAKR_SET_KR_SVC(na, KPKT_SC_VO);
1290 } else {
1291 ASSERT(na->na_nx->nx_prov->nxprov_params->nxp_qmap ==
1292 NEXUS_QMAP_TYPE_DEFAULT);
1293 /* 10: 1 mapping */
1294 for (i = 0; i < KPKT_SC_MAX_CLASSES; i++) {
1295 na->na_kring_svc_lut[i] = 0;
1296 }
1297 for (i = 0; i < num_tx_rings; i++) {
1298 NAKR(na, NR_TX)[i].ckr_svc = KPKT_SC_UNSPEC;
1299 }
1300 }
1301 }
1302
1303 static LCK_GRP_DECLARE(channel_txq_lock_group, "sk_ch_txq_lock");
1304 static LCK_GRP_DECLARE(channel_rxq_lock_group, "sk_ch_rxq_lock");
1305 static LCK_GRP_DECLARE(channel_txs_lock_group, "sk_ch_txs_lock");
1306 static LCK_GRP_DECLARE(channel_rxs_lock_group, "sk_ch_rxs_lock");
1307 static LCK_GRP_DECLARE(channel_alloc_lock_group, "sk_ch_alloc_lock");
1308 static LCK_GRP_DECLARE(channel_evq_lock_group, "sk_ch_evq_lock");
1309 static LCK_GRP_DECLARE(channel_evs_lock_group, "sk_ch_evs_lock");
1310
1311 static lck_grp_t *
na_kr_q_lck_grp(enum txrx t)1312 na_kr_q_lck_grp(enum txrx t)
1313 {
1314 switch (t) {
1315 case NR_TX:
1316 return &channel_txq_lock_group;
1317 case NR_RX:
1318 return &channel_rxq_lock_group;
1319 case NR_A:
1320 case NR_F:
1321 return &channel_alloc_lock_group;
1322 case NR_EV:
1323 return &channel_evq_lock_group;
1324 default:
1325 VERIFY(0);
1326 /* NOTREACHED */
1327 __builtin_unreachable();
1328 }
1329 }
1330
1331 static lck_grp_t *
na_kr_s_lck_grp(enum txrx t)1332 na_kr_s_lck_grp(enum txrx t)
1333 {
1334 switch (t) {
1335 case NR_TX:
1336 return &channel_txs_lock_group;
1337 case NR_RX:
1338 return &channel_rxs_lock_group;
1339 case NR_A:
1340 case NR_F:
1341 return &channel_alloc_lock_group;
1342 case NR_EV:
1343 return &channel_evs_lock_group;
1344 default:
1345 VERIFY(0);
1346 /* NOTREACHED */
1347 __builtin_unreachable();
1348 }
1349 }
1350
1351 static void
kr_init_tbr(struct __kern_channel_ring * r)1352 kr_init_tbr(struct __kern_channel_ring *r)
1353 {
1354 r->ckr_tbr_depth = CKR_TBR_TOKEN_INVALID;
1355 r->ckr_tbr_token = CKR_TBR_TOKEN_INVALID;
1356 r->ckr_tbr_last = 0;
1357 }
1358
1359 struct kern_pbufpool *
na_kr_get_pp(struct nexus_adapter * na,enum txrx t)1360 na_kr_get_pp(struct nexus_adapter *na, enum txrx t)
1361 {
1362 struct kern_pbufpool *pp = NULL;
1363 switch (t) {
1364 case NR_RX:
1365 case NR_F:
1366 case NR_EV:
1367 pp = skmem_arena_nexus(na->na_arena)->arn_rx_pp;
1368 break;
1369 case NR_TX:
1370 case NR_A:
1371 pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
1372 break;
1373 default:
1374 VERIFY(0);
1375 /* NOTREACHED */
1376 __builtin_unreachable();
1377 }
1378
1379 return pp;
1380 }
1381
1382 /*
1383 * Create the krings array and initialize the fields common to all adapters.
1384 * The array layout is this:
1385 *
1386 * +----------+
1387 * na->na_tx_rings ----->| | \
1388 * | | } na->num_tx_ring
1389 * | | /
1390 * na->na_rx_rings ----> +----------+
1391 * | | \
1392 * | | } na->na_num_rx_rings
1393 * | | /
1394 * na->na_alloc_rings -> +----------+
1395 * | | \
1396 * na->na_free_rings --> +----------+ } na->na_num_allocator_ring_pairs
1397 * | | /
1398 * na->na_event_rings -> +----------+
1399 * | | \
1400 * | | } na->na_num_event_rings
1401 * | | /
1402 * +----------+
1403 * na->na_tailroom ----->| | \
1404 * | | } tailroom bytes
1405 * | | /
1406 * +----------+
1407 *
1408 * The tailroom space is currently used by flow switch ports for allocating
1409 * leases.
1410 */
1411 /* call with SK_LOCK held */
1412 static int
na_kr_create(struct nexus_adapter * na,uint32_t tailroom,boolean_t alloc_ctx)1413 na_kr_create(struct nexus_adapter *na, uint32_t tailroom, boolean_t alloc_ctx)
1414 {
1415 lck_grp_t *q_lck_grp, *s_lck_grp;
1416 uint32_t i, len, ndesc;
1417 struct kern_pbufpool *pp = NULL;
1418 struct __kern_channel_ring *kring;
1419 uint32_t n[NR_ALL];
1420 int c, tot_slots, err = 0;
1421 enum txrx t;
1422
1423 SK_LOCK_ASSERT_HELD();
1424
1425 n[NR_TX] = na_get_nrings(na, NR_TX);
1426 n[NR_RX] = na_get_nrings(na, NR_RX);
1427 n[NR_A] = na_get_nrings(na, NR_A);
1428 n[NR_F] = na_get_nrings(na, NR_F);
1429 n[NR_EV] = na_get_nrings(na, NR_EV);
1430
1431 len = ((n[NR_TX] + n[NR_RX] + n[NR_A] + n[NR_F] + n[NR_EV]) *
1432 sizeof(struct __kern_channel_ring)) + tailroom;
1433
1434 na->na_rings_mem_sz = (size_t)len;
1435 na->na_tx_rings = sk_alloc((size_t)len, Z_WAITOK, skmem_tag_nx_rings);
1436 if (__improbable(na->na_tx_rings == NULL)) {
1437 SK_ERR("Cannot allocate krings");
1438 err = ENOMEM;
1439 goto error;
1440 }
1441 na->na_rx_rings = na->na_tx_rings + n[NR_TX];
1442 if (n[NR_A] != 0) {
1443 na->na_alloc_rings = na->na_rx_rings + n[NR_RX];
1444 na->na_free_rings = na->na_alloc_rings + n[NR_A];
1445 } else {
1446 na->na_alloc_rings = na->na_free_rings = NULL;
1447 }
1448 if (n[NR_EV] != 0) {
1449 if (na->na_free_rings != NULL) {
1450 na->na_event_rings = na->na_free_rings + n[NR_F];
1451 } else {
1452 na->na_event_rings = na->na_rx_rings + n[NR_RX];
1453 }
1454 }
1455
1456 /* total number of slots for TX/RX adapter rings */
1457 c = tot_slots = (n[NR_TX] * na_get_nslots(na, NR_TX)) +
1458 (n[NR_RX] * na_get_nslots(na, NR_RX));
1459
1460 /* for scratch space on alloc and free rings */
1461 if (n[NR_A] != 0) {
1462 tot_slots += n[NR_A] * na_get_nslots(na, NR_A);
1463 tot_slots += n[NR_F] * na_get_nslots(na, NR_F);
1464 c = tot_slots;
1465 }
1466 na->na_total_slots = tot_slots;
1467
1468 /* slot context (optional) for all TX/RX ring slots of this adapter */
1469 if (alloc_ctx) {
1470 na->na_slot_ctxs =
1471 skn_alloc_type_array(slot_ctxs, struct slot_ctx,
1472 na->na_total_slots, Z_WAITOK, skmem_tag_nx_contexts);
1473 if (na->na_slot_ctxs == NULL) {
1474 SK_ERR("Cannot allocate slot contexts");
1475 err = ENOMEM;
1476 goto error;
1477 }
1478 atomic_bitset_32(&na->na_flags, NAF_SLOT_CONTEXT);
1479 }
1480
1481 /*
1482 * packet handle array storage for all TX/RX ring slots of this
1483 * adapter.
1484 */
1485 na->na_scratch = skn_alloc_type_array(scratch, kern_packet_t,
1486 na->na_total_slots, Z_WAITOK, skmem_tag_nx_scratch);
1487 if (na->na_scratch == NULL) {
1488 SK_ERR("Cannot allocate slot contexts");
1489 err = ENOMEM;
1490 goto error;
1491 }
1492
1493 /*
1494 * All fields in krings are 0 except the one initialized below.
1495 * but better be explicit on important kring fields.
1496 */
1497 for_all_rings(t) {
1498 ndesc = na_get_nslots(na, t);
1499 pp = na_kr_get_pp(na, t);
1500 for (i = 0; i < n[t]; i++) {
1501 kring = &NAKR(na, t)[i];
1502 bzero(kring, sizeof(*kring));
1503 kring->ckr_na = na;
1504 kring->ckr_pp = pp;
1505 kring->ckr_max_pkt_len = pp->pp_buflet_size *
1506 pp->pp_max_frags;
1507 kring->ckr_ring_id = i;
1508 kring->ckr_tx = t;
1509 kr_init_to_mhints(kring, ndesc);
1510 kr_init_tbr(kring);
1511 if (NA_KERNEL_ONLY(na)) {
1512 kring->ckr_flags |= CKRF_KERNEL_ONLY;
1513 }
1514 if (na->na_flags & NAF_HOST_ONLY) {
1515 kring->ckr_flags |= CKRF_HOST;
1516 }
1517 ASSERT((t >= NR_TXRX) || (c > 0));
1518 if ((t < NR_TXRX) &&
1519 (na->na_flags & NAF_SLOT_CONTEXT)) {
1520 ASSERT(na->na_slot_ctxs != NULL);
1521 kring->ckr_flags |= CKRF_SLOT_CONTEXT;
1522 kring->ckr_slot_ctxs =
1523 na->na_slot_ctxs + (tot_slots - c);
1524 }
1525 ASSERT(na->na_scratch != NULL);
1526 if (t < NR_TXRXAF) {
1527 kring->ckr_scratch =
1528 na->na_scratch + (tot_slots - c);
1529 }
1530 if (t < NR_TXRXAF) {
1531 c -= ndesc;
1532 }
1533 switch (t) {
1534 case NR_A:
1535 if (i == 0) {
1536 kring->ckr_na_sync =
1537 na_packet_pool_alloc_sync;
1538 kring->ckr_alloc_ws =
1539 na_upp_alloc_lowat;
1540 } else {
1541 ASSERT(i == 1);
1542 kring->ckr_na_sync =
1543 na_packet_pool_alloc_buf_sync;
1544 kring->ckr_alloc_ws =
1545 na_upp_alloc_buf_lowat;
1546 }
1547 break;
1548 case NR_F:
1549 if (i == 0) {
1550 kring->ckr_na_sync =
1551 na_packet_pool_free_sync;
1552 } else {
1553 ASSERT(i == 1);
1554 kring->ckr_na_sync =
1555 na_packet_pool_free_buf_sync;
1556 }
1557 break;
1558 case NR_TX:
1559 kring->ckr_na_sync = na->na_txsync;
1560 if (na->na_flags & NAF_TX_MITIGATION) {
1561 kring->ckr_flags |= CKRF_MITIGATION;
1562 }
1563 switch (na->na_type) {
1564 #if CONFIG_NEXUS_USER_PIPE
1565 case NA_USER_PIPE:
1566 ASSERT(!(na->na_flags &
1567 NAF_USER_PKT_POOL));
1568 kring->ckr_prologue = kr_txprologue;
1569 kring->ckr_finalize = NULL;
1570 break;
1571 #endif /* CONFIG_NEXUS_USER_PIPE */
1572 #if CONFIG_NEXUS_MONITOR
1573 case NA_MONITOR:
1574 ASSERT(!(na->na_flags &
1575 NAF_USER_PKT_POOL));
1576 kring->ckr_prologue = kr_txprologue;
1577 kring->ckr_finalize = NULL;
1578 break;
1579 #endif /* CONFIG_NEXUS_MONITOR */
1580 default:
1581 if (na->na_flags & NAF_USER_PKT_POOL) {
1582 kring->ckr_prologue =
1583 kr_txprologue_upp;
1584 kring->ckr_finalize =
1585 kr_txfinalize_upp;
1586 } else {
1587 kring->ckr_prologue =
1588 kr_txprologue;
1589 kring->ckr_finalize =
1590 kr_txfinalize;
1591 }
1592 break;
1593 }
1594 break;
1595 case NR_RX:
1596 kring->ckr_na_sync = na->na_rxsync;
1597 if (na->na_flags & NAF_RX_MITIGATION) {
1598 kring->ckr_flags |= CKRF_MITIGATION;
1599 }
1600 switch (na->na_type) {
1601 #if CONFIG_NEXUS_USER_PIPE
1602 case NA_USER_PIPE:
1603 ASSERT(!(na->na_flags &
1604 NAF_USER_PKT_POOL));
1605 kring->ckr_prologue =
1606 kr_rxprologue_nodetach;
1607 kring->ckr_finalize = kr_rxfinalize;
1608 break;
1609 #endif /* CONFIG_NEXUS_USER_PIPE */
1610 #if CONFIG_NEXUS_MONITOR
1611 case NA_MONITOR:
1612 ASSERT(!(na->na_flags &
1613 NAF_USER_PKT_POOL));
1614 kring->ckr_prologue =
1615 kr_rxprologue_nodetach;
1616 kring->ckr_finalize = kr_rxfinalize;
1617 break;
1618 #endif /* CONFIG_NEXUS_MONITOR */
1619 default:
1620 if (na->na_flags & NAF_USER_PKT_POOL) {
1621 kring->ckr_prologue =
1622 kr_rxprologue_upp;
1623 kring->ckr_finalize =
1624 kr_rxfinalize_upp;
1625 } else {
1626 kring->ckr_prologue =
1627 kr_rxprologue;
1628 kring->ckr_finalize =
1629 kr_rxfinalize;
1630 }
1631 break;
1632 }
1633 break;
1634 case NR_EV:
1635 kring->ckr_na_sync = kern_channel_event_sync;
1636 break;
1637 default:
1638 VERIFY(0);
1639 /* NOTREACHED */
1640 __builtin_unreachable();
1641 }
1642 if (t != NR_EV) {
1643 kring->ckr_na_notify = na->na_notify;
1644 } else {
1645 kring->ckr_na_notify = NULL;
1646 }
1647 (void) snprintf(kring->ckr_name,
1648 sizeof(kring->ckr_name) - 1,
1649 "%s %s%u%s", na->na_name, sk_ring2str(t), i,
1650 ((kring->ckr_flags & CKRF_HOST) ? "^" : ""));
1651 SK_DF(SK_VERB_NA | SK_VERB_RING,
1652 "kr \"%s\" (0x%llx) krflags 0x%b rh %u rt %u",
1653 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1654 CKRF_BITS, kring->ckr_rhead, kring->ckr_rtail);
1655 kring->ckr_state = KR_READY;
1656 q_lck_grp = na_kr_q_lck_grp(t);
1657 s_lck_grp = na_kr_s_lck_grp(t);
1658 kring->ckr_qlock_group = q_lck_grp;
1659 lck_mtx_init(&kring->ckr_qlock, kring->ckr_qlock_group,
1660 &channel_lock_attr);
1661 kring->ckr_slock_group = s_lck_grp;
1662 lck_spin_init(&kring->ckr_slock, kring->ckr_slock_group,
1663 &channel_lock_attr);
1664 csi_init(&kring->ckr_si,
1665 (kring->ckr_flags & CKRF_MITIGATION),
1666 na->na_ch_mit_ival);
1667 }
1668 csi_init(&na->na_si[t],
1669 (na->na_flags & (NAF_TX_MITIGATION | NAF_RX_MITIGATION)),
1670 na->na_ch_mit_ival);
1671 }
1672 ASSERT(c == 0);
1673 na->na_tailroom = na->na_rx_rings + n[NR_RX] + n[NR_A] + n[NR_F];
1674
1675 if (na->na_type == NA_NETIF_DEV) {
1676 na_kr_setup_netif_svc_map(na);
1677 }
1678
1679 /* validate now for cases where we create only krings */
1680 na_krings_verify(na);
1681 return 0;
1682
1683 error:
1684 ASSERT(err != 0);
1685 if (na->na_tx_rings != NULL) {
1686 sk_free(na->na_tx_rings, na->na_rings_mem_sz);
1687 na->na_tx_rings = NULL;
1688 }
1689 if (na->na_slot_ctxs != NULL) {
1690 ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1691 skn_free_type_array(slot_ctxs,
1692 struct slot_ctx, na->na_total_slots,
1693 na->na_slot_ctxs);
1694 na->na_slot_ctxs = NULL;
1695 }
1696 if (na->na_scratch != NULL) {
1697 skn_free_type_array(scratch,
1698 kern_packet_t, na->na_total_slots,
1699 na->na_scratch);
1700 na->na_scratch = NULL;
1701 }
1702 return err;
1703 }
1704
1705 /* undo the actions performed by na_kr_create() */
1706 /* call with SK_LOCK held */
1707 static void
na_kr_delete(struct nexus_adapter * na)1708 na_kr_delete(struct nexus_adapter *na)
1709 {
1710 struct __kern_channel_ring *kring = na->na_tx_rings;
1711 enum txrx t;
1712
1713 ASSERT((kring != NULL) && (na->na_tailroom != NULL));
1714 SK_LOCK_ASSERT_HELD();
1715
1716 for_all_rings(t) {
1717 csi_destroy(&na->na_si[t]);
1718 }
1719 /* we rely on the krings layout described above */
1720 for (; kring != na->na_tailroom; kring++) {
1721 lck_mtx_destroy(&kring->ckr_qlock, kring->ckr_qlock_group);
1722 lck_spin_destroy(&kring->ckr_slock, kring->ckr_slock_group);
1723 csi_destroy(&kring->ckr_si);
1724 if (kring->ckr_flags & CKRF_SLOT_CONTEXT) {
1725 kring->ckr_flags &= ~CKRF_SLOT_CONTEXT;
1726 ASSERT(kring->ckr_slot_ctxs != NULL);
1727 kring->ckr_slot_ctxs = NULL;
1728 }
1729 }
1730 if (na->na_slot_ctxs != NULL) {
1731 ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1732 atomic_bitclear_32(&na->na_flags, NAF_SLOT_CONTEXT);
1733 skn_free_type_array(slot_ctxs,
1734 struct slot_ctx, na->na_total_slots,
1735 na->na_slot_ctxs);
1736 na->na_slot_ctxs = NULL;
1737 }
1738 if (na->na_scratch != NULL) {
1739 skn_free_type_array(scratch,
1740 kern_packet_t, na->na_total_slots,
1741 na->na_scratch);
1742 na->na_scratch = NULL;
1743 }
1744 ASSERT(!(na->na_flags & NAF_SLOT_CONTEXT));
1745 sk_free(na->na_tx_rings, na->na_rings_mem_sz);
1746 na->na_tx_rings = na->na_rx_rings = na->na_alloc_rings =
1747 na->na_free_rings = na->na_event_rings = na->na_tailroom = NULL;
1748 }
1749
1750 static void
na_kr_slot_desc_init(struct __slot_desc * ksds,boolean_t kernel_only,struct __slot_desc * usds,size_t ndesc)1751 na_kr_slot_desc_init(struct __slot_desc *ksds,
1752 boolean_t kernel_only, struct __slot_desc *usds, size_t ndesc)
1753 {
1754 size_t i;
1755
1756 bzero(ksds, ndesc * SLOT_DESC_SZ);
1757 if (usds != NULL) {
1758 ASSERT(!kernel_only);
1759 bzero(usds, ndesc * SLOT_DESC_SZ);
1760 } else {
1761 ASSERT(kernel_only);
1762 }
1763
1764 for (i = 0; i < ndesc; i++) {
1765 KSD_INIT(SLOT_DESC_KSD(&ksds[i]));
1766 if (!kernel_only) {
1767 USD_INIT(SLOT_DESC_USD(&usds[i]));
1768 }
1769 }
1770 }
1771
1772 /* call with SK_LOCK held */
1773 static int
na_kr_setup(struct nexus_adapter * na,struct kern_channel * ch)1774 na_kr_setup(struct nexus_adapter *na, struct kern_channel *ch)
1775 {
1776 struct skmem_arena *ar = na->na_arena;
1777 struct skmem_arena_nexus *arn;
1778 mach_vm_offset_t roff[SKMEM_REGIONS];
1779 enum txrx t;
1780 uint32_t i;
1781
1782 SK_LOCK_ASSERT_HELD();
1783 ASSERT(!(na->na_flags & NAF_MEM_NO_INIT));
1784 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
1785 arn = skmem_arena_nexus(ar);
1786 ASSERT(arn != NULL);
1787
1788 bzero(&roff, sizeof(roff));
1789 for (i = 0; i < SKMEM_REGIONS; i++) {
1790 if (ar->ar_regions[i] == NULL) {
1791 continue;
1792 }
1793
1794 /* not for nexus */
1795 ASSERT(i != SKMEM_REGION_SYSCTLS);
1796
1797 /*
1798 * Get region offsets from base of mmap span; the arena
1799 * doesn't need to be mmap'd at this point, since we
1800 * simply compute the relative offset.
1801 */
1802 roff[i] = skmem_arena_get_region_offset(ar, i);
1803 }
1804
1805 for_all_rings(t) {
1806 for (i = 0; i < na_get_nrings(na, t); i++) {
1807 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
1808 struct __user_channel_ring *ring = kring->ckr_ring;
1809 mach_vm_offset_t ring_off, usd_roff;
1810 struct skmem_obj_info oi, oim;
1811 uint32_t ndesc;
1812
1813 if (ring != NULL) {
1814 SK_DF(SK_VERB_NA | SK_VERB_RING,
1815 "kr 0x%llx (\"%s\") is already "
1816 "initialized", SK_KVA(kring),
1817 kring->ckr_name);
1818 continue; /* already created by somebody else */
1819 }
1820
1821 if (!KR_KERNEL_ONLY(kring) &&
1822 (ring = skmem_cache_alloc(arn->arn_ring_cache,
1823 SKMEM_NOSLEEP)) == NULL) {
1824 SK_ERR("Cannot allocate %s_ring for kr "
1825 "0x%llx (\"%s\")", sk_ring2str(t),
1826 SK_KVA(kring), kring->ckr_name);
1827 goto cleanup;
1828 }
1829 kring->ckr_flags |= CKRF_MEM_RING_INITED;
1830 kring->ckr_ring = ring;
1831 ndesc = kring->ckr_num_slots;
1832
1833 if (ring == NULL) {
1834 goto skip_user_ring_setup;
1835 }
1836
1837 *(uint32_t *)(uintptr_t)&ring->ring_num_slots = ndesc;
1838
1839 /* offset of current ring in mmap span */
1840 skmem_cache_get_obj_info(arn->arn_ring_cache,
1841 ring, &oi, NULL);
1842 ring_off = (roff[SKMEM_REGION_RING] +
1843 SKMEM_OBJ_ROFF(&oi));
1844
1845 /*
1846 * ring_{buf,md,sd}_ofs offsets are relative to the
1847 * current ring, and not to the base of mmap span.
1848 */
1849 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_buf_base =
1850 (roff[SKMEM_REGION_BUF] - ring_off);
1851 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_md_base =
1852 (roff[SKMEM_REGION_UMD] - ring_off);
1853 _CASSERT(sizeof(uint16_t) ==
1854 sizeof(ring->ring_bft_size));
1855 if (roff[SKMEM_REGION_UBFT] != 0) {
1856 ASSERT(ar->ar_regions[SKMEM_REGION_UBFT] !=
1857 NULL);
1858 *(mach_vm_offset_t *)(uintptr_t)
1859 &ring->ring_bft_base =
1860 (roff[SKMEM_REGION_UBFT] - ring_off);
1861 *(uint16_t *)(uintptr_t)&ring->ring_bft_size =
1862 (uint16_t)ar->ar_regions[SKMEM_REGION_UBFT]->
1863 skr_c_obj_size;
1864 ASSERT(ring->ring_bft_size ==
1865 ar->ar_regions[SKMEM_REGION_KBFT]->
1866 skr_c_obj_size);
1867 } else {
1868 *(mach_vm_offset_t *)(uintptr_t)
1869 &ring->ring_bft_base = 0;
1870 *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1871 }
1872
1873 if (t == NR_TX || t == NR_A || t == NR_EV) {
1874 usd_roff = roff[SKMEM_REGION_TXAUSD];
1875 } else {
1876 ASSERT(t == NR_RX || t == NR_F);
1877 usd_roff = roff[SKMEM_REGION_RXFUSD];
1878 }
1879 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_sd_base =
1880 (usd_roff - ring_off);
1881
1882 /* copy values from kring */
1883 ring->ring_head = kring->ckr_rhead;
1884 *(slot_idx_t *)(uintptr_t)&ring->ring_khead =
1885 kring->ckr_khead;
1886 *(slot_idx_t *)(uintptr_t)&ring->ring_tail =
1887 kring->ckr_rtail;
1888
1889 _CASSERT(sizeof(uint32_t) ==
1890 sizeof(ring->ring_buf_size));
1891 _CASSERT(sizeof(uint16_t) ==
1892 sizeof(ring->ring_md_size));
1893 *(uint32_t *)(uintptr_t)&ring->ring_buf_size =
1894 ar->ar_regions[SKMEM_REGION_BUF]->skr_c_obj_size;
1895 if (ar->ar_regions[SKMEM_REGION_UMD] != NULL) {
1896 *(uint16_t *)(uintptr_t)&ring->ring_md_size =
1897 (uint16_t)ar->ar_regions[SKMEM_REGION_UMD]->
1898 skr_c_obj_size;
1899 ASSERT(ring->ring_md_size ==
1900 ar->ar_regions[SKMEM_REGION_KMD]->
1901 skr_c_obj_size);
1902 } else {
1903 *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1904 ASSERT(PP_KERNEL_ONLY(arn->arn_rx_pp));
1905 ASSERT(PP_KERNEL_ONLY(arn->arn_tx_pp));
1906 }
1907
1908 /* ring info */
1909 _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_id));
1910 _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_kind));
1911 *(uint16_t *)(uintptr_t)&ring->ring_id =
1912 (uint16_t)kring->ckr_ring_id;
1913 *(uint16_t *)(uintptr_t)&ring->ring_kind =
1914 (uint16_t)kring->ckr_tx;
1915
1916 SK_DF(SK_VERB_NA | SK_VERB_RING,
1917 "%s_ring at 0x%llx kr 0x%llx (\"%s\")",
1918 sk_ring2str(t), SK_KVA(ring), SK_KVA(kring),
1919 kring->ckr_name);
1920 SK_DF(SK_VERB_NA | SK_VERB_RING,
1921 " num_slots: %u", ring->ring_num_slots);
1922 SK_DF(SK_VERB_NA | SK_VERB_RING,
1923 " buf_base: 0x%llx",
1924 (uint64_t)ring->ring_buf_base);
1925 SK_DF(SK_VERB_NA | SK_VERB_RING,
1926 " md_base: 0x%llx",
1927 (uint64_t)ring->ring_md_base);
1928 SK_DF(SK_VERB_NA | SK_VERB_RING,
1929 " sd_base: 0x%llx",
1930 (uint64_t)ring->ring_sd_base);
1931 SK_DF(SK_VERB_NA | SK_VERB_RING,
1932 " h, t: %u, %u, %u", ring->ring_head,
1933 ring->ring_tail);
1934 SK_DF(SK_VERB_NA | SK_VERB_RING,
1935 " md_size: %d",
1936 (uint64_t)ring->ring_md_size);
1937
1938 /* make sure they're in synch */
1939 _CASSERT(NR_RX == CR_KIND_RX);
1940 _CASSERT(NR_TX == CR_KIND_TX);
1941 _CASSERT(NR_A == CR_KIND_ALLOC);
1942 _CASSERT(NR_F == CR_KIND_FREE);
1943 _CASSERT(NR_EV == CR_KIND_EVENT);
1944
1945 skip_user_ring_setup:
1946 /*
1947 * This flag tells na_kr_teardown_all() that it should
1948 * go thru the checks to free up the slot maps.
1949 */
1950 kring->ckr_flags |= CKRF_MEM_SD_INITED;
1951 if (t == NR_TX || t == NR_A || t == NR_EV) {
1952 kring->ckr_ksds_cache = arn->arn_txaksd_cache;
1953 } else {
1954 ASSERT(t == NR_RX || t == NR_F);
1955 kring->ckr_ksds_cache = arn->arn_rxfksd_cache;
1956 }
1957 kring->ckr_ksds =
1958 skmem_cache_alloc(kring->ckr_ksds_cache,
1959 SKMEM_NOSLEEP);
1960 if (kring->ckr_ksds == NULL) {
1961 SK_ERR("Cannot allocate %s_ksds for kr "
1962 "0x%llx (\"%s\")", sk_ring2str(t),
1963 SK_KVA(kring), kring->ckr_name);
1964 goto cleanup;
1965 }
1966 if (!KR_KERNEL_ONLY(kring)) {
1967 skmem_cache_get_obj_info(kring->ckr_ksds_cache,
1968 kring->ckr_ksds, &oi, &oim);
1969 kring->ckr_usds = SKMEM_OBJ_ADDR(&oim);
1970 }
1971 na_kr_slot_desc_init(kring->ckr_ksds,
1972 KR_KERNEL_ONLY(kring), kring->ckr_usds, ndesc);
1973
1974 /* cache last slot descriptor address */
1975 ASSERT(kring->ckr_lim == (ndesc - 1));
1976 kring->ckr_ksds_last = &kring->ckr_ksds[kring->ckr_lim];
1977
1978 if ((t < NR_TXRX) &&
1979 !(na->na_flags & NAF_USER_PKT_POOL) &&
1980 na_kr_populate_slots(kring) != 0) {
1981 SK_ERR("Cannot allocate buffers for kr "
1982 "0x%llx (\"%s\")", SK_KVA(kring),
1983 kring->ckr_name);
1984 goto cleanup;
1985 }
1986 }
1987 }
1988
1989 return 0;
1990
1991 cleanup:
1992 na_kr_teardown_all(na, ch, FALSE);
1993
1994 return ENOMEM;
1995 }
1996
1997 static void
na_kr_teardown_common(struct nexus_adapter * na,struct __kern_channel_ring * kring,enum txrx t,struct kern_channel * ch,boolean_t defunct)1998 na_kr_teardown_common(struct nexus_adapter *na,
1999 struct __kern_channel_ring *kring, enum txrx t, struct kern_channel *ch,
2000 boolean_t defunct)
2001 {
2002 struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
2003 struct __user_channel_ring *ckr_ring;
2004 boolean_t sd_idle, sd_inited;
2005
2006 ASSERT(arn != NULL);
2007 kr_enter(kring, TRUE);
2008 /*
2009 * Check for CKRF_MEM_SD_INITED and CKRF_MEM_RING_INITED
2010 * to make sure that the freeing needs to happen (else just
2011 * nullify the values).
2012 * If this adapter owns the memory for the slot descriptors,
2013 * check if the region is marked as busy (sd_idle is false)
2014 * and leave the kring's slot descriptor fields alone if so,
2015 * at defunct time. At final teardown time, sd_idle must be
2016 * true else we assert; this indicates a missing call to
2017 * skmem_arena_nexus_sd_set_noidle().
2018 */
2019 sd_inited = ((kring->ckr_flags & CKRF_MEM_SD_INITED) != 0);
2020 if (sd_inited) {
2021 /* callee will do KR_KSD(), so check */
2022 if (((t < NR_TXRX) || (t == NR_EV)) &&
2023 (kring->ckr_ksds != NULL)) {
2024 na_kr_depopulate_slots(kring, ch, defunct);
2025 }
2026 /* leave CKRF_MEM_SD_INITED flag alone until idle */
2027 sd_idle = skmem_arena_nexus_sd_idle(arn);
2028 VERIFY(sd_idle || defunct);
2029 } else {
2030 sd_idle = TRUE;
2031 }
2032
2033 if (sd_idle) {
2034 kring->ckr_flags &= ~CKRF_MEM_SD_INITED;
2035 if (kring->ckr_ksds != NULL) {
2036 if (sd_inited) {
2037 skmem_cache_free(kring->ckr_ksds_cache,
2038 kring->ckr_ksds);
2039 }
2040 kring->ckr_ksds = NULL;
2041 kring->ckr_ksds_last = NULL;
2042 kring->ckr_usds = NULL;
2043 }
2044 ASSERT(kring->ckr_ksds_last == NULL);
2045 ASSERT(kring->ckr_usds == NULL);
2046 }
2047
2048 if ((ckr_ring = kring->ckr_ring) != NULL) {
2049 kring->ckr_ring = NULL;
2050 }
2051
2052 if (kring->ckr_flags & CKRF_MEM_RING_INITED) {
2053 ASSERT(ckr_ring != NULL || KR_KERNEL_ONLY(kring));
2054 if (ckr_ring != NULL) {
2055 skmem_cache_free(arn->arn_ring_cache, ckr_ring);
2056 }
2057 kring->ckr_flags &= ~CKRF_MEM_RING_INITED;
2058 }
2059
2060 if (defunct) {
2061 /* if defunct, drop everything; see KR_DROP() */
2062 kring->ckr_flags |= CKRF_DEFUNCT;
2063 }
2064 kr_exit(kring);
2065 }
2066
2067 /*
2068 * Teardown ALL rings of a nexus adapter; this includes {tx,rx,alloc,free,event}
2069 */
2070 static void
na_kr_teardown_all(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2071 na_kr_teardown_all(struct nexus_adapter *na, struct kern_channel *ch,
2072 boolean_t defunct)
2073 {
2074 enum txrx t;
2075
2076 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2077
2078 /* skip if this adapter has no allocated rings */
2079 if (na->na_tx_rings == NULL) {
2080 return;
2081 }
2082
2083 for_all_rings(t) {
2084 for (uint32_t i = 0; i < na_get_nrings(na, t); i++) {
2085 na_kr_teardown_common(na, &NAKR(na, t)[i],
2086 t, ch, defunct);
2087 }
2088 }
2089 }
2090
2091 /*
2092 * Teardown only {tx,rx} rings assigned to the channel.
2093 */
2094 static void
na_kr_teardown_txrx(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct,struct proc * p)2095 na_kr_teardown_txrx(struct nexus_adapter *na, struct kern_channel *ch,
2096 boolean_t defunct, struct proc *p)
2097 {
2098 enum txrx t;
2099
2100 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2101
2102 for_rx_tx(t) {
2103 ring_id_t qfirst = ch->ch_first[t];
2104 ring_id_t qlast = ch->ch_last[t];
2105 uint32_t i;
2106
2107 for (i = qfirst; i < qlast; i++) {
2108 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2109 na_kr_teardown_common(na, kring, t, ch, defunct);
2110
2111 /*
2112 * Issue a notify to wake up anyone sleeping in kqueue
2113 * so that they notice the newly defuncted channels and
2114 * return an error
2115 */
2116 kring->ckr_na_notify(kring, p, 0);
2117 }
2118 }
2119 }
2120
2121 static int
na_kr_populate_slots(struct __kern_channel_ring * kring)2122 na_kr_populate_slots(struct __kern_channel_ring *kring)
2123 {
2124 const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2125 struct nexus_adapter *na = KRNA(kring);
2126 kern_pbufpool_t pp = kring->ckr_pp;
2127 uint32_t nslots = kring->ckr_num_slots;
2128 uint32_t start_idx, i;
2129 uint32_t sidx = 0; /* slot counter */
2130 struct __kern_slot_desc *ksd;
2131 struct __user_slot_desc *usd;
2132 struct __kern_quantum *kqum;
2133 nexus_type_t nexus_type;
2134 int err = 0;
2135
2136 ASSERT(kring->ckr_tx < NR_TXRX);
2137 ASSERT(!(KRNA(kring)->na_flags & NAF_USER_PKT_POOL));
2138 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2139 ASSERT(pp != NULL);
2140
2141 /*
2142 * xxx_ppool: remove this special case
2143 */
2144 nexus_type = na->na_nxdom_prov->nxdom_prov_dom->nxdom_type;
2145
2146 switch (nexus_type) {
2147 case NEXUS_TYPE_FLOW_SWITCH:
2148 case NEXUS_TYPE_KERNEL_PIPE:
2149 /*
2150 * xxx_ppool: This is temporary code until we come up with a
2151 * scheme for user space to alloc & attach packets to tx ring.
2152 */
2153 if (kernel_only || kring->ckr_tx == NR_RX) {
2154 return 0;
2155 }
2156 break;
2157
2158 case NEXUS_TYPE_NET_IF:
2159 if (((na->na_type == NA_NETIF_DEV) ||
2160 (na->na_type == NA_NETIF_HOST)) &&
2161 (kernel_only || (kring->ckr_tx == NR_RX))) {
2162 return 0;
2163 }
2164
2165 ASSERT((na->na_type == NA_NETIF_COMPAT_DEV) ||
2166 (na->na_type == NA_NETIF_COMPAT_HOST) ||
2167 (na->na_type == NA_NETIF_DEV) ||
2168 (na->na_type == NA_NETIF_VP));
2169
2170 if (!kernel_only) {
2171 if (kring->ckr_tx == NR_RX) {
2172 return 0;
2173 } else {
2174 break;
2175 }
2176 }
2177
2178 ASSERT(kernel_only);
2179
2180 if ((na->na_type == NA_NETIF_COMPAT_DEV) ||
2181 (na->na_type == NA_NETIF_COMPAT_HOST)) {
2182 return 0;
2183 }
2184 VERIFY(0);
2185 /* NOTREACHED */
2186 __builtin_unreachable();
2187
2188 case NEXUS_TYPE_USER_PIPE:
2189 case NEXUS_TYPE_MONITOR:
2190 break;
2191
2192 default:
2193 VERIFY(0);
2194 /* NOTREACHED */
2195 __builtin_unreachable();
2196 }
2197
2198 /* Fill the ring with packets */
2199 sidx = start_idx = 0;
2200 for (i = 0; i < nslots; i++) {
2201 kqum = SK_PTR_ADDR_KQUM(pp_alloc_packet(pp, pp->pp_max_frags,
2202 SKMEM_NOSLEEP));
2203 if (kqum == NULL) {
2204 err = ENOMEM;
2205 SK_ERR("ar 0x%llx (\"%s\") no more buffers "
2206 "after %u of %u, err %d", SK_KVA(na->na_arena),
2207 na->na_arena->ar_name, i, nslots, err);
2208 goto cleanup;
2209 }
2210 ksd = KR_KSD(kring, i);
2211 usd = (kernel_only ? NULL : KR_USD(kring, i));
2212
2213 /* attach packet to slot */
2214 kqum->qum_ksd = ksd;
2215 ASSERT(!KSD_VALID_METADATA(ksd));
2216 KSD_ATTACH_METADATA(ksd, kqum);
2217 if (usd != NULL) {
2218 USD_ATTACH_METADATA(usd, METADATA_IDX(kqum));
2219 kr_externalize_metadata(kring, pp->pp_max_frags,
2220 kqum, current_proc());
2221 }
2222
2223 SK_DF(SK_VERB_MEM, " C ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2224 " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd), METADATA_IDX(kqum),
2225 SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2226 SK_KVA(&kqum->qum_buf[0]));
2227 if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2228 SK_DF(SK_VERB_MEM, " C usd [%-3d, 0x%llx] "
2229 "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]",
2230 (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2231 SK_KVA(usd), METADATA_IDX(kqum),
2232 SK_KVA(kqum->qum_user),
2233 kqum->qum_user->qum_buf[0].buf_idx,
2234 SK_KVA(&kqum->qum_user->qum_buf[0]));
2235 }
2236
2237 sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2238 }
2239
2240 SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") populated %u slots from idx %u",
2241 SK_KVA(na->na_arena), na->na_arena->ar_name, nslots, start_idx);
2242
2243 cleanup:
2244 if (err != 0) {
2245 sidx = start_idx;
2246 while (i-- > 0) {
2247 ksd = KR_KSD(kring, i);
2248 usd = (kernel_only ? NULL : KR_USD(kring, i));
2249 kqum = ksd->sd_qum;
2250
2251 ASSERT(ksd == kqum->qum_ksd);
2252 KSD_RESET(ksd);
2253 if (usd != NULL) {
2254 USD_RESET(usd);
2255 }
2256 /* detach packet from slot */
2257 kqum->qum_ksd = NULL;
2258 pp_free_packet(pp, SK_PTR_ADDR(kqum));
2259
2260 sidx = SLOT_NEXT(sidx, kring->ckr_lim);
2261 }
2262 }
2263 return err;
2264 }
2265
2266 static void
na_kr_depopulate_slots(struct __kern_channel_ring * kring,struct kern_channel * ch,boolean_t defunct)2267 na_kr_depopulate_slots(struct __kern_channel_ring *kring,
2268 struct kern_channel *ch, boolean_t defunct)
2269 {
2270 #pragma unused(ch)
2271 const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2272 uint32_t i, j, n = kring->ckr_num_slots;
2273 struct nexus_adapter *na = KRNA(kring);
2274 struct kern_pbufpool *pp = kring->ckr_pp;
2275 boolean_t upp = FALSE;
2276 obj_idx_t midx;
2277
2278 ASSERT((kring->ckr_tx < NR_TXRX) || (kring->ckr_tx == NR_EV));
2279 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2280
2281 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2282
2283 if (((na->na_flags & NAF_USER_PKT_POOL) != 0) &&
2284 (kring->ckr_tx != NR_EV)) {
2285 upp = TRUE;
2286 }
2287 for (i = 0, j = 0; i < n; i++) {
2288 struct __kern_slot_desc *ksd = KR_KSD(kring, i);
2289 struct __user_slot_desc *usd;
2290 struct __kern_quantum *qum, *kqum;
2291 boolean_t free_packet = FALSE;
2292 int err;
2293
2294 if (!KSD_VALID_METADATA(ksd)) {
2295 continue;
2296 }
2297
2298 kqum = ksd->sd_qum;
2299 usd = (kernel_only ? NULL : KR_USD(kring, i));
2300 midx = METADATA_IDX(kqum);
2301
2302 /*
2303 * if the packet is internalized it should not be in the
2304 * hash table of packets loaned to user space.
2305 */
2306 if (upp && (kqum->qum_qflags & QUM_F_INTERNALIZED)) {
2307 if ((qum = pp_find_upp(pp, midx)) != NULL) {
2308 panic("internalized packet 0x%llx in htbl",
2309 SK_KVA(qum));
2310 /* NOTREACHED */
2311 __builtin_unreachable();
2312 }
2313 free_packet = TRUE;
2314 } else if (upp) {
2315 /*
2316 * if the packet is not internalized check if it is
2317 * in the list of packets loaned to user-space.
2318 * Remove from the list before freeing.
2319 */
2320 ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
2321 qum = pp_remove_upp(pp, midx, &err);
2322 if (err != 0) {
2323 SK_ERR("un-allocated packet or buflet %d %p",
2324 midx, SK_KVA(qum));
2325 if (qum != NULL) {
2326 free_packet = TRUE;
2327 }
2328 }
2329 } else {
2330 free_packet = TRUE;
2331 }
2332
2333 /*
2334 * Clear the user and kernel slot descriptors. Note that
2335 * if we are depopulating the slots due to defunct (and not
2336 * due to normal deallocation/teardown), we leave the user
2337 * slot descriptor alone. At that point the process may
2338 * be suspended, and later when it resumes it would just
2339 * pick up the original contents and move forward with
2340 * whatever it was doing.
2341 */
2342 KSD_RESET(ksd);
2343 if (usd != NULL && !defunct) {
2344 USD_RESET(usd);
2345 }
2346
2347 /* detach packet from slot */
2348 kqum->qum_ksd = NULL;
2349
2350 SK_DF(SK_VERB_MEM, " D ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2351 " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd),
2352 METADATA_IDX(kqum), SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2353 SK_KVA(&kqum->qum_buf[0]));
2354 if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2355 SK_DF(SK_VERB_MEM, " D usd [%-3u, 0x%llx] "
2356 "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]",
2357 (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2358 SK_KVA(usd), METADATA_IDX(kqum),
2359 SK_KVA(kqum->qum_user),
2360 kqum->qum_user->qum_buf[0].buf_idx,
2361 SK_KVA(&kqum->qum_user->qum_buf[0]));
2362 }
2363
2364 if (free_packet) {
2365 pp_free_packet(pp, SK_PTR_ADDR(kqum)); ++j;
2366 }
2367 }
2368
2369 SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") depopulated %u of %u slots",
2370 SK_KVA(KRNA(kring)->na_arena), KRNA(kring)->na_arena->ar_name,
2371 j, n);
2372 }
2373
2374 int
na_rings_mem_setup(struct nexus_adapter * na,uint32_t tailroom,boolean_t alloc_ctx,struct kern_channel * ch)2375 na_rings_mem_setup(struct nexus_adapter *na, uint32_t tailroom,
2376 boolean_t alloc_ctx, struct kern_channel *ch)
2377 {
2378 boolean_t kronly;
2379 int err;
2380
2381 SK_LOCK_ASSERT_HELD();
2382 ASSERT(na->na_channels == 0);
2383 /*
2384 * If NAF_MEM_NO_INIT is set, then only create the krings and not
2385 * the backing memory regions for the adapter.
2386 */
2387 kronly = (na->na_flags & NAF_MEM_NO_INIT);
2388 ASSERT(!kronly || NA_KERNEL_ONLY(na));
2389
2390 /*
2391 * Create and initialize the common fields of the krings array.
2392 * using the information that must be already available in the na.
2393 * tailroom can be used to request the allocation of additional
2394 * tailroom bytes after the krings array. This is used by
2395 * nexus_vp_adapter's (i.e., flow switch ports) to make room
2396 * for leasing-related data structures.
2397 */
2398 if ((err = na_kr_create(na, tailroom, alloc_ctx)) == 0 && !kronly) {
2399 err = na_kr_setup(na, ch);
2400 if (err != 0) {
2401 na_kr_delete(na);
2402 }
2403 }
2404
2405 return err;
2406 }
2407
2408 void
na_rings_mem_teardown(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)2409 na_rings_mem_teardown(struct nexus_adapter *na, struct kern_channel *ch,
2410 boolean_t defunct)
2411 {
2412 SK_LOCK_ASSERT_HELD();
2413 ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT));
2414
2415 /*
2416 * Deletes the kring and ring array of the adapter. They
2417 * must have been created using na_rings_mem_setup().
2418 *
2419 * XXX: [email protected] -- the parameter "ch" should not be
2420 * needed here; however na_kr_depopulate_slots() needs to
2421 * go thru the channel's user packet pool hash, and so for
2422 * now we leave it here.
2423 */
2424 na_kr_teardown_all(na, ch, defunct);
2425 if (!defunct) {
2426 na_kr_delete(na);
2427 }
2428 }
2429
2430 void
na_ch_rings_defunct(struct kern_channel * ch,struct proc * p)2431 na_ch_rings_defunct(struct kern_channel *ch, struct proc *p)
2432 {
2433 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2434
2435 /*
2436 * Depopulate slots on the TX and RX rings of this channel,
2437 * but don't touch other rings owned by other channels if
2438 * this adapter is being shared.
2439 */
2440 na_kr_teardown_txrx(ch->ch_na, ch, TRUE, p);
2441 }
2442
2443 void
na_kr_drop(struct nexus_adapter * na,boolean_t drop)2444 na_kr_drop(struct nexus_adapter *na, boolean_t drop)
2445 {
2446 enum txrx t;
2447 uint32_t i;
2448
2449 for_rx_tx(t) {
2450 for (i = 0; i < na_get_nrings(na, t); i++) {
2451 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2452 int error;
2453 error = kr_enter(kring, TRUE);
2454 if (drop) {
2455 kring->ckr_flags |= CKRF_DROP;
2456 } else {
2457 kring->ckr_flags &= ~CKRF_DROP;
2458 }
2459
2460 if (error != 0) {
2461 SK_ERR("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2462 "kr_enter failed %d",
2463 na->na_name, SK_KVA(na),
2464 kring->ckr_name, SK_KVA(kring),
2465 error);
2466 } else {
2467 kr_exit(kring);
2468 }
2469 SK_D("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2470 "krflags 0x%b", na->na_name, SK_KVA(na),
2471 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
2472 CKRF_BITS);
2473 }
2474 }
2475 }
2476
2477 /*
2478 * Set the stopped/enabled status of ring. When stopping, they also wait
2479 * for all current activity on the ring to terminate. The status change
2480 * is then notified using the na na_notify callback.
2481 */
2482 static void
na_set_ring(struct nexus_adapter * na,uint32_t ring_id,enum txrx t,uint32_t state)2483 na_set_ring(struct nexus_adapter *na, uint32_t ring_id, enum txrx t,
2484 uint32_t state)
2485 {
2486 struct __kern_channel_ring *kr = &NAKR(na, t)[ring_id];
2487
2488 /*
2489 * Mark the ring as stopped/enabled, and run through the
2490 * locks to make sure other users get to see it.
2491 */
2492 if (state == KR_READY) {
2493 kr_start(kr);
2494 } else {
2495 kr_stop(kr, state);
2496 }
2497 }
2498
2499
2500 /* stop or enable all the rings of na */
2501 static void
na_set_all_rings(struct nexus_adapter * na,uint32_t state)2502 na_set_all_rings(struct nexus_adapter *na, uint32_t state)
2503 {
2504 uint32_t i;
2505 enum txrx t;
2506
2507 SK_LOCK_ASSERT_HELD();
2508
2509 if (!NA_IS_ACTIVE(na)) {
2510 return;
2511 }
2512
2513 for_rx_tx(t) {
2514 for (i = 0; i < na_get_nrings(na, t); i++) {
2515 na_set_ring(na, i, t, state);
2516 }
2517 }
2518 }
2519
2520 /*
2521 * Convenience function used in drivers. Waits for current txsync()s/rxsync()s
2522 * to finish and prevents any new one from starting. Call this before turning
2523 * Skywalk mode off, or before removing the harware rings (e.g., on module
2524 * onload). As a rule of thumb for linux drivers, this should be placed near
2525 * each napi_disable().
2526 */
2527 void
na_disable_all_rings(struct nexus_adapter * na)2528 na_disable_all_rings(struct nexus_adapter *na)
2529 {
2530 na_set_all_rings(na, KR_STOPPED);
2531 }
2532
2533 /*
2534 * Convenience function used in drivers. Re-enables rxsync and txsync on the
2535 * adapter's rings In linux drivers, this should be placed near each
2536 * napi_enable().
2537 */
2538 void
na_enable_all_rings(struct nexus_adapter * na)2539 na_enable_all_rings(struct nexus_adapter *na)
2540 {
2541 na_set_all_rings(na, KR_READY /* enabled */);
2542 }
2543
2544 void
na_lock_all_rings(struct nexus_adapter * na)2545 na_lock_all_rings(struct nexus_adapter *na)
2546 {
2547 na_set_all_rings(na, KR_LOCKED);
2548 }
2549
2550 void
na_unlock_all_rings(struct nexus_adapter * na)2551 na_unlock_all_rings(struct nexus_adapter *na)
2552 {
2553 na_enable_all_rings(na);
2554 }
2555
2556 int
na_connect(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p)2557 na_connect(struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
2558 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
2559 {
2560 struct nexus_adapter *na = NULL;
2561 mach_vm_size_t memsize = 0;
2562 int err = 0;
2563 enum txrx t;
2564
2565 ASSERT(!(chr->cr_mode & CHMODE_KERNEL));
2566 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
2567
2568 SK_LOCK_ASSERT_HELD();
2569
2570 /* find the nexus adapter and return the reference */
2571 err = na_find(ch, nx, chr, ch0, nxb, p, &na, TRUE /* create */);
2572 if (err != 0) {
2573 ASSERT(na == NULL);
2574 goto done;
2575 }
2576
2577 if (NA_KERNEL_ONLY(na)) {
2578 err = EBUSY;
2579 goto done;
2580 }
2581
2582 /* reject if the adapter is defunct of non-permissive */
2583 if ((na->na_flags & NAF_DEFUNCT) || na_reject_channel(ch, na)) {
2584 err = ENXIO;
2585 goto done;
2586 }
2587
2588 err = na_bind_channel(na, ch, chr);
2589 if (err != 0) {
2590 goto done;
2591 }
2592
2593 ASSERT(ch->ch_schema != NULL);
2594 ASSERT(na == ch->ch_na);
2595
2596 for_all_rings(t) {
2597 if (na_get_nrings(na, t) == 0) {
2598 ch->ch_si[t] = NULL;
2599 continue;
2600 }
2601 ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2602 &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2603 }
2604
2605 skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2606
2607 if (!(skmem_arena_nexus(na->na_arena)->arn_mode &
2608 AR_NEXUS_MODE_EXTERNAL_PPOOL)) {
2609 atomic_bitset_32(__DECONST(uint32_t *,
2610 &ch->ch_schema->csm_flags), CSM_PRIV_MEM);
2611 }
2612
2613 err = skmem_arena_mmap(na->na_arena, p, &ch->ch_mmap);
2614 if (err != 0) {
2615 goto done;
2616 }
2617
2618 atomic_bitset_32(__DECONST(uint32_t *, &ch->ch_schema->csm_flags),
2619 CSM_ACTIVE);
2620 chr->cr_memsize = memsize;
2621 chr->cr_memoffset = ch->ch_schema_offset;
2622
2623 SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2624 "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2625 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2626 na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2627 na->na_flags, NAF_BITS);
2628
2629 done:
2630 if (err != 0) {
2631 if (ch->ch_schema != NULL || na != NULL) {
2632 if (ch->ch_schema != NULL) {
2633 ASSERT(na == ch->ch_na);
2634 /*
2635 * Callee will unmap memory region if needed,
2636 * as well as release reference held on 'na'.
2637 */
2638 na_disconnect(nx, ch);
2639 na = NULL;
2640 }
2641 if (na != NULL) {
2642 (void) na_release_locked(na);
2643 na = NULL;
2644 }
2645 }
2646 }
2647
2648 return err;
2649 }
2650
2651 void
na_disconnect(struct kern_nexus * nx,struct kern_channel * ch)2652 na_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
2653 {
2654 #pragma unused(nx)
2655 enum txrx t;
2656
2657 SK_LOCK_ASSERT_HELD();
2658
2659 SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2660 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2661 ch->ch_na->na_name, ch->ch_info->cinfo_nx_port,
2662 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(ch->ch_na),
2663 ch->ch_na->na_flags, NAF_BITS);
2664
2665 /* destroy mapping and release references */
2666 na_unbind_channel(ch);
2667 ASSERT(ch->ch_na == NULL);
2668 ASSERT(ch->ch_schema == NULL);
2669 for_all_rings(t) {
2670 ch->ch_si[t] = NULL;
2671 }
2672 }
2673
2674 void
na_defunct(struct kern_nexus * nx,struct kern_channel * ch,struct nexus_adapter * na,boolean_t locked)2675 na_defunct(struct kern_nexus *nx, struct kern_channel *ch,
2676 struct nexus_adapter *na, boolean_t locked)
2677 {
2678 #pragma unused(nx)
2679 SK_LOCK_ASSERT_HELD();
2680 if (!locked) {
2681 lck_mtx_lock(&ch->ch_lock);
2682 }
2683
2684 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2685
2686 if (!(na->na_flags & NAF_DEFUNCT)) {
2687 /*
2688 * Mark this adapter as defunct to inform nexus-specific
2689 * teardown handler called by na_teardown() below.
2690 */
2691 atomic_bitset_32(&na->na_flags, NAF_DEFUNCT);
2692
2693 /*
2694 * Depopulate slots.
2695 */
2696 na_teardown(na, ch, TRUE);
2697
2698 /*
2699 * And finally destroy any already-defunct memory regions.
2700 * Do this only if the nexus adapter owns the arena, i.e.
2701 * NAF_MEM_LOANED is not set. Otherwise, we'd expect
2702 * that this routine be called again for the real owner.
2703 */
2704 if (!(na->na_flags & NAF_MEM_LOANED)) {
2705 skmem_arena_defunct(na->na_arena);
2706 }
2707 }
2708
2709 SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d) "
2710 "na 0x%llx naflags %b", ch->ch_name, ch->ch_pid,
2711 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2712 na->na_name, ch->ch_info->cinfo_nx_port,
2713 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2714 na->na_flags, NAF_BITS);
2715
2716 if (!locked) {
2717 lck_mtx_unlock(&ch->ch_lock);
2718 }
2719 }
2720
2721 /*
2722 * TODO: [email protected] -- merge this into na_connect()
2723 */
2724 int
na_connect_spec(struct kern_nexus * nx,struct kern_channel * ch,struct chreq * chr,struct proc * p)2725 na_connect_spec(struct kern_nexus *nx, struct kern_channel *ch,
2726 struct chreq *chr, struct proc *p)
2727 {
2728 #pragma unused(p)
2729 struct nexus_adapter *na = NULL;
2730 mach_vm_size_t memsize = 0;
2731 int error = 0;
2732 enum txrx t;
2733
2734 ASSERT(chr->cr_mode & CHMODE_KERNEL);
2735 ASSERT(ch->ch_flags & CHANF_KERNEL);
2736 ASSERT(ch->ch_na == NULL);
2737 ASSERT(ch->ch_schema == NULL);
2738
2739 SK_LOCK_ASSERT_HELD();
2740
2741 error = na_find(ch, nx, chr, NULL, NULL, kernproc, &na, TRUE);
2742 if (error != 0) {
2743 goto done;
2744 }
2745
2746 if (na == NULL) {
2747 error = EINVAL;
2748 goto done;
2749 }
2750
2751 if (na->na_channels > 0) {
2752 error = EBUSY;
2753 goto done;
2754 }
2755
2756 if (na->na_flags & NAF_DEFUNCT) {
2757 error = ENXIO;
2758 goto done;
2759 }
2760
2761 /*
2762 * Special connect requires the nexus adapter to handle its
2763 * own channel binding and unbinding via na_special(); bail
2764 * if this adapter doesn't support it.
2765 */
2766 if (na->na_special == NULL) {
2767 error = ENOTSUP;
2768 goto done;
2769 }
2770
2771 /* upon success, "ch->ch_na" will point to "na" */
2772 error = na->na_special(na, ch, chr, NXSPEC_CMD_CONNECT);
2773 if (error != 0) {
2774 ASSERT(ch->ch_na == NULL);
2775 goto done;
2776 }
2777
2778 ASSERT(na->na_flags & NAF_SPEC_INIT);
2779 ASSERT(na == ch->ch_na);
2780 /* make sure this is still the case */
2781 ASSERT(ch->ch_schema == NULL);
2782
2783 for_rx_tx(t) {
2784 ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2785 &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2786 }
2787
2788 skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2789 chr->cr_memsize = memsize;
2790
2791 SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2792 "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2793 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2794 na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2795 na->na_flags, NAF_BITS);
2796
2797 done:
2798 if (error != 0) {
2799 if (ch->ch_na != NULL || na != NULL) {
2800 if (ch->ch_na != NULL) {
2801 ASSERT(na == ch->ch_na);
2802 /* callee will release reference on 'na' */
2803 na_disconnect_spec(nx, ch);
2804 na = NULL;
2805 }
2806 if (na != NULL) {
2807 (void) na_release_locked(na);
2808 na = NULL;
2809 }
2810 }
2811 }
2812
2813 return error;
2814 }
2815
2816 /*
2817 * TODO: [email protected] -- merge this into na_disconnect()
2818 */
2819 void
na_disconnect_spec(struct kern_nexus * nx,struct kern_channel * ch)2820 na_disconnect_spec(struct kern_nexus *nx, struct kern_channel *ch)
2821 {
2822 #pragma unused(nx)
2823 struct nexus_adapter *na = ch->ch_na;
2824 enum txrx t;
2825 int error;
2826
2827 SK_LOCK_ASSERT_HELD();
2828 ASSERT(na != NULL);
2829 ASSERT(na->na_flags & NAF_SPEC_INIT); /* has been bound */
2830
2831 SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2832 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2833 na->na_name, ch->ch_info->cinfo_nx_port,
2834 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2835 na->na_flags, NAF_BITS);
2836
2837 /* take a reference for this routine */
2838 na_retain_locked(na);
2839
2840 ASSERT(ch->ch_flags & CHANF_KERNEL);
2841 ASSERT(ch->ch_schema == NULL);
2842 ASSERT(na->na_special != NULL);
2843 /* unbind this channel */
2844 error = na->na_special(na, ch, NULL, NXSPEC_CMD_DISCONNECT);
2845 ASSERT(error == 0);
2846 ASSERT(!(na->na_flags & NAF_SPEC_INIT));
2847
2848 /* now release our reference; this may be the last */
2849 na_release_locked(na);
2850 na = NULL;
2851
2852 ASSERT(ch->ch_na == NULL);
2853 for_rx_tx(t) {
2854 ch->ch_si[t] = NULL;
2855 }
2856 }
2857
2858 void
na_start_spec(struct kern_nexus * nx,struct kern_channel * ch)2859 na_start_spec(struct kern_nexus *nx, struct kern_channel *ch)
2860 {
2861 #pragma unused(nx)
2862 struct nexus_adapter *na = ch->ch_na;
2863
2864 SK_LOCK_ASSERT_HELD();
2865
2866 ASSERT(ch->ch_flags & CHANF_KERNEL);
2867 ASSERT(NA_KERNEL_ONLY(na));
2868 ASSERT(na->na_special != NULL);
2869
2870 na->na_special(na, ch, NULL, NXSPEC_CMD_START);
2871 }
2872
2873 void
na_stop_spec(struct kern_nexus * nx,struct kern_channel * ch)2874 na_stop_spec(struct kern_nexus *nx, struct kern_channel *ch)
2875 {
2876 #pragma unused(nx)
2877 struct nexus_adapter *na = ch->ch_na;
2878
2879 SK_LOCK_ASSERT_HELD();
2880
2881 ASSERT(ch->ch_flags & CHANF_KERNEL);
2882 ASSERT(NA_KERNEL_ONLY(na));
2883 ASSERT(na->na_special != NULL);
2884
2885 na->na_special(na, ch, NULL, NXSPEC_CMD_STOP);
2886 }
2887
2888 /*
2889 * MUST BE CALLED UNDER SK_LOCK()
2890 *
2891 * Get a refcounted reference to a nexus adapter attached
2892 * to the interface specified by chr.
2893 * This is always called in the execution of an ioctl().
2894 *
2895 * Return ENXIO if the interface specified by the request does
2896 * not exist, ENOTSUP if Skywalk is not supported by the interface,
2897 * EINVAL if parameters are invalid, ENOMEM if needed resources
2898 * could not be allocated.
2899 * If successful, hold a reference to the nexus adapter.
2900 *
2901 * No reference is kept on the real interface, which may then
2902 * disappear at any time.
2903 */
2904 int
na_find(struct kern_channel * ch,struct kern_nexus * nx,struct chreq * chr,struct kern_channel * ch0,struct nxbind * nxb,struct proc * p,struct nexus_adapter ** na,boolean_t create)2905 na_find(struct kern_channel *ch, struct kern_nexus *nx, struct chreq *chr,
2906 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p,
2907 struct nexus_adapter **na, boolean_t create)
2908 {
2909 int error = 0;
2910
2911 _CASSERT(sizeof(chr->cr_name) == sizeof((*na)->na_name));
2912
2913 *na = NULL; /* default return value */
2914
2915 SK_LOCK_ASSERT_HELD();
2916
2917 /*
2918 * We cascade through all possibile types of nexus adapter.
2919 * All nx_*_na_find() functions return an error and an na,
2920 * with the following combinations:
2921 *
2922 * error na
2923 * 0 NULL type doesn't match
2924 * !0 NULL type matches, but na creation/lookup failed
2925 * 0 !NULL type matches and na created/found
2926 * !0 !NULL impossible
2927 */
2928
2929 #if CONFIG_NEXUS_MONITOR
2930 /* try to see if this is a monitor port */
2931 error = nx_monitor_na_find(nx, ch, chr, ch0, nxb, p, na, create);
2932 if (error != 0 || *na != NULL) {
2933 return error;
2934 }
2935 #endif /* CONFIG_NEXUS_MONITOR */
2936 #if CONFIG_NEXUS_USER_PIPE
2937 /* try to see if this is a pipe port */
2938 error = nx_upipe_na_find(nx, ch, chr, nxb, p, na, create);
2939 if (error != 0 || *na != NULL) {
2940 return error;
2941 }
2942 #endif /* CONFIG_NEXUS_USER_PIPE */
2943 #if CONFIG_NEXUS_KERNEL_PIPE
2944 /* try to see if this is a kernel pipe port */
2945 error = nx_kpipe_na_find(nx, ch, chr, nxb, p, na, create);
2946 if (error != 0 || *na != NULL) {
2947 return error;
2948 }
2949 #endif /* CONFIG_NEXUS_KERNEL_PIPE */
2950 #if CONFIG_NEXUS_FLOWSWITCH
2951 /* try to see if this is a flowswitch port */
2952 error = nx_fsw_na_find(nx, ch, chr, nxb, p, na, create);
2953 if (error != 0 || *na != NULL) {
2954 return error;
2955 }
2956 #endif /* CONFIG_NEXUS_FLOWSWITCH */
2957 #if CONFIG_NEXUS_NETIF
2958 error = nx_netif_na_find(nx, ch, chr, nxb, p, na, create);
2959 if (error != 0 || *na != NULL) {
2960 return error;
2961 }
2962 #endif /* CONFIG_NEXUS_NETIF */
2963
2964 ASSERT(*na == NULL);
2965 return ENXIO;
2966 }
2967
2968 void
na_retain_locked(struct nexus_adapter * na)2969 na_retain_locked(struct nexus_adapter *na)
2970 {
2971 SK_LOCK_ASSERT_HELD();
2972
2973 if (na != NULL) {
2974 #if SK_LOG
2975 uint32_t oref = atomic_add_32_ov(&na->na_refcount, 1);
2976 SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
2977 na->na_name, SK_KVA(na), oref + 1, na->na_channels);
2978 #else /* !SK_LOG */
2979 atomic_add_32(&na->na_refcount, 1);
2980 #endif /* !SK_LOG */
2981 }
2982 }
2983
2984 /* returns 1 iff the nexus_adapter is destroyed */
2985 int
na_release_locked(struct nexus_adapter * na)2986 na_release_locked(struct nexus_adapter *na)
2987 {
2988 uint32_t oref;
2989
2990 SK_LOCK_ASSERT_HELD();
2991
2992 ASSERT(na->na_refcount > 0);
2993 oref = atomic_add_32_ov(&na->na_refcount, -1);
2994 if (oref > 1) {
2995 SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
2996 na->na_name, SK_KVA(na), oref - 1, na->na_channels);
2997 return 0;
2998 }
2999 ASSERT(na->na_channels == 0);
3000
3001 #if CONFIG_NEXUS_FLOWSWITCH || CONFIG_NEXUS_NETIF
3002 struct ifnet *ifp = na->na_ifp;
3003 if (ifp != NULL) {
3004 /*
3005 * Prevent threads from doing further data movement
3006 * on this interface; callee holds an I/O refcnt
3007 * which we'll release later during resume.
3008 */
3009 ifnet_datamov_suspend(ifp);
3010 }
3011 #endif /* !CONFIG_NEXUS_FLOWSWITCH & !CONFIG_NEXUS_NETIF */
3012
3013 if (na->na_flags & NAF_ASYNC_DTOR) {
3014 na_destroyer_enqueue(na);
3015 } else {
3016 na_destroyer_final(na);
3017 }
3018
3019 return 1;
3020 }
3021
3022 static void
na_destroyer_final(struct nexus_adapter * na)3023 na_destroyer_final(struct nexus_adapter *na)
3024 {
3025 SK_LOCK_ASSERT_HELD();
3026
3027 #if CONFIG_NEXUS_FLOWSWITCH || CONFIG_NEXUS_NETIF
3028 struct ifnet *ifp = na->na_ifp;
3029
3030 if (ifp != NULL) {
3031 SK_UNLOCK();
3032 /*
3033 * Wait until all threads in the data paths are done.
3034 */
3035 ifnet_datamov_drain(ifp);
3036
3037 if (na->na_type == NA_NETIF_DEV ||
3038 na->na_type == NA_NETIF_COMPAT_DEV) {
3039 /* undo what nx_netif_attach() did */
3040 ASSERT(na == (struct nexus_adapter *)ifp->if_na);
3041 ifp->if_na_ops = NULL;
3042 ifp->if_na = NULL;
3043 membar_sync();
3044
3045 SKYWALK_CLEAR_CAPABLE(ifp, na);
3046 }
3047 SK_LOCK();
3048 }
3049 #endif /* !CONFIG_NEXUS_FLOWSWITCH & !CONFIG_NEXUS_NETIF */
3050
3051 ASSERT(na->na_refcount == 0);
3052 if (na->na_dtor != NULL) {
3053 na->na_dtor(na);
3054 }
3055
3056 #if CONFIG_NEXUS_FLOWSWITCH || CONFIG_NEXUS_NETIF
3057 if (na->na_ifp != NULL) {
3058 ASSERT(ifp == na->na_ifp);
3059 SK_DF(SK_VERB_REFCNT,
3060 "na \"%s\" (0x%llx) releasing %s [ioref %u]",
3061 na->na_name, SK_KVA(na), na->na_ifp->if_xname,
3062 (na->na_ifp->if_refio - 1));
3063 ifnet_decr_iorefcnt(na->na_ifp);
3064 na->na_ifp = NULL;
3065 }
3066
3067 /*
3068 * Release reference during suspend and mark the interface
3069 * as data-ready; at this point it's safe to resume data
3070 * movement thru the interface.
3071 */
3072 if (ifp != NULL) {
3073 ifnet_datamov_resume(ifp);
3074 ifp = NULL;
3075 }
3076 #endif /* CONFIG_NEXUS_FLOWSWITCH || CONFIG_NEXUS_NETIF */
3077
3078 ASSERT(na->na_tx_rings == NULL && na->na_rx_rings == NULL);
3079 ASSERT(na->na_slot_ctxs == NULL);
3080 ASSERT(na->na_scratch == NULL);
3081
3082 #if CONFIG_NEXUS_USER_PIPE
3083 nx_upipe_na_dealloc(na);
3084 #endif /* CONFIG_NEXUS_USER_PIPE */
3085 if (na->na_arena != NULL) {
3086 skmem_arena_release(na->na_arena);
3087 na->na_arena = NULL;
3088 }
3089
3090 SK_DF(SK_VERB_MEM, "na \"%s\" (0x%llx) being freed",
3091 na->na_name, SK_KVA(na));
3092
3093 NA_FREE(na);
3094 }
3095
3096 static void
na_destroyer_enqueue(struct nexus_adapter * na)3097 na_destroyer_enqueue(struct nexus_adapter *na)
3098 {
3099 SK_LOCK_ASSERT_HELD();
3100
3101 ASSERT(na->na_refcount == 0);
3102 ++na_destroyer_cnt;
3103 VERIFY(na_destroyer_cnt != 0);
3104 TAILQ_INSERT_TAIL(&na_destroyer_head, na, na_destroyer_link);
3105 wakeup((caddr_t)&na_destroyer_run);
3106 }
3107
3108 static struct nexus_adapter *
na_destroyer_dequeue(void)3109 na_destroyer_dequeue(void)
3110 {
3111 struct nexus_adapter *na;
3112
3113 SK_LOCK_ASSERT_HELD();
3114
3115 na = TAILQ_FIRST(&na_destroyer_head);
3116 VERIFY(na_destroyer_cnt != 0 || na == NULL);
3117 if (na != NULL) {
3118 VERIFY(na_destroyer_cnt != 0);
3119 --na_destroyer_cnt;
3120 TAILQ_REMOVE(&na_destroyer_head, na, na_destroyer_link);
3121 na->na_destroyer_link.tqe_next = NULL;
3122 na->na_destroyer_link.tqe_prev = NULL;
3123 }
3124 return na;
3125 }
3126
3127 static int
na_destroyer_thread_cont(int err)3128 na_destroyer_thread_cont(int err)
3129 {
3130 #pragma unused(err)
3131 struct nexus_adapter *na;
3132
3133 for (;;) {
3134 SK_LOCK_ASSERT_HELD();
3135 while (na_destroyer_cnt == 0) {
3136 (void) msleep0(&na_destroyer_run, &sk_lock,
3137 (PZERO - 1), "na_destroyer_thread_cont", 0,
3138 na_destroyer_thread_cont);
3139 /* NOTREACHED */
3140 }
3141
3142 net_update_uptime();
3143
3144 VERIFY(TAILQ_FIRST(&na_destroyer_head) != NULL);
3145
3146 na = na_destroyer_dequeue();
3147 if (na != NULL) {
3148 na_destroyer_final(na);
3149 SK_LOCK_ASSERT_HELD();
3150 }
3151 }
3152 }
3153
3154 __dead2
3155 static void
na_destroyer_thread_func(void * v,wait_result_t w)3156 na_destroyer_thread_func(void *v, wait_result_t w)
3157 {
3158 #pragma unused(v, w)
3159 SK_LOCK();
3160 (void) msleep0(&na_destroyer_run, &sk_lock,
3161 (PZERO - 1), "na_destroyer", 0, na_destroyer_thread_cont);
3162 /*
3163 * msleep0() shouldn't have returned as PCATCH was not set;
3164 * therefore assert in this case.
3165 */
3166 SK_UNLOCK();
3167 VERIFY(0);
3168 /* NOTREACHED */
3169 __builtin_unreachable();
3170 }
3171
3172 static struct nexus_adapter *
na_pseudo_alloc(zalloc_flags_t how)3173 na_pseudo_alloc(zalloc_flags_t how)
3174 {
3175 struct nexus_adapter *na;
3176
3177 na = zalloc_flags(na_pseudo_zone, how | Z_ZERO);
3178 if (na) {
3179 na->na_type = NA_PSEUDO;
3180 na->na_free = na_pseudo_free;
3181 }
3182 return na;
3183 }
3184
3185 static void
na_pseudo_free(struct nexus_adapter * na)3186 na_pseudo_free(struct nexus_adapter *na)
3187 {
3188 ASSERT(na->na_refcount == 0);
3189 SK_DF(SK_VERB_MEM, "na 0x%llx FREE", SK_KVA(na));
3190 bzero(na, sizeof(*na));
3191 zfree(na_pseudo_zone, na);
3192 }
3193
3194 static int
na_pseudo_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3195 na_pseudo_txsync(struct __kern_channel_ring *kring, struct proc *p,
3196 uint32_t flags)
3197 {
3198 #pragma unused(kring, p, flags)
3199 SK_DF(SK_VERB_SYNC | SK_VERB_TX,
3200 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3201 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3202 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3203 flags);
3204
3205 return 0;
3206 }
3207
3208 static int
na_pseudo_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3209 na_pseudo_rxsync(struct __kern_channel_ring *kring, struct proc *p,
3210 uint32_t flags)
3211 {
3212 #pragma unused(kring, p, flags)
3213 SK_DF(SK_VERB_SYNC | SK_VERB_RX,
3214 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3215 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3216 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3217 flags);
3218
3219 ASSERT(kring->ckr_rhead <= kring->ckr_lim);
3220
3221 return 0;
3222 }
3223
3224 static int
na_pseudo_activate(struct nexus_adapter * na,na_activate_mode_t mode)3225 na_pseudo_activate(struct nexus_adapter *na, na_activate_mode_t mode)
3226 {
3227 SK_D("na \"%s\" (0x%llx) %s", na->na_name,
3228 SK_KVA(na), na_activate_mode2str(mode));
3229
3230 switch (mode) {
3231 case NA_ACTIVATE_MODE_ON:
3232 atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
3233 break;
3234
3235 case NA_ACTIVATE_MODE_DEFUNCT:
3236 break;
3237
3238 case NA_ACTIVATE_MODE_OFF:
3239 atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
3240 break;
3241
3242 default:
3243 VERIFY(0);
3244 /* NOTREACHED */
3245 __builtin_unreachable();
3246 }
3247
3248 return 0;
3249 }
3250
3251 static void
na_pseudo_dtor(struct nexus_adapter * na)3252 na_pseudo_dtor(struct nexus_adapter *na)
3253 {
3254 #pragma unused(na)
3255 }
3256
3257 static int
na_pseudo_krings_create(struct nexus_adapter * na,struct kern_channel * ch)3258 na_pseudo_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
3259 {
3260 return na_rings_mem_setup(na, 0, FALSE, ch);
3261 }
3262
3263 static void
na_pseudo_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)3264 na_pseudo_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
3265 boolean_t defunct)
3266 {
3267 na_rings_mem_teardown(na, ch, defunct);
3268 }
3269
3270 /*
3271 * Pseudo nexus adapter; typically used as a generic parent adapter.
3272 */
3273 int
na_pseudo_create(struct kern_nexus * nx,struct chreq * chr,struct nexus_adapter ** ret)3274 na_pseudo_create(struct kern_nexus *nx, struct chreq *chr,
3275 struct nexus_adapter **ret)
3276 {
3277 struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
3278 struct nexus_adapter *na;
3279 int error;
3280
3281 SK_LOCK_ASSERT_HELD();
3282 *ret = NULL;
3283
3284 na = na_pseudo_alloc(Z_WAITOK);
3285
3286 ASSERT(na->na_type == NA_PSEUDO);
3287 ASSERT(na->na_free == na_pseudo_free);
3288
3289 (void) strncpy(na->na_name, chr->cr_name, sizeof(na->na_name) - 1);
3290 na->na_name[sizeof(na->na_name) - 1] = '\0';
3291 uuid_generate_random(na->na_uuid);
3292
3293 /*
3294 * Verify upper bounds; for all cases including user pipe nexus,
3295 * the parameters must have already been validated by corresponding
3296 * nxdom_prov_params() function defined by each domain.
3297 */
3298 na_set_nrings(na, NR_TX, nxp->nxp_tx_rings);
3299 na_set_nrings(na, NR_RX, nxp->nxp_rx_rings);
3300 na_set_nslots(na, NR_TX, nxp->nxp_tx_slots);
3301 na_set_nslots(na, NR_RX, nxp->nxp_rx_slots);
3302 ASSERT(na_get_nrings(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
3303 ASSERT(na_get_nrings(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
3304 ASSERT(na_get_nslots(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
3305 ASSERT(na_get_nslots(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
3306
3307 na->na_txsync = na_pseudo_txsync;
3308 na->na_rxsync = na_pseudo_rxsync;
3309 na->na_activate = na_pseudo_activate;
3310 na->na_dtor = na_pseudo_dtor;
3311 na->na_krings_create = na_pseudo_krings_create;
3312 na->na_krings_delete = na_pseudo_krings_delete;
3313
3314 *(nexus_stats_type_t *)(uintptr_t)&na->na_stats_type =
3315 NEXUS_STATS_TYPE_INVALID;
3316
3317 /* other fields are set in the common routine */
3318 na_attach_common(na, nx, NX_DOM_PROV(nx));
3319
3320 if ((error = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
3321 nx, na)) != 0) {
3322 ASSERT(na->na_arena == NULL);
3323 goto err;
3324 }
3325 ASSERT(na->na_arena != NULL);
3326
3327 *(uint32_t *)(uintptr_t)&na->na_flowadv_max = nxp->nxp_flowadv_max;
3328 ASSERT(na->na_flowadv_max == 0 ||
3329 skmem_arena_nexus(na->na_arena)->arn_flowadv_obj != NULL);
3330
3331 #if SK_LOG
3332 uuid_string_t uuidstr;
3333 SK_D("na_name: \"%s\"", na->na_name);
3334 SK_D(" UUID: %s", sk_uuid_unparse(na->na_uuid, uuidstr));
3335 SK_D(" nx: 0x%llx (\"%s\":\"%s\")",
3336 SK_KVA(na->na_nx), NX_DOM(na->na_nx)->nxdom_name,
3337 NX_DOM_PROV(na->na_nx)->nxdom_prov_name);
3338 SK_D(" flags: %b", na->na_flags, NAF_BITS);
3339 SK_D(" flowadv_max: %u", na->na_flowadv_max);
3340 SK_D(" rings: tx %u rx %u",
3341 na_get_nrings(na, NR_TX), na_get_nrings(na, NR_RX));
3342 SK_D(" slots: tx %u rx %u",
3343 na_get_nslots(na, NR_TX), na_get_nslots(na, NR_RX));
3344 #if CONFIG_NEXUS_USER_PIPE
3345 SK_D(" next_pipe: %u", na->na_next_pipe);
3346 SK_D(" max_pipes: %u", na->na_max_pipes);
3347 #endif /* CONFIG_NEXUS_USER_PIPE */
3348 #endif /* SK_LOG */
3349
3350 *ret = na;
3351 na_retain_locked(na);
3352
3353 return 0;
3354
3355 err:
3356 if (na != NULL) {
3357 if (na->na_arena != NULL) {
3358 skmem_arena_release(na->na_arena);
3359 na->na_arena = NULL;
3360 }
3361 NA_FREE(na);
3362 }
3363 return error;
3364 }
3365
3366 void
na_flowadv_entry_alloc(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx)3367 na_flowadv_entry_alloc(const struct nexus_adapter *na, uuid_t fae_id,
3368 const flowadv_idx_t fe_idx)
3369 {
3370 struct skmem_arena *ar = na->na_arena;
3371 struct skmem_arena_nexus *arn = skmem_arena_nexus(na->na_arena);
3372 struct __flowadv_entry *fae;
3373
3374 ASSERT(NA_IS_ACTIVE(na) && na->na_flowadv_max != 0);
3375 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3376
3377 AR_LOCK(ar);
3378
3379 /* we must not get here if arena is defunct; this must be valid */
3380 ASSERT(arn->arn_flowadv_obj != NULL);
3381
3382 VERIFY(fe_idx < na->na_flowadv_max);
3383 fae = &arn->arn_flowadv_obj[fe_idx];
3384 uuid_copy(fae->fae_id, fae_id);
3385 fae->fae_flags |= FLOWADVF_VALID;
3386
3387 AR_UNLOCK(ar);
3388 }
3389
3390 void
na_flowadv_entry_free(const struct nexus_adapter * na,uuid_t fae_id,const flowadv_idx_t fe_idx)3391 na_flowadv_entry_free(const struct nexus_adapter *na, uuid_t fae_id,
3392 const flowadv_idx_t fe_idx)
3393 {
3394 #pragma unused(fae_id)
3395 struct skmem_arena *ar = na->na_arena;
3396 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3397
3398 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3399 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3400
3401 AR_LOCK(ar);
3402
3403 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3404 if (arn->arn_flowadv_obj != NULL) {
3405 struct __flowadv_entry *fae;
3406
3407 VERIFY(fe_idx < na->na_flowadv_max);
3408 fae = &arn->arn_flowadv_obj[fe_idx];
3409 ASSERT(uuid_compare(fae->fae_id, fae_id) == 0);
3410 uuid_clear(fae->fae_id);
3411 fae->fae_flags &= ~FLOWADVF_VALID;
3412 }
3413
3414 AR_UNLOCK(ar);
3415 }
3416
3417 bool
na_flowadv_set(const struct nexus_adapter * na,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3418 na_flowadv_set(const struct nexus_adapter *na, const flowadv_idx_t fe_idx,
3419 const flowadv_token_t flow_token)
3420 {
3421 struct skmem_arena *ar = na->na_arena;
3422 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3423 bool suspend;
3424
3425 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3426 ASSERT(fe_idx < na->na_flowadv_max);
3427 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3428
3429 AR_LOCK(ar);
3430
3431 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3432
3433 if (arn->arn_flowadv_obj != NULL) {
3434 struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3435
3436 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3437 /*
3438 * We cannot guarantee that the flow is still around by now,
3439 * so check if that's the case and let the caller know.
3440 */
3441 if ((suspend = (fae->fae_token == flow_token))) {
3442 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3443 fae->fae_flags |= FLOWADVF_SUSPENDED;
3444 }
3445 } else {
3446 suspend = false;
3447 }
3448 if (suspend) {
3449 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) flow token 0x%llu fidx %u "
3450 "SUSPEND", sk_proc_name_address(current_proc()),
3451 sk_proc_pid(current_proc()), flow_token, fe_idx);
3452 } else {
3453 SK_ERR("%s(%d) flow token 0x%llu fidx %u no longer around",
3454 sk_proc_name_address(current_proc()),
3455 sk_proc_pid(current_proc()), flow_token, fe_idx);
3456 }
3457
3458 AR_UNLOCK(ar);
3459
3460 return suspend;
3461 }
3462
3463 int
na_flowadv_clear(const struct kern_channel * ch,const flowadv_idx_t fe_idx,const flowadv_token_t flow_token)3464 na_flowadv_clear(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3465 const flowadv_token_t flow_token)
3466 {
3467 struct nexus_adapter *na = ch->ch_na;
3468 struct skmem_arena *ar = na->na_arena;
3469 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3470 boolean_t resume;
3471
3472 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3473 ASSERT(fe_idx < na->na_flowadv_max);
3474 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3475
3476 AR_LOCK(ar);
3477
3478 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3479
3480 if (arn->arn_flowadv_obj != NULL) {
3481 struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3482
3483 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3484 /*
3485 * We cannot guarantee that the flow is still around by now,
3486 * so check if that's the case and let the caller know.
3487 */
3488 if ((resume = (fae->fae_token == flow_token))) {
3489 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3490 fae->fae_flags &= ~FLOWADVF_SUSPENDED;
3491 }
3492 } else {
3493 resume = FALSE;
3494 }
3495 if (resume) {
3496 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d): flow token 0x%x "
3497 "fidx %u RESUME", ch->ch_name, ch->ch_pid, flow_token,
3498 fe_idx);
3499 } else {
3500 SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3501 ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3502 }
3503
3504 AR_UNLOCK(ar);
3505
3506 return resume;
3507 }
3508
3509 void
na_flowadv_event(struct __kern_channel_ring * kring)3510 na_flowadv_event(struct __kern_channel_ring *kring)
3511 {
3512 ASSERT(kring->ckr_tx == NR_TX);
3513
3514 SK_DF(SK_VERB_EVENTS, "%s(%d) na \"%s\" (0x%llx) kr 0x%llx",
3515 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
3516 KRNA(kring)->na_name, SK_KVA(KRNA(kring)), SK_KVA(kring));
3517
3518 na_post_event(kring, TRUE, FALSE, FALSE, CHAN_FILT_HINT_FLOW_ADV_UPD);
3519 }
3520
3521 static int
na_packet_pool_free_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3522 na_packet_pool_free_sync(struct __kern_channel_ring *kring, struct proc *p,
3523 uint32_t flags)
3524 {
3525 #pragma unused(flags, p)
3526 int n, ret = 0;
3527 slot_idx_t j;
3528 struct __kern_slot_desc *ksd;
3529 struct __user_slot_desc *usd;
3530 struct __kern_quantum *kqum;
3531 struct kern_pbufpool *pp = kring->ckr_pp;
3532 uint32_t nfree = 0;
3533
3534 /* packet pool list is protected by channel lock */
3535 ASSERT(!KR_KERNEL_ONLY(kring));
3536
3537 /* # of new slots */
3538 n = kring->ckr_rhead - kring->ckr_khead;
3539 if (n < 0) {
3540 n += kring->ckr_num_slots;
3541 }
3542
3543 /* nothing to free */
3544 if (__improbable(n == 0)) {
3545 SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3546 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3547 "nothing to free");
3548 goto done;
3549 }
3550
3551 j = kring->ckr_khead;
3552 PP_LOCK(pp);
3553 while (n--) {
3554 int err;
3555
3556 ksd = KR_KSD(kring, j);
3557 usd = KR_USD(kring, j);
3558
3559 if (__improbable(!SD_VALID_METADATA(usd))) {
3560 SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3561 ret = EINVAL;
3562 break;
3563 }
3564
3565 kqum = pp_remove_upp_locked(pp, usd->sd_md_idx, &err);
3566 if (__improbable(err != 0)) {
3567 SK_ERR("un-allocated packet or buflet %d %p",
3568 usd->sd_md_idx, SK_KVA(kqum));
3569 ret = EINVAL;
3570 break;
3571 }
3572
3573 /* detach and free the packet */
3574 kqum->qum_qflags &= ~QUM_F_FINALIZED;
3575 kqum->qum_ksd = NULL;
3576 ASSERT(!KSD_VALID_METADATA(ksd));
3577 USD_DETACH_METADATA(usd);
3578 ASSERT(pp == kqum->qum_pp);
3579 ASSERT(nfree < kring->ckr_num_slots);
3580 kring->ckr_scratch[nfree++] = (uint64_t)kqum;
3581 j = SLOT_NEXT(j, kring->ckr_lim);
3582 }
3583 PP_UNLOCK(pp);
3584
3585 if (__probable(nfree > 0)) {
3586 pp_free_packet_batch(pp, &kring->ckr_scratch[0], nfree);
3587 }
3588
3589 kring->ckr_khead = j;
3590 kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3591
3592 done:
3593 return ret;
3594 }
3595
3596 static int
na_packet_pool_alloc_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3597 na_packet_pool_alloc_sync(struct __kern_channel_ring *kring, struct proc *p,
3598 uint32_t flags)
3599 {
3600 int b, err;
3601 uint32_t n = 0;
3602 slot_idx_t j;
3603 uint64_t now;
3604 uint32_t curr_ws, ph_needed, ph_cnt;
3605 struct __kern_slot_desc *ksd;
3606 struct __user_slot_desc *usd;
3607 struct __kern_quantum *kqum;
3608 kern_pbufpool_t pp = kring->ckr_pp;
3609 pid_t pid = proc_pid(p);
3610
3611 /* packet pool list is protected by channel lock */
3612 ASSERT(!KR_KERNEL_ONLY(kring));
3613 ASSERT(!PP_KERNEL_ONLY(pp));
3614
3615 now = _net_uptime;
3616 if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3617 if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3618 kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3619 }
3620 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3621 "%s: purged curr_ws(%d)", kring->ckr_name,
3622 kring->ckr_alloc_ws);
3623 return 0;
3624 }
3625 /* reclaim the completed slots */
3626 kring->ckr_khead = kring->ckr_rhead;
3627
3628 /* # of busy (unclaimed) slots */
3629 b = kring->ckr_ktail - kring->ckr_khead;
3630 if (b < 0) {
3631 b += kring->ckr_num_slots;
3632 }
3633
3634 curr_ws = kring->ckr_alloc_ws;
3635 if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3636 /* increment the working set by 50% */
3637 curr_ws += (curr_ws >> 1);
3638 curr_ws = MIN(curr_ws, kring->ckr_lim);
3639 } else {
3640 if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3641 (uint32_t)b >= (curr_ws >> 2)) {
3642 /* decrease the working set by 25% */
3643 curr_ws -= (curr_ws >> 2);
3644 }
3645 }
3646 curr_ws = MAX(curr_ws, na_upp_alloc_lowat);
3647 if (curr_ws > (uint32_t)b) {
3648 n = curr_ws - b;
3649 }
3650 kring->ckr_alloc_ws = curr_ws;
3651 kring->ckr_sync_time = now;
3652
3653 /* min with # of avail free slots (subtract busy from max) */
3654 n = ph_needed = MIN(n, kring->ckr_lim - b);
3655 j = kring->ckr_ktail;
3656 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3657 "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3658
3659 if ((ph_cnt = ph_needed) == 0) {
3660 goto done;
3661 }
3662
3663 err = kern_pbufpool_alloc_batch_nosleep(pp, 1, kring->ckr_scratch,
3664 &ph_cnt);
3665
3666 if (__improbable(ph_cnt == 0)) {
3667 SK_ERR("kr 0x%llx failed to alloc %u packet s(%d)",
3668 SK_KVA(kring), ph_needed, err);
3669 kring->ckr_err_stats.cres_pkt_alloc_failures += ph_needed;
3670 } else {
3671 /*
3672 * Add packets to the allocated list of user packet pool.
3673 */
3674 pp_insert_upp_batch(pp, pid, kring->ckr_scratch, ph_cnt);
3675 }
3676
3677
3678 for (n = 0; n < ph_cnt; n++) {
3679 ksd = KR_KSD(kring, j);
3680 usd = KR_USD(kring, j);
3681
3682 kqum = SK_PTR_ADDR_KQUM(kring->ckr_scratch[n]);
3683 kring->ckr_scratch[n] = 0;
3684 ASSERT(kqum != NULL);
3685
3686 /* cleanup any stale slot mapping */
3687 KSD_RESET(ksd);
3688 ASSERT(usd != NULL);
3689 USD_RESET(usd);
3690
3691 /*
3692 * Since this packet is freshly allocated and we need to
3693 * have the flag set for the attach to succeed, just set
3694 * it here rather than calling __packet_finalize().
3695 */
3696 kqum->qum_qflags |= QUM_F_FINALIZED;
3697
3698 /* Attach packet to slot */
3699 KR_SLOT_ATTACH_METADATA(kring, ksd, kqum);
3700 /*
3701 * externalize the packet as it is being transferred to
3702 * user space.
3703 */
3704 kr_externalize_metadata(kring, pp->pp_max_frags, kqum, p);
3705
3706 j = SLOT_NEXT(j, kring->ckr_lim);
3707 }
3708 done:
3709 ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3710 kring->ckr_ktail = j;
3711 return 0;
3712 }
3713
3714 static int
na_packet_pool_free_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3715 na_packet_pool_free_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3716 uint32_t flags)
3717 {
3718 #pragma unused(flags, p)
3719 int n, ret = 0;
3720 slot_idx_t j;
3721 struct __kern_slot_desc *ksd;
3722 struct __user_slot_desc *usd;
3723 struct __kern_buflet *kbft;
3724 struct kern_pbufpool *pp = kring->ckr_pp;
3725
3726 /* packet pool list is protected by channel lock */
3727 ASSERT(!KR_KERNEL_ONLY(kring));
3728
3729 /* # of new slots */
3730 n = kring->ckr_rhead - kring->ckr_khead;
3731 if (n < 0) {
3732 n += kring->ckr_num_slots;
3733 }
3734
3735 /* nothing to free */
3736 if (__improbable(n == 0)) {
3737 SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3738 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3739 "nothing to free");
3740 goto done;
3741 }
3742
3743 j = kring->ckr_khead;
3744 while (n--) {
3745 int err;
3746
3747 ksd = KR_KSD(kring, j);
3748 usd = KR_USD(kring, j);
3749
3750 if (__improbable(!SD_VALID_METADATA(usd))) {
3751 SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3752 ret = EINVAL;
3753 break;
3754 }
3755
3756 kbft = pp_remove_upp_bft(pp, usd->sd_md_idx, &err);
3757 if (__improbable(err != 0)) {
3758 SK_ERR("un-allocated buflet %d %p", usd->sd_md_idx,
3759 SK_KVA(kbft));
3760 ret = EINVAL;
3761 break;
3762 }
3763
3764 /* detach and free the packet */
3765 ASSERT(!KSD_VALID_METADATA(ksd));
3766 USD_DETACH_METADATA(usd);
3767 pp_free_buflet(pp, kbft);
3768 j = SLOT_NEXT(j, kring->ckr_lim);
3769 }
3770 kring->ckr_khead = j;
3771 kring->ckr_ktail = SLOT_PREV(j, kring->ckr_lim);
3772
3773 done:
3774 return ret;
3775 }
3776
3777 static int
na_packet_pool_alloc_buf_sync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)3778 na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3779 uint32_t flags)
3780 {
3781 int b, err;
3782 uint32_t n = 0;
3783 slot_idx_t j;
3784 uint64_t now;
3785 uint32_t curr_ws, bh_needed, bh_cnt;
3786 struct __kern_slot_desc *ksd;
3787 struct __user_slot_desc *usd;
3788 struct __kern_buflet *kbft;
3789 struct __kern_buflet_ext *kbe;
3790 kern_pbufpool_t pp = kring->ckr_pp;
3791 pid_t pid = proc_pid(p);
3792
3793 /* packet pool list is protected by channel lock */
3794 ASSERT(!KR_KERNEL_ONLY(kring));
3795 ASSERT(!PP_KERNEL_ONLY(pp));
3796
3797 now = _net_uptime;
3798 if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3799 if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3800 kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3801 }
3802 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3803 "%s: purged curr_ws(%d)", kring->ckr_name,
3804 kring->ckr_alloc_ws);
3805 return 0;
3806 }
3807 /* reclaim the completed slots */
3808 kring->ckr_khead = kring->ckr_rhead;
3809
3810 /* # of busy (unclaimed) slots */
3811 b = kring->ckr_ktail - kring->ckr_khead;
3812 if (b < 0) {
3813 b += kring->ckr_num_slots;
3814 }
3815
3816 curr_ws = kring->ckr_alloc_ws;
3817 if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3818 /* increment the working set by 50% */
3819 curr_ws += (curr_ws >> 1);
3820 curr_ws = MIN(curr_ws, kring->ckr_lim);
3821 } else {
3822 if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3823 (uint32_t)b >= (curr_ws >> 2)) {
3824 /* decrease the working set by 25% */
3825 curr_ws -= (curr_ws >> 2);
3826 }
3827 }
3828 curr_ws = MAX(curr_ws, na_upp_alloc_buf_lowat);
3829 if (curr_ws > (uint32_t)b) {
3830 n = curr_ws - b;
3831 }
3832 kring->ckr_alloc_ws = curr_ws;
3833 kring->ckr_sync_time = now;
3834
3835 /* min with # of avail free slots (subtract busy from max) */
3836 n = bh_needed = MIN(n, kring->ckr_lim - b);
3837 j = kring->ckr_ktail;
3838 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3839 "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3840
3841 if ((bh_cnt = bh_needed) == 0) {
3842 goto done;
3843 }
3844
3845 err = pp_alloc_buflet_batch(pp, kring->ckr_scratch, &bh_cnt,
3846 SKMEM_NOSLEEP);
3847
3848 if (bh_cnt == 0) {
3849 SK_ERR("kr 0x%llx failed to alloc %u buflets(%d)",
3850 SK_KVA(kring), bh_needed, err);
3851 kring->ckr_err_stats.cres_pkt_alloc_failures += bh_needed;
3852 }
3853
3854 for (n = 0; n < bh_cnt; n++) {
3855 struct __user_buflet *ubft;
3856
3857 ksd = KR_KSD(kring, j);
3858 usd = KR_USD(kring, j);
3859
3860 kbft = (struct __kern_buflet *)(kring->ckr_scratch[n]);
3861 kbe = (struct __kern_buflet_ext *)kbft;
3862 kring->ckr_scratch[n] = 0;
3863 ASSERT(kbft != NULL);
3864
3865 /*
3866 * Add buflet to the allocated list of user packet pool.
3867 */
3868 pp_insert_upp_bft(pp, kbft, pid);
3869
3870 /*
3871 * externalize the buflet as it is being transferred to
3872 * user space.
3873 */
3874 ubft = __DECONST(struct __user_buflet *, kbe->kbe_buf_user);
3875 KBUF_EXTERNALIZE(kbft, ubft, pp);
3876
3877 /* cleanup any stale slot mapping */
3878 KSD_RESET(ksd);
3879 ASSERT(usd != NULL);
3880 USD_RESET(usd);
3881
3882 /* Attach buflet to slot */
3883 KR_SLOT_ATTACH_BUF_METADATA(kring, ksd, kbft);
3884
3885 j = SLOT_NEXT(j, kring->ckr_lim);
3886 }
3887 done:
3888 ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3889 kring->ckr_ktail = j;
3890 return 0;
3891 }
3892
3893 /* The caller needs to ensure that the NA stays intact */
3894 void
na_drain(struct nexus_adapter * na,boolean_t purge)3895 na_drain(struct nexus_adapter *na, boolean_t purge)
3896 {
3897 /* will be cleared on next channel sync */
3898 if (!(atomic_bitset_32_ov(&na->na_flags, NAF_DRAINING) &
3899 NAF_DRAINING) && NA_IS_ACTIVE(na)) {
3900 SK_DF(SK_VERB_NA, "%s: %s na 0x%llx flags %b",
3901 na->na_name, (purge ? "purging" : "pruning"),
3902 SK_KVA(na), na->na_flags, NAF_BITS);
3903
3904 /* reap (purge/prune) caches in the arena */
3905 skmem_arena_reap(na->na_arena, purge);
3906 }
3907 }
3908