1 /*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 */
53
54 #include <skywalk/os_skywalk_private.h>
55 #include <skywalk/nexus/netif/nx_netif.h>
56 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
57 #include <mach/thread_act.h>
58 #include <kern/thread.h>
59 #include <kern/sched_prim.h>
60
61 static void na_netif_compat_finalize(struct nexus_netif_adapter *,
62 struct ifnet *);
63 static errno_t nx_netif_compat_receive(struct ifnet *ifp, struct mbuf *m_head,
64 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
65 boolean_t poll, struct thread *tp);
66 static int nx_netif_compat_catch_rx(struct nexus_netif_compat_adapter *na,
67 boolean_t enable);
68 static int nx_netif_compat_xmit_frame(struct nexus_adapter *, struct mbuf *,
69 struct __kern_packet *);
70
71 static int nx_netif_compat_na_notify_tx(struct __kern_channel_ring *,
72 struct proc *, uint32_t);
73 static int nx_netif_compat_na_notify_rx(struct __kern_channel_ring *,
74 struct proc *, uint32_t);
75 static int nx_netif_compat_na_activate(struct nexus_adapter *,
76 na_activate_mode_t);
77 static int nx_netif_compat_na_txsync(struct __kern_channel_ring *,
78 struct proc *, uint32_t);
79 static int nx_netif_compat_na_rxsync(struct __kern_channel_ring *,
80 struct proc *, uint32_t);
81 static void nx_netif_compat_na_dtor(struct nexus_adapter *na);
82
83 static void nx_netif_compat_tx_intr(struct ifnet *, enum txrx, uint32_t,
84 uint32_t *);
85 static inline struct mbuf *nx_netif_compat_ring_alloc(int, int, uint16_t);
86 static inline void nx_netif_compat_ring_free(struct mbuf *m);
87 static void nx_netif_compat_ringcb(caddr_t cl, uint32_t size, caddr_t arg);
88
89 static uint32_t nx_netif_compat_tx_clean(struct netif_stats *nifs,
90 struct __kern_channel_ring *kring);
91 static void nx_netif_compat_set_tx_event(struct __kern_channel_ring *kring,
92 slot_idx_t khead);
93
94 static struct nexus_netif_compat_adapter *na_netif_compat_alloc(zalloc_flags_t);
95 static void na_netif_compat_free(struct nexus_adapter *);
96 #if DEBUG || DEVELOPMENT
97 static struct mbuf *nx_netif_rx_split(struct mbuf *, uint32_t);
98 #endif /* DEBUG || DEVELOPMENT */
99
100 #define MBUF_TXQ(m) ((m)->m_pkthdr.pkt_flowid)
101 #define MBUF_RXQ(m) ((m)->m_pkthdr.pkt_flowid)
102
103 #define NMB_PROPF_TX_NOTIFY 0x1 /* generate transmit event */
104 #define NMB_FLAGS_MASK 0x0000ffff
105 #define NMB_INDEX_MASK 0xffff0000
106 #define NMB_GET_FLAGS(p) (((uint32_t)(p) & NMB_FLAGS_MASK))
107 #define NMB_SET_FLAGS(p, f) (((uint32_t)(p) & ~NMB_FLAGS_MASK) | (f))
108 #define NMB_GET_INDEX(p) (((uint32_t)(p) & NMB_INDEX_MASK) >> 16)
109 #define NMB_SET_INDEX(p, i) (((uint32_t)(p) & ~NMB_INDEX_MASK) | (i << 16))
110
111 static SKMEM_TYPE_DEFINE(na_netif_compat_zone, struct nexus_netif_compat_adapter);
112
113 static int netif_tx_event_mode = 0;
114
115 #if (DEVELOPMENT || DEBUG)
116 SYSCTL_EXTENSIBLE_NODE(_kern_skywalk_netif, OID_AUTO, compat,
117 CTLFLAG_RW | CTLFLAG_LOCKED,
118 0, "Skywalk netif Nexus legacy compatibility support");
119 SYSCTL_INT(_kern_skywalk_netif_compat, OID_AUTO, tx_event_mode,
120 CTLFLAG_RW | CTLFLAG_LOCKED, &netif_tx_event_mode, 0, "");
121 static uint32_t netif_rx_split = 0;
122 SYSCTL_UINT(_kern_skywalk_netif_compat, OID_AUTO, rx_split,
123 CTLFLAG_RW | CTLFLAG_LOCKED, &netif_rx_split, 0, "");
124 #endif /* !DEVELOPMENT && !DEBUG */
125
126 struct kern_nexus_domain_provider nx_netif_compat_prov_s = {
127 .nxdom_prov_name = NEXUS_PROVIDER_NET_IF_COMPAT,
128 .nxdom_prov_flags = NXDOMPROVF_DEFAULT,
129 .nxdom_prov_cb = {
130 .dp_cb_init = nx_netif_prov_init,
131 .dp_cb_fini = nx_netif_prov_fini,
132 .dp_cb_params = nx_netif_prov_params,
133 /*
134 * We must be using the native netif handlers below,
135 * since we act as the default domain provider; see
136 * kern_nexus_register_domain_provider().
137 */
138 .dp_cb_mem_new = nx_netif_prov_mem_new,
139 .dp_cb_config = nx_netif_prov_config,
140 .dp_cb_nx_ctor = nx_netif_prov_nx_ctor,
141 .dp_cb_nx_dtor = nx_netif_prov_nx_dtor,
142 .dp_cb_nx_mem_info = nx_netif_prov_nx_mem_info,
143 .dp_cb_nx_mib_get = nx_netif_prov_nx_mib_get,
144 .dp_cb_nx_stop = nx_netif_prov_nx_stop,
145 },
146 };
147
148 struct nexus_ifnet_ops na_netif_compat_ops = {
149 .ni_finalize = na_netif_compat_finalize,
150 .ni_reap = nx_netif_reap,
151 .ni_dequeue = nx_netif_compat_tx_dequeue,
152 .ni_get_len = nx_netif_compat_tx_get_len,
153 .ni_detach_notify = NULL
154 };
155
156 #define SKMEM_TAG_NETIF_COMPAT_MIT "com.apple.skywalk.netif.compat.mit"
157 static SKMEM_TAG_DEFINE(skmem_tag_netif_compat_mit, SKMEM_TAG_NETIF_COMPAT_MIT);
158
159 #define SKMEM_TAG_NETIF_COMPAT_POOL "com.apple.skywalk.netif.compat.pool"
160 static SKMEM_TAG_DEFINE(skmem_tag_netif_compat_pool, SKMEM_TAG_NETIF_COMPAT_POOL);
161
162 void
nx_netif_compat_init(struct nxdom * nxdom)163 nx_netif_compat_init(struct nxdom *nxdom)
164 {
165 _CASSERT(NETIF_COMPAT_MAX_MBUF_DATA_COPY <= NETIF_COMPAT_BUF_SIZE);
166
167 /*
168 * We want nxprov_create() coming from userland to use the
169 * netif_compat domain provider, so install it as default.
170 * This is verified by the caller.
171 */
172 (void) nxdom_prov_add(nxdom, &nx_netif_compat_prov_s);
173 }
174
175 void
nx_netif_compat_fini(void)176 nx_netif_compat_fini(void)
177 {
178 (void) nxdom_prov_del(&nx_netif_compat_prov_s);
179 }
180
181 static struct nexus_netif_compat_adapter *
na_netif_compat_alloc(zalloc_flags_t how)182 na_netif_compat_alloc(zalloc_flags_t how)
183 {
184 struct nexus_netif_compat_adapter *nca;
185
186 _CASSERT(offsetof(struct nexus_netif_compat_adapter, nca_up) == 0);
187
188 nca = zalloc_flags(na_netif_compat_zone, how | Z_ZERO);
189 if (nca) {
190 SK_DF(SK_VERB_MEM, "nca %p ALLOC", SK_KVA(nca));
191 }
192 return nca;
193 }
194
195 static void
na_netif_compat_free(struct nexus_adapter * na)196 na_netif_compat_free(struct nexus_adapter *na)
197 {
198 struct nexus_netif_compat_adapter *nca =
199 (struct nexus_netif_compat_adapter *)na;
200
201 SK_LOCK_ASSERT_HELD();
202 ASSERT(na->na_refcount == 0);
203
204 SK_DF(SK_VERB_MEM, "nca [dev+host] %p FREE", SK_KVA(nca));
205 bzero(nca, sizeof(*nca));
206 zfree(na_netif_compat_zone, nca);
207 }
208
209 /*
210 * Callback invoked when the device driver frees an mbuf used
211 * by skywalk to transmit a packet. This usually happens when
212 * the NIC notifies the driver that transmission is completed.
213 */
214 static void
nx_netif_compat_ringcb(caddr_t cl,uint32_t size,caddr_t arg)215 nx_netif_compat_ringcb(caddr_t cl, uint32_t size, caddr_t arg)
216 {
217 #pragma unused(cl, size)
218 struct mbuf *m = (void *)arg;
219 struct ifnet *ifp = NULL;
220 struct netif_stats *nifs = NULL;
221 uintptr_t data; /* not used */
222 uint32_t txq;
223 errno_t err;
224
225 err = mbuf_get_tx_compl_data(m, (uintptr_t *)&ifp, &data);
226 ASSERT(err == 0);
227
228 nifs = &NX_NETIF_PRIVATE(NA(ifp)->nifna_up.na_nx)->nif_stats;
229 txq = MBUF_TXQ(m);
230
231 for (;;) {
232 uint32_t p = 0, i, f;
233
234 (void) mbuf_cluster_get_prop(m, &p);
235 f = NMB_GET_FLAGS(p);
236 i = NMB_GET_INDEX(p);
237
238 SK_DF(SK_VERB_NETIF, "%s m 0x%llx txq %u i %u f 0x%x",
239 if_name(ifp), SK_KVA(m), MBUF_TXQ(m), i, f);
240
241 if (f & NMB_PROPF_TX_NOTIFY) {
242 uint32_t pn;
243
244 f &= ~NMB_PROPF_TX_NOTIFY;
245 pn = NMB_SET_FLAGS(p, f);
246
247 err = mbuf_cluster_set_prop(m, p, pn);
248 if (err != 0) {
249 if (err == EBUSY) { /* try again */
250 continue;
251 }
252 /* TODO: [email protected] -- what to do? */
253 SK_ERR("Failed to clear TX_NOTIFY "
254 "m 0x%llx i %u err %d", SK_KVA(m), i, err);
255 } else {
256 nx_netif_compat_tx_intr(ifp, NR_TX, txq, NULL);
257 SK_DF(SK_VERB_NETIF | SK_VERB_INTR | SK_VERB_TX,
258 "%s TX irq m 0x%llx txq %u i %u f 0x%x",
259 if_name(ifp), SK_KVA(m), MBUF_TXQ(m), i, f);
260 STATS_INC(nifs, NETIF_STATS_TX_IRQ);
261 }
262 }
263 break;
264 }
265 }
266
267 /* Hoisted out of line to reduce kernel stack footprint */
268 SK_NO_INLINE_ATTRIBUTE
269 static struct mbuf *
nx_netif_compat_ring_alloc(int how,int len,uint16_t idx)270 nx_netif_compat_ring_alloc(int how, int len, uint16_t idx)
271 {
272 struct mbuf *m = NULL;
273 size_t size = len;
274 uint32_t i;
275
276 if (mbuf_ring_cluster_alloc(how, MBUF_TYPE_HEADER, &m,
277 nx_netif_compat_ringcb, &size) != 0) {
278 return NULL;
279 }
280
281 for (;;) {
282 uint32_t p = 0, pn;
283 int err;
284
285 (void) mbuf_cluster_get_prop(m, &p);
286 pn = NMB_SET_FLAGS(p, 0);
287 pn = NMB_SET_INDEX(pn, idx);
288
289 err = mbuf_cluster_set_prop(m, p, pn);
290 if (err != 0) {
291 if (err == EBUSY) { /* try again */
292 continue;
293 }
294 SK_ERR("Failed to initialize properties m 0x%llx "
295 "err %d", SK_KVA(m), err);
296 m_freem(m);
297 return NULL;
298 }
299 (void) mbuf_cluster_get_prop(m, &p);
300 i = NMB_GET_INDEX(p);
301 ASSERT(i == idx);
302 break;
303 }
304
305 SK_DF(SK_VERB_MEM, "alloc m 0x%llx size %u i %u",
306 SK_KVA(m), (uint32_t)size, i);
307
308 return m;
309 }
310
311 /* Hoisted out of line to reduce kernel stack footprint */
312 SK_NO_INLINE_ATTRIBUTE
313 static void
nx_netif_compat_ring_free(struct mbuf * m)314 nx_netif_compat_ring_free(struct mbuf *m)
315 {
316 if (m == NULL) {
317 return;
318 }
319
320 for (;;) {
321 uint32_t p = 0;
322 int err;
323
324 (void) mbuf_cluster_get_prop(m, &p);
325 err = mbuf_cluster_set_prop(m, p, 0);
326 if (err != 0) {
327 if (err == EBUSY) { /* try again */
328 continue;
329 }
330 /* TODO: [email protected] -- what to do? */
331 SK_ERR("Failed to clear properties m 0x%llx err %d",
332 SK_KVA(m), err);
333 }
334 break;
335 }
336 m_freem(m);
337 }
338
339 static void
nx_netif_compat_tx_intr(struct ifnet * ifp,enum txrx t,uint32_t q,uint32_t * work_done)340 nx_netif_compat_tx_intr(struct ifnet *ifp, enum txrx t, uint32_t q,
341 uint32_t *work_done)
342 {
343 struct nexus_adapter *na = &NA(ifp)->nifna_up;
344
345 if (__improbable(!NA_IS_ACTIVE(na) || q >= na_get_nrings(na, t))) {
346 if (q >= na_get_nrings(na, t)) {
347 SK_ERR("na \"%s\" (0x%llx) invalid q %u >= %u",
348 na->na_name, SK_KVA(na), q, na_get_nrings(na, t));
349 }
350 } else {
351 (void) nx_netif_mit_tx_intr((NAKR(na, t) + q), kernproc,
352 0, work_done);
353 }
354 }
355
356 static int
nx_netif_compat_na_notify_tx(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)357 nx_netif_compat_na_notify_tx(struct __kern_channel_ring *kring,
358 struct proc *p, uint32_t flags)
359 {
360 /*
361 * This should never get executed, as nothing should be invoking
362 * the TX ring notify callback. The compat adapter directly
363 * calls nx_netif_compat_tx_intr() for TX completion from within
364 * nx_netif_compat_ringcb().
365 *
366 * If we ever get here, use the original na_notify callback
367 * saved during na_activate().
368 */
369 return kring->ckr_netif_notify(kring, p, flags);
370 }
371
372 static int
nx_netif_compat_na_notify_rx(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)373 nx_netif_compat_na_notify_rx(struct __kern_channel_ring *kring,
374 struct proc *p, uint32_t flags)
375 {
376 /*
377 * This should never get executed, as nothing should be invoking
378 * the RX ring notify callback. The compat adapter directly
379 * calls nx_netif_mit_rx_intr() for RX completion from within
380 * nx_netif_compat_receive().
381 *
382 * If we ever get here, use the original na_notify callback
383 * saved during na_activate().
384 */
385 return kring->ckr_netif_notify(kring, p, flags);
386 }
387
388 /* Enable/disable skywalk mode for a compat network interface. */
389 static int
nx_netif_compat_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)390 nx_netif_compat_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
391 {
392 struct nexus_netif_adapter *nifna = (struct nexus_netif_adapter *)na;
393 boolean_t tx_mit, rx_mit, tx_mit_simple, rx_mit_simple, rxpoll;
394 uint32_t limit = (uint32_t)sk_netif_compat_rx_mbq_limit;
395 struct nx_netif *nif = nifna->nifna_netif;
396 struct nexus_netif_compat_adapter *nca;
397 ifnet_t ifp = na->na_ifp;
398 uint32_t i, r;
399 int error;
400
401 ASSERT(na->na_type == NA_NETIF_COMPAT_DEV);
402 ASSERT(!(na->na_flags & NAF_HOST_ONLY));
403
404 SK_DF(SK_VERB_NETIF, "na \"%s\" (0x%llx) %s", na->na_name,
405 SK_KVA(na), na_activate_mode2str(mode));
406
407 nca = (struct nexus_netif_compat_adapter *)nifna;
408
409 switch (mode) {
410 case NA_ACTIVATE_MODE_ON:
411 ASSERT(SKYWALK_CAPABLE(na->na_ifp));
412
413 nx_netif_mit_config(nifna, &tx_mit, &tx_mit_simple,
414 &rx_mit, &rx_mit_simple);
415
416 /*
417 * Init the mitigation support on all the dev TX rings.
418 */
419 if (na_get_nrings(na, NR_TX) != 0 && tx_mit) {
420 nifna->nifna_tx_mit =
421 skn_alloc_type_array(tx_on, struct nx_netif_mit,
422 na_get_nrings(na, NR_TX), Z_WAITOK,
423 skmem_tag_netif_compat_mit);
424 if (nifna->nifna_tx_mit == NULL) {
425 SK_ERR("TX mitigation allocation failed");
426 error = ENOMEM;
427 goto out;
428 }
429 } else {
430 ASSERT(nifna->nifna_tx_mit == NULL);
431 }
432
433 /*
434 * Init either poller or mitigation support on all the
435 * dev RX rings; they're mutually exclusive and poller
436 * takes precedence.
437 */
438 rxpoll = (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL));
439 if (rxpoll) {
440 int err;
441 __unused kern_return_t kret;
442 thread_precedence_policy_data_t info;
443
444 ASSERT((ifp->if_xflags & IFXF_LEGACY) == 0);
445 ASSERT(ifp->if_input_poll != NULL);
446 ASSERT(ifp->if_input_ctl != NULL);
447 if ((err =
448 kernel_thread_start(netif_rxpoll_compat_thread_func,
449 ifp, &ifp->if_poll_thread)) != KERN_SUCCESS) {
450 panic_plain("%s: ifp=%p couldn't get a poll "
451 " thread; err=%d", __func__, ifp, err);
452 /* NOTREACHED */
453 __builtin_unreachable();
454 }
455 VERIFY(ifp->if_poll_thread != NULL);
456
457 /* wait until thread is ready */
458 lck_mtx_lock(&ifp->if_poll_lock);
459 while (!(ifp->if_poll_flags & IF_POLLF_READY)) {
460 (void) assert_wait(&ifp->if_poll_flags,
461 THREAD_UNINT);
462 lck_mtx_unlock(&ifp->if_poll_lock);
463 (void) thread_block(THREAD_CONTINUE_NULL);
464 lck_mtx_lock(&ifp->if_poll_lock);
465 }
466 lck_mtx_unlock(&ifp->if_poll_lock);
467
468 bzero(&info, sizeof(info));
469 info.importance = 1;
470 kret = thread_policy_set(ifp->if_poll_thread,
471 THREAD_PRECEDENCE_POLICY, (thread_policy_t)&info,
472 THREAD_PRECEDENCE_POLICY_COUNT);
473 ASSERT(kret == KERN_SUCCESS);
474 limit = if_rcvq_maxlen;
475 (void) netif_rxpoll_set_params(ifp, NULL, FALSE);
476 ASSERT(nifna->nifna_rx_mit == NULL);
477 } else if (rx_mit) {
478 nifna->nifna_rx_mit =
479 skn_alloc_type_array(rx_on, struct nx_netif_mit,
480 na_get_nrings(na, NR_RX), Z_WAITOK,
481 skmem_tag_netif_compat_mit);
482 if (nifna->nifna_rx_mit == NULL) {
483 SK_ERR("RX mitigation allocation failed");
484 if (nifna->nifna_tx_mit != NULL) {
485 skn_free_type_array(rx_fail,
486 struct nx_netif_mit,
487 na_get_nrings(na, NR_TX),
488 nifna->nifna_tx_mit);
489 nifna->nifna_tx_mit = NULL;
490 }
491 error = ENOMEM;
492 goto out;
493 }
494 }
495
496 /* intercept na_notify callback on the TX rings */
497 for (r = 0; r < na_get_nrings(na, NR_TX); r++) {
498 na->na_tx_rings[r].ckr_netif_notify =
499 na->na_tx_rings[r].ckr_na_notify;
500 na->na_tx_rings[r].ckr_na_notify =
501 nx_netif_compat_na_notify_tx;
502 if (nifna->nifna_tx_mit != NULL) {
503 nx_netif_mit_init(nif, na->na_ifp,
504 &nifna->nifna_tx_mit[r],
505 &na->na_tx_rings[r], tx_mit_simple);
506 }
507 }
508
509 /* intercept na_notify callback on the RX rings */
510 for (r = 0; r < na_get_nrings(na, NR_RX); r++) {
511 na->na_rx_rings[r].ckr_netif_notify =
512 na->na_rx_rings[r].ckr_na_notify;
513 na->na_rx_rings[r].ckr_na_notify =
514 nx_netif_compat_na_notify_rx;
515 if (nifna->nifna_rx_mit != NULL) {
516 nx_netif_mit_init(nif, na->na_ifp,
517 &nifna->nifna_rx_mit[r],
518 &na->na_rx_rings[r], rx_mit_simple);
519 }
520 }
521 /*
522 * Initialize the rx queue, as nx_netif_compat_receive() can
523 * be called as soon as nx_netif_compat_catch_rx() returns.
524 */
525 for (r = 0; r < na_get_nrings(na, NR_RX); r++) {
526 struct __kern_channel_ring *kr = &na->na_rx_rings[r];
527
528 nx_mbq_safe_init(kr, &kr->ckr_rx_queue, limit,
529 &nexus_mbq_lock_group, &nexus_lock_attr);
530 SK_DF(SK_VERB_NETIF,
531 "na \"%s\" (0x%llx) initialized kr \"%s\" "
532 "(0x%llx) krflags 0x%b", na->na_name, SK_KVA(na),
533 kr->ckr_name, SK_KVA(kr), kr->ckr_flags, CKRF_BITS);
534 }
535
536 /*
537 * Prepare packet buffers for the tx rings; don't preallocate
538 * the mbufs here, leave this to nx_netif_compat_na_txsync().
539 */
540 for (r = 0; r < na_get_nrings(na, NR_TX); r++) {
541 na->na_tx_rings[r].ckr_tx_pool = NULL;
542 }
543
544 for (r = 0; r < na_get_nrings(na, NR_TX); r++) {
545 na->na_tx_rings[r].ckr_tx_pool =
546 skn_alloc_type_array(tx_pool_on, struct mbuf *,
547 na_get_nslots(na, NR_TX), Z_WAITOK,
548 skmem_tag_netif_compat_pool);
549 if (na->na_tx_rings[r].ckr_tx_pool == NULL) {
550 SK_ERR("ckr_tx_pool allocation failed");
551 error = ENOMEM;
552 goto free_tx_pools;
553 }
554 }
555
556 /* Prepare to intercept incoming traffic. */
557 error = nx_netif_compat_catch_rx(nca, TRUE);
558 if (error != 0) {
559 SK_ERR("RX intercept failed (%d)", error);
560 goto uncatch;
561 }
562 nx_netif_filter_enable(nifna->nifna_netif);
563 nx_netif_flow_enable(nifna->nifna_netif);
564 atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
565 break;
566
567 case NA_ACTIVATE_MODE_DEFUNCT:
568 ASSERT(SKYWALK_CAPABLE(na->na_ifp));
569 break;
570
571 case NA_ACTIVATE_MODE_OFF:
572 /*
573 * Note that here we cannot assert SKYWALK_CAPABLE()
574 * as we're called in the destructor path.
575 */
576 atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
577 nx_netif_flow_disable(nifna->nifna_netif);
578 nx_netif_filter_disable(nifna->nifna_netif);
579
580 /*
581 * Signal the poller thread to terminate itself, and
582 * wait for it to exit.
583 */
584 if (ifp->if_poll_thread != THREAD_NULL) {
585 ASSERT(net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL));
586 ASSERT((ifp->if_xflags & IFXF_LEGACY) == 0);
587 lck_mtx_lock_spin(&ifp->if_poll_lock);
588 ifp->if_poll_flags |= IF_POLLF_TERMINATING;
589 wakeup_one((caddr_t)&ifp->if_poll_thread);
590 lck_mtx_unlock(&ifp->if_poll_lock);
591
592 /* wait for poller thread to terminate */
593 lck_mtx_lock(&ifp->if_poll_lock);
594 while (ifp->if_poll_thread != THREAD_NULL) {
595 SK_DF(SK_VERB_NETIF_POLL,
596 "%s: waiting for poller thread to terminate",
597 if_name(ifp));
598 (void) msleep(&ifp->if_poll_thread,
599 &ifp->if_poll_lock, (PZERO - 1),
600 "netif_poll_thread_exit", NULL);
601 }
602 lck_mtx_unlock(&ifp->if_poll_lock);
603 SK_DF(SK_VERB_NETIF_POLL,
604 "%s: poller thread termination complete",
605 if_name(ifp));
606 }
607
608 /* Do not intercept packets on the rx path. */
609 (void) nx_netif_compat_catch_rx(nca, FALSE);
610
611 /* Free the mbufs going to the channel rings */
612 for (r = 0; r < na_get_nrings(na, NR_RX); r++) {
613 nx_mbq_safe_purge(&na->na_rx_rings[r].ckr_rx_queue);
614 nx_mbq_safe_destroy(&na->na_rx_rings[r].ckr_rx_queue);
615 }
616
617 /* reset all TX notify callbacks */
618 for (r = 0; r < na_get_nrings(na, NR_TX); r++) {
619 na->na_tx_rings[r].ckr_na_notify =
620 na->na_tx_rings[r].ckr_netif_notify;
621 na->na_tx_rings[r].ckr_netif_notify = NULL;
622 if (nifna->nifna_tx_mit != NULL) {
623 na->na_tx_rings[r].ckr_netif_mit_stats = NULL;
624 nx_netif_mit_cleanup(&nifna->nifna_tx_mit[r]);
625 }
626 }
627
628 if (nifna->nifna_tx_mit != NULL) {
629 skn_free_type_array(tx_off, struct nx_netif_mit,
630 na_get_nrings(na, NR_TX), nifna->nifna_tx_mit);
631 nifna->nifna_tx_mit = NULL;
632 }
633
634 /* reset all RX notify callbacks */
635 for (r = 0; r < na_get_nrings(na, NR_RX); r++) {
636 na->na_rx_rings[r].ckr_na_notify =
637 na->na_rx_rings[r].ckr_netif_notify;
638 na->na_rx_rings[r].ckr_netif_notify = NULL;
639 if (nifna->nifna_rx_mit != NULL) {
640 na->na_rx_rings[r].ckr_netif_mit_stats = NULL;
641 nx_netif_mit_cleanup(&nifna->nifna_rx_mit[r]);
642 }
643 }
644 if (nifna->nifna_rx_mit != NULL) {
645 skn_free_type_array(rx_off, struct nx_netif_mit,
646 na_get_nrings(na, NR_RX), nifna->nifna_rx_mit);
647 nifna->nifna_rx_mit = NULL;
648 }
649
650 for (r = 0; r < na_get_nrings(na, NR_TX); r++) {
651 for (i = 0; i < na_get_nslots(na, NR_TX); i++) {
652 nx_netif_compat_ring_free(na->
653 na_tx_rings[r].ckr_tx_pool[i]);
654 na->na_tx_rings[r].ckr_tx_pool[i] = NULL;
655 }
656 skn_free_type_array(tx_pool_off,
657 struct mbuf *, na_get_nslots(na, NR_TX),
658 na->na_tx_rings[r].ckr_tx_pool);
659 }
660 break;
661
662 default:
663 VERIFY(0);
664 /* NOTREACHED */
665 __builtin_unreachable();
666 }
667
668 return 0;
669
670 uncatch:
671 (void) nx_netif_compat_catch_rx(nca, FALSE);
672
673 free_tx_pools:
674 for (r = 0; r < na_get_nrings(na, NR_TX); r++) {
675 if (na->na_tx_rings[r].ckr_tx_pool == NULL) {
676 continue;
677 }
678 for (i = 0; i < na_get_nslots(na, NR_TX); i++) {
679 nx_netif_compat_ring_free(
680 na->na_tx_rings[r].ckr_tx_pool[i]);
681 na->na_tx_rings[r].ckr_tx_pool[i] = NULL;
682 }
683 skn_free_type_array(tx_pool, struct mbuf *,
684 na_get_nslots(na, NR_TX), na->na_tx_rings[r].ckr_tx_pool);
685 na->na_tx_rings[r].ckr_tx_pool = NULL;
686 }
687 if (nifna->nifna_tx_mit != NULL) {
688 for (r = 0; r < na_get_nrings(na, NR_TX); r++) {
689 nx_netif_mit_cleanup(&nifna->nifna_tx_mit[r]);
690 }
691 skn_free_type_array(tx, struct nx_netif_mit,
692 na_get_nrings(na, NR_TX), nifna->nifna_tx_mit);
693 nifna->nifna_tx_mit = NULL;
694 }
695 if (nifna->nifna_rx_mit != NULL) {
696 for (r = 0; r < na_get_nrings(na, NR_RX); r++) {
697 nx_netif_mit_cleanup(&nifna->nifna_rx_mit[r]);
698 }
699 skn_free_type_array(rx, struct nx_netif_mit,
700 na_get_nrings(na, NR_RX), nifna->nifna_rx_mit);
701 nifna->nifna_rx_mit = NULL;
702 }
703 for (r = 0; r < na_get_nrings(na, NR_RX); r++) {
704 nx_mbq_safe_destroy(&na->na_rx_rings[r].ckr_rx_queue);
705 }
706 out:
707
708 return error;
709 }
710
711 /*
712 * Record completed transmissions and update ktail.
713 *
714 * The oldest tx buffer not yet completed is at ckr_ktail + 1,
715 * ckr_khead is the first unsent buffer.
716 */
717 /* Hoisted out of line to reduce kernel stack footprint */
718 SK_NO_INLINE_ATTRIBUTE
719 static uint32_t
nx_netif_compat_tx_clean(struct netif_stats * nifs,struct __kern_channel_ring * kring)720 nx_netif_compat_tx_clean(struct netif_stats *nifs,
721 struct __kern_channel_ring *kring)
722 {
723 const slot_idx_t lim = kring->ckr_lim;
724 slot_idx_t nm_i = SLOT_NEXT(kring->ckr_ktail, lim);
725 slot_idx_t khead = kring->ckr_khead;
726 uint32_t n = 0;
727 struct mbuf **ckr_tx_pool = kring->ckr_tx_pool;
728
729 while (nm_i != khead) { /* buffers not completed */
730 struct mbuf *m = ckr_tx_pool[nm_i];
731
732 if (__improbable(m == NULL)) {
733 /* this is done, try to replenish the entry */
734 VERIFY(nm_i <= UINT16_MAX);
735 ckr_tx_pool[nm_i] = m =
736 nx_netif_compat_ring_alloc(M_WAITOK,
737 kring->ckr_max_pkt_len, (uint16_t)nm_i);
738 if (__improbable(m == NULL)) {
739 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
740 STATS_INC(nifs, NETIF_STATS_DROP);
741 SK_DF(SK_VERB_MEM,
742 "mbuf allocation failed (slot %u)", nm_i);
743 /* XXX how do we proceed ? break ? */
744 return -ENOMEM;
745 }
746 } else if (mbuf_ring_cluster_is_active(m)) {
747 break; /* This mbuf is still busy */
748 }
749 n++;
750 nm_i = SLOT_NEXT(nm_i, lim);
751 }
752 kring->ckr_ktail = SLOT_PREV(nm_i, lim);
753
754 SK_RDF(SK_VERB_NETIF, 10, "kr \"%s\" (0x%llx) tx completed [%u] -> "
755 "kh %u kt %u | rh %u rt %u", kring->ckr_name, SK_KVA(kring),
756 n, kring->ckr_khead, kring->ckr_ktail,
757 kring->ckr_rhead, kring->ckr_rtail);
758
759 return n;
760 }
761
762 /* Hoisted out of line to reduce kernel stack footprint */
763 SK_NO_INLINE_ATTRIBUTE
764 static void
nx_netif_compat_set_tx_event(struct __kern_channel_ring * kring,slot_idx_t khead)765 nx_netif_compat_set_tx_event(struct __kern_channel_ring *kring,
766 slot_idx_t khead)
767 {
768 const slot_idx_t lim = kring->ckr_lim;
769 slot_idx_t ntc = SLOT_NEXT(kring->ckr_ktail, lim); /* next to clean */
770 struct mbuf *m;
771 slot_idx_t e;
772
773 if (ntc == khead) {
774 return; /* all buffers are free */
775 }
776 /*
777 * We have pending packet in the driver between ckr_ktail+1 and
778 * ckr_khead, and we have to choose one of these slots to generate
779 * a TX notification. There is a race, but this is only called
780 * within TX sync which does a double check.
781 */
782 if (__probable(netif_tx_event_mode == 0)) {
783 /*
784 * Choose the first pending slot, to be safe against drivers
785 * reordering mbuf transmissions.
786 */
787 e = ntc;
788 } else {
789 /*
790 * Choose a slot in the middle, so that we don't risk ending
791 * up in a situation where the client continuously wake up,
792 * fills one or a few TX slots and go to sleep again.
793 */
794 slot_idx_t n = lim + 1;
795
796 if (khead >= ntc) {
797 e = (khead + ntc) >> 1;
798 } else { /* wrap around */
799 e = (khead + n + ntc) >> 1;
800 if (e >= n) {
801 e -= n;
802 }
803 }
804
805 if (__improbable(e >= n)) {
806 SK_ERR("This cannot happen");
807 e = 0;
808 }
809 }
810 m = kring->ckr_tx_pool[e];
811
812 for (;;) {
813 uint32_t p = 0, pn, i, f;
814 int err;
815
816 (void) mbuf_cluster_get_prop(m, &p);
817 f = NMB_GET_FLAGS(p);
818 i = NMB_GET_INDEX(p);
819
820 if (f & NMB_PROPF_TX_NOTIFY) {
821 /*
822 * This can happen if there is already an event
823 * on the ring slot 'e': There is nothing to do.
824 */
825 SK_DF(SK_VERB_NETIF | SK_VERB_NOTIFY | SK_VERB_TX,
826 "TX_NOTIFY already set at %u m 0x%llx kc %u ntc %u",
827 e, SK_KVA(m), khead, ntc);
828 return;
829 }
830
831 f |= NMB_PROPF_TX_NOTIFY;
832 pn = NMB_SET_FLAGS(p, f);
833
834 err = mbuf_cluster_set_prop(m, p, pn);
835 if (err != 0) {
836 if (err == EBUSY) { /* try again */
837 continue;
838 }
839 /* TODO: [email protected] -- what to do? */
840 SK_ERR("Failed to set TX_NOTIFY at %u m 0x%llx kh %u "
841 "ntc %u, err %d", e, SK_KVA(m), khead, ntc, err);
842 } else {
843 SK_DF(SK_VERB_NETIF | SK_VERB_NOTIFY | SK_VERB_TX,
844 "Request TX_NOTIFY at %u m 0x%llx kh %u ntc %u",
845 e, SK_KVA(m), khead, ntc);
846 }
847 break;
848 }
849 }
850
851 #if SK_LOG
852 /* Hoisted out of line to reduce kernel stack footprint */
853 SK_LOG_ATTRIBUTE
854 static void
nx_netif_compat_na_txsync_log(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags,slot_idx_t nm_i)855 nx_netif_compat_na_txsync_log(struct __kern_channel_ring *kring,
856 struct proc *p, uint32_t flags, slot_idx_t nm_i)
857 {
858 SK_DF(SK_VERB_NETIF | SK_VERB_SYNC | SK_VERB_TX,
859 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0x%x "
860 "nm_i %u, kh %u kt %u | rh %u rt %u",
861 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
862 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
863 flags, nm_i, kring->ckr_khead, kring->ckr_ktail,
864 kring->ckr_rhead, kring->ckr_rtail);
865 }
866 #endif /* SK_LOG */
867
868 /*
869 * nx_netif_compat_na_txsync() transforms packets into mbufs and passes
870 * them to the device driver.
871 */
872 static int
nx_netif_compat_na_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)873 nx_netif_compat_na_txsync(struct __kern_channel_ring *kring, struct proc *p,
874 uint32_t flags)
875 {
876 #pragma unused(p)
877 struct nexus_adapter *na = KRNA(kring);
878 struct netif_stats *nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
879 slot_idx_t nm_i; /* index into the channel ring */ // j
880 const slot_idx_t head = kring->ckr_rhead;
881 uint32_t slot_count = 0;
882 uint32_t byte_count = 0;
883
884 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
885
886 /* update our work timestamp */
887 na->na_work_ts = _net_uptime;
888
889 /*
890 * First part: process new packets to send.
891 */
892 nm_i = kring->ckr_khead;
893 if (nm_i != head) { /* we have new packets to send */
894 while (nm_i != head) {
895 struct __kern_slot_desc *sd = KR_KSD(kring, nm_i);
896
897 /* device-specific */
898 struct mbuf *m;
899 int tx_ret;
900 /*
901 * Take a mbuf from the tx pool (replenishing the pool
902 * entry if necessary) and copy in the user packet.
903 */
904 VERIFY(nm_i <= UINT16_MAX);
905 m = kring->ckr_tx_pool[nm_i];
906 if (__improbable(m == NULL)) {
907 kring->ckr_tx_pool[nm_i] = m =
908 nx_netif_compat_ring_alloc(M_WAITOK,
909 kring->ckr_max_pkt_len, (uint16_t)nm_i);
910 if (__improbable(m == NULL)) {
911 STATS_INC(nifs, NETIF_STATS_DROP);
912 STATS_INC(nifs,
913 NETIF_STATS_DROP_NOMEM_MBUF);
914 SK_DF(SK_VERB_MEM,
915 "%s(%d) kr \"%s\" (0x%llx) "
916 "krflags 0x%b ckr_tx_pool[%u] "
917 "allocation failed",
918 sk_proc_name_address(p),
919 sk_proc_pid(p), kring->ckr_name,
920 SK_KVA(kring), kring->ckr_flags,
921 CKRF_BITS, nm_i);
922 /*
923 * Here we could schedule a timer
924 * which retries to replenish after
925 * a while, and notifies the client
926 * when it manages to replenish some
927 * slot. In any cae we break early
928 * to avoid crashes.
929 */
930 break;
931 }
932 STATS_INC(nifs, NETIF_STATS_TX_REPL);
933 }
934
935 byte_count += sd->sd_pkt->pkt_length;
936 slot_count++;
937
938 /*
939 * We should ask notifications when CS_REPORT is set,
940 * or roughly every half ring. To optimize this,
941 * we set a notification event when the client runs
942 * out of TX ring space, or when transmission fails.
943 * In the latter case we also break early.
944 */
945 tx_ret = nx_netif_compat_xmit_frame(na, m, sd->sd_pkt);
946 if (__improbable(tx_ret)) {
947 SK_RD(5, "start_xmit failed: err %d "
948 "[nm_i %u, h %u, kt %u]",
949 tx_ret, nm_i, head, kring->ckr_ktail);
950 /*
951 * No room for this mbuf in the device driver.
952 * Request a notification FOR A PREVIOUS MBUF,
953 * then call nx_netif_compat_tx_clean(kring) to
954 * do the double check and see if we can free
955 * more buffers. If there is space continue,
956 * else break; NOTE: the double check is
957 * necessary if the problem occurs in the
958 * txsync call after selrecord(). Also, we
959 * need some way to tell the caller that not
960 * all buffers were queued onto the device
961 * (this was not a problem with native skywalk
962 * driver where space is preallocated). The
963 * bridge has a similar problem and we solve
964 * it there by dropping the excess packets.
965 */
966 nx_netif_compat_set_tx_event(kring, nm_i);
967 if (nx_netif_compat_tx_clean(nifs, kring)) {
968 /* space now available */
969 continue;
970 } else {
971 break;
972 }
973 }
974 nm_i = SLOT_NEXT(nm_i, kring->ckr_lim);
975 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
976 }
977
978 /*
979 * Update khead to the next slot to transmit; Here nm_i
980 * is not necesarrily head, we could break early.
981 */
982 kring->ckr_khead = nm_i;
983
984 kr_update_stats(kring, slot_count, byte_count);
985 }
986
987 /*
988 * Second, reclaim completed buffers
989 */
990 if ((flags & NA_SYNCF_FORCE_RECLAIM) || kr_txempty(kring)) {
991 /*
992 * No more available slots? Set a notification event on a
993 * channel slot that will be cleaned in the future. No
994 * doublecheck is performed, since nx_netif_compat_na_txsync()
995 * will be called twice by ch_event().
996 */
997 nx_netif_compat_set_tx_event(kring, nm_i);
998 }
999 kring->ckr_pending_intr = 0;
1000
1001 #if SK_LOG
1002 if (__improbable((sk_verbose & SK_VERB_NETIF) != 0)) {
1003 nx_netif_compat_na_txsync_log(kring, p, flags, nm_i);
1004 }
1005 #endif /* SK_LOG */
1006
1007 (void) nx_netif_compat_tx_clean(nifs, kring);
1008
1009 return 0;
1010 }
1011
1012 #if SK_LOG
1013 /* Hoisted out of line to reduce kernel stack footprint */
1014 SK_LOG_ATTRIBUTE
1015 static void
nx_netif_compat_receive_log1(const struct __kern_channel_ring * kring,struct nx_mbq * q)1016 nx_netif_compat_receive_log1(const struct __kern_channel_ring *kring,
1017 struct nx_mbq *q)
1018 {
1019 SK_RD(10, "kr \"%s\" (0x%llx) krflags 0x%b FULL "
1020 "(qlen %u qsize %llu), kc %u kt %u", kring->ckr_name,
1021 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, nx_mbq_len(q),
1022 nx_mbq_size(q), kring->ckr_khead, kring->ckr_ktail);
1023 }
1024
1025 /* Hoisted out of line to reduce kernel stack footprint */
1026 SK_LOG_ATTRIBUTE
1027 static void
nx_netif_compat_receive_log2(const struct __kern_channel_ring * kring,struct nx_mbq * q,const struct ifnet_stat_increment_param * s)1028 nx_netif_compat_receive_log2(const struct __kern_channel_ring *kring,
1029 struct nx_mbq *q, const struct ifnet_stat_increment_param *s)
1030 {
1031 SK_RDF(SK_VERB_RX, 10, "kr \"%s\" (0x%llx) krflags 0x%b OK, "
1032 "added %u packets %u bytes, now qlen %u qsize %llu",
1033 kring->ckr_name, SK_KVA(kring), kring->ckr_flags, CKRF_BITS,
1034 s->packets_in, s->bytes_in, nx_mbq_len(q), nx_mbq_size(q));
1035 }
1036 #endif /* SK_LOG */
1037
1038 /*
1039 * This is the default RX path for the compat netif nexus. Packets
1040 * are enqueued and later extracted by nx_netif_compat_na_rxsync().
1041 */
1042 /* TODO: [email protected] -- implement chaining */
1043 static errno_t
nx_netif_compat_receive(struct ifnet * ifp,struct mbuf * m_head,struct mbuf * m_tail,const struct ifnet_stat_increment_param * s,boolean_t poll,struct thread * tp)1044 nx_netif_compat_receive(struct ifnet *ifp, struct mbuf *m_head,
1045 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
1046 boolean_t poll, struct thread *tp)
1047 {
1048 #pragma unused(tp)
1049 boolean_t ifp_rxpoll = ((ifp->if_eflags & IFEF_RXPOLL) && net_rxpoll);
1050 struct nexus_adapter *na = &NA(ifp)->nifna_up;
1051 struct __kern_channel_ring *kring;
1052 struct netif_stats *nifs;
1053 uint32_t r, work_done;
1054 unsigned int qlimit;
1055 struct nx_mbq *q;
1056 errno_t err = 0;
1057
1058 /* update our work timestamp */
1059 na->na_work_ts = _net_uptime;
1060
1061 if (__improbable(m_head == NULL)) {
1062 ASSERT(m_tail == NULL);
1063 ASSERT(poll);
1064 ASSERT(s->bytes_in == 0);
1065 ASSERT(s->packets_in == 0);
1066 }
1067
1068 /* BEGIN CSTYLED */
1069 /*
1070 * TODO: [email protected] -- this needs to be revisited once we
1071 * have a clear definition of how multiple RX rings are mapped
1072 * to flows; this would involve the hardware/driver doing some
1073 * kind of classification and RSS-like demuxing.
1074 *
1075 * When we enable that, we'll need to consider sifting thru the
1076 * mbuf chain we get from the caller, and enqueue them across
1077 * per-ring temporary mbuf queue (along with marking the ring
1078 * indicating pending packets.) During second stage processing,
1079 * we'll issue nx_netif_mit_rx_intr() on each marked ring to
1080 * dispatch the packets upstream.
1081 *
1082 * r = MBUF_RXQ(m);
1083 *
1084 * if (r >= na->na_num_rx_rings)
1085 * r = r % na->na_num_rx_rings;
1086 *
1087 * kring = &na->na_rx_rings[r];
1088 * q = &kring->ckr_rx_queue;
1089 *
1090 * For now, target only the first RX ring (ring 0).
1091 */
1092 /* END CSTYLED */
1093 r = 0; /* receive ring number */
1094 kring = &na->na_rx_rings[r];
1095
1096 ASSERT(na->na_type == NA_NETIF_COMPAT_DEV);
1097 nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
1098
1099 if (__improbable((!NA_IS_ACTIVE(na)) || KR_DROP(kring))) {
1100 /* BEGIN CSTYLED */
1101 /*
1102 * If we deal with multiple rings, change above to:
1103 *
1104 * if (!NA_IS_ACTIVE(na) || r >= na_get_nrings(na, NR_RX)))
1105 *
1106 * then here do:
1107 *
1108 * if (r >= na_get_nrings(na, NR_RX)) {
1109 * SK_ERR("na \"%s\" (0x%llx) invalid r %u >= %u",
1110 * na->na_name, SK_KVA(na), r,
1111 * na_get_nrings(na, NR_RX));
1112 * }
1113 */
1114 /* END CSTYLED */
1115 m_freem_list(m_head);
1116 if (!NA_IS_ACTIVE(na)) {
1117 STATS_ADD(nifs, NETIF_STATS_DROP_NA_INACTIVE,
1118 s->packets_in);
1119 } else if (KR_DROP(kring)) {
1120 STATS_ADD(nifs, NETIF_STATS_DROP_KRDROP_MODE,
1121 s->packets_in);
1122 }
1123 STATS_ADD(nifs, NETIF_STATS_DROP, s->packets_in);
1124 err = ENXIO;
1125 goto done;
1126 }
1127 if (__improbable(m_head == NULL)) {
1128 goto send_packets;
1129 }
1130
1131 q = &kring->ckr_rx_queue;
1132 nx_mbq_lock_spin(q);
1133 qlimit = nx_mbq_limit(q);
1134 if (ifp_rxpoll) {
1135 /*
1136 * qlimit of the receive queue is much smaller when the
1137 * interface is in oppurtunistic polling mode. In this case
1138 * when the interface is operating in interrupt mode,
1139 * a sudden burst of input packets can cause the receive queue
1140 * to quickly buildup due to scheduling latency in waking up
1141 * the poller thread. To avoid drops here due to this latency
1142 * we provide a leeway on the qlimit.
1143 */
1144 qlimit <<= 5;
1145 }
1146 if (__improbable(nx_mbq_len(q) > qlimit)) {
1147 #if SK_LOG
1148 if (__improbable(sk_verbose != 0)) {
1149 nx_netif_compat_receive_log1(kring, q);
1150 }
1151 #endif /* SK_LOG */
1152 nx_mbq_unlock(q);
1153 m_freem_list(m_head);
1154 STATS_ADD(nifs, NETIF_STATS_DROP_RXQ_OVFL, s->packets_in);
1155 STATS_ADD(nifs, NETIF_STATS_DROP, s->packets_in);
1156 goto send_packets;
1157 }
1158 nx_mbq_enq_multi(q, m_head, m_tail, s->packets_in, s->bytes_in);
1159
1160 #if SK_LOG
1161 if (__improbable((sk_verbose & SK_VERB_NETIF) != 0)) {
1162 nx_netif_compat_receive_log2(kring, q, s);
1163 }
1164 #endif /* SK_LOG */
1165
1166 nx_mbq_unlock(q);
1167
1168 (void) ifnet_stat_increment_in(ifp, s->packets_in, s->bytes_in,
1169 s->errors_in);
1170
1171 if (poll) {
1172 /* update incremental poll stats */
1173 PKTCNTR_ADD(&ifp->if_poll_tstats, s->packets_in, s->bytes_in);
1174 }
1175
1176 send_packets:
1177 /*
1178 * if the interface supports oppurtunistic input polling, then the
1179 * input packet processing is performed in context of the poller thread.
1180 */
1181 if (!poll && ifp_rxpoll) {
1182 /* wakeup the poller thread */
1183 ifnet_poll(ifp);
1184 } else {
1185 /*
1186 * wakeup the mitigation thread if needed to perform input
1187 * packet processing.
1188 * if the interface supports oppurtunistic input polling, then
1189 * mitigation thread is not created and the input packet
1190 * processing happens in context of the poller thread.
1191 */
1192 err = nx_netif_mit_rx_intr((NAKR(na, NR_RX) + r), kernproc, 0,
1193 &work_done);
1194 }
1195 done:
1196 return err;
1197 }
1198
1199 #if SK_LOG
1200 /* Hoisted out of line to reduce kernel stack footprint */
1201 SK_LOG_ATTRIBUTE
1202 static void
nx_netif_compat_na_rxsync_log(const struct __kern_channel_ring * kring,struct proc * p,uint32_t flags,slot_idx_t nm_i)1203 nx_netif_compat_na_rxsync_log(const struct __kern_channel_ring *kring,
1204 struct proc *p, uint32_t flags, slot_idx_t nm_i)
1205 {
1206 SK_DF(SK_VERB_NETIF | SK_VERB_SYNC | SK_VERB_RX,
1207 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b "
1208 "ring %u flags 0x%x nm_i %u kt %u", sk_proc_name_address(p),
1209 sk_proc_pid(p), kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1210 CKRF_BITS, kring->ckr_ring_id, flags, nm_i, kring->ckr_ktail);
1211 }
1212 #endif /* SK_LOG */
1213
1214 #if DEBUG || DEVELOPMENT
1215 /*
1216 * Split an mbuf chain at offset "split", such that the first mbuf
1217 * is a zero-length M_PKTHDR, followed by the rest of the mbufs.
1218 * Typically, the "split" value is equal to the size of the link
1219 * layer header, e.g. Ethernet header.
1220 */
1221 static struct mbuf *
nx_netif_rx_split(struct mbuf * m0,uint32_t split)1222 nx_netif_rx_split(struct mbuf *m0, uint32_t split)
1223 {
1224 struct mbuf *m = m0;
1225
1226 if (split == 0) {
1227 split = MHLEN;
1228 M_PREPEND(m, split, M_DONTWAIT, 0);
1229 } else {
1230 m->m_data -= split;
1231 m->m_len += split;
1232 m_pktlen(m) += split;
1233
1234 ASSERT((uintptr_t)m->m_data >= (uintptr_t)mbuf_datastart(m));
1235 ASSERT((uintptr_t)m->m_data < ((uintptr_t)mbuf_datastart(m) +
1236 mbuf_maxlen(m)));
1237 }
1238 if (m != NULL) {
1239 struct mbuf *n = m_split(m, split, M_DONTWAIT);
1240 if (n == NULL) {
1241 m_freem(m);
1242 return NULL;
1243 }
1244 m0 = m;
1245 ASSERT((uint32_t)m->m_len == split);
1246 m->m_data += split;
1247 m->m_len -= split;
1248 while (m->m_next != NULL) {
1249 m = m->m_next;
1250 }
1251 m->m_next = n;
1252 m = m0;
1253 m_pktlen(m) = m_length2(m, NULL);
1254 }
1255
1256 return m;
1257 }
1258 #endif /* DEBUG || DEVELOPMENT */
1259
1260 /*
1261 * nx_netif_compat_na_rxsync() extracts mbufs from the queue filled by
1262 * nx_netif_compat_receive() and puts their content in the channel
1263 * receive ring.
1264 *
1265 * Accesses to kring are serialized via kring->ckr_rx_queue lock, because
1266 * the rx handler is asynchronous,
1267 */
1268 static int
nx_netif_compat_na_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)1269 nx_netif_compat_na_rxsync(struct __kern_channel_ring *kring, struct proc *p,
1270 uint32_t flags)
1271 {
1272 #pragma unused(p)
1273 struct nexus_adapter *na = KRNA(kring);
1274 struct nexus_netif_adapter *nifna = (struct nexus_netif_adapter *)na;
1275 struct nx_netif *nif = nifna->nifna_netif;
1276 slot_idx_t nm_i; /* index into the channel ring */
1277 struct netif_stats *nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
1278 uint32_t npkts = 0;
1279 uint32_t byte_count = 0;
1280 const slot_idx_t lim = kring->ckr_lim;
1281 const slot_idx_t head = kring->ckr_rhead;
1282 boolean_t force_update = ((flags & NA_SYNCF_FORCE_READ) ||
1283 kring->ckr_pending_intr != 0);
1284 struct mbuf *m;
1285 uint32_t n;
1286 uint32_t avail; /* in slots */
1287 int err, mlen;
1288 boolean_t attach_mbuf = FALSE;
1289 struct nx_mbq *q, tmpq;
1290 struct kern_pbufpool *pp = kring->ckr_pp;
1291 uint32_t ph_cnt, i = 0;
1292
1293 ASSERT(pp->pp_max_frags == 1);
1294 ASSERT(head <= lim);
1295
1296 /*
1297 * First part: skip past packets that userspace has released.
1298 * This can possibly make room for the second part.
1299 * equivalent to kr_reclaim()
1300 */
1301 if (kring->ckr_khead != head) {
1302 kring->ckr_khead = head;
1303 /* ensure global visibility */
1304 membar_sync();
1305 }
1306
1307 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1308
1309 /*
1310 * Second part: import newly received packets.
1311 */
1312 if (!force_update) {
1313 return 0;
1314 }
1315
1316 /* update our work timestamp */
1317 na->na_work_ts = _net_uptime;
1318
1319 /* first empty slot in the receive ring */
1320 nm_i = kring->ckr_ktail;
1321
1322 /*
1323 * Compute the available space (in bytes) in this ring.
1324 * The first slot that is not considered in is the one
1325 * before ckr_khead.
1326 */
1327 avail = kr_available_slots_rxring(kring);
1328 if (__improbable(avail == 0)) {
1329 return 0;
1330 }
1331
1332 if (NA_KERNEL_ONLY(na)) {
1333 ASSERT(na->na_ifp != NULL &&
1334 fsw_ifp_to_fsw(na->na_ifp) != NULL);
1335 /*
1336 * We are not supporting attachment to bridge flowswitch
1337 * for now, until we support PKT_F_MBUF_DATA packets
1338 * in bridge flowswitch.
1339 */
1340 attach_mbuf = TRUE;
1341 }
1342
1343 /*
1344 * Quickly move all of ckr_rx_queue to a temporary queue to dequeue
1345 * from. For each mbuf, attach or copy it to the packet attached
1346 * to the slot. Release the lock while we're doing that, to allow
1347 * for the input thread to enqueue.
1348 */
1349 q = &kring->ckr_rx_queue;
1350 nx_mbq_init(&tmpq, NX_MBQ_NO_LIMIT);
1351 nx_mbq_lock_spin(q);
1352 nx_mbq_concat(&tmpq, q);
1353 nx_mbq_unlock(q);
1354
1355 if (__improbable(nx_mbq_len(&tmpq) == 0)) {
1356 return 0;
1357 }
1358
1359 ph_cnt = MIN(avail, nx_mbq_len(&tmpq));
1360 err = kern_pbufpool_alloc_batch_nosleep(pp, 1, kring->ckr_scratch,
1361 &ph_cnt);
1362 if (err == ENOMEM) {
1363 SK_DF(SK_VERB_MEM, "%s(%p) failed to alloc %d pkts for kr "
1364 "0x%llu", sk_proc_name_address(p), sk_proc_pid(p), ph_cnt,
1365 SK_KVA(kring));
1366 goto done;
1367 }
1368 ASSERT(ph_cnt != 0);
1369
1370 for (n = 0; (n < ph_cnt) &&
1371 ((m = nx_mbq_deq(&tmpq)) != NULL); n++) {
1372 struct __kern_slot_desc *ksd = KR_KSD(kring, nm_i);
1373 struct __kern_packet *pkt;
1374 kern_packet_t ph;
1375 uint8_t hlen;
1376 uint16_t tag;
1377 char *h;
1378
1379 ASSERT(m->m_flags & M_PKTHDR);
1380 mlen = m_pktlen(m);
1381 h = m->m_pkthdr.pkt_hdr;
1382 if (__improbable(mlen == 0 || h == NULL ||
1383 h < (char *)mbuf_datastart(m) || h > (char *)m->m_data)) {
1384 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1385 SK_RD(5, "kr \"%s\" (0x%llx) m 0x%llx len %d"
1386 "bad pkt_hdr", kring->ckr_name,
1387 SK_KVA(kring), SK_KVA(m), mlen);
1388 m_freem(m);
1389 m = NULL;
1390 continue;
1391 }
1392
1393 hlen = (uint8_t)(m->m_data - h);
1394 mlen += hlen;
1395
1396 #if DEBUG || DEVELOPMENT
1397 if (__improbable(netif_rx_split != 0)) {
1398 /* callee frees mbuf upon failure */
1399 if ((m = nx_netif_rx_split(m, hlen)) == NULL) {
1400 continue;
1401 }
1402
1403 ASSERT((uintptr_t)m->m_data >=
1404 (uintptr_t)mbuf_datastart(m));
1405 ASSERT((uintptr_t)m->m_data <
1406 ((uintptr_t)mbuf_datastart(m) +
1407 mbuf_maxlen(m)));
1408 }
1409 #endif /* DEBUG || DEVELOPMENT */
1410
1411 ph = kring->ckr_scratch[i];
1412 ASSERT(ph != 0);
1413 kring->ckr_scratch[i] = 0;
1414 pkt = SK_PTR_ADDR_KPKT(ph);
1415 ++i;
1416
1417 /*
1418 * Wind back the data pointer to include any frame headers
1419 * as part of the copy below. The header length is then
1420 * stored in the corresponding metadata area of the buffer.
1421 */
1422 m->m_data -= hlen;
1423 m->m_len += hlen;
1424 m->m_pkthdr.len += hlen;
1425 ASSERT(mlen == m->m_pkthdr.len);
1426
1427 pkt->pkt_link_flags = 0;
1428 if (m->m_flags & M_HASFCS) {
1429 pkt->pkt_link_flags |= PKT_LINKF_ETHFCS;
1430 }
1431 if (mbuf_get_vlan_tag(m, &tag) == 0) {
1432 (void) kern_packet_set_vlan_tag(SK_PKT2PH(pkt), tag,
1433 FALSE);
1434 }
1435 SK_DF(SK_VERB_NETIF | SK_VERB_SYNC | SK_VERB_RX,
1436 "kr \"%s\" (0x%llx) m 0x%llx idx %u slot_len %d",
1437 kring->ckr_name, SK_KVA(kring), SK_KVA(m), nm_i, mlen);
1438
1439 if (__probable(attach_mbuf)) {
1440 STATS_INC(nifs, NETIF_STATS_RX_COPY_ATTACH);
1441 err = __packet_initialize_with_mbuf(pkt, m, 0, hlen);
1442 VERIFY(err == 0);
1443 } else if (__probable(mlen <= (int)PP_BUF_SIZE_DEF(pp))) {
1444 STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
1445 /*
1446 * We're sending this up to a user channel opened
1447 * directly to the netif; copy everything.
1448 */
1449 err = __packet_set_headroom(ph, 0);
1450 VERIFY(err == 0);
1451 err = __packet_set_link_header_length(ph, hlen);
1452 VERIFY(err == 0);
1453 nif->nif_pkt_copy_from_mbuf(NR_RX, ph, 0, m, 0,
1454 mlen, FALSE, 0);
1455 /* finalize and attach the packet */
1456 err = __packet_finalize(ph);
1457 VERIFY(err == 0);
1458 m_freem(m);
1459 m = NULL;
1460 } else {
1461 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1462 STATS_INC(nifs, NETIF_STATS_DROP);
1463 m_freem(m);
1464 m = NULL;
1465 kern_pbufpool_free(pp, ph);
1466 ph = 0;
1467 pkt = NULL;
1468 continue;
1469 }
1470
1471 err = KR_SLOT_ATTACH_METADATA(kring, ksd,
1472 (struct __kern_quantum *)pkt);
1473 ASSERT(err == 0);
1474
1475 byte_count += mlen;
1476 ++npkts;
1477 ASSERT(npkts < kring->ckr_num_slots);
1478 nm_i = SLOT_NEXT(nm_i, lim);
1479 }
1480
1481 if (__improbable(i < ph_cnt)) {
1482 kern_pbufpool_free_batch(pp, &kring->ckr_scratch[i],
1483 (ph_cnt - i));
1484 }
1485
1486 ASSERT(npkts <= ph_cnt);
1487 kr_update_stats(kring, npkts, byte_count);
1488
1489 if (npkts != 0) {
1490 kring->ckr_ktail = nm_i;
1491 STATS_ADD(nifs, NETIF_STATS_RX_PACKETS, npkts);
1492 }
1493 kring->ckr_pending_intr = 0;
1494
1495 #if SK_LOG
1496 if (__improbable((sk_verbose & SK_VERB_NETIF) != 0)) {
1497 nx_netif_compat_na_rxsync_log(kring, p, flags, nm_i);
1498 }
1499 #endif /* SK_LOG */
1500
1501 done:
1502 /*
1503 * If we didn't process all packets in temporary queue,
1504 * move them back to the head of ckr_rx_queue.
1505 */
1506 if (!nx_mbq_empty(&tmpq)) {
1507 nx_mbq_lock_spin(q);
1508 nx_mbq_concat(&tmpq, q);
1509 ASSERT(nx_mbq_empty(q));
1510 nx_mbq_concat(q, &tmpq);
1511 nx_mbq_unlock(q);
1512 }
1513 ASSERT(nx_mbq_empty(&tmpq));
1514
1515 return 0;
1516 }
1517
1518 static void
nx_netif_compat_na_dtor(struct nexus_adapter * na)1519 nx_netif_compat_na_dtor(struct nexus_adapter *na)
1520 {
1521 struct ifnet *ifp;
1522 struct nexus_netif_compat_adapter *nca =
1523 (struct nexus_netif_compat_adapter *)na;
1524
1525 SK_LOCK_ASSERT_HELD();
1526
1527 SK_DF(SK_VERB_NETIF, "na \"%s\" (0x%llx)", na->na_name, SK_KVA(na));
1528
1529 /*
1530 * If the finalizer callback hasn't been called for whatever
1531 * reasons, pick up the embryonic ifnet stored in na_private.
1532 * Otherwise, release the I/O refcnt of a non-NULL na_ifp.
1533 */
1534 if ((ifp = na->na_ifp) == NULL) {
1535 ifp = na->na_private;
1536 na->na_private = NULL;
1537 } else {
1538 ifnet_decr_iorefcnt(ifp);
1539 na->na_ifp = NULL;
1540 }
1541
1542 if (nca->nca_up.nifna_netif != NULL) {
1543 nx_netif_release(nca->nca_up.nifna_netif);
1544 nca->nca_up.nifna_netif = NULL;
1545 }
1546 ASSERT(!SKYWALK_NATIVE(ifp));
1547 }
1548
1549 /*
1550 * nx_netif_compat_attach() makes it possible to use skywalk on
1551 * a device without native skywalk support.
1552 * This is less performant than native support but potentially
1553 * faster than raw sockets or similar schemes.
1554 */
1555 int
nx_netif_compat_attach(struct kern_nexus * nx,struct ifnet * ifp)1556 nx_netif_compat_attach(struct kern_nexus *nx, struct ifnet *ifp)
1557 {
1558 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
1559 struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
1560 struct nexus_netif_compat_adapter *devnca = NULL;
1561 struct nexus_netif_compat_adapter *hostnca = NULL;
1562 struct nexus_adapter *devna = NULL;
1563 struct nexus_adapter *hostna = NULL;
1564 boolean_t embryonic = FALSE;
1565 uint32_t tx_rings, tx_slots;
1566 int retval = 0;
1567
1568 SK_LOCK_ASSERT_HELD();
1569 ASSERT(!SKYWALK_NATIVE(ifp));
1570 ASSERT(!SKYWALK_CAPABLE(ifp));
1571 ASSERT(ifp->if_na == NULL);
1572 ASSERT(ifp->if_na_ops == NULL);
1573
1574 devnca = na_netif_compat_alloc(Z_WAITOK);
1575 hostnca = na_netif_compat_alloc(Z_WAITOK);
1576
1577 /*
1578 * We can be called for two different interface states:
1579 *
1580 * Fully attached: get an io ref count; upon success, this
1581 * holds a reference to the ifnet for the ifp pointer stored
1582 * in 'na_ifp' down below for both adapters.
1583 *
1584 * Embryonic: temporary hold the ifnet in na_private, which
1585 * upon a successful ifnet_attach(), will be moved over to
1586 * the 'na_ifp' with an io ref count held.
1587 *
1588 * The ifnet in 'na_ifp' will be released by na_release_locked().
1589 */
1590 if (!ifnet_is_attached(ifp, 1)) {
1591 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
1592 ifp = NULL;
1593 retval = ENXIO;
1594 goto err;
1595 }
1596 embryonic = TRUE;
1597 }
1598
1599 /* initialize the (compat) device netif adapter */
1600 devnca->nca_up.nifna_netif = nif;
1601 nx_netif_retain(nif);
1602 devna = &devnca->nca_up.nifna_up;
1603 (void) strncpy(devna->na_name, ifp->if_xname, sizeof(devna->na_name) - 1);
1604 devna->na_name[sizeof(devna->na_name) - 1] = '\0';
1605 uuid_generate_random(devna->na_uuid);
1606 if (embryonic) {
1607 /*
1608 * We will move this over to na_ifp once
1609 * the interface is fully attached.
1610 */
1611 devna->na_private = ifp;
1612 ASSERT(devna->na_ifp == NULL);
1613 } else {
1614 ASSERT(devna->na_private == NULL);
1615 /* use I/O refcnt from ifnet_is_attached() */
1616 devna->na_ifp = ifp;
1617 }
1618
1619 devna->na_type = NA_NETIF_COMPAT_DEV;
1620 devna->na_free = na_netif_compat_free;
1621 devna->na_activate = nx_netif_compat_na_activate;
1622 devna->na_txsync = nx_netif_compat_na_txsync;
1623 devna->na_rxsync = nx_netif_compat_na_rxsync;
1624 devna->na_dtor = nx_netif_compat_na_dtor;
1625 devna->na_krings_create = nx_netif_dev_krings_create;
1626 devna->na_krings_delete = nx_netif_dev_krings_delete;
1627 devna->na_special = nx_netif_na_special;
1628
1629 *(nexus_stats_type_t *)(uintptr_t)&devna->na_stats_type =
1630 NEXUS_STATS_TYPE_INVALID;
1631
1632 if (skywalk_netif_direct_allowed(ifp->if_xname)) {
1633 tx_rings = nxp->nxp_tx_rings;
1634 tx_slots = nxp->nxp_tx_slots;
1635 } else {
1636 tx_rings = 0;
1637 tx_slots = 0;
1638 }
1639 na_set_nrings(devna, NR_TX, tx_rings);
1640 na_set_nrings(devna, NR_RX, nxp->nxp_rx_rings);
1641 na_set_nslots(devna, NR_TX, tx_slots);
1642 na_set_nslots(devna, NR_RX, nxp->nxp_rx_slots);
1643 /*
1644 * Verify upper bounds; the parameters must have already been
1645 * validated by nxdom_prov_params() by the time we get here.
1646 */
1647 ASSERT(na_get_nrings(devna, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
1648 ASSERT(na_get_nrings(devna, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
1649 ASSERT(na_get_nslots(devna, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
1650 ASSERT(na_get_nslots(devna, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
1651
1652 na_attach_common(devna, nx, &nx_netif_compat_prov_s);
1653
1654 if ((retval = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
1655 nx, devna)) != 0) {
1656 ASSERT(devna->na_arena == NULL);
1657 /* we've transferred the refcnt to na_ifp above */
1658 ifp = NULL;
1659 goto err;
1660 }
1661 ASSERT(devna->na_arena != NULL);
1662
1663 *(uint32_t *)(uintptr_t)&devna->na_flowadv_max = nxp->nxp_flowadv_max;
1664 ASSERT(devna->na_flowadv_max == 0 ||
1665 skmem_arena_nexus(devna->na_arena)->arn_flowadv_obj != NULL);
1666
1667 /* setup packet copy routines */
1668 if (skmem_arena_nexus(devna->na_arena)->arn_rx_pp->pp_max_frags > 1) {
1669 nif->nif_pkt_copy_from_mbuf =
1670 pkt_copy_multi_buflet_from_mbuf;
1671 nif->nif_pkt_copy_to_mbuf =
1672 pkt_copy_multi_buflet_to_mbuf;
1673 } else {
1674 nif->nif_pkt_copy_from_mbuf = pkt_copy_from_mbuf;
1675 nif->nif_pkt_copy_to_mbuf = pkt_copy_to_mbuf;
1676 }
1677
1678 /* initialize the host netif adapter */
1679 hostnca->nca_up.nifna_netif = nif;
1680 nx_netif_retain(nif);
1681 hostna = &hostnca->nca_up.nifna_up;
1682 (void) snprintf(hostna->na_name, sizeof(hostna->na_name),
1683 "%s^", devna->na_name);
1684 uuid_generate_random(hostna->na_uuid);
1685 if (embryonic) {
1686 /*
1687 * We will move this over to na_ifp once
1688 * the interface is fully attached.
1689 */
1690 hostna->na_private = ifp;
1691 ASSERT(hostna->na_ifp == NULL);
1692 } else {
1693 ASSERT(hostna->na_private == NULL);
1694 hostna->na_ifp = devna->na_ifp;
1695 ifnet_incr_iorefcnt(hostna->na_ifp);
1696 }
1697 hostna->na_type = NA_NETIF_COMPAT_HOST;
1698 hostna->na_free = na_netif_compat_free;
1699 hostna->na_activate = nx_netif_host_na_activate;
1700 hostna->na_txsync = nx_netif_host_na_txsync;
1701 hostna->na_rxsync = nx_netif_host_na_rxsync;
1702 hostna->na_dtor = nx_netif_compat_na_dtor;
1703 hostna->na_krings_create = nx_netif_host_krings_create;
1704 hostna->na_krings_delete = nx_netif_host_krings_delete;
1705 hostna->na_special = nx_netif_host_na_special;
1706
1707 atomic_bitset_32(&hostna->na_flags, NAF_HOST_ONLY);
1708 *(nexus_stats_type_t *)(uintptr_t)&hostna->na_stats_type =
1709 NEXUS_STATS_TYPE_INVALID;
1710
1711 na_set_nrings(hostna, NR_TX, 1);
1712 na_set_nrings(hostna, NR_RX, 0);
1713 na_set_nslots(hostna, NR_TX, nxp->nxp_tx_slots);
1714 na_set_nslots(hostna, NR_RX, 0);
1715
1716 na_attach_common(hostna, nx, &nx_netif_prov_s);
1717
1718 if ((retval = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
1719 nx, hostna)) != 0) {
1720 ASSERT(hostna->na_arena == NULL);
1721 /* we've transferred the refcnt to na_ifp above */
1722 ifp = NULL;
1723 goto err;
1724 }
1725 ASSERT(hostna->na_arena != NULL);
1726
1727 *(uint32_t *)(uintptr_t)&hostna->na_flowadv_max = nxp->nxp_flowadv_max;
1728 ASSERT(hostna->na_flowadv_max == 0 ||
1729 skmem_arena_nexus(hostna->na_arena)->arn_flowadv_obj != NULL);
1730
1731 /* these will be undone by destructor */
1732 ifp->if_na_ops = &na_netif_compat_ops;
1733 ifp->if_na = &devnca->nca_up;
1734 na_retain_locked(devna);
1735 na_retain_locked(hostna);
1736
1737 SKYWALK_SET_CAPABLE(ifp);
1738
1739 NETIF_WLOCK(nif);
1740 nif->nif_ifp = ifp;
1741 retval = nx_port_alloc(nx, NEXUS_PORT_NET_IF_DEV, NULL, &devna, kernproc);
1742 ASSERT(retval == 0);
1743 retval = nx_port_alloc(nx, NEXUS_PORT_NET_IF_HOST, NULL, &hostna, kernproc);
1744 ASSERT(retval == 0);
1745 NETIF_WUNLOCK(nif);
1746
1747 #if SK_LOG
1748 uuid_string_t uuidstr;
1749 SK_DF(SK_VERB_NETIF, "na_name: \"%s\"", devna->na_name);
1750 SK_DF(SK_VERB_NETIF, " UUID: %s",
1751 sk_uuid_unparse(devna->na_uuid, uuidstr));
1752 SK_DF(SK_VERB_NETIF, " nx: 0x%llx (\"%s\":\"%s\")",
1753 SK_KVA(devna->na_nx), NX_DOM(devna->na_nx)->nxdom_name,
1754 NX_DOM_PROV(devna->na_nx)->nxdom_prov_name);
1755 SK_DF(SK_VERB_NETIF, " flags: 0x%b", devna->na_flags, NAF_BITS);
1756 SK_DF(SK_VERB_NETIF, " flowadv_max: %u", devna->na_flowadv_max);
1757 SK_DF(SK_VERB_NETIF, " rings: tx %u rx %u",
1758 na_get_nrings(devna, NR_TX), na_get_nrings(devna, NR_RX));
1759 SK_DF(SK_VERB_NETIF, " slots: tx %u rx %u",
1760 na_get_nslots(devna, NR_TX), na_get_nslots(devna, NR_RX));
1761 #if CONFIG_NEXUS_USER_PIPE
1762 SK_DF(SK_VERB_NETIF, " next_pipe: %u", devna->na_next_pipe);
1763 SK_DF(SK_VERB_NETIF, " max_pipes: %u", devna->na_max_pipes);
1764 #endif /* CONFIG_NEXUS_USER_PIPE */
1765 SK_DF(SK_VERB_NETIF, " ifp: 0x%llx %s [ioref %u]",
1766 SK_KVA(ifp), ifp->if_xname, ifp->if_refio);
1767 SK_DF(SK_VERB_NETIF, "hostna: \"%s\"", hostna->na_name);
1768 SK_DF(SK_VERB_NETIF, " UUID: %s",
1769 sk_uuid_unparse(hostna->na_uuid, uuidstr));
1770 SK_DF(SK_VERB_NETIF, " nx: 0x%llx (\"%s\":\"%s\")",
1771 SK_KVA(hostna->na_nx), NX_DOM(hostna->na_nx)->nxdom_name,
1772 NX_DOM_PROV(hostna->na_nx)->nxdom_prov_name);
1773 SK_DF(SK_VERB_NETIF, " flags: 0x%b",
1774 hostna->na_flags, NAF_BITS);
1775 SK_DF(SK_VERB_NETIF, " flowadv_max: %u", hostna->na_flowadv_max);
1776 SK_DF(SK_VERB_NETIF, " rings: tx %u rx %u",
1777 na_get_nrings(hostna, NR_TX), na_get_nrings(hostna, NR_RX));
1778 SK_DF(SK_VERB_NETIF, " slots: tx %u rx %u",
1779 na_get_nslots(hostna, NR_TX), na_get_nslots(hostna, NR_RX));
1780 #if CONFIG_NEXUS_USER_PIPE
1781 SK_DF(SK_VERB_NETIF, " next_pipe: %u", hostna->na_next_pipe);
1782 SK_DF(SK_VERB_NETIF, " max_pipes: %u", hostna->na_max_pipes);
1783 #endif /* CONFIG_NEXUS_USER_PIPE */
1784 SK_DF(SK_VERB_NETIF, " ifp: 0x%llx %s [ioref %u]", SK_KVA(ifp),
1785 ifp->if_xname, ifp->if_refio);
1786 #endif /* SK_LOG */
1787
1788 err:
1789 if (retval != 0) {
1790 ASSERT(ifp == NULL);
1791 if (devna != NULL) {
1792 if (devna->na_arena != NULL) {
1793 skmem_arena_release(devna->na_arena);
1794 devna->na_arena = NULL;
1795 }
1796 if (devna->na_ifp != NULL) {
1797 ifnet_decr_iorefcnt(devna->na_ifp);
1798 devna->na_ifp = NULL;
1799 }
1800 devna->na_private = NULL;
1801 }
1802 if (hostna != NULL) {
1803 if (hostna->na_arena != NULL) {
1804 skmem_arena_release(hostna->na_arena);
1805 hostna->na_arena = NULL;
1806 }
1807 if (hostna->na_ifp != NULL) {
1808 ifnet_decr_iorefcnt(hostna->na_ifp);
1809 hostna->na_ifp = NULL;
1810 }
1811 hostna->na_private = NULL;
1812 }
1813 if (devnca != NULL) {
1814 if (devnca->nca_up.nifna_netif != NULL) {
1815 nx_netif_release(devnca->nca_up.nifna_netif);
1816 devnca->nca_up.nifna_netif = NULL;
1817 }
1818 na_netif_compat_free((struct nexus_adapter *)devnca);
1819 }
1820 if (hostnca != NULL) {
1821 if (hostnca->nca_up.nifna_netif != NULL) {
1822 nx_netif_release(hostnca->nca_up.nifna_netif);
1823 hostnca->nca_up.nifna_netif = NULL;
1824 }
1825 na_netif_compat_free((struct nexus_adapter *)hostnca);
1826 }
1827 }
1828 return retval;
1829 }
1830
1831 static void
na_netif_compat_finalize(struct nexus_netif_adapter * nifna,struct ifnet * ifp)1832 na_netif_compat_finalize(struct nexus_netif_adapter *nifna, struct ifnet *ifp)
1833 {
1834 na_netif_finalize(nifna, ifp);
1835 }
1836
1837 /*
1838 * Intercept the rx routine in the standard device driver.
1839 * Second argument is non-zero to intercept, 0 to restore
1840 */
1841 static int
nx_netif_compat_catch_rx(struct nexus_netif_compat_adapter * nca,boolean_t enable)1842 nx_netif_compat_catch_rx(struct nexus_netif_compat_adapter *nca,
1843 boolean_t enable)
1844 {
1845 struct ifnet *ifp = nca->nca_up.nifna_up.na_ifp;
1846 int err = 0;
1847
1848 ASSERT(!(nca->nca_up.nifna_up.na_flags & NAF_HOST_ONLY));
1849
1850 if (enable) {
1851 err = dlil_set_input_handler(ifp, nx_netif_compat_receive);
1852 } else {
1853 dlil_reset_input_handler(ifp);
1854 }
1855 return err;
1856 }
1857
1858 /*
1859 * Transmit routine used by nx_netif_compat_na_txsync(). Returns 0 on success
1860 * and non-zero on error (which may be packet drops or other errors).
1861 * len identifies the channel buffer, m is the (preallocated) mbuf to use
1862 * for transmissions.
1863 *
1864 * We should add a reference to the mbuf so the m_freem() at the end
1865 * of the transmission does not consume resources.
1866 *
1867 * On FreeBSD, and on multiqueue cards, we can force the queue using
1868 * if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1869 * i = m->m_pkthdr.flowid % adapter->num_queues;
1870 * else
1871 * i = curcpu % adapter->num_queues;
1872 *
1873 */
1874 static int
nx_netif_compat_xmit_frame(struct nexus_adapter * na,struct mbuf * m,struct __kern_packet * pkt)1875 nx_netif_compat_xmit_frame(struct nexus_adapter *na, struct mbuf *m,
1876 struct __kern_packet *pkt)
1877 {
1878 struct nexus_netif_adapter *nifna = (struct nexus_netif_adapter *)na;
1879 struct nx_netif *nif = nifna->nifna_netif;
1880 struct netif_stats *nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
1881 struct ifnet *ifp = na->na_ifp;
1882 kern_packet_t ph = SK_PTR_ENCODE(pkt, METADATA_TYPE(pkt),
1883 METADATA_SUBTYPE(pkt));
1884 uint32_t len;
1885 int ret = 0;
1886
1887 if ((ret = mbuf_ring_cluster_activate(m)) != 0) {
1888 panic("Failed to activate mbuf ring cluster 0x%llx (%d)",
1889 SK_KVA(m), ret);
1890 /* NOTREACHED */
1891 __builtin_unreachable();
1892 }
1893
1894 len = pkt->pkt_length;
1895
1896 /*
1897 * The mbuf should be a cluster from our special pool,
1898 * so we do not need to do an m_copyback but just copy.
1899 */
1900 if (m->m_ext.ext_size < len) {
1901 SK_RD(5, "size %u < len %u", m->m_ext.ext_size, len);
1902 len = m->m_ext.ext_size;
1903 }
1904
1905 STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
1906 if (PACKET_HAS_PARTIAL_CHECKSUM(pkt)) {
1907 STATS_INC(nifs, NETIF_STATS_TX_COPY_SUM);
1908 }
1909
1910 nif->nif_pkt_copy_to_mbuf(NR_TX, ph, pkt->pkt_headroom, m, 0, len,
1911 PACKET_HAS_PARTIAL_CHECKSUM(pkt), pkt->pkt_csum_tx_start_off);
1912
1913 /* used for tx notification */
1914 ret = mbuf_set_tx_compl_data(m, (uintptr_t)ifp, (uintptr_t)NULL);
1915 ASSERT(ret == 0);
1916
1917 ret = dlil_output_handler(ifp, m);
1918 return ret;
1919 }
1920