1 /*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #define _IP_VHL
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/nexus/netif/nx_netif.h>
31 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
32 #include <net/ethernet.h>
33 #include <net/pktap.h>
34 #include <sys/kdebug.h>
35 #include <sys/sdt.h>
36
37 #define DBG_FUNC_NX_NETIF_HOST_ENQUEUE \
38 SKYWALKDBG_CODE(DBG_SKYWALK_NETIF, 2)
39
40 static void nx_netif_host_catch_tx(struct nexus_adapter *, bool);
41 static inline struct __kern_packet*
42 nx_netif_mbuf_to_kpkt(struct nexus_adapter *, struct mbuf *);
43
44 #define SK_IFCAP_CSUM (IFCAP_HWCSUM|IFCAP_CSUM_PARTIAL|IFCAP_CSUM_ZERO_INVERT)
45
46 static bool
nx_netif_host_is_gso_needed(struct nexus_adapter * na)47 nx_netif_host_is_gso_needed(struct nexus_adapter *na)
48 {
49 struct nx_netif *nif = ((struct nexus_netif_adapter *)na)->nifna_netif;
50
51 /*
52 * Don't enable for Compat netif.
53 */
54 if (na->na_type != NA_NETIF_HOST) {
55 return false;
56 }
57 /*
58 * Don't enable if netif is not plumbed under a flowswitch.
59 */
60 if (!NA_KERNEL_ONLY(na)) {
61 return false;
62 }
63 /*
64 * Don't enable If HW TSO is enabled.
65 */
66 if (((nif->nif_hwassist & IFNET_TSO_IPV4) != 0) ||
67 ((nif->nif_hwassist & IFNET_TSO_IPV6) != 0)) {
68 return false;
69 }
70 /*
71 * Don't enable if TX aggregation is disabled.
72 */
73 if (sk_fsw_tx_agg_tcp == 0) {
74 return false;
75 }
76 return true;
77 }
78
79 static void
nx_netif_host_adjust_if_capabilities(struct nexus_adapter * na,bool activate)80 nx_netif_host_adjust_if_capabilities(struct nexus_adapter *na, bool activate)
81 {
82 struct nx_netif *nif = ((struct nexus_netif_adapter *)na)->nifna_netif;
83 struct ifnet *ifp = na->na_ifp;
84
85 ifnet_lock_exclusive(ifp);
86
87 if (activate) {
88 /* XXX: [email protected] - disable TSO and LRO for now */
89 nif->nif_hwassist = ifp->if_hwassist;
90 nif->nif_capabilities = ifp->if_capabilities;
91 nif->nif_capenable = ifp->if_capenable;
92 ifp->if_hwassist &= ~(IFNET_CHECKSUMF | IFNET_TSOF);
93 ifp->if_capabilities &= ~(SK_IFCAP_CSUM | IFCAP_TSO);
94 ifp->if_capenable &= ~(SK_IFCAP_CSUM | IFCAP_TSO);
95
96 /*
97 * Re-enable the capabilities which Skywalk layer provides:
98 *
99 * Native driver: a copy from packet to mbuf always occurs
100 * for each inbound and outbound packet; if hardware
101 * does not support csum offload, we leverage combined
102 * copy and checksum, and thus advertise IFNET_CSUM_PARTIAL.
103 * We also always enable 16KB jumbo mbuf support.
104 *
105 * Compat driver: inbound and outbound mbufs don't incur a
106 * copy, and so leave the driver advertised flags alone.
107 */
108 if (NA_KERNEL_ONLY(na)) {
109 if (na->na_type == NA_NETIF_HOST) { /* native */
110 ifp->if_hwassist |=
111 IFNET_MULTIPAGES | (nif->nif_hwassist &
112 (IFNET_CHECKSUMF | IFNET_TSOF));
113 ifp->if_capabilities |=
114 (nif->nif_capabilities &
115 (SK_IFCAP_CSUM | IFCAP_TSO));
116 ifp->if_capenable |=
117 (nif->nif_capenable &
118 (SK_IFCAP_CSUM | IFCAP_TSO));
119 /*
120 * If hardware doesn't support IP and TCP/UDP csum offload,
121 * advertise IFNET_CSUM_PARTIAL.
122 */
123 if ((ifp->if_hwassist & IFNET_UDP_TCP_TX_CHECKSUMF) !=
124 IFNET_UDP_TCP_TX_CHECKSUMF) {
125 ifp->if_hwassist |= IFNET_CSUM_PARTIAL | IFNET_CSUM_ZERO_INVERT;
126 ifp->if_capabilities |= IFCAP_CSUM_PARTIAL | IFCAP_CSUM_ZERO_INVERT;
127 ifp->if_capenable |= IFCAP_CSUM_PARTIAL | IFCAP_CSUM_ZERO_INVERT;
128 }
129 if (sk_fsw_tx_agg_tcp != 0) {
130 ifp->if_hwassist |= IFNET_TSOF;
131 ifp->if_capabilities |= IFCAP_TSO;
132 ifp->if_capenable |= IFCAP_TSO;
133 }
134
135 if (!nx_netif_host_is_gso_needed(na)) {
136 if_set_eflags(ifp, IFEF_SENDLIST);
137 }
138 } else { /* compat */
139 ifp->if_hwassist |=
140 (nif->nif_hwassist &
141 (IFNET_CHECKSUMF | IFNET_TSOF));
142 ifp->if_capabilities |=
143 (nif->nif_capabilities &
144 (SK_IFCAP_CSUM | IFCAP_TSO));
145 ifp->if_capenable |=
146 (nif->nif_capenable &
147 (SK_IFCAP_CSUM | IFCAP_TSO));
148 }
149 }
150 } else {
151 if (NA_KERNEL_ONLY(na) && na->na_type == NA_NETIF_HOST) {
152 if_clear_eflags(ifp, IFEF_SENDLIST);
153 }
154 /* Unset any capabilities previously set by Skywalk */
155 ifp->if_hwassist &= ~(IFNET_CHECKSUMF | IFNET_MULTIPAGES);
156 ifp->if_capabilities &= ~SK_IFCAP_CSUM;
157 ifp->if_capenable &= ~SK_IFCAP_CSUM;
158 if ((sk_fsw_tx_agg_tcp != 0) &&
159 (na->na_type == NA_NETIF_HOST)) {
160 ifp->if_hwassist &= ~IFNET_TSOF;
161 ifp->if_capabilities &= ~IFCAP_TSO;
162 ifp->if_capenable &= ~IFCAP_TSO;
163 }
164 /* Restore driver original flags */
165 ifp->if_hwassist |= (nif->nif_hwassist &
166 (IFNET_CHECKSUMF | IFNET_TSOF | IFNET_MULTIPAGES));
167 ifp->if_capabilities |=
168 (nif->nif_capabilities & (SK_IFCAP_CSUM | IFCAP_TSO));
169 ifp->if_capenable |=
170 (nif->nif_capenable & (SK_IFCAP_CSUM | IFCAP_TSO));
171 }
172
173 ifnet_lock_done(ifp);
174 }
175
176 int
nx_netif_host_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)177 nx_netif_host_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
178 {
179 struct ifnet *ifp = na->na_ifp;
180 int error = 0;
181
182 ASSERT(na->na_type == NA_NETIF_HOST ||
183 na->na_type == NA_NETIF_COMPAT_HOST);
184 ASSERT(na->na_flags & NAF_HOST_ONLY);
185
186 SK_DF(SK_VERB_NETIF, "na \"%s\" (0x%llx) %s", na->na_name,
187 SK_KVA(na), na_activate_mode2str(mode));
188
189 switch (mode) {
190 case NA_ACTIVATE_MODE_ON:
191 VERIFY(SKYWALK_CAPABLE(ifp));
192
193 nx_netif_host_adjust_if_capabilities(na, true);
194 /*
195 * Make skywalk control the packet steering
196 * Don't intercept tx packets if this is a netif compat
197 * adapter attached to a flowswitch
198 */
199 nx_netif_host_catch_tx(na, true);
200
201 os_atomic_or(&na->na_flags, NAF_ACTIVE, relaxed);
202 break;
203
204 case NA_ACTIVATE_MODE_DEFUNCT:
205 VERIFY(SKYWALK_CAPABLE(ifp));
206 break;
207
208 case NA_ACTIVATE_MODE_OFF:
209 /* Release packet steering control. */
210 nx_netif_host_catch_tx(na, false);
211
212 /*
213 * Note that here we cannot assert SKYWALK_CAPABLE()
214 * as we're called in the destructor path.
215 */
216 os_atomic_andnot(&na->na_flags, NAF_ACTIVE, relaxed);
217
218 nx_netif_host_adjust_if_capabilities(na, false);
219 break;
220
221 default:
222 VERIFY(0);
223 /* NOTREACHED */
224 __builtin_unreachable();
225 }
226
227 return error;
228 }
229
230 /* na_krings_create callback for netif host adapters */
231 int
nx_netif_host_krings_create(struct nexus_adapter * na,struct kern_channel * ch)232 nx_netif_host_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
233 {
234 int ret;
235
236 SK_LOCK_ASSERT_HELD();
237 ASSERT(na->na_type == NA_NETIF_HOST ||
238 na->na_type == NA_NETIF_COMPAT_HOST);
239 ASSERT(na->na_flags & NAF_HOST_ONLY);
240
241 ret = na_rings_mem_setup(na, FALSE, ch);
242 if (ret == 0) {
243 struct __kern_channel_ring *kring;
244 uint32_t i;
245
246 /* drop by default until fully bound */
247 if (NA_KERNEL_ONLY(na)) {
248 na_kr_drop(na, TRUE);
249 }
250
251 for (i = 0; i < na_get_nrings(na, NR_RX); i++) {
252 kring = &NAKR(na, NR_RX)[i];
253 /* initialize the nx_mbq for the sw rx ring */
254 nx_mbq_safe_init(kring, &kring->ckr_rx_queue,
255 NX_MBQ_NO_LIMIT, &nexus_mbq_lock_group,
256 &nexus_lock_attr);
257 SK_DF(SK_VERB_NETIF,
258 "na \"%s\" (0x%llx) initialized host kr \"%s\" "
259 "(0x%llx) krflags 0x%b", na->na_name, SK_KVA(na),
260 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
261 CKRF_BITS);
262 }
263 }
264 return ret;
265 }
266
267 /*
268 * Destructor for netif host adapters; they also have an mbuf queue
269 * on the rings connected to the host so we need to purge them first.
270 */
271 void
nx_netif_host_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)272 nx_netif_host_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
273 boolean_t defunct)
274 {
275 struct __kern_channel_ring *kring;
276 uint32_t i;
277
278 SK_LOCK_ASSERT_HELD();
279 ASSERT(na->na_type == NA_NETIF_HOST ||
280 na->na_type == NA_NETIF_COMPAT_HOST);
281 ASSERT(na->na_flags & NAF_HOST_ONLY);
282
283 if (NA_KERNEL_ONLY(na)) {
284 na_kr_drop(na, TRUE);
285 }
286
287 for (i = 0; i < na_get_nrings(na, NR_RX); i++) {
288 struct nx_mbq *q;
289
290 kring = &NAKR(na, NR_RX)[i];
291 q = &kring->ckr_rx_queue;
292 SK_DF(SK_VERB_NETIF,
293 "na \"%s\" (0x%llx) destroy host kr \"%s\" (0x%llx) "
294 "krflags 0x%b with qlen %u", na->na_name, SK_KVA(na),
295 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
296 CKRF_BITS, nx_mbq_len(q));
297 nx_mbq_purge(q);
298 if (!defunct) {
299 nx_mbq_safe_destroy(q);
300 }
301 }
302
303 na_rings_mem_teardown(na, ch, defunct);
304 }
305
306 /* kring->ckr_na_sync callback for the host rx ring */
307 int
nx_netif_host_na_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)308 nx_netif_host_na_rxsync(struct __kern_channel_ring *kring,
309 struct proc *p, uint32_t flags)
310 {
311 #pragma unused(kring, p, flags)
312 return 0;
313 }
314
315 /*
316 * kring->ckr_na_sync callback for the host tx ring.
317 */
318 int
nx_netif_host_na_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)319 nx_netif_host_na_txsync(struct __kern_channel_ring *kring, struct proc *p,
320 uint32_t flags)
321 {
322 #pragma unused(kring, p, flags)
323 return 0;
324 }
325
326 int
nx_netif_host_na_special(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr,nxspec_cmd_t spec_cmd)327 nx_netif_host_na_special(struct nexus_adapter *na, struct kern_channel *ch,
328 struct chreq *chr, nxspec_cmd_t spec_cmd)
329 {
330 ASSERT(na->na_type == NA_NETIF_HOST ||
331 na->na_type == NA_NETIF_COMPAT_HOST);
332 return nx_netif_na_special_common(na, ch, chr, spec_cmd);
333 }
334
335 /*
336 * Intercept the packet steering routine in the tx path,
337 * so that we can decide which queue is used for an mbuf.
338 * Second argument is TRUE to intercept, FALSE to restore.
339 */
340 static void
nx_netif_host_catch_tx(struct nexus_adapter * na,bool activate)341 nx_netif_host_catch_tx(struct nexus_adapter *na, bool activate)
342 {
343 struct ifnet *ifp = na->na_ifp;
344 int err = 0;
345
346 ASSERT(na->na_type == NA_NETIF_HOST ||
347 na->na_type == NA_NETIF_COMPAT_HOST);
348 ASSERT(na->na_flags & NAF_HOST_ONLY);
349
350 /*
351 * Common case is NA_KERNEL_ONLY: if the netif is plumbed
352 * below the flowswitch. For TXSTART compat driver and legacy:
353 * don't intercept DLIL output handler, since in this model
354 * packets from both BSD stack and flowswitch are directly
355 * enqueued to the classq via ifnet_enqueue().
356 *
357 * Otherwise, it's the uncommon case where a user channel is
358 * opened directly to the netif. Here we either intercept
359 * or restore the DLIL output handler.
360 */
361 if (activate) {
362 if (__improbable(!NA_KERNEL_ONLY(na))) {
363 return;
364 }
365 /*
366 * For native drivers only, intercept if_output();
367 * for compat, leave it alone since we don't need
368 * to perform any mbuf-pkt conversion.
369 */
370 if (na->na_type == NA_NETIF_HOST) {
371 err = ifnet_set_output_handler(ifp,
372 nx_netif_host_is_gso_needed(na) ?
373 netif_gso_dispatch : nx_netif_host_output);
374 VERIFY(err == 0);
375 }
376 } else {
377 if (__improbable(!NA_KERNEL_ONLY(na))) {
378 return;
379 }
380 /*
381 * Restore original if_output() for native drivers.
382 */
383 if (na->na_type == NA_NETIF_HOST) {
384 ifnet_reset_output_handler(ifp);
385 }
386 }
387 }
388
389 static int
get_af_from_mbuf(struct mbuf * m)390 get_af_from_mbuf(struct mbuf *m)
391 {
392 /*
393 * -fbounds-safety: Although m_pkthdr.pkt_hdr is a void * without
394 * annotations, here we can just mark the uint8_t *pkt_hdr as __single
395 * becase we don't do any arithmetic and the only place we dereference
396 * it is to read the ip version, where having the bounds of a single
397 * 8-bit size is enough.
398 */
399 uint8_t *__single pkt_hdr;
400 uint8_t ipv;
401 struct mbuf *m0;
402 int af;
403
404 pkt_hdr = m->m_pkthdr.pkt_hdr;
405 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
406 if (pkt_hdr >= (uint8_t *)m0->m_data &&
407 pkt_hdr < (uint8_t *)m0->m_data + m0->m_len) {
408 break;
409 }
410 }
411 if (m0 == NULL) {
412 DTRACE_SKYWALK1(bad__pkthdr, struct mbuf *, m);
413 af = AF_UNSPEC;
414 goto done;
415 }
416 ipv = IP_VHL_V(*pkt_hdr);
417 if (ipv == 4) {
418 af = AF_INET;
419 } else if (ipv == 6) {
420 af = AF_INET6;
421 } else {
422 af = AF_UNSPEC;
423 }
424 done:
425 DTRACE_SKYWALK2(mbuf__af, int, af, struct mbuf *, m);
426 return af;
427 }
428
429 /*
430 * if_output() callback called by dlil_output() to handle mbufs coming out
431 * of the host networking stack. The mbuf will get converted to a packet,
432 * and enqueued to the classq of a Skywalk native interface.
433 */
434 int
nx_netif_host_output(struct ifnet * ifp,struct mbuf * m_chain)435 nx_netif_host_output(struct ifnet *ifp, struct mbuf *m_chain)
436 {
437 struct nx_netif *nif = NA(ifp)->nifna_netif;
438 struct __kern_channel_ring *currentkring = NULL;
439 struct kern_nexus *nx = nif->nif_nx;
440 struct nexus_adapter *hwna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
441 struct nexus_adapter *hostna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_HOST);
442 struct netif_stats *nifs = &NX_NETIF_PRIVATE(hwna->na_nx)->nif_stats;
443 struct mbuf *m_head = m_chain, *m = NULL, *drop_list = NULL, *free_list = NULL;
444 struct __kern_packet *pkt_chain_head, *pkt_chain_tail;
445 struct netif_qset *__single qset = NULL;
446 struct pktq pkt_q;
447 uint64_t qset_id;
448 bool qset_id_valid = false;
449 boolean_t pkt_drop = FALSE;
450 uint32_t n_pkts = 0, n_bytes = 0;
451 errno_t error = 0;
452
453 ASSERT(ifp->if_eflags & IFEF_SKYWALK_NATIVE);
454 ASSERT(hostna->na_type == NA_NETIF_HOST);
455
456 KPKTQ_INIT(&pkt_q);
457 while (m_head) {
458 struct __kern_channel_ring *kring;
459
460 pkt_drop = FALSE;
461 m = m_head;
462 m_head = m_head->m_nextpkt;
463 m->m_nextpkt = NULL;
464
465 uint32_t sc_idx = MBUF_SCIDX(m_get_service_class(m));
466 struct __kern_packet *kpkt;
467
468 /*
469 * nx_netif_host_catch_tx() must only be steering the output
470 * packets here only for native interfaces, otherwise we must
471 * not get here for compat.
472 */
473
474 ASSERT(sc_idx < KPKT_SC_MAX_CLASSES);
475 kring = &hwna->na_tx_rings[hwna->na_kring_svc_lut[sc_idx]];
476 if (currentkring != kring) {
477 if (currentkring != NULL) {
478 KDBG((SK_KTRACE_NETIF_HOST_ENQUEUE | DBG_FUNC_END), SK_KVA(currentkring),
479 error);
480 }
481 currentkring = kring;
482 KDBG((SK_KTRACE_NETIF_HOST_ENQUEUE | DBG_FUNC_START), SK_KVA(currentkring));
483 }
484 if (__improbable(!NA_IS_ACTIVE(hwna) || !NA_IS_ACTIVE(hostna))) {
485 STATS_INC(nifs, NETIF_STATS_DROP_NA_INACTIVE);
486 SK_ERR("\"%s\" (0x%llx) not in skywalk mode anymore",
487 hwna->na_name, SK_KVA(hwna));
488 error = ENXIO;
489 pkt_drop = TRUE;
490 goto out;
491 }
492 /*
493 * Drop if the kring no longer accepts packets.
494 */
495 if (__improbable(KR_DROP(&hostna->na_rx_rings[0]) || KR_DROP(kring))) {
496 STATS_INC(nifs, NETIF_STATS_DROP_KRDROP_MODE);
497 /* not a serious error, so no need to be chatty here */
498 SK_DF(SK_VERB_NETIF,
499 "kr \"%s\" (0x%llx) krflags 0x%b or %s in drop mode",
500 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
501 CKRF_BITS, ifp->if_xname);
502 error = ENXIO;
503 pkt_drop = TRUE;
504 goto out;
505 }
506 if (__improbable(((unsigned)m_pktlen(m) + ifp->if_tx_headroom) >
507 kring->ckr_max_pkt_len)) { /* too long for us */
508 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
509 SK_ERR("\"%s\" (0x%llx) from_host, drop packet size %u > %u",
510 hwna->na_name, SK_KVA(hwna), m_pktlen(m),
511 kring->ckr_max_pkt_len);
512 pkt_drop = TRUE;
513 goto out;
514 }
515 /*
516 * Convert mbuf to packet and enqueue it.
517 */
518 kpkt = nx_netif_mbuf_to_kpkt(hwna, m);
519 if (kpkt == NULL) {
520 error = ENOBUFS;
521 pkt_drop = TRUE;
522 goto out;
523 }
524
525 if ((m->m_pkthdr.pkt_flags & PKTF_SKIP_PKTAP) == 0 &&
526 pktap_total_tap_count != 0) {
527 int af = get_af_from_mbuf(m);
528
529 if (af != AF_UNSPEC) {
530 nx_netif_pktap_output(ifp, af, kpkt);
531 }
532 }
533 if (NX_LLINK_PROV(nif->nif_nx) &&
534 ifp->if_traffic_rule_count > 0 &&
535 !qset_id_valid &&
536 nxctl_inet_traffic_rule_find_qset_id_with_pkt(ifp->if_xname,
537 kpkt, &qset_id) == 0) {
538 qset_id_valid = true;
539 /*
540 * This always returns a qset because if the qset id
541 * is invalid the default qset is returned.
542 */
543 qset = nx_netif_find_qset(nif, qset_id);
544 ASSERT(qset != NULL);
545 }
546 if (qset != NULL) {
547 kpkt->pkt_qset_idx = qset->nqs_idx;
548 }
549
550 if (!netif_chain_enqueue_enabled(ifp)) {
551 if (qset != NULL) {
552 error = ifnet_enqueue_ifcq_pkt(ifp,
553 qset->nqs_ifcq, kpkt,
554 false, &pkt_drop);
555 nx_netif_qset_release(&qset);
556 } else {
557 /* callee consumes packet */
558 error = ifnet_enqueue_pkt(ifp, kpkt, false, &pkt_drop);
559 }
560
561 if (pkt_drop) {
562 STATS_INC(nifs, NETIF_STATS_TX_DROP_ENQ_AQM);
563 }
564 } else {
565 KPKTQ_ENQUEUE(&pkt_q, kpkt);
566 n_pkts++;
567 n_bytes += m->m_pkthdr.len;
568 }
569 out:
570 /* always free mbuf (even in the success case) */
571 m->m_nextpkt = free_list;
572 free_list = m;
573
574 if (__improbable(pkt_drop)) {
575 STATS_INC(nifs, NETIF_STATS_DROP);
576 }
577
578 if (__improbable(error)) {
579 break;
580 }
581 }
582
583 if (currentkring != NULL) {
584 KDBG((SK_KTRACE_NETIF_HOST_ENQUEUE | DBG_FUNC_END), SK_KVA(currentkring),
585 error);
586 }
587
588 if (__probable(!KPKTQ_EMPTY(&pkt_q))) {
589 pkt_chain_head = KPKTQ_FIRST(&pkt_q);
590 pkt_chain_tail = KPKTQ_LAST(&pkt_q);
591 if (qset != NULL) {
592 error = ifnet_enqueue_ifcq_pkt_chain(ifp, qset->nqs_ifcq,
593 pkt_chain_head, pkt_chain_tail, n_pkts, n_bytes, false, &pkt_drop);
594 nx_netif_qset_release(&qset);
595 } else {
596 /* callee consumes packet */
597 error = ifnet_enqueue_pkt_chain(ifp, pkt_chain_head, pkt_chain_tail,
598 n_pkts, n_bytes, false, &pkt_drop);
599 }
600 if (pkt_drop) {
601 STATS_ADD(nifs, NETIF_STATS_TX_DROP_ENQ_AQM, n_pkts);
602 STATS_ADD(nifs, NETIF_STATS_DROP, n_pkts);
603 }
604 }
605
606 if (error) {
607 drop_list = m_head;
608 while (m_head != NULL) {
609 m_head = m_head->m_nextpkt;
610 STATS_INC(nifs, NETIF_STATS_DROP);
611 }
612 m_freem_list(drop_list);
613 }
614 m_freem_list(free_list);
615
616 netif_transmit(ifp, NETIF_XMIT_FLAG_HOST);
617
618 return error;
619 }
620
621 static inline int
get_l2_hlen(struct mbuf * m,uint8_t * l2len)622 get_l2_hlen(struct mbuf *m, uint8_t *l2len)
623 {
624 /*
625 * -fbounds-safety: Although m_pkthdr.pkt_hdr is a void * without
626 * annotations, here we mark char *pkt_hdr as __single because we don't
627 * dereference this pointer, and we're mostly just using this pointer
628 * for comparisons.
629 */
630 char *__single pkt_hdr;
631 struct mbuf *m0;
632 uint64_t len = 0;
633
634 pkt_hdr = m->m_pkthdr.pkt_hdr;
635 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
636 if (pkt_hdr >= m_mtod_current(m0) &&
637 pkt_hdr < m_mtod_current(m0) + m0->m_len) {
638 break;
639 }
640 len += m0->m_len;
641 }
642 if (m0 == NULL) {
643 DTRACE_SKYWALK2(bad__pkthdr, struct mbuf *, m, char *, pkt_hdr);
644 return EINVAL;
645 }
646 len += (pkt_hdr - m_mtod_current(m0));
647 if (len > UINT8_MAX) {
648 DTRACE_SKYWALK2(bad__l2len, struct mbuf *, m, uint64_t, len);
649 return EINVAL;
650 }
651 *l2len = (uint8_t)len;
652 return 0;
653 }
654
655 #if SK_LOG
656 /* Hoisted out of line to reduce kernel stack footprint */
657 SK_LOG_ATTRIBUTE
658 static void
nx_netif_mbuf_to_kpkt_log(struct __kern_packet * kpkt,uint32_t len,uint32_t poff)659 nx_netif_mbuf_to_kpkt_log(struct __kern_packet *kpkt, uint32_t len,
660 uint32_t poff)
661 {
662 uint8_t *baddr;
663 uint32_t pkt_len;
664
665 MD_BUFLET_ADDR_ABS(kpkt, baddr);
666 pkt_len = __packet_get_real_data_length(kpkt);
667 SK_DF(SK_VERB_HOST | SK_VERB_TX, "mlen %u dplen %u"
668 " hr %u l2 %u poff %u", len, kpkt->pkt_length,
669 kpkt->pkt_headroom, kpkt->pkt_l2_len, poff);
670 SK_DF(SK_VERB_HOST | SK_VERB_TX | SK_VERB_DUMP, "%s",
671 sk_dump("buf", baddr, pkt_len, 128, NULL, 0));
672 }
673 #endif /* SK_LOG */
674
675 static inline struct __kern_packet *
nx_netif_mbuf_to_kpkt(struct nexus_adapter * na,struct mbuf * m)676 nx_netif_mbuf_to_kpkt(struct nexus_adapter *na, struct mbuf *m)
677 {
678 struct netif_stats *nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
679 struct nexus_netif_adapter *nifna = NIFNA(na);
680 struct nx_netif *nif = nifna->nifna_netif;
681 uint16_t poff = na->na_ifp->if_tx_headroom;
682 uint32_t len;
683 struct kern_pbufpool *pp;
684 struct __kern_packet *kpkt;
685 kern_packet_t ph;
686 boolean_t copysum;
687 uint8_t l2hlen;
688 int err;
689
690 pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
691 ASSERT((pp != NULL) && (pp->pp_md_type == NEXUS_META_TYPE_PACKET) &&
692 (pp->pp_md_subtype == NEXUS_META_SUBTYPE_RAW));
693 ASSERT(!PP_HAS_TRUNCATED_BUF(pp));
694
695 len = m_pktlen(m);
696 VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pp) * pp->pp_max_frags));
697
698 /* alloc packet */
699 ph = pp_alloc_packet_by_size(pp, poff + len, SKMEM_NOSLEEP);
700 if (__improbable(ph == 0)) {
701 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
702 SK_DF(SK_VERB_MEM,
703 "%s(%d) pp \"%s\" (0x%llx) has no more "
704 "packet for %s", sk_proc_name_address(current_proc()),
705 sk_proc_pid(current_proc()), pp->pp_name, SK_KVA(pp),
706 if_name(na->na_ifp));
707 return NULL;
708 }
709
710 copysum = ((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID |
711 CSUM_PARTIAL)) == (CSUM_DATA_VALID | CSUM_PARTIAL));
712
713 STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
714 if (copysum) {
715 STATS_INC(nifs, NETIF_STATS_TX_COPY_SUM);
716 }
717
718 kpkt = SK_PTR_ADDR_KPKT(ph);
719 kpkt->pkt_link_flags = 0;
720 nif->nif_pkt_copy_from_mbuf(NR_TX, ph, poff, m, 0, len,
721 copysum, m->m_pkthdr.csum_tx_start);
722
723 kpkt->pkt_headroom = (uint8_t)poff;
724 if ((err = get_l2_hlen(m, &l2hlen)) == 0) {
725 kpkt->pkt_l2_len = l2hlen;
726 } else {
727 kpkt->pkt_l2_len = 0;
728 }
729 /* finalize the packet */
730 METADATA_ADJUST_LEN(kpkt, 0, poff);
731 err = __packet_finalize(ph);
732 VERIFY(err == 0);
733
734 #if SK_LOG
735 if (__improbable((sk_verbose & SK_VERB_HOST) != 0) && kpkt != NULL) {
736 nx_netif_mbuf_to_kpkt_log(kpkt, len, poff);
737 }
738 #endif /* SK_LOG */
739
740 return kpkt;
741 }
742