xref: /xnu-8792.41.9/bsd/skywalk/nexus/netif/nx_netif_host.c (revision 5c2921b07a2480ab43ec66f5b9e41cb872bc554f)
1 /*
2  * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 #define _IP_VHL
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/nexus/netif/nx_netif.h>
31 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
32 #include <net/ethernet.h>
33 #include <net/pktap.h>
34 #include <sys/kdebug.h>
35 #include <sys/sdt.h>
36 
37 #define DBG_FUNC_NX_NETIF_HOST_ENQUEUE  \
38 	SKYWALKDBG_CODE(DBG_SKYWALK_NETIF, 2)
39 
40 static void nx_netif_host_catch_tx(struct nexus_adapter *, bool);
41 static inline struct __kern_packet*
42 nx_netif_mbuf_to_kpkt(struct nexus_adapter *, struct mbuf *);
43 
44 #define SK_IFCAP_CSUM   (IFCAP_HWCSUM|IFCAP_CSUM_PARTIAL|IFCAP_CSUM_ZERO_INVERT)
45 
46 static void
nx_netif_host_adjust_if_capabilities(struct nexus_adapter * na,bool activate)47 nx_netif_host_adjust_if_capabilities(struct nexus_adapter *na, bool activate)
48 {
49 	struct nx_netif *nif = ((struct nexus_netif_adapter *)na)->nifna_netif;
50 	struct ifnet *ifp = na->na_ifp;
51 
52 	ifnet_lock_exclusive(ifp);
53 
54 	if (activate) {
55 		/* XXX: [email protected] - disable TSO and LRO for now */
56 		nif->nif_hwassist = ifp->if_hwassist;
57 		nif->nif_capabilities = ifp->if_capabilities;
58 		nif->nif_capenable = ifp->if_capenable;
59 		ifp->if_hwassist &= ~(IFNET_CHECKSUMF | IFNET_TSOF);
60 		ifp->if_capabilities &= ~(SK_IFCAP_CSUM | IFCAP_TSO);
61 		ifp->if_capenable &= ~(SK_IFCAP_CSUM | IFCAP_TSO);
62 
63 		/*
64 		 * Re-enable the capabilities which Skywalk layer provides:
65 		 *
66 		 * Native driver: a copy from packet to mbuf always occurs
67 		 * for each inbound and outbound packet; we leverage combined
68 		 * and copy checksum, and thus advertise the capabilities.
69 		 * We also always enable 16KB jumbo mbuf support.
70 		 *
71 		 * Compat driver: inbound and outbound mbufs don't incur a
72 		 * copy, and so leave the driver advertised flags alone.
73 		 */
74 		if (NA_KERNEL_ONLY(na)) {
75 			if (na->na_type == NA_NETIF_HOST) {     /* native */
76 				ifp->if_hwassist |= (IFNET_CSUM_TCP |
77 				    IFNET_CSUM_UDP | IFNET_CSUM_TCPIPV6 |
78 				    IFNET_CSUM_UDPIPV6 | IFNET_CSUM_PARTIAL |
79 				    IFNET_CSUM_ZERO_INVERT | IFNET_MULTIPAGES);
80 				ifp->if_capabilities |= SK_IFCAP_CSUM;
81 				ifp->if_capenable |= SK_IFCAP_CSUM;
82 				if (sk_fsw_tx_agg_tcp != 0) {
83 					ifp->if_hwassist |= IFNET_TSOF;
84 					ifp->if_capabilities |= IFCAP_TSO;
85 					ifp->if_capenable |= IFCAP_TSO;
86 				}
87 			} else {                                /* compat */
88 				ifp->if_hwassist |=
89 				    (nif->nif_hwassist &
90 				    (IFNET_CHECKSUMF | IFNET_TSOF));
91 				ifp->if_capabilities |=
92 				    (nif->nif_capabilities &
93 				    (SK_IFCAP_CSUM | IFCAP_TSO));
94 				ifp->if_capenable |=
95 				    (nif->nif_capenable &
96 				    (SK_IFCAP_CSUM | IFCAP_TSO));
97 			}
98 		}
99 	} else {
100 		/* Unset any capabilities previously set by Skywalk */
101 		ifp->if_hwassist &= ~(IFNET_CHECKSUMF | IFNET_MULTIPAGES);
102 		ifp->if_capabilities &= ~SK_IFCAP_CSUM;
103 		ifp->if_capenable &= ~SK_IFCAP_CSUM;
104 		if ((sk_fsw_tx_agg_tcp != 0) &&
105 		    (na->na_type == NA_NETIF_HOST)) {
106 			ifp->if_hwassist &= ~IFNET_TSOF;
107 			ifp->if_capabilities &= ~IFCAP_TSO;
108 			ifp->if_capenable &= ~IFCAP_TSO;
109 		}
110 		/* Restore driver original flags */
111 		ifp->if_hwassist |= (nif->nif_hwassist &
112 		    (IFNET_CHECKSUMF | IFNET_TSOF | IFNET_MULTIPAGES));
113 		ifp->if_capabilities |=
114 		    (nif->nif_capabilities & (SK_IFCAP_CSUM | IFCAP_TSO));
115 		ifp->if_capenable |=
116 		    (nif->nif_capenable & (SK_IFCAP_CSUM | IFCAP_TSO));
117 	}
118 
119 	ifnet_lock_done(ifp);
120 }
121 
122 static  bool
nx_netif_host_is_gso_needed(struct nexus_adapter * na)123 nx_netif_host_is_gso_needed(struct nexus_adapter *na)
124 {
125 	struct nx_netif *nif = ((struct nexus_netif_adapter *)na)->nifna_netif;
126 
127 	/*
128 	 * Don't enable for Compat netif.
129 	 */
130 	if (na->na_type != NA_NETIF_HOST) {
131 		return false;
132 	}
133 	/*
134 	 * Don't enable if netif is not plumbed under a flowswitch.
135 	 */
136 	if (!NA_KERNEL_ONLY(na)) {
137 		return false;
138 	}
139 	/*
140 	 * Don't enable If HW TSO is enabled.
141 	 */
142 	if (((nif->nif_hwassist & IFNET_TSO_IPV4) != 0) ||
143 	    ((nif->nif_hwassist & IFNET_TSO_IPV6) != 0)) {
144 		return false;
145 	}
146 	/*
147 	 * Don't enable if TX aggregation is disabled.
148 	 */
149 	if (sk_fsw_tx_agg_tcp == 0) {
150 		return false;
151 	}
152 	return true;
153 }
154 
155 int
nx_netif_host_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)156 nx_netif_host_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
157 {
158 	struct ifnet *ifp = na->na_ifp;
159 	int error = 0;
160 
161 	ASSERT(na->na_type == NA_NETIF_HOST ||
162 	    na->na_type == NA_NETIF_COMPAT_HOST);
163 	ASSERT(na->na_flags & NAF_HOST_ONLY);
164 
165 	SK_DF(SK_VERB_NETIF, "na \"%s\" (0x%llx) %s", na->na_name,
166 	    SK_KVA(na), na_activate_mode2str(mode));
167 
168 	switch (mode) {
169 	case NA_ACTIVATE_MODE_ON:
170 		VERIFY(SKYWALK_CAPABLE(ifp));
171 
172 		nx_netif_host_adjust_if_capabilities(na, true);
173 		/*
174 		 * Make skywalk control the packet steering
175 		 * Don't intercept tx packets if this is a netif compat
176 		 * adapter attached to a flowswitch
177 		 */
178 		nx_netif_host_catch_tx(na, true);
179 
180 		atomic_bitset_32(&na->na_flags, NAF_ACTIVE);
181 		break;
182 
183 	case NA_ACTIVATE_MODE_DEFUNCT:
184 		VERIFY(SKYWALK_CAPABLE(ifp));
185 		break;
186 
187 	case NA_ACTIVATE_MODE_OFF:
188 		/* Release packet steering control. */
189 		nx_netif_host_catch_tx(na, false);
190 
191 		/*
192 		 * Note that here we cannot assert SKYWALK_CAPABLE()
193 		 * as we're called in the destructor path.
194 		 */
195 		atomic_bitclear_32(&na->na_flags, NAF_ACTIVE);
196 
197 		nx_netif_host_adjust_if_capabilities(na, false);
198 		break;
199 
200 	default:
201 		VERIFY(0);
202 		/* NOTREACHED */
203 		__builtin_unreachable();
204 	}
205 
206 	return error;
207 }
208 
209 /* na_krings_create callback for netif host adapters */
210 int
nx_netif_host_krings_create(struct nexus_adapter * na,struct kern_channel * ch)211 nx_netif_host_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
212 {
213 	int ret;
214 
215 	SK_LOCK_ASSERT_HELD();
216 	ASSERT(na->na_type == NA_NETIF_HOST ||
217 	    na->na_type == NA_NETIF_COMPAT_HOST);
218 	ASSERT(na->na_flags & NAF_HOST_ONLY);
219 
220 	ret = na_rings_mem_setup(na, 0, FALSE, ch);
221 	if (ret == 0) {
222 		struct __kern_channel_ring *kring;
223 		uint32_t i;
224 
225 		/* drop by default until fully bound */
226 		if (NA_KERNEL_ONLY(na)) {
227 			na_kr_drop(na, TRUE);
228 		}
229 
230 		for (i = 0; i < na_get_nrings(na, NR_RX); i++) {
231 			kring = &NAKR(na, NR_RX)[i];
232 			/* initialize the nx_mbq for the sw rx ring */
233 			nx_mbq_safe_init(kring, &kring->ckr_rx_queue,
234 			    NX_MBQ_NO_LIMIT, &nexus_mbq_lock_group,
235 			    &nexus_lock_attr);
236 			SK_DF(SK_VERB_NETIF,
237 			    "na \"%s\" (0x%llx) initialized host kr \"%s\" "
238 			    "(0x%llx) krflags 0x%b", na->na_name, SK_KVA(na),
239 			    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
240 			    CKRF_BITS);
241 		}
242 	}
243 	return ret;
244 }
245 
246 /*
247  * Destructor for netif host adapters; they also have an mbuf queue
248  * on the rings connected to the host so we need to purge them first.
249  */
250 void
nx_netif_host_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)251 nx_netif_host_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
252     boolean_t defunct)
253 {
254 	struct __kern_channel_ring *kring;
255 	uint32_t i;
256 
257 	SK_LOCK_ASSERT_HELD();
258 	ASSERT(na->na_type == NA_NETIF_HOST ||
259 	    na->na_type == NA_NETIF_COMPAT_HOST);
260 	ASSERT(na->na_flags & NAF_HOST_ONLY);
261 
262 	if (NA_KERNEL_ONLY(na)) {
263 		na_kr_drop(na, TRUE);
264 	}
265 
266 	for (i = 0; i < na_get_nrings(na, NR_RX); i++) {
267 		struct nx_mbq *q;
268 
269 		kring = &NAKR(na, NR_RX)[i];
270 		q = &kring->ckr_rx_queue;
271 		SK_DF(SK_VERB_NETIF,
272 		    "na \"%s\" (0x%llx) destroy host kr \"%s\" (0x%llx) "
273 		    "krflags 0x%b with qlen %u", na->na_name, SK_KVA(na),
274 		    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
275 		    CKRF_BITS, nx_mbq_len(q));
276 		nx_mbq_purge(q);
277 		if (!defunct) {
278 			nx_mbq_safe_destroy(q);
279 		}
280 	}
281 
282 	na_rings_mem_teardown(na, ch, defunct);
283 }
284 
285 /* kring->ckr_na_sync callback for the host rx ring */
286 int
nx_netif_host_na_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)287 nx_netif_host_na_rxsync(struct __kern_channel_ring *kring,
288     struct proc *p, uint32_t flags)
289 {
290 #pragma unused(kring, p, flags)
291 	return 0;
292 }
293 
294 /*
295  * kring->ckr_na_sync callback for the host tx ring.
296  */
297 int
nx_netif_host_na_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)298 nx_netif_host_na_txsync(struct __kern_channel_ring *kring, struct proc *p,
299     uint32_t flags)
300 {
301 #pragma unused(kring, p, flags)
302 	return 0;
303 }
304 
305 int
nx_netif_host_na_special(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr,nxspec_cmd_t spec_cmd)306 nx_netif_host_na_special(struct nexus_adapter *na, struct kern_channel *ch,
307     struct chreq *chr, nxspec_cmd_t spec_cmd)
308 {
309 	ASSERT(na->na_type == NA_NETIF_HOST ||
310 	    na->na_type == NA_NETIF_COMPAT_HOST);
311 	return nx_netif_na_special_common(na, ch, chr, spec_cmd);
312 }
313 
314 /*
315  * Intercept the packet steering routine in the tx path,
316  * so that we can decide which queue is used for an mbuf.
317  * Second argument is TRUE to intercept, FALSE to restore.
318  */
319 static void
nx_netif_host_catch_tx(struct nexus_adapter * na,bool activate)320 nx_netif_host_catch_tx(struct nexus_adapter *na, bool activate)
321 {
322 	struct ifnet *ifp = na->na_ifp;
323 	int err = 0;
324 
325 	ASSERT(na->na_type == NA_NETIF_HOST ||
326 	    na->na_type == NA_NETIF_COMPAT_HOST);
327 	ASSERT(na->na_flags & NAF_HOST_ONLY);
328 
329 	/*
330 	 * Common case is NA_KERNEL_ONLY: if the netif is plumbed
331 	 * below the flowswitch.  For TXSTART compat driver and legacy:
332 	 * don't intercept DLIL output handler, since in this model
333 	 * packets from both BSD stack and flowswitch are directly
334 	 * enqueued to the classq via ifnet_enqueue().
335 	 *
336 	 * Otherwise, it's the uncommon case where a user channel is
337 	 * opened directly to the netif.  Here we either intercept
338 	 * or restore the DLIL output handler.
339 	 */
340 	if (activate) {
341 		if (__improbable(!NA_KERNEL_ONLY(na))) {
342 			return;
343 		}
344 		/*
345 		 * For native drivers only, intercept if_output();
346 		 * for compat, leave it alone since we don't need
347 		 * to perform any mbuf-pkt conversion.
348 		 */
349 		if (na->na_type == NA_NETIF_HOST) {
350 			err = ifnet_set_output_handler(ifp,
351 			    nx_netif_host_is_gso_needed(na) ?
352 			    netif_gso_dispatch : nx_netif_host_output);
353 			VERIFY(err == 0);
354 		}
355 	} else {
356 		if (__improbable(!NA_KERNEL_ONLY(na))) {
357 			return;
358 		}
359 		/*
360 		 * Restore original if_output() for native drivers.
361 		 */
362 		if (na->na_type == NA_NETIF_HOST) {
363 			ifnet_reset_output_handler(ifp);
364 		}
365 	}
366 }
367 
368 static int
get_af_from_mbuf(struct mbuf * m)369 get_af_from_mbuf(struct mbuf *m)
370 {
371 	uint8_t *pkt_hdr;
372 	uint8_t ipv;
373 	struct mbuf *m0;
374 	int af;
375 
376 	pkt_hdr = m->m_pkthdr.pkt_hdr;
377 	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
378 		if (pkt_hdr >= (uint8_t *)m0->m_data &&
379 		    pkt_hdr < (uint8_t *)m0->m_data + m0->m_len) {
380 			break;
381 		}
382 	}
383 	if (m0 == NULL) {
384 		DTRACE_SKYWALK1(bad__pkthdr, struct mbuf *, m);
385 		af = AF_UNSPEC;
386 		goto done;
387 	}
388 	ipv = IP_VHL_V(*pkt_hdr);
389 	if (ipv == 4) {
390 		af = AF_INET;
391 	} else if (ipv == 6) {
392 		af = AF_INET6;
393 	} else {
394 		af = AF_UNSPEC;
395 	}
396 done:
397 	DTRACE_SKYWALK2(mbuf__af, int, af, struct mbuf *, m);
398 	return af;
399 }
400 
401 /*
402  * if_output() callback called by dlil_output() to handle mbufs coming out
403  * of the host networking stack.  The mbuf will get converted to a packet,
404  * and enqueued to the classq of a Skywalk native interface.
405  */
406 int
nx_netif_host_output(struct ifnet * ifp,struct mbuf * m)407 nx_netif_host_output(struct ifnet *ifp, struct mbuf *m)
408 {
409 	struct nx_netif *nif = NA(ifp)->nifna_netif;
410 	struct kern_nexus *nx = nif->nif_nx;
411 	struct nexus_adapter *hwna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
412 	struct nexus_adapter *hostna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_HOST);
413 	struct __kern_channel_ring *kring;
414 	uint32_t sc_idx = MBUF_SCIDX(m_get_service_class(m));
415 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(hwna->na_nx)->nif_stats;
416 	struct __kern_packet *kpkt;
417 	uint64_t qset_id;
418 	errno_t error = ENOBUFS;
419 	boolean_t pkt_drop = FALSE;
420 
421 	/*
422 	 * nx_netif_host_catch_tx() must only be steering the output
423 	 * packets here only for native interfaces, otherwise we must
424 	 * not get here for compat.
425 	 */
426 	ASSERT(ifp->if_eflags & IFEF_SKYWALK_NATIVE);
427 	ASSERT(m->m_nextpkt == NULL);
428 	ASSERT(hostna->na_type == NA_NETIF_HOST);
429 	ASSERT(sc_idx < KPKT_SC_MAX_CLASSES);
430 
431 	kring = &hwna->na_tx_rings[hwna->na_kring_svc_lut[sc_idx]];
432 	KDBG((SK_KTRACE_NETIF_HOST_ENQUEUE | DBG_FUNC_START), SK_KVA(kring));
433 	if (__improbable(!NA_IS_ACTIVE(hwna) || !NA_IS_ACTIVE(hostna))) {
434 		STATS_INC(nifs, NETIF_STATS_DROP_NA_INACTIVE);
435 		SK_ERR("\"%s\" (0x%llx) not in skywalk mode anymore",
436 		    hwna->na_name, SK_KVA(hwna));
437 		error = ENXIO;
438 		pkt_drop = TRUE;
439 		goto done;
440 	}
441 	/*
442 	 * Drop if the kring no longer accepts packets.
443 	 */
444 	if (__improbable(KR_DROP(&hostna->na_rx_rings[0]) || KR_DROP(kring))) {
445 		STATS_INC(nifs, NETIF_STATS_DROP_KRDROP_MODE);
446 		/* not a serious error, so no need to be chatty here */
447 		SK_DF(SK_VERB_NETIF,
448 		    "kr \"%s\" (0x%llx) krflags 0x%b or %s in drop mode",
449 		    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
450 		    CKRF_BITS, ifp->if_xname);
451 		error = ENXIO;
452 		pkt_drop = TRUE;
453 		goto done;
454 	}
455 	if (__improbable(((unsigned)m_pktlen(m) + ifp->if_tx_headroom) >
456 	    kring->ckr_max_pkt_len)) { /* too long for us */
457 		STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
458 		SK_ERR("\"%s\" (0x%llx) from_host, drop packet size %u > %u",
459 		    hwna->na_name, SK_KVA(hwna), m_pktlen(m),
460 		    kring->ckr_max_pkt_len);
461 		pkt_drop = TRUE;
462 		goto done;
463 	}
464 	/*
465 	 * Convert mbuf to packet and enqueue it.
466 	 */
467 	kpkt = nx_netif_mbuf_to_kpkt(hwna, m);
468 	if (__probable(kpkt != NULL)) {
469 		if ((m->m_pkthdr.pkt_flags & PKTF_SKIP_PKTAP) == 0 &&
470 		    pktap_total_tap_count != 0) {
471 			int af = get_af_from_mbuf(m);
472 
473 			if (af != AF_UNSPEC) {
474 				nx_netif_pktap_output(ifp, af, kpkt);
475 			}
476 		}
477 		if (NX_LLINK_PROV(nif->nif_nx) &&
478 		    ifp->if_traffic_rule_count > 0 &&
479 		    nxctl_inet_traffic_rule_find_qset_id_with_pkt(ifp->if_xname,
480 		    kpkt, &qset_id) == 0) {
481 			struct netif_qset *qset;
482 
483 			/*
484 			 * This always returns a qset because if the qset id
485 			 * is invalid the default qset is returned.
486 			 */
487 			qset = nx_netif_find_qset(nif, qset_id);
488 			ASSERT(qset != NULL);
489 			kpkt->pkt_qset_idx = qset->nqs_idx;
490 			error = ifnet_enqueue_ifcq_pkt(ifp, qset->nqs_ifcq, kpkt,
491 			    false, &pkt_drop);
492 			nx_netif_qset_release(&qset);
493 		} else {
494 			/* callee consumes packet */
495 			error = ifnet_enqueue_pkt(ifp, kpkt, false, &pkt_drop);
496 		}
497 		netif_transmit(ifp, NETIF_XMIT_FLAG_HOST);
498 		if (pkt_drop) {
499 			STATS_INC(nifs, NETIF_STATS_TX_DROP_ENQ_AQM);
500 		}
501 	} else {
502 		error = ENOBUFS;
503 		pkt_drop = TRUE;
504 	}
505 done:
506 	/* always free mbuf (even in the success case) */
507 	m_freem(m);
508 	if (__improbable(pkt_drop)) {
509 		STATS_INC(nifs, NETIF_STATS_DROP);
510 	}
511 
512 	KDBG((SK_KTRACE_NETIF_HOST_ENQUEUE | DBG_FUNC_END), SK_KVA(kring),
513 	    error);
514 
515 	return error;
516 }
517 
518 static inline int
get_l2_hlen(struct mbuf * m,uint8_t * l2len)519 get_l2_hlen(struct mbuf *m, uint8_t *l2len)
520 {
521 	char *pkt_hdr;
522 	struct mbuf *m0;
523 	uint64_t len = 0;
524 
525 	pkt_hdr = m->m_pkthdr.pkt_hdr;
526 	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
527 		if (pkt_hdr >= m0->m_data && pkt_hdr < m0->m_data + m0->m_len) {
528 			break;
529 		}
530 		len += m0->m_len;
531 	}
532 	if (m0 == NULL) {
533 		DTRACE_SKYWALK2(bad__pkthdr, struct mbuf *, m, char *, pkt_hdr);
534 		return EINVAL;
535 	}
536 	len += (pkt_hdr - m0->m_data);
537 	if (len > UINT8_MAX) {
538 		DTRACE_SKYWALK2(bad__l2len, struct mbuf *, m, uint64_t, len);
539 		return EINVAL;
540 	}
541 	*l2len = (uint8_t)len;
542 	return 0;
543 }
544 
545 #if SK_LOG
546 /* Hoisted out of line to reduce kernel stack footprint */
547 SK_LOG_ATTRIBUTE
548 static void
nx_netif_mbuf_to_kpkt_log(struct __kern_packet * kpkt,uint32_t len,uint32_t poff)549 nx_netif_mbuf_to_kpkt_log(struct __kern_packet *kpkt, uint32_t len,
550     uint32_t poff)
551 {
552 	uint8_t *baddr;
553 	MD_BUFLET_ADDR_ABS(kpkt, baddr);
554 	SK_DF(SK_VERB_HOST | SK_VERB_TX, "mlen %u dplen %u"
555 	    " hr %u l2 %u poff %u", len, kpkt->pkt_length,
556 	    kpkt->pkt_headroom, kpkt->pkt_l2_len, poff);
557 	SK_DF(SK_VERB_HOST | SK_VERB_TX | SK_VERB_DUMP, "%s",
558 	    sk_dump("buf", baddr, kpkt->pkt_length, 128, NULL, 0));
559 }
560 #endif /* SK_LOG */
561 
562 static inline struct __kern_packet *
nx_netif_mbuf_to_kpkt(struct nexus_adapter * na,struct mbuf * m)563 nx_netif_mbuf_to_kpkt(struct nexus_adapter *na, struct mbuf *m)
564 {
565 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
566 	struct nexus_netif_adapter *nifna = NIFNA(na);
567 	struct nx_netif *nif = nifna->nifna_netif;
568 	uint16_t poff = na->na_ifp->if_tx_headroom;
569 	uint32_t len;
570 	struct kern_pbufpool *pp;
571 	struct __kern_packet *kpkt;
572 	kern_packet_t ph;
573 	boolean_t copysum;
574 	uint8_t l2hlen;
575 	int err;
576 
577 	pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
578 	ASSERT((pp != NULL) && (pp->pp_md_type == NEXUS_META_TYPE_PACKET) &&
579 	    (pp->pp_md_subtype == NEXUS_META_SUBTYPE_RAW));
580 	ASSERT(!PP_HAS_TRUNCATED_BUF(pp));
581 
582 	len = m_pktlen(m);
583 	VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pp) * pp->pp_max_frags));
584 
585 	/* alloc packet */
586 	ph = pp_alloc_packet_by_size(pp, poff + len, SKMEM_NOSLEEP);
587 	if (__improbable(ph == 0)) {
588 		STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
589 		SK_DF(SK_VERB_MEM,
590 		    "%s(%d) pp \"%s\" (0x%llx) has no more "
591 		    "packet for %s", sk_proc_name_address(current_proc()),
592 		    sk_proc_pid(current_proc()), pp->pp_name, SK_KVA(pp),
593 		    if_name(na->na_ifp));
594 		return NULL;
595 	}
596 
597 	copysum = ((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID |
598 	    CSUM_PARTIAL)) == (CSUM_DATA_VALID | CSUM_PARTIAL));
599 
600 	STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
601 	if (copysum) {
602 		STATS_INC(nifs, NETIF_STATS_TX_COPY_SUM);
603 	}
604 
605 	kpkt = SK_PTR_ADDR_KPKT(ph);
606 	kpkt->pkt_link_flags = 0;
607 	nif->nif_pkt_copy_from_mbuf(NR_TX, ph, poff, m, 0, len,
608 	    copysum, m->m_pkthdr.csum_tx_start);
609 
610 	kpkt->pkt_headroom = (uint8_t)poff;
611 	if ((err = get_l2_hlen(m, &l2hlen)) == 0) {
612 		kpkt->pkt_l2_len = l2hlen;
613 	} else {
614 		kpkt->pkt_l2_len = 0;
615 	}
616 	/* finalize the packet */
617 	METADATA_ADJUST_LEN(kpkt, 0, poff);
618 	err = __packet_finalize(ph);
619 	VERIFY(err == 0);
620 
621 #if SK_LOG
622 	if (__improbable((sk_verbose & SK_VERB_HOST) != 0) && kpkt != NULL) {
623 		nx_netif_mbuf_to_kpkt_log(kpkt, len, poff);
624 	}
625 #endif /* SK_LOG */
626 
627 	return kpkt;
628 }
629