xref: /xnu-10063.141.1/bsd/skywalk/nexus/netif/nx_netif_host.c (revision d8b80295118ef25ac3a784134bcf95cd8e88109f)
1 /*
2  * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 #define _IP_VHL
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/nexus/netif/nx_netif.h>
31 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
32 #include <net/ethernet.h>
33 #include <net/pktap.h>
34 #include <sys/kdebug.h>
35 #include <sys/sdt.h>
36 
37 #define DBG_FUNC_NX_NETIF_HOST_ENQUEUE  \
38 	SKYWALKDBG_CODE(DBG_SKYWALK_NETIF, 2)
39 
40 static void nx_netif_host_catch_tx(struct nexus_adapter *, bool);
41 static inline struct __kern_packet*
42 nx_netif_mbuf_to_kpkt(struct nexus_adapter *, struct mbuf *);
43 
44 #define SK_IFCAP_CSUM   (IFCAP_HWCSUM|IFCAP_CSUM_PARTIAL|IFCAP_CSUM_ZERO_INVERT)
45 
46 static void
nx_netif_host_adjust_if_capabilities(struct nexus_adapter * na,bool activate)47 nx_netif_host_adjust_if_capabilities(struct nexus_adapter *na, bool activate)
48 {
49 	struct nx_netif *nif = ((struct nexus_netif_adapter *)na)->nifna_netif;
50 	struct ifnet *ifp = na->na_ifp;
51 
52 	ifnet_lock_exclusive(ifp);
53 
54 	if (activate) {
55 		/* XXX: [email protected] - disable TSO and LRO for now */
56 		nif->nif_hwassist = ifp->if_hwassist;
57 		nif->nif_capabilities = ifp->if_capabilities;
58 		nif->nif_capenable = ifp->if_capenable;
59 		ifp->if_hwassist &= ~(IFNET_CHECKSUMF | IFNET_TSOF);
60 		ifp->if_capabilities &= ~(SK_IFCAP_CSUM | IFCAP_TSO);
61 		ifp->if_capenable &= ~(SK_IFCAP_CSUM | IFCAP_TSO);
62 
63 		/*
64 		 * Re-enable the capabilities which Skywalk layer provides:
65 		 *
66 		 * Native driver: a copy from packet to mbuf always occurs
67 		 * for each inbound and outbound packet; if hardware
68 		 * does not support csum offload, we leverage combined
69 		 * copy and checksum, and thus advertise IFNET_CSUM_PARTIAL.
70 		 * We also always enable 16KB jumbo mbuf support.
71 		 *
72 		 * Compat driver: inbound and outbound mbufs don't incur a
73 		 * copy, and so leave the driver advertised flags alone.
74 		 */
75 		if (NA_KERNEL_ONLY(na)) {
76 			if (na->na_type == NA_NETIF_HOST) {     /* native */
77 				ifp->if_hwassist |=
78 				    IFNET_MULTIPAGES | (nif->nif_hwassist &
79 				    (IFNET_CHECKSUMF | IFNET_TSOF));
80 				ifp->if_capabilities |=
81 				    (nif->nif_capabilities &
82 				    (SK_IFCAP_CSUM | IFCAP_TSO));
83 				ifp->if_capenable |=
84 				    (nif->nif_capenable &
85 				    (SK_IFCAP_CSUM | IFCAP_TSO));
86 				/*
87 				 * If hardware doesn't support IP and TCP/UDP csum offload,
88 				 * advertise IFNET_CSUM_PARTIAL.
89 				 */
90 				if ((ifp->if_hwassist & IFNET_UDP_TCP_TX_CHECKSUMF) !=
91 				    IFNET_UDP_TCP_TX_CHECKSUMF) {
92 					ifp->if_hwassist |= IFNET_CSUM_PARTIAL | IFNET_CSUM_ZERO_INVERT;
93 					ifp->if_capabilities |= IFCAP_CSUM_PARTIAL | IFCAP_CSUM_ZERO_INVERT;
94 					ifp->if_capenable |= IFCAP_CSUM_PARTIAL | IFCAP_CSUM_ZERO_INVERT;
95 				}
96 				if (sk_fsw_tx_agg_tcp != 0) {
97 					ifp->if_hwassist |= IFNET_TSOF;
98 					ifp->if_capabilities |= IFCAP_TSO;
99 					ifp->if_capenable |= IFCAP_TSO;
100 				}
101 			} else {                                /* compat */
102 				ifp->if_hwassist |=
103 				    (nif->nif_hwassist &
104 				    (IFNET_CHECKSUMF | IFNET_TSOF));
105 				ifp->if_capabilities |=
106 				    (nif->nif_capabilities &
107 				    (SK_IFCAP_CSUM | IFCAP_TSO));
108 				ifp->if_capenable |=
109 				    (nif->nif_capenable &
110 				    (SK_IFCAP_CSUM | IFCAP_TSO));
111 			}
112 		}
113 	} else {
114 		/* Unset any capabilities previously set by Skywalk */
115 		ifp->if_hwassist &= ~(IFNET_CHECKSUMF | IFNET_MULTIPAGES);
116 		ifp->if_capabilities &= ~SK_IFCAP_CSUM;
117 		ifp->if_capenable &= ~SK_IFCAP_CSUM;
118 		if ((sk_fsw_tx_agg_tcp != 0) &&
119 		    (na->na_type == NA_NETIF_HOST)) {
120 			ifp->if_hwassist &= ~IFNET_TSOF;
121 			ifp->if_capabilities &= ~IFCAP_TSO;
122 			ifp->if_capenable &= ~IFCAP_TSO;
123 		}
124 		/* Restore driver original flags */
125 		ifp->if_hwassist |= (nif->nif_hwassist &
126 		    (IFNET_CHECKSUMF | IFNET_TSOF | IFNET_MULTIPAGES));
127 		ifp->if_capabilities |=
128 		    (nif->nif_capabilities & (SK_IFCAP_CSUM | IFCAP_TSO));
129 		ifp->if_capenable |=
130 		    (nif->nif_capenable & (SK_IFCAP_CSUM | IFCAP_TSO));
131 	}
132 
133 	ifnet_lock_done(ifp);
134 }
135 
136 static  bool
nx_netif_host_is_gso_needed(struct nexus_adapter * na)137 nx_netif_host_is_gso_needed(struct nexus_adapter *na)
138 {
139 	struct nx_netif *nif = ((struct nexus_netif_adapter *)na)->nifna_netif;
140 
141 	/*
142 	 * Don't enable for Compat netif.
143 	 */
144 	if (na->na_type != NA_NETIF_HOST) {
145 		return false;
146 	}
147 	/*
148 	 * Don't enable if netif is not plumbed under a flowswitch.
149 	 */
150 	if (!NA_KERNEL_ONLY(na)) {
151 		return false;
152 	}
153 	/*
154 	 * Don't enable If HW TSO is enabled.
155 	 */
156 	if (((nif->nif_hwassist & IFNET_TSO_IPV4) != 0) ||
157 	    ((nif->nif_hwassist & IFNET_TSO_IPV6) != 0)) {
158 		return false;
159 	}
160 	/*
161 	 * Don't enable if TX aggregation is disabled.
162 	 */
163 	if (sk_fsw_tx_agg_tcp == 0) {
164 		return false;
165 	}
166 	return true;
167 }
168 
169 int
nx_netif_host_na_activate(struct nexus_adapter * na,na_activate_mode_t mode)170 nx_netif_host_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
171 {
172 	struct ifnet *ifp = na->na_ifp;
173 	int error = 0;
174 
175 	ASSERT(na->na_type == NA_NETIF_HOST ||
176 	    na->na_type == NA_NETIF_COMPAT_HOST);
177 	ASSERT(na->na_flags & NAF_HOST_ONLY);
178 
179 	SK_DF(SK_VERB_NETIF, "na \"%s\" (0x%llx) %s", na->na_name,
180 	    SK_KVA(na), na_activate_mode2str(mode));
181 
182 	switch (mode) {
183 	case NA_ACTIVATE_MODE_ON:
184 		VERIFY(SKYWALK_CAPABLE(ifp));
185 
186 		nx_netif_host_adjust_if_capabilities(na, true);
187 		/*
188 		 * Make skywalk control the packet steering
189 		 * Don't intercept tx packets if this is a netif compat
190 		 * adapter attached to a flowswitch
191 		 */
192 		nx_netif_host_catch_tx(na, true);
193 
194 		os_atomic_or(&na->na_flags, NAF_ACTIVE, relaxed);
195 		break;
196 
197 	case NA_ACTIVATE_MODE_DEFUNCT:
198 		VERIFY(SKYWALK_CAPABLE(ifp));
199 		break;
200 
201 	case NA_ACTIVATE_MODE_OFF:
202 		/* Release packet steering control. */
203 		nx_netif_host_catch_tx(na, false);
204 
205 		/*
206 		 * Note that here we cannot assert SKYWALK_CAPABLE()
207 		 * as we're called in the destructor path.
208 		 */
209 		os_atomic_andnot(&na->na_flags, NAF_ACTIVE, relaxed);
210 
211 		nx_netif_host_adjust_if_capabilities(na, false);
212 		break;
213 
214 	default:
215 		VERIFY(0);
216 		/* NOTREACHED */
217 		__builtin_unreachable();
218 	}
219 
220 	return error;
221 }
222 
223 /* na_krings_create callback for netif host adapters */
224 int
nx_netif_host_krings_create(struct nexus_adapter * na,struct kern_channel * ch)225 nx_netif_host_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
226 {
227 	int ret;
228 
229 	SK_LOCK_ASSERT_HELD();
230 	ASSERT(na->na_type == NA_NETIF_HOST ||
231 	    na->na_type == NA_NETIF_COMPAT_HOST);
232 	ASSERT(na->na_flags & NAF_HOST_ONLY);
233 
234 	ret = na_rings_mem_setup(na, FALSE, ch);
235 	if (ret == 0) {
236 		struct __kern_channel_ring *kring;
237 		uint32_t i;
238 
239 		/* drop by default until fully bound */
240 		if (NA_KERNEL_ONLY(na)) {
241 			na_kr_drop(na, TRUE);
242 		}
243 
244 		for (i = 0; i < na_get_nrings(na, NR_RX); i++) {
245 			kring = &NAKR(na, NR_RX)[i];
246 			/* initialize the nx_mbq for the sw rx ring */
247 			nx_mbq_safe_init(kring, &kring->ckr_rx_queue,
248 			    NX_MBQ_NO_LIMIT, &nexus_mbq_lock_group,
249 			    &nexus_lock_attr);
250 			SK_DF(SK_VERB_NETIF,
251 			    "na \"%s\" (0x%llx) initialized host kr \"%s\" "
252 			    "(0x%llx) krflags 0x%b", na->na_name, SK_KVA(na),
253 			    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
254 			    CKRF_BITS);
255 		}
256 	}
257 	return ret;
258 }
259 
260 /*
261  * Destructor for netif host adapters; they also have an mbuf queue
262  * on the rings connected to the host so we need to purge them first.
263  */
264 void
nx_netif_host_krings_delete(struct nexus_adapter * na,struct kern_channel * ch,boolean_t defunct)265 nx_netif_host_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
266     boolean_t defunct)
267 {
268 	struct __kern_channel_ring *kring;
269 	uint32_t i;
270 
271 	SK_LOCK_ASSERT_HELD();
272 	ASSERT(na->na_type == NA_NETIF_HOST ||
273 	    na->na_type == NA_NETIF_COMPAT_HOST);
274 	ASSERT(na->na_flags & NAF_HOST_ONLY);
275 
276 	if (NA_KERNEL_ONLY(na)) {
277 		na_kr_drop(na, TRUE);
278 	}
279 
280 	for (i = 0; i < na_get_nrings(na, NR_RX); i++) {
281 		struct nx_mbq *q;
282 
283 		kring = &NAKR(na, NR_RX)[i];
284 		q = &kring->ckr_rx_queue;
285 		SK_DF(SK_VERB_NETIF,
286 		    "na \"%s\" (0x%llx) destroy host kr \"%s\" (0x%llx) "
287 		    "krflags 0x%b with qlen %u", na->na_name, SK_KVA(na),
288 		    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
289 		    CKRF_BITS, nx_mbq_len(q));
290 		nx_mbq_purge(q);
291 		if (!defunct) {
292 			nx_mbq_safe_destroy(q);
293 		}
294 	}
295 
296 	na_rings_mem_teardown(na, ch, defunct);
297 }
298 
299 /* kring->ckr_na_sync callback for the host rx ring */
300 int
nx_netif_host_na_rxsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)301 nx_netif_host_na_rxsync(struct __kern_channel_ring *kring,
302     struct proc *p, uint32_t flags)
303 {
304 #pragma unused(kring, p, flags)
305 	return 0;
306 }
307 
308 /*
309  * kring->ckr_na_sync callback for the host tx ring.
310  */
311 int
nx_netif_host_na_txsync(struct __kern_channel_ring * kring,struct proc * p,uint32_t flags)312 nx_netif_host_na_txsync(struct __kern_channel_ring *kring, struct proc *p,
313     uint32_t flags)
314 {
315 #pragma unused(kring, p, flags)
316 	return 0;
317 }
318 
319 int
nx_netif_host_na_special(struct nexus_adapter * na,struct kern_channel * ch,struct chreq * chr,nxspec_cmd_t spec_cmd)320 nx_netif_host_na_special(struct nexus_adapter *na, struct kern_channel *ch,
321     struct chreq *chr, nxspec_cmd_t spec_cmd)
322 {
323 	ASSERT(na->na_type == NA_NETIF_HOST ||
324 	    na->na_type == NA_NETIF_COMPAT_HOST);
325 	return nx_netif_na_special_common(na, ch, chr, spec_cmd);
326 }
327 
328 /*
329  * Intercept the packet steering routine in the tx path,
330  * so that we can decide which queue is used for an mbuf.
331  * Second argument is TRUE to intercept, FALSE to restore.
332  */
333 static void
nx_netif_host_catch_tx(struct nexus_adapter * na,bool activate)334 nx_netif_host_catch_tx(struct nexus_adapter *na, bool activate)
335 {
336 	struct ifnet *ifp = na->na_ifp;
337 	int err = 0;
338 
339 	ASSERT(na->na_type == NA_NETIF_HOST ||
340 	    na->na_type == NA_NETIF_COMPAT_HOST);
341 	ASSERT(na->na_flags & NAF_HOST_ONLY);
342 
343 	/*
344 	 * Common case is NA_KERNEL_ONLY: if the netif is plumbed
345 	 * below the flowswitch.  For TXSTART compat driver and legacy:
346 	 * don't intercept DLIL output handler, since in this model
347 	 * packets from both BSD stack and flowswitch are directly
348 	 * enqueued to the classq via ifnet_enqueue().
349 	 *
350 	 * Otherwise, it's the uncommon case where a user channel is
351 	 * opened directly to the netif.  Here we either intercept
352 	 * or restore the DLIL output handler.
353 	 */
354 	if (activate) {
355 		if (__improbable(!NA_KERNEL_ONLY(na))) {
356 			return;
357 		}
358 		/*
359 		 * For native drivers only, intercept if_output();
360 		 * for compat, leave it alone since we don't need
361 		 * to perform any mbuf-pkt conversion.
362 		 */
363 		if (na->na_type == NA_NETIF_HOST) {
364 			err = ifnet_set_output_handler(ifp,
365 			    nx_netif_host_is_gso_needed(na) ?
366 			    netif_gso_dispatch : nx_netif_host_output);
367 			VERIFY(err == 0);
368 		}
369 	} else {
370 		if (__improbable(!NA_KERNEL_ONLY(na))) {
371 			return;
372 		}
373 		/*
374 		 * Restore original if_output() for native drivers.
375 		 */
376 		if (na->na_type == NA_NETIF_HOST) {
377 			ifnet_reset_output_handler(ifp);
378 		}
379 	}
380 }
381 
382 static int
get_af_from_mbuf(struct mbuf * m)383 get_af_from_mbuf(struct mbuf *m)
384 {
385 	uint8_t *pkt_hdr;
386 	uint8_t ipv;
387 	struct mbuf *m0;
388 	int af;
389 
390 	pkt_hdr = m->m_pkthdr.pkt_hdr;
391 	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
392 		if (pkt_hdr >= (uint8_t *)m0->m_data &&
393 		    pkt_hdr < (uint8_t *)m0->m_data + m0->m_len) {
394 			break;
395 		}
396 	}
397 	if (m0 == NULL) {
398 		DTRACE_SKYWALK1(bad__pkthdr, struct mbuf *, m);
399 		af = AF_UNSPEC;
400 		goto done;
401 	}
402 	ipv = IP_VHL_V(*pkt_hdr);
403 	if (ipv == 4) {
404 		af = AF_INET;
405 	} else if (ipv == 6) {
406 		af = AF_INET6;
407 	} else {
408 		af = AF_UNSPEC;
409 	}
410 done:
411 	DTRACE_SKYWALK2(mbuf__af, int, af, struct mbuf *, m);
412 	return af;
413 }
414 
415 /*
416  * if_output() callback called by dlil_output() to handle mbufs coming out
417  * of the host networking stack.  The mbuf will get converted to a packet,
418  * and enqueued to the classq of a Skywalk native interface.
419  */
420 int
nx_netif_host_output(struct ifnet * ifp,struct mbuf * m)421 nx_netif_host_output(struct ifnet *ifp, struct mbuf *m)
422 {
423 	struct nx_netif *nif = NA(ifp)->nifna_netif;
424 	struct kern_nexus *nx = nif->nif_nx;
425 	struct nexus_adapter *hwna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
426 	struct nexus_adapter *hostna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_HOST);
427 	struct __kern_channel_ring *kring;
428 	uint32_t sc_idx = MBUF_SCIDX(m_get_service_class(m));
429 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(hwna->na_nx)->nif_stats;
430 	struct __kern_packet *kpkt;
431 	uint64_t qset_id;
432 	errno_t error = ENOBUFS;
433 	boolean_t pkt_drop = FALSE;
434 
435 	/*
436 	 * nx_netif_host_catch_tx() must only be steering the output
437 	 * packets here only for native interfaces, otherwise we must
438 	 * not get here for compat.
439 	 */
440 	ASSERT(ifp->if_eflags & IFEF_SKYWALK_NATIVE);
441 	ASSERT(m->m_nextpkt == NULL);
442 	ASSERT(hostna->na_type == NA_NETIF_HOST);
443 	ASSERT(sc_idx < KPKT_SC_MAX_CLASSES);
444 
445 	kring = &hwna->na_tx_rings[hwna->na_kring_svc_lut[sc_idx]];
446 	KDBG((SK_KTRACE_NETIF_HOST_ENQUEUE | DBG_FUNC_START), SK_KVA(kring));
447 	if (__improbable(!NA_IS_ACTIVE(hwna) || !NA_IS_ACTIVE(hostna))) {
448 		STATS_INC(nifs, NETIF_STATS_DROP_NA_INACTIVE);
449 		SK_ERR("\"%s\" (0x%llx) not in skywalk mode anymore",
450 		    hwna->na_name, SK_KVA(hwna));
451 		error = ENXIO;
452 		pkt_drop = TRUE;
453 		goto done;
454 	}
455 	/*
456 	 * Drop if the kring no longer accepts packets.
457 	 */
458 	if (__improbable(KR_DROP(&hostna->na_rx_rings[0]) || KR_DROP(kring))) {
459 		STATS_INC(nifs, NETIF_STATS_DROP_KRDROP_MODE);
460 		/* not a serious error, so no need to be chatty here */
461 		SK_DF(SK_VERB_NETIF,
462 		    "kr \"%s\" (0x%llx) krflags 0x%b or %s in drop mode",
463 		    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
464 		    CKRF_BITS, ifp->if_xname);
465 		error = ENXIO;
466 		pkt_drop = TRUE;
467 		goto done;
468 	}
469 	if (__improbable(((unsigned)m_pktlen(m) + ifp->if_tx_headroom) >
470 	    kring->ckr_max_pkt_len)) { /* too long for us */
471 		STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
472 		SK_ERR("\"%s\" (0x%llx) from_host, drop packet size %u > %u",
473 		    hwna->na_name, SK_KVA(hwna), m_pktlen(m),
474 		    kring->ckr_max_pkt_len);
475 		pkt_drop = TRUE;
476 		goto done;
477 	}
478 	/*
479 	 * Convert mbuf to packet and enqueue it.
480 	 */
481 	kpkt = nx_netif_mbuf_to_kpkt(hwna, m);
482 	if (__probable(kpkt != NULL)) {
483 		if ((m->m_pkthdr.pkt_flags & PKTF_SKIP_PKTAP) == 0 &&
484 		    pktap_total_tap_count != 0) {
485 			int af = get_af_from_mbuf(m);
486 
487 			if (af != AF_UNSPEC) {
488 				nx_netif_pktap_output(ifp, af, kpkt);
489 			}
490 		}
491 		if (NX_LLINK_PROV(nif->nif_nx) &&
492 		    ifp->if_traffic_rule_count > 0 &&
493 		    nxctl_inet_traffic_rule_find_qset_id_with_pkt(ifp->if_xname,
494 		    kpkt, &qset_id) == 0) {
495 			struct netif_qset *qset;
496 
497 			/*
498 			 * This always returns a qset because if the qset id
499 			 * is invalid the default qset is returned.
500 			 */
501 			qset = nx_netif_find_qset(nif, qset_id);
502 			ASSERT(qset != NULL);
503 			kpkt->pkt_qset_idx = qset->nqs_idx;
504 			error = ifnet_enqueue_ifcq_pkt(ifp, qset->nqs_ifcq, kpkt,
505 			    false, &pkt_drop);
506 			nx_netif_qset_release(&qset);
507 		} else {
508 			/* callee consumes packet */
509 			error = ifnet_enqueue_pkt(ifp, kpkt, false, &pkt_drop);
510 		}
511 		netif_transmit(ifp, NETIF_XMIT_FLAG_HOST);
512 		if (pkt_drop) {
513 			STATS_INC(nifs, NETIF_STATS_TX_DROP_ENQ_AQM);
514 		}
515 	} else {
516 		error = ENOBUFS;
517 		pkt_drop = TRUE;
518 	}
519 done:
520 	/* always free mbuf (even in the success case) */
521 	m_freem(m);
522 	if (__improbable(pkt_drop)) {
523 		STATS_INC(nifs, NETIF_STATS_DROP);
524 	}
525 
526 	KDBG((SK_KTRACE_NETIF_HOST_ENQUEUE | DBG_FUNC_END), SK_KVA(kring),
527 	    error);
528 
529 	return error;
530 }
531 
532 static inline int
get_l2_hlen(struct mbuf * m,uint8_t * l2len)533 get_l2_hlen(struct mbuf *m, uint8_t *l2len)
534 {
535 	char *pkt_hdr;
536 	struct mbuf *m0;
537 	uint64_t len = 0;
538 
539 	pkt_hdr = m->m_pkthdr.pkt_hdr;
540 	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
541 		if (pkt_hdr >= m_mtod_current(m0) && pkt_hdr < m_mtod_current(m0) + m0->m_len) {
542 			break;
543 		}
544 		len += m0->m_len;
545 	}
546 	if (m0 == NULL) {
547 		DTRACE_SKYWALK2(bad__pkthdr, struct mbuf *, m, char *, pkt_hdr);
548 		return EINVAL;
549 	}
550 	len += (pkt_hdr - m_mtod_current(m0));
551 	if (len > UINT8_MAX) {
552 		DTRACE_SKYWALK2(bad__l2len, struct mbuf *, m, uint64_t, len);
553 		return EINVAL;
554 	}
555 	*l2len = (uint8_t)len;
556 	return 0;
557 }
558 
559 #if SK_LOG
560 /* Hoisted out of line to reduce kernel stack footprint */
561 SK_LOG_ATTRIBUTE
562 static void
nx_netif_mbuf_to_kpkt_log(struct __kern_packet * kpkt,uint32_t len,uint32_t poff)563 nx_netif_mbuf_to_kpkt_log(struct __kern_packet *kpkt, uint32_t len,
564     uint32_t poff)
565 {
566 	uint8_t *baddr;
567 	MD_BUFLET_ADDR_ABS(kpkt, baddr);
568 	SK_DF(SK_VERB_HOST | SK_VERB_TX, "mlen %u dplen %u"
569 	    " hr %u l2 %u poff %u", len, kpkt->pkt_length,
570 	    kpkt->pkt_headroom, kpkt->pkt_l2_len, poff);
571 	SK_DF(SK_VERB_HOST | SK_VERB_TX | SK_VERB_DUMP, "%s",
572 	    sk_dump("buf", baddr, kpkt->pkt_length, 128, NULL, 0));
573 }
574 #endif /* SK_LOG */
575 
576 static inline struct __kern_packet *
nx_netif_mbuf_to_kpkt(struct nexus_adapter * na,struct mbuf * m)577 nx_netif_mbuf_to_kpkt(struct nexus_adapter *na, struct mbuf *m)
578 {
579 	struct netif_stats *nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
580 	struct nexus_netif_adapter *nifna = NIFNA(na);
581 	struct nx_netif *nif = nifna->nifna_netif;
582 	uint16_t poff = na->na_ifp->if_tx_headroom;
583 	uint32_t len;
584 	struct kern_pbufpool *pp;
585 	struct __kern_packet *kpkt;
586 	kern_packet_t ph;
587 	boolean_t copysum;
588 	uint8_t l2hlen;
589 	int err;
590 
591 	pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
592 	ASSERT((pp != NULL) && (pp->pp_md_type == NEXUS_META_TYPE_PACKET) &&
593 	    (pp->pp_md_subtype == NEXUS_META_SUBTYPE_RAW));
594 	ASSERT(!PP_HAS_TRUNCATED_BUF(pp));
595 
596 	len = m_pktlen(m);
597 	VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pp) * pp->pp_max_frags));
598 
599 	/* alloc packet */
600 	ph = pp_alloc_packet_by_size(pp, poff + len, SKMEM_NOSLEEP);
601 	if (__improbable(ph == 0)) {
602 		STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
603 		SK_DF(SK_VERB_MEM,
604 		    "%s(%d) pp \"%s\" (0x%llx) has no more "
605 		    "packet for %s", sk_proc_name_address(current_proc()),
606 		    sk_proc_pid(current_proc()), pp->pp_name, SK_KVA(pp),
607 		    if_name(na->na_ifp));
608 		return NULL;
609 	}
610 
611 	copysum = ((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID |
612 	    CSUM_PARTIAL)) == (CSUM_DATA_VALID | CSUM_PARTIAL));
613 
614 	STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
615 	if (copysum) {
616 		STATS_INC(nifs, NETIF_STATS_TX_COPY_SUM);
617 	}
618 
619 	kpkt = SK_PTR_ADDR_KPKT(ph);
620 	kpkt->pkt_link_flags = 0;
621 	nif->nif_pkt_copy_from_mbuf(NR_TX, ph, poff, m, 0, len,
622 	    copysum, m->m_pkthdr.csum_tx_start);
623 
624 	kpkt->pkt_headroom = (uint8_t)poff;
625 	if ((err = get_l2_hlen(m, &l2hlen)) == 0) {
626 		kpkt->pkt_l2_len = l2hlen;
627 	} else {
628 		kpkt->pkt_l2_len = 0;
629 	}
630 	/* finalize the packet */
631 	METADATA_ADJUST_LEN(kpkt, 0, poff);
632 	err = __packet_finalize(ph);
633 	VERIFY(err == 0);
634 
635 #if SK_LOG
636 	if (__improbable((sk_verbose & SK_VERB_HOST) != 0) && kpkt != NULL) {
637 		nx_netif_mbuf_to_kpkt_log(kpkt, len, poff);
638 	}
639 #endif /* SK_LOG */
640 
641 	return kpkt;
642 }
643