xref: /xnu-8019.80.24/bsd/skywalk/nexus/netif/nx_netif_gso.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
31  * All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  *   1. Redistributions of source code must retain the above copyright
37  *      notice, this list of conditions and the following disclaimer.
38  *   2. Redistributions in binary form must reproduce the above copyright
39  *      notice, this list of conditions and the following disclaimer in the
40  *      documentation and/or other materials provided with the distribution.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52  * SUCH DAMAGE.
53  */
54 
55 #include <sys/param.h>
56 #include <sys/kernel.h>
57 #include <sys/types.h>
58 #include <sys/systm.h>
59 #include <sys/mbuf.h>
60 #include <sys/socket.h>
61 #include <sys/sysctl.h>
62 #include <sys/malloc.h>
63 
64 #include <netinet/in.h>
65 #include <netinet/ip_var.h>
66 #include <netinet/ip.h>
67 #include <netinet/tcp.h>
68 #include <netinet/tcpip.h>
69 #include <netinet/ip6.h>
70 #include <netinet6/ip6_var.h>
71 
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/ethernet.h>
75 #include <net/pktap.h>
76 #include <skywalk/os_skywalk_private.h>
77 #include <skywalk/nexus/netif/nx_netif.h>
78 
79 #define CSUM_GSO_MASK    0x00300000
80 #define CSUM_GSO_OFFSET  20
81 #define CSUM_TO_GSO(x) ((x & CSUM_GSO_MASK) >> CSUM_GSO_OFFSET)
82 
83 enum netif_gso_type {
84 	GSO_NONE,
85 	GSO_TCP4,
86 	GSO_TCP6,
87 	GSO_END_OF_TYPE
88 };
89 
90 uint32_t netif_chain_enqueue = 1;
91 #if (DEVELOPMENT || DEBUG)
92 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, chain_enqueue,
93     CTLFLAG_RW | CTLFLAG_LOCKED, &netif_chain_enqueue, 0,
94     "netif chain enqueue");
95 #endif /* (DEVELOPMENT || DEBUG) */
96 
97 /*
98  * Array of function pointers that execute GSO depending on packet type
99  */
100 int (*netif_gso_functions[GSO_END_OF_TYPE]) (struct ifnet*, struct mbuf*);
101 
102 /*
103  * Structure that contains the state during the TCP segmentation
104  */
105 struct netif_gso_ip_tcp_state {
106 	void (*update)(struct netif_gso_ip_tcp_state*,
107 	    struct __kern_packet *pkt, uint8_t *baddr);
108 	void (*internal)(struct netif_gso_ip_tcp_state*, uint32_t partial,
109 	    uint16_t payload_len);
110 	union {
111 		struct ip *ip;
112 		struct ip6_hdr *ip6;
113 	} hdr;
114 	int af;
115 	struct tcphdr *tcp;
116 	struct kern_pbufpool *pp;
117 	uint32_t psuedo_hdr_csum;
118 	uint32_t tcp_seq;
119 	uint16_t hlen;
120 	uint16_t mss;
121 	uint16_t ip_id;
122 	uint8_t mac_hlen;
123 	uint8_t ip_hlen;
124 	uint8_t tcp_hlen;
125 };
126 
127 static inline uint8_t
netif_gso_get_frame_header_len(struct mbuf * m,uint8_t * hlen)128 netif_gso_get_frame_header_len(struct mbuf *m, uint8_t *hlen)
129 {
130 	uint64_t len;
131 	char *ph = m->m_pkthdr.pkt_hdr;
132 
133 	if (__improbable(m_pktlen(m) == 0 || ph == NULL ||
134 	    ph < (char *)m->m_data)) {
135 		return ERANGE;
136 	}
137 	len = (ph - m->m_data);
138 	if (__improbable(len > UINT8_MAX)) {
139 		return ERANGE;
140 	}
141 	*hlen = (uint8_t)len;
142 	return 0;
143 }
144 
145 static inline int
netif_gso_check_netif_active(struct ifnet * ifp,struct mbuf * m,struct kern_pbufpool ** pp)146 netif_gso_check_netif_active(struct ifnet *ifp, struct mbuf *m,
147     struct kern_pbufpool **pp)
148 {
149 	struct __kern_channel_ring *kring;
150 	struct nx_netif *nif = NA(ifp)->nifna_netif;
151 	struct netif_stats *nifs = &nif->nif_stats;
152 	struct kern_nexus *nx = nif->nif_nx;
153 	struct nexus_adapter *hwna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
154 	uint32_t sc_idx = MBUF_SCIDX(m_get_service_class(m));
155 
156 	if (__improbable(!NA_IS_ACTIVE(hwna))) {
157 		STATS_INC(nifs, NETIF_STATS_DROP_NA_INACTIVE);
158 		SK_DF(SK_VERB_NETIF,
159 		    "\"%s\" (0x%llx) not in skywalk mode anymore",
160 		    hwna->na_name, SK_KVA(hwna));
161 		return ENXIO;
162 	}
163 
164 	VERIFY(sc_idx < KPKT_SC_MAX_CLASSES);
165 	kring = &hwna->na_tx_rings[hwna->na_kring_svc_lut[sc_idx]];
166 	if (__improbable(KR_DROP(kring))) {
167 		STATS_INC(nifs, NETIF_STATS_DROP_KRDROP_MODE);
168 		SK_DF(SK_VERB_NETIF,
169 		    "kr \"%s\" (0x%llx) krflags 0x%b or %s in drop mode",
170 		    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
171 		    CKRF_BITS, ifp->if_xname);
172 		return ENXIO;
173 	}
174 	*pp = kring->ckr_pp;
175 	return 0;
176 }
177 
178 static inline boolean_t
netif_chain_enqueue_enabled(struct ifnet * ifp)179 netif_chain_enqueue_enabled(struct ifnet *ifp)
180 {
181 	return netif_chain_enqueue != 0 && ifp->if_output_netem == NULL &&
182 	       (ifp->if_eflags & IFEF_ENQUEUE_MULTI) == 0;
183 }
184 
185 static inline int
netif_gso_send(struct ifnet * ifp,struct __kern_packet * head,struct __kern_packet * tail,uint32_t count,uint32_t bytes)186 netif_gso_send(struct ifnet *ifp, struct __kern_packet *head,
187     struct __kern_packet *tail, uint32_t count, uint32_t bytes)
188 {
189 	struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
190 	int error = 0;
191 	boolean_t dropped;
192 
193 	if (netif_chain_enqueue_enabled(ifp)) {
194 		dropped = false;
195 		error = ifnet_enqueue_pkt_chain(ifp, head, tail, count, bytes,
196 		    false, &dropped);
197 		if (__improbable(dropped)) {
198 			STATS_ADD(nifs, NETIF_STATS_TX_DROP_ENQ_AQM, count);
199 			STATS_ADD(nifs, NETIF_STATS_DROP, count);
200 		}
201 	} else {
202 		struct __kern_packet *pkt = head, *next;
203 		uint32_t c = 0, b = 0;
204 
205 		while (pkt != NULL) {
206 			int err;
207 
208 			next = pkt->pkt_nextpkt;
209 			pkt->pkt_nextpkt = NULL;
210 			c++;
211 			b += pkt->pkt_length;
212 
213 			dropped = false;
214 			err = ifnet_enqueue_pkt(ifp, pkt, false, &dropped);
215 			if (error == 0 && __improbable(err != 0)) {
216 				error = err;
217 			}
218 			if (__improbable(dropped)) {
219 				STATS_INC(nifs, NETIF_STATS_TX_DROP_ENQ_AQM);
220 				STATS_INC(nifs, NETIF_STATS_DROP);
221 			}
222 			pkt = next;
223 		}
224 		ASSERT(c == count);
225 		ASSERT(b == bytes);
226 	}
227 	netif_transmit(ifp, NETIF_XMIT_FLAG_HOST);
228 	return error;
229 }
230 
231 /*
232  * Segment and transmit a queue of packets which fit the given mss + hdr_len.
233  * m points to mbuf chain to be segmented.
234  * This function splits the payload (m-> m_pkthdr.len - hdr_len)
235  * into segments of length MSS bytes and then copy the first hdr_len bytes
236  * from m at the top of each segment.
237  */
238 static inline int
netif_gso_tcp_segment_mbuf(struct mbuf * m,struct ifnet * ifp,struct netif_gso_ip_tcp_state * state,struct kern_pbufpool * pp)239 netif_gso_tcp_segment_mbuf(struct mbuf *m, struct ifnet *ifp,
240     struct netif_gso_ip_tcp_state *state, struct kern_pbufpool *pp)
241 {
242 	uuid_t euuid;
243 	struct pktq pktq_alloc, pktq_seg;
244 	uint64_t timestamp = 0;
245 	uint64_t pflags;
246 	int error = 0;
247 	uint32_t policy_id;
248 	uint32_t svc_class;
249 	uint32_t n, n_pkts, n_bytes;
250 	int32_t off = 0, total_len = m->m_pkthdr.len;
251 	uint8_t tx_headroom = (uint8_t)ifp->if_tx_headroom;
252 	struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
253 	struct __kern_packet *pkt_chain_head, *pkt_chain_tail;
254 	uint16_t mss = state->mss;
255 	bool skip_pktap;
256 
257 	VERIFY(total_len > state->hlen);
258 	VERIFY(((tx_headroom + state->mac_hlen) & 0x1) == 0);
259 	VERIFY((tx_headroom + state->hlen + mss) <= pp->pp_buflet_size);
260 
261 	KPKTQ_INIT(&pktq_alloc);
262 	KPKTQ_INIT(&pktq_seg);
263 	/* batch allocate enough packets */
264 	n_pkts = (uint32_t)(SK_ROUNDUP((total_len - state->hlen), mss) / mss);
265 	error = pp_alloc_pktq(pp, 1, &pktq_alloc, n_pkts, NULL,
266 	    NULL, SKMEM_NOSLEEP);
267 	if (__improbable(error != 0)) {
268 		STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
269 		SK_ERR("failed to alloc %u pkts", n_pkts);
270 		pp_free_pktq(&pktq_alloc);
271 		error = ENOBUFS;
272 		goto done;
273 	}
274 
275 	ASSERT(m->m_pkthdr.pkt_proto == IPPROTO_TCP);
276 	ASSERT((m->m_flags & M_BCAST) == 0);
277 	ASSERT((m->m_flags & M_MCAST) == 0);
278 	ASSERT(((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) == 0));
279 	pflags = m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK;
280 	pflags |= PKTF_START_SEQ;
281 	(void) mbuf_get_timestamp(m, &timestamp, NULL);
282 	necp_get_app_uuid_from_packet(m, euuid);
283 	policy_id = necp_get_policy_id_from_packet(m);
284 	svc_class = m_get_service_class(m);
285 	skip_pktap = (m->m_pkthdr.pkt_flags & PKTF_SKIP_PKTAP) != 0 ||
286 	    pktap_total_tap_count == 0;
287 
288 	for (n = 1, off = state->hlen; off < total_len; off += mss, n++) {
289 		uint8_t *baddr, *baddr0;
290 		uint32_t partial;
291 		struct __kern_packet *pkt;
292 
293 		KPKTQ_DEQUEUE(&pktq_alloc, pkt);
294 		ASSERT(pkt != NULL);
295 
296 		/* get buffer address from packet */
297 		MD_BUFLET_ADDR_ABS(pkt, baddr0);
298 		baddr = baddr0;
299 		baddr += tx_headroom;
300 
301 		/*
302 		 * Copy the link-layer, IP and TCP header from the
303 		 * original packet.
304 		 */
305 		m_copydata(m, 0, state->hlen, baddr);
306 		baddr += state->hlen;
307 
308 		/*
309 		 * Copy the payload from original packet and
310 		 * compute partial checksum on the payload.
311 		 */
312 		if (off + mss > total_len) {
313 			/* if last segment is less than mss */
314 			mss = (uint16_t)(total_len - off);
315 		}
316 		partial = m_copydata_sum(m, off, mss, baddr, 0, NULL);
317 
318 		/*
319 		 * update packet metadata
320 		 */
321 		pkt->pkt_headroom = tx_headroom;
322 		pkt->pkt_l2_len = state->mac_hlen;
323 		pkt->pkt_link_flags = 0;
324 		pkt->pkt_csum_flags = 0;
325 		pkt->pkt_csum_tx_start_off = 0;
326 		pkt->pkt_csum_tx_stuff_off = 0;
327 		uuid_copy(pkt->pkt_policy_euuid, euuid);
328 		pkt->pkt_policy_id = policy_id;
329 		pkt->pkt_timestamp = timestamp;
330 		pkt->pkt_svc_class = svc_class;
331 		pkt->pkt_pflags |= pflags;
332 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
333 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
334 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
335 		pkt->pkt_flow_ip_proto = IPPROTO_TCP;
336 		pkt->pkt_transport_protocol = IPPROTO_TCP;
337 		pkt->pkt_flow_tcp_seq = htonl(state->tcp_seq);
338 
339 		state->update(state, pkt, baddr0);
340 		/*
341 		 * FIN or PUSH flags if present will be set only on the last
342 		 * segment.
343 		 */
344 		if (n != n_pkts) {
345 			state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
346 		}
347 		/*
348 		 * CWR flag if present is set only on the first segment
349 		 * and cleared on the subsequent segments.
350 		 */
351 		if (n != 1) {
352 			state->tcp->th_flags &= ~TH_CWR;
353 			state->tcp->th_seq = htonl(state->tcp_seq);
354 		}
355 		ASSERT(state->tcp->th_seq == pkt->pkt_flow_tcp_seq);
356 		state->internal(state, partial, mss);
357 		METADATA_ADJUST_LEN(pkt, state->hlen + mss, tx_headroom);
358 		VERIFY(__packet_finalize(SK_PKT2PH(pkt)) == 0);
359 		KPKTQ_ENQUEUE(&pktq_seg, pkt);
360 		if (!skip_pktap) {
361 			nx_netif_pktap_output(ifp, state->af, pkt);
362 		}
363 	}
364 	ASSERT(off == total_len);
365 	STATS_ADD(nifs, NETIF_STATS_GSO_SEG, n_pkts);
366 
367 	/* ifnet_enqueue_pkt_chain() consumes the packet chain */
368 	pkt_chain_head = KPKTQ_FIRST(&pktq_seg);
369 	pkt_chain_tail = KPKTQ_LAST(&pktq_seg);
370 	KPKTQ_INIT(&pktq_seg);
371 	n_bytes = total_len + (state->hlen * (n_pkts - 1));
372 
373 	error = netif_gso_send(ifp, pkt_chain_head, pkt_chain_tail,
374 	    n_pkts, n_bytes);
375 
376 done:
377 	KPKTQ_FINI(&pktq_alloc);
378 	return error;
379 }
380 
381 /*
382  * Update the pointers to TCP and IPv4 headers
383  */
384 static void
netif_gso_ipv4_tcp_update(struct netif_gso_ip_tcp_state * state,struct __kern_packet * pkt,uint8_t * baddr)385 netif_gso_ipv4_tcp_update(struct netif_gso_ip_tcp_state *state,
386     struct __kern_packet *pkt, uint8_t *baddr)
387 {
388 	state->hdr.ip = (struct ip *)(void *)(baddr + pkt->pkt_headroom +
389 	    pkt->pkt_l2_len);
390 	state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip) +
391 	    state->ip_hlen);
392 }
393 
394 /*
395  * Finalize the TCP and IPv4 headers
396  */
397 static void
netif_gso_ipv4_tcp_internal(struct netif_gso_ip_tcp_state * state,uint32_t partial,uint16_t payload_len)398 netif_gso_ipv4_tcp_internal(struct netif_gso_ip_tcp_state *state,
399     uint32_t partial, uint16_t payload_len)
400 {
401 	/*
402 	 * Update IP header
403 	 */
404 	state->hdr.ip->ip_id = htons((state->ip_id)++);
405 	state->hdr.ip->ip_len = htons(state->ip_hlen + state->tcp_hlen +
406 	    payload_len);
407 	/*
408 	 * IP header checksum
409 	 */
410 	state->hdr.ip->ip_sum = 0;
411 	state->hdr.ip->ip_sum = inet_cksum_buffer(state->hdr.ip, 0, 0,
412 	    state->ip_hlen);
413 	/*
414 	 * TCP Checksum
415 	 */
416 	state->tcp->th_sum = 0;
417 	partial = __packet_cksum(state->tcp, state->tcp_hlen, partial);
418 	partial += htons(state->tcp_hlen + IPPROTO_TCP + payload_len);
419 	partial += state->psuedo_hdr_csum;
420 	ADDCARRY(partial);
421 	state->tcp->th_sum = ~(uint16_t)partial;
422 	/*
423 	 * Update tcp sequence number in gso state
424 	 */
425 	state->tcp_seq += payload_len;
426 }
427 
428 /*
429  * Updates the pointers to TCP and IPv6 headers
430  */
431 static void
netif_gso_ipv6_tcp_update(struct netif_gso_ip_tcp_state * state,struct __kern_packet * pkt,uint8_t * baddr)432 netif_gso_ipv6_tcp_update(struct netif_gso_ip_tcp_state *state,
433     struct __kern_packet *pkt, uint8_t *baddr)
434 {
435 	state->hdr.ip6 = (struct ip6_hdr *)(baddr + pkt->pkt_headroom +
436 	    pkt->pkt_l2_len);
437 	state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip6) +
438 	    state->ip_hlen);
439 }
440 
441 /*
442  * Finalize the TCP and IPv6 headers
443  */
444 static void
netif_gso_ipv6_tcp_internal(struct netif_gso_ip_tcp_state * state,uint32_t partial,uint16_t payload_len)445 netif_gso_ipv6_tcp_internal(struct netif_gso_ip_tcp_state *state,
446     uint32_t partial, uint16_t payload_len)
447 {
448 	/*
449 	 * Update IP header
450 	 */
451 	state->hdr.ip6->ip6_plen = htons(state->tcp_hlen + payload_len);
452 	/*
453 	 * TCP Checksum
454 	 */
455 	state->tcp->th_sum = 0;
456 	partial = __packet_cksum(state->tcp, state->tcp_hlen, partial);
457 	partial += htonl(state->tcp_hlen + IPPROTO_TCP + payload_len);
458 	partial += state->psuedo_hdr_csum;
459 	ADDCARRY(partial);
460 	state->tcp->th_sum = ~(uint16_t)partial;
461 	/*
462 	 * Update tcp sequence number
463 	 */
464 	state->tcp_seq += payload_len;
465 }
466 
467 /*
468  * Init the state during the TCP segmentation
469  */
470 static inline void
netif_gso_ip_tcp_init_state(struct netif_gso_ip_tcp_state * state,struct mbuf * m,uint8_t mac_hlen,uint8_t ip_hlen,bool isipv6)471 netif_gso_ip_tcp_init_state(struct netif_gso_ip_tcp_state *state,
472     struct mbuf *m, uint8_t mac_hlen, uint8_t ip_hlen, bool isipv6)
473 {
474 	if (isipv6) {
475 		state->af = AF_INET6;
476 		state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) +
477 		    mac_hlen);
478 		/* should be atleast 16 bit aligned */
479 		VERIFY(((uintptr_t)state->hdr.ip6 & (uintptr_t)0x1) == 0);
480 		state->tcp = (struct tcphdr *)(void *)((caddr_t)
481 		    (state->hdr.ip6) + ip_hlen);
482 		state->update = netif_gso_ipv6_tcp_update;
483 		state->internal = netif_gso_ipv6_tcp_internal;
484 		state->psuedo_hdr_csum = in6_pseudo(&state->hdr.ip6->ip6_src,
485 		    &state->hdr.ip6->ip6_dst, 0);
486 	} else {
487 		struct in_addr ip_src, ip_dst;
488 
489 		state->af = AF_INET;
490 		state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) +
491 		    mac_hlen);
492 		/* should be atleast 16 bit aligned */
493 		VERIFY(((uintptr_t)state->hdr.ip & (uintptr_t)0x1) == 0);
494 		state->ip_id = ntohs(state->hdr.ip->ip_id);
495 		state->tcp = (struct tcphdr *)(void *)((caddr_t)
496 		    (state->hdr.ip) + ip_hlen);
497 		state->update = netif_gso_ipv4_tcp_update;
498 		state->internal = netif_gso_ipv4_tcp_internal;
499 		bcopy(&state->hdr.ip->ip_src, &ip_src, sizeof(ip_src));
500 		bcopy(&state->hdr.ip->ip_dst, &ip_dst, sizeof(ip_dst));
501 		state->psuedo_hdr_csum = in_pseudo(ip_src.s_addr,
502 		    ip_dst.s_addr, 0);
503 	}
504 
505 	state->mac_hlen = mac_hlen;
506 	state->ip_hlen = ip_hlen;
507 	state->tcp_hlen = (uint8_t)(state->tcp->th_off << 2);
508 	state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
509 	VERIFY(m->m_pkthdr.tso_segsz != 0);
510 	state->mss = (uint16_t)m->m_pkthdr.tso_segsz;
511 	state->tcp_seq = ntohl(state->tcp->th_seq);
512 }
513 
514 /*
515  * GSO on TCP/IPv4
516  */
517 static int
netif_gso_ipv4_tcp(struct ifnet * ifp,struct mbuf * m)518 netif_gso_ipv4_tcp(struct ifnet *ifp, struct mbuf *m)
519 {
520 	struct ip *ip;
521 	struct kern_pbufpool *pp = NULL;
522 	struct netif_gso_ip_tcp_state state;
523 	uint16_t hlen;
524 	uint8_t ip_hlen;
525 	uint8_t mac_hlen;
526 	struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
527 	boolean_t pkt_dropped = false;
528 	int error;
529 
530 	STATS_INC(nifs, NETIF_STATS_GSO_PKT);
531 	if (__improbable(m->m_pkthdr.pkt_proto != IPPROTO_TCP)) {
532 		STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NONTCP);
533 		error = ENOTSUP;
534 		pkt_dropped = true;
535 		goto done;
536 	}
537 
538 	error = netif_gso_check_netif_active(ifp, m, &pp);
539 	if (__improbable(error != 0)) {
540 		STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NA_INACTIVE);
541 		error = ENXIO;
542 		pkt_dropped = true;
543 		goto done;
544 	}
545 
546 	error = netif_gso_get_frame_header_len(m, &mac_hlen);
547 	if (__improbable(error != 0)) {
548 		STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_BADLEN);
549 		pkt_dropped = true;
550 		goto done;
551 	}
552 
553 	hlen = mac_hlen + sizeof(struct ip);
554 	if (__improbable(m->m_len < hlen)) {
555 		m = m_pullup(m, hlen);
556 		if (m == NULL) {
557 			STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
558 			error = ENOBUFS;
559 			pkt_dropped = true;
560 			goto done;
561 		}
562 	}
563 	ip = (struct ip *)(void *)(mtod(m, uint8_t *) + mac_hlen);
564 	ip_hlen = (uint8_t)(ip->ip_hl << 2);
565 	hlen = mac_hlen + ip_hlen + sizeof(struct tcphdr);
566 	if (__improbable(m->m_len < hlen)) {
567 		m = m_pullup(m, hlen);
568 		if (m == NULL) {
569 			STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
570 			error = ENOBUFS;
571 			pkt_dropped = true;
572 			goto done;
573 		}
574 	}
575 	netif_gso_ip_tcp_init_state(&state, m, mac_hlen, ip_hlen, false);
576 	error = netif_gso_tcp_segment_mbuf(m, ifp, &state, pp);
577 done:
578 	m_freem(m);
579 	if (__improbable(pkt_dropped)) {
580 		STATS_INC(nifs, NETIF_STATS_DROP);
581 	}
582 	return error;
583 }
584 
585 /*
586  * GSO on TCP/IPv6
587  */
588 static int
netif_gso_ipv6_tcp(struct ifnet * ifp,struct mbuf * m)589 netif_gso_ipv6_tcp(struct ifnet *ifp, struct mbuf *m)
590 {
591 	struct ip6_hdr *ip6;
592 	struct kern_pbufpool *pp = NULL;
593 	struct netif_gso_ip_tcp_state state;
594 	int lasthdr_off;
595 	uint16_t hlen;
596 	uint8_t ip_hlen;
597 	uint8_t mac_hlen;
598 	struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
599 	boolean_t pkt_dropped = false;
600 	int error;
601 
602 	STATS_INC(nifs, NETIF_STATS_GSO_PKT);
603 	if (__improbable(m->m_pkthdr.pkt_proto != IPPROTO_TCP)) {
604 		STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NONTCP);
605 		error = ENOTSUP;
606 		pkt_dropped = true;
607 		goto done;
608 	}
609 
610 	error = netif_gso_check_netif_active(ifp, m, &pp);
611 	if (__improbable(error != 0)) {
612 		STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NA_INACTIVE);
613 		error = ENXIO;
614 		pkt_dropped = true;
615 		goto done;
616 	}
617 
618 	error = netif_gso_get_frame_header_len(m, &mac_hlen);
619 	if (__improbable(error != 0)) {
620 		STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_BADLEN);
621 		pkt_dropped = true;
622 		goto done;
623 	}
624 
625 	hlen = mac_hlen + sizeof(struct ip6_hdr);
626 	if (__improbable(m->m_len < hlen)) {
627 		m = m_pullup(m, hlen);
628 		if (m == NULL) {
629 			STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
630 			error = ENOBUFS;
631 			pkt_dropped = true;
632 			goto done;
633 		}
634 	}
635 	ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + mac_hlen);
636 	lasthdr_off = ip6_lasthdr(m, mac_hlen, IPPROTO_IPV6, NULL) - mac_hlen;
637 	VERIFY(lasthdr_off <= UINT8_MAX);
638 	ip_hlen = (uint8_t)lasthdr_off;
639 	hlen = mac_hlen + ip_hlen + sizeof(struct tcphdr);
640 	if (__improbable(m->m_len < hlen)) {
641 		m = m_pullup(m, hlen);
642 		if (m == NULL) {
643 			STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
644 			error = ENOBUFS;
645 			pkt_dropped = true;
646 			goto done;
647 		}
648 	}
649 	netif_gso_ip_tcp_init_state(&state, m, mac_hlen, ip_hlen, true);
650 	error = netif_gso_tcp_segment_mbuf(m, ifp, &state, pp);
651 done:
652 	m_freem(m);
653 	if (__improbable(pkt_dropped)) {
654 		STATS_INC(nifs, NETIF_STATS_DROP);
655 	}
656 	return error;
657 }
658 
659 int
netif_gso_dispatch(struct ifnet * ifp,struct mbuf * m)660 netif_gso_dispatch(struct ifnet *ifp, struct mbuf *m)
661 {
662 	int gso_flags;
663 
664 	ASSERT(m->m_nextpkt == NULL);
665 	gso_flags = CSUM_TO_GSO(m->m_pkthdr.csum_flags);
666 	VERIFY(gso_flags < GSO_END_OF_TYPE);
667 	return netif_gso_functions[gso_flags](ifp, m);
668 }
669 
670 void
netif_gso_init(void)671 netif_gso_init(void)
672 {
673 	_CASSERT(CSUM_TO_GSO(~(CSUM_TSO_IPV4 | CSUM_TSO_IPV6)) == GSO_NONE);
674 	_CASSERT(CSUM_TO_GSO(CSUM_TSO_IPV4) == GSO_TCP4);
675 	_CASSERT(CSUM_TO_GSO(CSUM_TSO_IPV6) == GSO_TCP6);
676 	netif_gso_functions[GSO_NONE] = nx_netif_host_output;
677 	netif_gso_functions[GSO_TCP4] = netif_gso_ipv4_tcp;
678 	netif_gso_functions[GSO_TCP6] = netif_gso_ipv6_tcp;
679 }
680 
681 void
netif_gso_fini(void)682 netif_gso_fini(void)
683 {
684 	netif_gso_functions[GSO_NONE] = NULL;
685 	netif_gso_functions[GSO_TCP4] = NULL;
686 	netif_gso_functions[GSO_TCP6] = NULL;
687 }
688