xref: /xnu-12377.1.9/bsd/skywalk/nexus/netif/nx_netif_gso.c (revision f6217f891ac0bb64f3d375211650a4c1ff8ca1ea)
1 /*
2  * Copyright (c) 2020-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
31  * All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  *   1. Redistributions of source code must retain the above copyright
37  *      notice, this list of conditions and the following disclaimer.
38  *   2. Redistributions in binary form must reproduce the above copyright
39  *      notice, this list of conditions and the following disclaimer in the
40  *      documentation and/or other materials provided with the distribution.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52  * SUCH DAMAGE.
53  */
54 
55 #include <sys/param.h>
56 #include <sys/kernel.h>
57 #include <sys/types.h>
58 #include <sys/systm.h>
59 #include <sys/mbuf.h>
60 #include <sys/socket.h>
61 #include <sys/sysctl.h>
62 #include <sys/malloc.h>
63 
64 #include <netinet/in.h>
65 #include <netinet/ip_var.h>
66 #include <netinet/ip.h>
67 #include <netinet/tcp.h>
68 #include <netinet/tcpip.h>
69 #include <netinet/ip6.h>
70 #include <netinet6/ip6_var.h>
71 
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/ethernet.h>
75 #include <net/pktap.h>
76 #include <skywalk/os_skywalk_private.h>
77 #include <skywalk/nexus/netif/nx_netif.h>
78 
79 #define CSUM_GSO_MASK    0x00300000
80 #define CSUM_GSO_OFFSET  20
81 #define CSUM_TO_GSO(x) ((x & CSUM_GSO_MASK) >> CSUM_GSO_OFFSET)
82 
83 enum netif_gso_type {
84 	GSO_NONE,
85 	GSO_TCP4,
86 	GSO_TCP6,
87 	GSO_END_OF_TYPE
88 };
89 
90 uint32_t netif_chain_enqueue = 1;
91 #if (DEVELOPMENT || DEBUG)
92 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, chain_enqueue,
93     CTLFLAG_RW | CTLFLAG_LOCKED, &netif_chain_enqueue, 0,
94     "netif chain enqueue");
95 #endif /* (DEVELOPMENT || DEBUG) */
96 
97 /*
98  * Array of function pointers that execute GSO depending on packet type
99  */
100 int (*netif_gso_functions[GSO_END_OF_TYPE]) (struct ifnet*, struct mbuf*);
101 
102 /*
103  * Structure that contains the state during the TCP segmentation
104  */
105 struct netif_gso_ip_tcp_state {
106 	void (*update)(struct netif_gso_ip_tcp_state*,
107 	    struct __kern_packet *pkt, uint8_t *__bidi_indexable baddr);
108 	void (*internal)(struct netif_gso_ip_tcp_state*, uint32_t partial,
109 	    uint16_t payload_len, uint32_t *csum_flags);
110 	union {
111 		struct ip *ip;
112 		struct ip6_hdr *ip6;
113 	} hdr;
114 	int af;
115 	struct tcphdr *tcp;
116 	struct kern_pbufpool *pp;
117 	uint32_t psuedo_hdr_csum;
118 	uint32_t tcp_seq;
119 	uint16_t hlen;
120 	uint16_t mss;
121 	uint16_t ip_id;
122 	uint8_t mac_hlen;
123 	uint8_t ip_hlen;
124 	uint8_t tcp_hlen;
125 	boolean_t copy_data_sum;
126 };
127 
128 static inline uint8_t
netif_gso_get_frame_header_len(struct mbuf * m,uint8_t * hlen)129 netif_gso_get_frame_header_len(struct mbuf *m, uint8_t *hlen)
130 {
131 	uint64_t len;
132 	char *__single ph = m->m_pkthdr.pkt_hdr;
133 
134 	if (__improbable(m_pktlen(m) == 0 || ph == NULL ||
135 	    ph < (char *)m->m_data)) {
136 		return ERANGE;
137 	}
138 	len = (ph - m_mtod_current(m));
139 	if (__improbable(len > UINT8_MAX)) {
140 		return ERANGE;
141 	}
142 	*hlen = (uint8_t)len;
143 	return 0;
144 }
145 
146 static inline int
netif_gso_check_netif_active(struct ifnet * ifp,struct mbuf * m,struct kern_pbufpool ** pp)147 netif_gso_check_netif_active(struct ifnet *ifp, struct mbuf *m,
148     struct kern_pbufpool **pp)
149 {
150 	struct __kern_channel_ring *kring;
151 	struct nx_netif *nif = NA(ifp)->nifna_netif;
152 	struct netif_stats *nifs = &nif->nif_stats;
153 	struct kern_nexus *nx = nif->nif_nx;
154 	struct nexus_adapter *hwna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
155 	uint32_t sc_idx = MBUF_SCIDX(m_get_service_class(m));
156 
157 	if (__improbable(!NA_IS_ACTIVE(hwna))) {
158 		STATS_INC(nifs, NETIF_STATS_DROP_NA_INACTIVE);
159 		SK_DF(SK_VERB_NETIF,
160 		    "\"%s\" (%p) not in skywalk mode anymore",
161 		    hwna->na_name, SK_KVA(hwna));
162 		return ENXIO;
163 	}
164 
165 	VERIFY(sc_idx < KPKT_SC_MAX_CLASSES);
166 	kring = &hwna->na_tx_rings[hwna->na_kring_svc_lut[sc_idx]];
167 	if (__improbable(KR_DROP(kring))) {
168 		STATS_INC(nifs, NETIF_STATS_DROP_KRDROP_MODE);
169 		SK_DF(SK_VERB_NETIF,
170 		    "kr \"%s\" (%p) krflags 0x%x or %s in drop mode",
171 		    kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
172 		    ifp->if_xname);
173 		return ENXIO;
174 	}
175 	*pp = kring->ckr_pp;
176 	return 0;
177 }
178 
179 boolean_t
netif_chain_enqueue_enabled(struct ifnet * ifp)180 netif_chain_enqueue_enabled(struct ifnet *ifp)
181 {
182 	return netif_chain_enqueue != 0 && ifp->if_output_netem == NULL &&
183 	       (ifp->if_eflags & IFEF_ENQUEUE_MULTI) == 0;
184 }
185 
186 static inline int
netif_gso_send(struct ifnet * ifp,struct __kern_packet * head,struct __kern_packet * tail,uint32_t count,uint32_t bytes)187 netif_gso_send(struct ifnet *ifp, struct __kern_packet *head,
188     struct __kern_packet *tail, uint32_t count, uint32_t bytes)
189 {
190 	struct nx_netif *nif = NA(ifp)->nifna_netif;
191 	struct netif_stats *nifs = &nif->nif_stats;
192 	struct netif_qset *__single qset = NULL;
193 	int error = 0;
194 	boolean_t dropped;
195 
196 	qset = nx_netif_find_qset_with_pkt(ifp, head);
197 	if (netif_chain_enqueue_enabled(ifp)) {
198 		dropped = false;
199 		if (qset != NULL) {
200 			head->pkt_qset_idx = qset->nqs_idx;
201 			error = ifnet_enqueue_pkt_chain(ifp, qset->nqs_ifcq,
202 			    head, tail, count, bytes, false, &dropped);
203 		} else {
204 			error = ifnet_enqueue_pkt_chain(ifp, ifp->if_snd, head, tail,
205 			    count, bytes, false, &dropped);
206 		}
207 		if (__improbable(dropped)) {
208 			STATS_ADD(nifs, NETIF_STATS_TX_DROP_ENQ_AQM, count);
209 			STATS_ADD(nifs, NETIF_STATS_DROP, count);
210 		}
211 	} else {
212 		struct __kern_packet *pkt = head, *next;
213 		uint32_t c = 0, b = 0;
214 
215 		while (pkt != NULL) {
216 			int err;
217 
218 			next = pkt->pkt_nextpkt;
219 			pkt->pkt_nextpkt = NULL;
220 			c++;
221 			b += pkt->pkt_length;
222 
223 			dropped = false;
224 			if (qset != NULL) {
225 				pkt->pkt_qset_idx = qset->nqs_idx;
226 				err = ifnet_enqueue_pkt(ifp, qset->nqs_ifcq,
227 				    pkt, false, &dropped);
228 			} else {
229 				err = ifnet_enqueue_pkt(ifp, ifp->if_snd, pkt, false, &dropped);
230 			}
231 			if (error == 0 && __improbable(err != 0)) {
232 				error = err;
233 			}
234 			if (__improbable(dropped)) {
235 				STATS_INC(nifs, NETIF_STATS_TX_DROP_ENQ_AQM);
236 				STATS_INC(nifs, NETIF_STATS_DROP);
237 			}
238 			pkt = next;
239 		}
240 		ASSERT(c == count);
241 		ASSERT(b == bytes);
242 	}
243 	if (qset != NULL) {
244 		nx_netif_qset_release(&qset);
245 	}
246 	netif_transmit(ifp, NETIF_XMIT_FLAG_HOST);
247 	return error;
248 }
249 
250 /*
251  * Segment and transmit a queue of packets which fit the given mss + hdr_len.
252  * m points to mbuf chain to be segmented.
253  * This function splits the payload (m-> m_pkthdr.len - hdr_len)
254  * into segments of length MSS bytes and then copy the first hdr_len bytes
255  * from m at the top of each segment.
256  */
257 static inline int
netif_gso_tcp_segment_mbuf(struct mbuf * m,struct ifnet * ifp,struct netif_gso_ip_tcp_state * state,struct kern_pbufpool * pp)258 netif_gso_tcp_segment_mbuf(struct mbuf *m, struct ifnet *ifp,
259     struct netif_gso_ip_tcp_state *state, struct kern_pbufpool *pp)
260 {
261 	uuid_t euuid;
262 	struct pktq pktq_alloc, pktq_seg;
263 	uint64_t timestamp = 0, m_tx_timestamp = 0;
264 	uint64_t pflags;
265 	int error = 0;
266 	uint32_t policy_id;
267 	uint32_t skip_policy_id;
268 	uint32_t svc_class;
269 	uint32_t n, n_pkts, n_bytes;
270 	int32_t off = 0, total_len = m->m_pkthdr.len;
271 	uint8_t tx_headroom = (uint8_t)ifp->if_tx_headroom;
272 	struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
273 	struct __kern_packet *pkt_chain_head, *pkt_chain_tail;
274 	struct m_tag *ts_tag = NULL;
275 	uint16_t mss = state->mss;
276 	bool skip_pktap;
277 
278 	VERIFY(total_len > state->hlen);
279 	VERIFY(((tx_headroom + state->mac_hlen) & 0x1) == 0);
280 	VERIFY((tx_headroom + state->hlen + mss) <= PP_BUF_SIZE_DEF(pp));
281 
282 	KPKTQ_INIT(&pktq_alloc);
283 	KPKTQ_INIT(&pktq_seg);
284 	/* batch allocate enough packets */
285 	n_pkts = (uint32_t)(SK_ROUNDUP((total_len - state->hlen), mss) / mss);
286 	error = pp_alloc_pktq(pp, 1, &pktq_alloc, n_pkts, NULL,
287 	    NULL, SKMEM_NOSLEEP);
288 	if (__improbable(error != 0)) {
289 		STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
290 		SK_ERR("failed to alloc %u pkts", n_pkts);
291 		pp_free_pktq(&pktq_alloc);
292 		error = ENOBUFS;
293 		goto done;
294 	}
295 
296 	ASSERT(m->m_pkthdr.pkt_proto == IPPROTO_TCP);
297 	ASSERT((m->m_flags & M_BCAST) == 0);
298 	ASSERT((m->m_flags & M_MCAST) == 0);
299 	ASSERT(((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) == 0));
300 	pflags = m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK;
301 	pflags |= PKTF_START_SEQ;
302 	pflags |= (m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) ? PKT_F_L4S : 0;
303 	(void) mbuf_get_timestamp(m, &timestamp, NULL);
304 	necp_get_app_uuid_from_packet(m, euuid);
305 	policy_id = necp_get_policy_id_from_packet(m);
306 	skip_policy_id = necp_get_skip_policy_id_from_packet(m);
307 	svc_class = m_get_service_class(m);
308 	skip_pktap = (m->m_pkthdr.pkt_flags & PKTF_SKIP_PKTAP) != 0 ||
309 	    pktap_total_tap_count == 0;
310 
311 	ts_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM);
312 	if (ts_tag != NULL) {
313 		m_tx_timestamp = *(uint64_t *)(ts_tag->m_tag_data);
314 	}
315 
316 	for (n = 1, off = state->hlen; off < total_len; off += mss, n++) {
317 		uint8_t *baddr, *baddr0;
318 		uint32_t partial = 0;
319 		struct __kern_packet *pkt;
320 
321 		KPKTQ_DEQUEUE(&pktq_alloc, pkt);
322 		ASSERT(pkt != NULL);
323 
324 		/* get buffer address from packet */
325 		MD_BUFLET_ADDR_ABS(pkt, baddr0);
326 		baddr = baddr0;
327 		baddr += tx_headroom;
328 
329 		/*
330 		 * Copy the link-layer, IP and TCP header from the
331 		 * original packet.
332 		 */
333 		m_copydata(m, 0, state->hlen, baddr);
334 		baddr += state->hlen;
335 
336 		/*
337 		 * Copy the payload from original packet and
338 		 * compute partial checksum on the payload.
339 		 */
340 		if (off + mss > total_len) {
341 			/* if last segment is less than mss */
342 			mss = (uint16_t)(total_len - off);
343 		}
344 		if (state->copy_data_sum) {
345 			partial = m_copydata_sum(m, off, mss, baddr, 0, NULL);
346 		} else {
347 			m_copydata(m, off, mss, baddr);
348 		}
349 
350 		/*
351 		 * update packet metadata
352 		 */
353 		pkt->pkt_headroom = tx_headroom;
354 		pkt->pkt_l2_len = state->mac_hlen;
355 		pkt->pkt_link_flags = 0;
356 		pkt->pkt_csum_flags = 0;
357 		pkt->pkt_csum_tx_start_off = 0;
358 		pkt->pkt_csum_tx_stuff_off = 0;
359 		uuid_copy(pkt->pkt_policy_euuid, euuid);
360 		pkt->pkt_policy_id = policy_id;
361 		pkt->pkt_skip_policy_id = skip_policy_id;
362 		pkt->pkt_timestamp = timestamp;
363 		pkt->pkt_svc_class = svc_class;
364 		pkt->pkt_pflags |= pflags;
365 		pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
366 		pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
367 		pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
368 		pkt->pkt_flow_ip_proto = IPPROTO_TCP;
369 		pkt->pkt_transport_protocol = IPPROTO_TCP;
370 		pkt->pkt_flow_tcp_seq = htonl(state->tcp_seq);
371 		__packet_set_tx_timestamp(SK_PKT2PH(pkt), m_tx_timestamp);
372 
373 		state->update(state, pkt, baddr0);
374 		/*
375 		 * FIN or PUSH flags if present will be set only on the last
376 		 * segment.
377 		 */
378 		if (n != n_pkts) {
379 			state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
380 		}
381 		/*
382 		 * CWR flag if present is set only on the first segment
383 		 * and cleared on the subsequent segments.
384 		 */
385 		if (n != 1) {
386 			state->tcp->th_flags &= ~TH_CWR;
387 			state->tcp->th_seq = htonl(state->tcp_seq);
388 		}
389 		ASSERT(state->tcp->th_seq == pkt->pkt_flow_tcp_seq);
390 		state->internal(state, partial, mss, &pkt->pkt_csum_flags);
391 		METADATA_ADJUST_LEN(pkt, state->hlen + mss, tx_headroom);
392 		VERIFY(__packet_finalize(SK_PKT2PH(pkt)) == 0);
393 		KPKTQ_ENQUEUE(&pktq_seg, pkt);
394 		if (!skip_pktap) {
395 			nx_netif_pktap_output(ifp, state->af, pkt);
396 		}
397 	}
398 	ASSERT(off == total_len);
399 	STATS_ADD(nifs, NETIF_STATS_GSO_SEG, n_pkts);
400 
401 	/* ifnet_enqueue_pkt_chain() consumes the packet chain */
402 	pkt_chain_head = KPKTQ_FIRST(&pktq_seg);
403 	pkt_chain_tail = KPKTQ_LAST(&pktq_seg);
404 	KPKTQ_INIT(&pktq_seg);
405 	n_bytes = total_len + (state->hlen * (n_pkts - 1));
406 
407 	if (m->m_pkthdr.pkt_ext_flags & PKTF_EXT_QSET_ID_VALID) {
408 		pkt_chain_head->pkt_pflags |= PKT_F_PRIV_HAS_QSET_ID;
409 		pkt_chain_head->pkt_priv =
410 		    __unsafe_forge_single(void *, m->m_pkthdr.pkt_mpriv_qsetid);
411 	}
412 
413 	error = netif_gso_send(ifp, pkt_chain_head, pkt_chain_tail,
414 	    n_pkts, n_bytes);
415 
416 done:
417 	KPKTQ_FINI(&pktq_alloc);
418 	return error;
419 }
420 
421 /*
422  * Update the pointers to TCP and IPv4 headers
423  */
424 static void
netif_gso_ipv4_tcp_update(struct netif_gso_ip_tcp_state * state,struct __kern_packet * pkt,uint8_t * __bidi_indexable baddr)425 netif_gso_ipv4_tcp_update(struct netif_gso_ip_tcp_state *state,
426     struct __kern_packet *pkt, uint8_t *__bidi_indexable baddr)
427 {
428 	state->hdr.ip = (struct ip *)(void *)(baddr + pkt->pkt_headroom +
429 	    pkt->pkt_l2_len);
430 	state->tcp = (struct tcphdr *)(void *)(baddr + pkt->pkt_headroom +
431 	    pkt->pkt_l2_len +  state->ip_hlen);
432 }
433 
434 /*
435  * Finalize the TCP and IPv4 headers
436  */
437 static void
netif_gso_ipv4_tcp_internal(struct netif_gso_ip_tcp_state * state,uint32_t partial,uint16_t payload_len,uint32_t * csum_flags __unused)438 netif_gso_ipv4_tcp_internal(struct netif_gso_ip_tcp_state *state,
439     uint32_t partial, uint16_t payload_len, uint32_t *csum_flags __unused)
440 {
441 	int hlen;
442 	uint8_t *__sized_by(hlen) buffer;
443 
444 	/*
445 	 * Update IP header
446 	 */
447 	state->hdr.ip->ip_id = htons((state->ip_id)++);
448 	state->hdr.ip->ip_len = htons(state->ip_hlen + state->tcp_hlen +
449 	    payload_len);
450 	/*
451 	 * IP header checksum
452 	 */
453 	state->hdr.ip->ip_sum = 0;
454 	buffer = (uint8_t *__bidi_indexable)(struct ip *__bidi_indexable)
455 	    state->hdr.ip;
456 	hlen = state->ip_hlen;
457 	state->hdr.ip->ip_sum = inet_cksum_buffer(buffer, 0, 0, hlen);
458 	/*
459 	 * TCP Checksum
460 	 */
461 	state->tcp->th_sum = 0;
462 	partial = __packet_cksum(state->tcp, state->tcp_hlen, partial);
463 	partial += htons(state->tcp_hlen + IPPROTO_TCP + payload_len);
464 	partial += state->psuedo_hdr_csum;
465 	ADDCARRY(partial);
466 	state->tcp->th_sum = ~(uint16_t)partial;
467 	/*
468 	 * Update tcp sequence number in gso state
469 	 */
470 	state->tcp_seq += payload_len;
471 }
472 
473 static void
netif_gso_ipv4_tcp_internal_nosum(struct netif_gso_ip_tcp_state * state,uint32_t partial __unused,uint16_t payload_len __unused,uint32_t * csum_flags)474 netif_gso_ipv4_tcp_internal_nosum(struct netif_gso_ip_tcp_state *state,
475     uint32_t partial __unused, uint16_t payload_len __unused,
476     uint32_t *csum_flags)
477 {
478 	/*
479 	 * Update IP header
480 	 */
481 	state->hdr.ip->ip_id = htons((state->ip_id)++);
482 	state->hdr.ip->ip_len = htons(state->ip_hlen + state->tcp_hlen +
483 	    payload_len);
484 	/*
485 	 * Update tcp sequence number in gso state
486 	 */
487 	state->tcp_seq += payload_len;
488 
489 	/* offload csum to hardware */
490 	*csum_flags |= PACKET_CSUM_IP | PACKET_CSUM_TCP;
491 }
492 
493 /*
494  * Updates the pointers to TCP and IPv6 headers
495  */
496 static void
netif_gso_ipv6_tcp_update(struct netif_gso_ip_tcp_state * state,struct __kern_packet * pkt,uint8_t * __bidi_indexable baddr)497 netif_gso_ipv6_tcp_update(struct netif_gso_ip_tcp_state *state,
498     struct __kern_packet *pkt, uint8_t *__bidi_indexable baddr)
499 {
500 	state->hdr.ip6 = (struct ip6_hdr *)(baddr + pkt->pkt_headroom +
501 	    pkt->pkt_l2_len);
502 	state->tcp = (struct tcphdr *)(void *)(baddr + pkt->pkt_headroom +
503 	    pkt->pkt_l2_len + state->ip_hlen);
504 }
505 
506 /*
507  * Finalize the TCP and IPv6 headers
508  */
509 static void
netif_gso_ipv6_tcp_internal_nosum(struct netif_gso_ip_tcp_state * state,uint32_t partial __unused,uint16_t payload_len __unused,uint32_t * csum_flags)510 netif_gso_ipv6_tcp_internal_nosum(struct netif_gso_ip_tcp_state *state,
511     uint32_t partial __unused, uint16_t payload_len __unused,
512     uint32_t *csum_flags)
513 {
514 	/*
515 	 * Update IP header
516 	 */
517 	state->hdr.ip6->ip6_plen = htons(state->tcp_hlen + payload_len);
518 
519 	/*
520 	 * Update tcp sequence number
521 	 */
522 	state->tcp_seq += payload_len;
523 
524 	/* offload csum to hardware */
525 	*csum_flags |= PACKET_CSUM_TCPIPV6;
526 }
527 
528 /*
529  * Finalize the TCP and IPv6 headers
530  */
531 static void
netif_gso_ipv6_tcp_internal(struct netif_gso_ip_tcp_state * state,uint32_t partial,uint16_t payload_len,uint32_t * csum_flags __unused)532 netif_gso_ipv6_tcp_internal(struct netif_gso_ip_tcp_state *state,
533     uint32_t partial, uint16_t payload_len, uint32_t *csum_flags __unused)
534 {
535 	/*
536 	 * Update IP header
537 	 */
538 	state->hdr.ip6->ip6_plen = htons(state->tcp_hlen + payload_len);
539 	/*
540 	 * TCP Checksum
541 	 */
542 	state->tcp->th_sum = 0;
543 	partial = __packet_cksum(state->tcp, state->tcp_hlen, partial);
544 	partial += htonl(state->tcp_hlen + IPPROTO_TCP + payload_len);
545 	partial += state->psuedo_hdr_csum;
546 	ADDCARRY(partial);
547 	state->tcp->th_sum = ~(uint16_t)partial;
548 	/*
549 	 * Update tcp sequence number
550 	 */
551 	state->tcp_seq += payload_len;
552 }
553 
554 /*
555  * Init the state during the TCP segmentation
556  */
557 static inline void
netif_gso_ip_tcp_init_state(struct netif_gso_ip_tcp_state * state,struct mbuf * m,uint8_t mac_hlen,uint8_t ip_hlen,bool isipv6,ifnet_t ifp)558 netif_gso_ip_tcp_init_state(struct netif_gso_ip_tcp_state *state,
559     struct mbuf *m, uint8_t mac_hlen, uint8_t ip_hlen, bool isipv6, ifnet_t ifp)
560 {
561 	if (isipv6) {
562 		state->af = AF_INET6;
563 		state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) +
564 		    mac_hlen);
565 		/* should be atleast 16 bit aligned */
566 		VERIFY(((uintptr_t)state->hdr.ip6 & (uintptr_t)0x1) == 0);
567 		state->tcp = (struct tcphdr *)(void *)(m_mtod_current(m) +
568 		    mac_hlen + ip_hlen);
569 		state->update = netif_gso_ipv6_tcp_update;
570 		if (ifp->if_hwassist & IFNET_CSUM_TCPIPV6) {
571 			state->internal = netif_gso_ipv6_tcp_internal_nosum;
572 			state->copy_data_sum = false;
573 		} else {
574 			state->internal = netif_gso_ipv6_tcp_internal;
575 			state->copy_data_sum = true;
576 		}
577 		state->psuedo_hdr_csum = in6_pseudo(&state->hdr.ip6->ip6_src,
578 		    &state->hdr.ip6->ip6_dst, 0);
579 	} else {
580 		struct in_addr ip_src, ip_dst;
581 
582 		state->af = AF_INET;
583 		state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) +
584 		    mac_hlen);
585 		/* should be atleast 16 bit aligned */
586 		VERIFY(((uintptr_t)state->hdr.ip & (uintptr_t)0x1) == 0);
587 		state->ip_id = ntohs(state->hdr.ip->ip_id);
588 		state->tcp = (struct tcphdr *)(void *)(m_mtod_current(m) +
589 		    mac_hlen + ip_hlen);
590 		state->update = netif_gso_ipv4_tcp_update;
591 		if ((ifp->if_hwassist & (IFNET_CSUM_IP | IFNET_CSUM_TCP)) ==
592 		    (IFNET_CSUM_IP | IFNET_CSUM_TCP)) {
593 			state->internal = netif_gso_ipv4_tcp_internal_nosum;
594 			state->copy_data_sum = false;
595 		} else {
596 			state->internal = netif_gso_ipv4_tcp_internal;
597 			state->copy_data_sum = true;
598 		}
599 		bcopy(&state->hdr.ip->ip_src, &ip_src, sizeof(ip_src));
600 		bcopy(&state->hdr.ip->ip_dst, &ip_dst, sizeof(ip_dst));
601 		state->psuedo_hdr_csum = in_pseudo(ip_src.s_addr,
602 		    ip_dst.s_addr, 0);
603 	}
604 
605 	state->mac_hlen = mac_hlen;
606 	state->ip_hlen = ip_hlen;
607 	state->tcp_hlen = (uint8_t)(state->tcp->th_off << 2);
608 	state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
609 	VERIFY(m->m_pkthdr.tso_segsz != 0);
610 	state->mss = (uint16_t)m->m_pkthdr.tso_segsz;
611 	state->tcp_seq = ntohl(state->tcp->th_seq);
612 }
613 
614 /*
615  * GSO on TCP/IPv4
616  */
617 static int
netif_gso_ipv4_tcp(struct ifnet * ifp,struct mbuf * m)618 netif_gso_ipv4_tcp(struct ifnet *ifp, struct mbuf *m)
619 {
620 	struct ip *ip;
621 	struct kern_pbufpool *__single pp = NULL;
622 	struct netif_gso_ip_tcp_state state;
623 	uint16_t hlen;
624 	uint8_t ip_hlen;
625 	uint8_t mac_hlen;
626 	struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
627 	boolean_t pkt_dropped = false;
628 	int error;
629 
630 	STATS_INC(nifs, NETIF_STATS_GSO_PKT);
631 	if (__improbable(m->m_pkthdr.pkt_proto != IPPROTO_TCP)) {
632 		STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NONTCP);
633 		error = ENOTSUP;
634 		pkt_dropped = true;
635 		goto done;
636 	}
637 
638 	error = netif_gso_check_netif_active(ifp, m, &pp);
639 	if (__improbable(error != 0)) {
640 		STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NA_INACTIVE);
641 		error = ENXIO;
642 		pkt_dropped = true;
643 		goto done;
644 	}
645 
646 	error = netif_gso_get_frame_header_len(m, &mac_hlen);
647 	if (__improbable(error != 0)) {
648 		STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_BADLEN);
649 		pkt_dropped = true;
650 		goto done;
651 	}
652 
653 	hlen = mac_hlen + sizeof(struct ip);
654 	if (__improbable(m->m_len < hlen)) {
655 		m = m_pullup(m, hlen);
656 		if (m == NULL) {
657 			STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
658 			error = ENOBUFS;
659 			pkt_dropped = true;
660 			goto done;
661 		}
662 	}
663 	ip = (struct ip *)(void *)(mtod(m, uint8_t *) + mac_hlen);
664 	ip_hlen = (uint8_t)(ip->ip_hl << 2);
665 	hlen = mac_hlen + ip_hlen + sizeof(struct tcphdr);
666 	if (__improbable(m->m_len < hlen)) {
667 		m = m_pullup(m, hlen);
668 		if (m == NULL) {
669 			STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
670 			error = ENOBUFS;
671 			pkt_dropped = true;
672 			goto done;
673 		}
674 	}
675 	netif_gso_ip_tcp_init_state(&state, m, mac_hlen, ip_hlen, false, ifp);
676 	error = netif_gso_tcp_segment_mbuf(m, ifp, &state, pp);
677 done:
678 	m_freem(m);
679 	if (__improbable(pkt_dropped)) {
680 		STATS_INC(nifs, NETIF_STATS_DROP);
681 	}
682 	return error;
683 }
684 
685 /*
686  * GSO on TCP/IPv6
687  */
688 static int
netif_gso_ipv6_tcp(struct ifnet * ifp,struct mbuf * m)689 netif_gso_ipv6_tcp(struct ifnet *ifp, struct mbuf *m)
690 {
691 	struct ip6_hdr *ip6;
692 	struct kern_pbufpool *__single pp = NULL;
693 	struct netif_gso_ip_tcp_state state;
694 	int lasthdr_off;
695 	uint16_t hlen;
696 	uint8_t ip_hlen;
697 	uint8_t mac_hlen;
698 	struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
699 	boolean_t pkt_dropped = false;
700 	int error;
701 
702 	STATS_INC(nifs, NETIF_STATS_GSO_PKT);
703 	if (__improbable(m->m_pkthdr.pkt_proto != IPPROTO_TCP)) {
704 		STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NONTCP);
705 		error = ENOTSUP;
706 		pkt_dropped = true;
707 		goto done;
708 	}
709 
710 	error = netif_gso_check_netif_active(ifp, m, &pp);
711 	if (__improbable(error != 0)) {
712 		STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NA_INACTIVE);
713 		error = ENXIO;
714 		pkt_dropped = true;
715 		goto done;
716 	}
717 
718 	error = netif_gso_get_frame_header_len(m, &mac_hlen);
719 	if (__improbable(error != 0)) {
720 		STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_BADLEN);
721 		pkt_dropped = true;
722 		goto done;
723 	}
724 
725 	hlen = mac_hlen + sizeof(struct ip6_hdr);
726 	if (__improbable(m->m_len < hlen)) {
727 		m = m_pullup(m, hlen);
728 		if (m == NULL) {
729 			STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
730 			error = ENOBUFS;
731 			pkt_dropped = true;
732 			goto done;
733 		}
734 	}
735 	ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + mac_hlen);
736 	lasthdr_off = ip6_lasthdr(m, mac_hlen, IPPROTO_IPV6, NULL) - mac_hlen;
737 	VERIFY(lasthdr_off <= UINT8_MAX);
738 	ip_hlen = (uint8_t)lasthdr_off;
739 	hlen = mac_hlen + ip_hlen + sizeof(struct tcphdr);
740 	if (__improbable(m->m_len < hlen)) {
741 		m = m_pullup(m, hlen);
742 		if (m == NULL) {
743 			STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
744 			error = ENOBUFS;
745 			pkt_dropped = true;
746 			goto done;
747 		}
748 	}
749 	netif_gso_ip_tcp_init_state(&state, m, mac_hlen, ip_hlen, true, ifp);
750 	error = netif_gso_tcp_segment_mbuf(m, ifp, &state, pp);
751 done:
752 	m_freem(m);
753 	if (__improbable(pkt_dropped)) {
754 		STATS_INC(nifs, NETIF_STATS_DROP);
755 	}
756 	return error;
757 }
758 
759 int
netif_gso_dispatch(struct ifnet * ifp,struct mbuf * m)760 netif_gso_dispatch(struct ifnet *ifp, struct mbuf *m)
761 {
762 	int gso_flags;
763 
764 	ASSERT(m->m_nextpkt == NULL);
765 	gso_flags = CSUM_TO_GSO(m->m_pkthdr.csum_flags);
766 	VERIFY(gso_flags < GSO_END_OF_TYPE);
767 	return netif_gso_functions[gso_flags](ifp, m);
768 }
769 
770 void
netif_gso_init(void)771 netif_gso_init(void)
772 {
773 	static_assert(CSUM_TO_GSO(~(CSUM_TSO_IPV4 | CSUM_TSO_IPV6)) == GSO_NONE);
774 	static_assert(CSUM_TO_GSO(CSUM_TSO_IPV4) == GSO_TCP4);
775 	static_assert(CSUM_TO_GSO(CSUM_TSO_IPV6) == GSO_TCP6);
776 	netif_gso_functions[GSO_NONE] = nx_netif_host_output;
777 	netif_gso_functions[GSO_TCP4] = netif_gso_ipv4_tcp;
778 	netif_gso_functions[GSO_TCP6] = netif_gso_ipv6_tcp;
779 }
780 
781 void
netif_gso_fini(void)782 netif_gso_fini(void)
783 {
784 	netif_gso_functions[GSO_NONE] = NULL;
785 	netif_gso_functions[GSO_TCP4] = NULL;
786 	netif_gso_functions[GSO_TCP6] = NULL;
787 }
788