1 /*
2 * Copyright (c) 2020-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 */
54
55 #include <sys/param.h>
56 #include <sys/kernel.h>
57 #include <sys/types.h>
58 #include <sys/systm.h>
59 #include <sys/mbuf.h>
60 #include <sys/socket.h>
61 #include <sys/sysctl.h>
62 #include <sys/malloc.h>
63
64 #include <netinet/in.h>
65 #include <netinet/ip_var.h>
66 #include <netinet/ip.h>
67 #include <netinet/tcp.h>
68 #include <netinet/tcpip.h>
69 #include <netinet/ip6.h>
70 #include <netinet6/ip6_var.h>
71
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/ethernet.h>
75 #include <net/pktap.h>
76 #include <skywalk/os_skywalk_private.h>
77 #include <skywalk/nexus/netif/nx_netif.h>
78
79 #define CSUM_GSO_MASK 0x00300000
80 #define CSUM_GSO_OFFSET 20
81 #define CSUM_TO_GSO(x) ((x & CSUM_GSO_MASK) >> CSUM_GSO_OFFSET)
82
83 enum netif_gso_type {
84 GSO_NONE,
85 GSO_TCP4,
86 GSO_TCP6,
87 GSO_END_OF_TYPE
88 };
89
90 uint32_t netif_chain_enqueue = 1;
91 #if (DEVELOPMENT || DEBUG)
92 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, chain_enqueue,
93 CTLFLAG_RW | CTLFLAG_LOCKED, &netif_chain_enqueue, 0,
94 "netif chain enqueue");
95 #endif /* (DEVELOPMENT || DEBUG) */
96
97 /*
98 * Array of function pointers that execute GSO depending on packet type
99 */
100 int (*netif_gso_functions[GSO_END_OF_TYPE]) (struct ifnet*, struct mbuf*);
101
102 /*
103 * Structure that contains the state during the TCP segmentation
104 */
105 struct netif_gso_ip_tcp_state {
106 void (*update)(struct netif_gso_ip_tcp_state*,
107 struct __kern_packet *pkt, uint8_t *baddr);
108 void (*internal)(struct netif_gso_ip_tcp_state*, uint32_t partial,
109 uint16_t payload_len);
110 union {
111 struct ip *ip;
112 struct ip6_hdr *ip6;
113 } hdr;
114 int af;
115 struct tcphdr *tcp;
116 struct kern_pbufpool *pp;
117 uint32_t psuedo_hdr_csum;
118 uint32_t tcp_seq;
119 uint16_t hlen;
120 uint16_t mss;
121 uint16_t ip_id;
122 uint8_t mac_hlen;
123 uint8_t ip_hlen;
124 uint8_t tcp_hlen;
125 };
126
127 static inline uint8_t
netif_gso_get_frame_header_len(struct mbuf * m,uint8_t * hlen)128 netif_gso_get_frame_header_len(struct mbuf *m, uint8_t *hlen)
129 {
130 uint64_t len;
131 char *ph = m->m_pkthdr.pkt_hdr;
132
133 if (__improbable(m_pktlen(m) == 0 || ph == NULL ||
134 ph < (char *)m->m_data)) {
135 return ERANGE;
136 }
137 len = (ph - m->m_data);
138 if (__improbable(len > UINT8_MAX)) {
139 return ERANGE;
140 }
141 *hlen = (uint8_t)len;
142 return 0;
143 }
144
145 static inline int
netif_gso_check_netif_active(struct ifnet * ifp,struct mbuf * m,struct kern_pbufpool ** pp)146 netif_gso_check_netif_active(struct ifnet *ifp, struct mbuf *m,
147 struct kern_pbufpool **pp)
148 {
149 struct __kern_channel_ring *kring;
150 struct nx_netif *nif = NA(ifp)->nifna_netif;
151 struct netif_stats *nifs = &nif->nif_stats;
152 struct kern_nexus *nx = nif->nif_nx;
153 struct nexus_adapter *hwna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
154 uint32_t sc_idx = MBUF_SCIDX(m_get_service_class(m));
155
156 if (__improbable(!NA_IS_ACTIVE(hwna))) {
157 STATS_INC(nifs, NETIF_STATS_DROP_NA_INACTIVE);
158 SK_DF(SK_VERB_NETIF,
159 "\"%s\" (0x%llx) not in skywalk mode anymore",
160 hwna->na_name, SK_KVA(hwna));
161 return ENXIO;
162 }
163
164 VERIFY(sc_idx < KPKT_SC_MAX_CLASSES);
165 kring = &hwna->na_tx_rings[hwna->na_kring_svc_lut[sc_idx]];
166 if (__improbable(KR_DROP(kring))) {
167 STATS_INC(nifs, NETIF_STATS_DROP_KRDROP_MODE);
168 SK_DF(SK_VERB_NETIF,
169 "kr \"%s\" (0x%llx) krflags 0x%b or %s in drop mode",
170 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
171 CKRF_BITS, ifp->if_xname);
172 return ENXIO;
173 }
174 *pp = kring->ckr_pp;
175 return 0;
176 }
177
178 static inline boolean_t
netif_chain_enqueue_enabled(struct ifnet * ifp)179 netif_chain_enqueue_enabled(struct ifnet *ifp)
180 {
181 return netif_chain_enqueue != 0 && ifp->if_output_netem == NULL &&
182 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) == 0;
183 }
184
185 static inline int
netif_gso_send(struct ifnet * ifp,struct __kern_packet * head,struct __kern_packet * tail,uint32_t count,uint32_t bytes)186 netif_gso_send(struct ifnet *ifp, struct __kern_packet *head,
187 struct __kern_packet *tail, uint32_t count, uint32_t bytes)
188 {
189 struct nx_netif *nif = NA(ifp)->nifna_netif;
190 struct netif_stats *nifs = &nif->nif_stats;
191 struct netif_qset *qset = NULL;
192 uint64_t qset_id = 0;
193 int error = 0;
194 boolean_t dropped;
195
196 if (NX_LLINK_PROV(nif->nif_nx) &&
197 ifp->if_traffic_rule_count > 0 &&
198 nxctl_inet_traffic_rule_find_qset_id_with_pkt(ifp->if_xname,
199 head, &qset_id) == 0) {
200 qset = nx_netif_find_qset(nif, qset_id);
201 ASSERT(qset != NULL);
202 }
203 if (netif_chain_enqueue_enabled(ifp)) {
204 dropped = false;
205 if (qset != NULL) {
206 head->pkt_qset_idx = qset->nqs_idx;
207 error = ifnet_enqueue_ifcq_pkt_chain(ifp, qset->nqs_ifcq,
208 head, tail, count, bytes, false, &dropped);
209 } else {
210 error = ifnet_enqueue_pkt_chain(ifp, head, tail,
211 count, bytes, false, &dropped);
212 }
213 if (__improbable(dropped)) {
214 STATS_ADD(nifs, NETIF_STATS_TX_DROP_ENQ_AQM, count);
215 STATS_ADD(nifs, NETIF_STATS_DROP, count);
216 }
217 } else {
218 struct __kern_packet *pkt = head, *next;
219 uint32_t c = 0, b = 0;
220
221 while (pkt != NULL) {
222 int err;
223
224 next = pkt->pkt_nextpkt;
225 pkt->pkt_nextpkt = NULL;
226 c++;
227 b += pkt->pkt_length;
228
229 dropped = false;
230 if (qset != NULL) {
231 pkt->pkt_qset_idx = qset->nqs_idx;
232 err = ifnet_enqueue_ifcq_pkt(ifp, qset->nqs_ifcq,
233 pkt, false, &dropped);
234 } else {
235 err = ifnet_enqueue_pkt(ifp, pkt, false, &dropped);
236 }
237 if (error == 0 && __improbable(err != 0)) {
238 error = err;
239 }
240 if (__improbable(dropped)) {
241 STATS_INC(nifs, NETIF_STATS_TX_DROP_ENQ_AQM);
242 STATS_INC(nifs, NETIF_STATS_DROP);
243 }
244 pkt = next;
245 }
246 ASSERT(c == count);
247 ASSERT(b == bytes);
248 }
249 if (qset != NULL) {
250 nx_netif_qset_release(&qset);
251 }
252 netif_transmit(ifp, NETIF_XMIT_FLAG_HOST);
253 return error;
254 }
255
256 /*
257 * Segment and transmit a queue of packets which fit the given mss + hdr_len.
258 * m points to mbuf chain to be segmented.
259 * This function splits the payload (m-> m_pkthdr.len - hdr_len)
260 * into segments of length MSS bytes and then copy the first hdr_len bytes
261 * from m at the top of each segment.
262 */
263 static inline int
netif_gso_tcp_segment_mbuf(struct mbuf * m,struct ifnet * ifp,struct netif_gso_ip_tcp_state * state,struct kern_pbufpool * pp)264 netif_gso_tcp_segment_mbuf(struct mbuf *m, struct ifnet *ifp,
265 struct netif_gso_ip_tcp_state *state, struct kern_pbufpool *pp)
266 {
267 uuid_t euuid;
268 struct pktq pktq_alloc, pktq_seg;
269 uint64_t timestamp = 0;
270 uint64_t pflags;
271 int error = 0;
272 uint32_t policy_id;
273 uint32_t svc_class;
274 uint32_t n, n_pkts, n_bytes;
275 int32_t off = 0, total_len = m->m_pkthdr.len;
276 uint8_t tx_headroom = (uint8_t)ifp->if_tx_headroom;
277 struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
278 struct __kern_packet *pkt_chain_head, *pkt_chain_tail;
279 uint16_t mss = state->mss;
280 bool skip_pktap;
281
282 VERIFY(total_len > state->hlen);
283 VERIFY(((tx_headroom + state->mac_hlen) & 0x1) == 0);
284 VERIFY((tx_headroom + state->hlen + mss) <= PP_BUF_SIZE_DEF(pp));
285
286 KPKTQ_INIT(&pktq_alloc);
287 KPKTQ_INIT(&pktq_seg);
288 /* batch allocate enough packets */
289 n_pkts = (uint32_t)(SK_ROUNDUP((total_len - state->hlen), mss) / mss);
290 error = pp_alloc_pktq(pp, 1, &pktq_alloc, n_pkts, NULL,
291 NULL, SKMEM_NOSLEEP);
292 if (__improbable(error != 0)) {
293 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
294 SK_ERR("failed to alloc %u pkts", n_pkts);
295 pp_free_pktq(&pktq_alloc);
296 error = ENOBUFS;
297 goto done;
298 }
299
300 ASSERT(m->m_pkthdr.pkt_proto == IPPROTO_TCP);
301 ASSERT((m->m_flags & M_BCAST) == 0);
302 ASSERT((m->m_flags & M_MCAST) == 0);
303 ASSERT(((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) == 0));
304 pflags = m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK;
305 pflags |= PKTF_START_SEQ;
306 (void) mbuf_get_timestamp(m, ×tamp, NULL);
307 necp_get_app_uuid_from_packet(m, euuid);
308 policy_id = necp_get_policy_id_from_packet(m);
309 svc_class = m_get_service_class(m);
310 skip_pktap = (m->m_pkthdr.pkt_flags & PKTF_SKIP_PKTAP) != 0 ||
311 pktap_total_tap_count == 0;
312
313 for (n = 1, off = state->hlen; off < total_len; off += mss, n++) {
314 uint8_t *baddr, *baddr0;
315 uint32_t partial;
316 struct __kern_packet *pkt;
317
318 KPKTQ_DEQUEUE(&pktq_alloc, pkt);
319 ASSERT(pkt != NULL);
320
321 /* get buffer address from packet */
322 MD_BUFLET_ADDR_ABS(pkt, baddr0);
323 baddr = baddr0;
324 baddr += tx_headroom;
325
326 /*
327 * Copy the link-layer, IP and TCP header from the
328 * original packet.
329 */
330 m_copydata(m, 0, state->hlen, baddr);
331 baddr += state->hlen;
332
333 /*
334 * Copy the payload from original packet and
335 * compute partial checksum on the payload.
336 */
337 if (off + mss > total_len) {
338 /* if last segment is less than mss */
339 mss = (uint16_t)(total_len - off);
340 }
341 partial = m_copydata_sum(m, off, mss, baddr, 0, NULL);
342
343 /*
344 * update packet metadata
345 */
346 pkt->pkt_headroom = tx_headroom;
347 pkt->pkt_l2_len = state->mac_hlen;
348 pkt->pkt_link_flags = 0;
349 pkt->pkt_csum_flags = 0;
350 pkt->pkt_csum_tx_start_off = 0;
351 pkt->pkt_csum_tx_stuff_off = 0;
352 uuid_copy(pkt->pkt_policy_euuid, euuid);
353 pkt->pkt_policy_id = policy_id;
354 pkt->pkt_timestamp = timestamp;
355 pkt->pkt_svc_class = svc_class;
356 pkt->pkt_pflags |= pflags;
357 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
358 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
359 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
360 pkt->pkt_flow_ip_proto = IPPROTO_TCP;
361 pkt->pkt_transport_protocol = IPPROTO_TCP;
362 pkt->pkt_flow_tcp_seq = htonl(state->tcp_seq);
363
364 state->update(state, pkt, baddr0);
365 /*
366 * FIN or PUSH flags if present will be set only on the last
367 * segment.
368 */
369 if (n != n_pkts) {
370 state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
371 }
372 /*
373 * CWR flag if present is set only on the first segment
374 * and cleared on the subsequent segments.
375 */
376 if (n != 1) {
377 state->tcp->th_flags &= ~TH_CWR;
378 state->tcp->th_seq = htonl(state->tcp_seq);
379 }
380 ASSERT(state->tcp->th_seq == pkt->pkt_flow_tcp_seq);
381 state->internal(state, partial, mss);
382 METADATA_ADJUST_LEN(pkt, state->hlen + mss, tx_headroom);
383 VERIFY(__packet_finalize(SK_PKT2PH(pkt)) == 0);
384 KPKTQ_ENQUEUE(&pktq_seg, pkt);
385 if (!skip_pktap) {
386 nx_netif_pktap_output(ifp, state->af, pkt);
387 }
388 }
389 ASSERT(off == total_len);
390 STATS_ADD(nifs, NETIF_STATS_GSO_SEG, n_pkts);
391
392 /* ifnet_enqueue_pkt_chain() consumes the packet chain */
393 pkt_chain_head = KPKTQ_FIRST(&pktq_seg);
394 pkt_chain_tail = KPKTQ_LAST(&pktq_seg);
395 KPKTQ_INIT(&pktq_seg);
396 n_bytes = total_len + (state->hlen * (n_pkts - 1));
397
398 error = netif_gso_send(ifp, pkt_chain_head, pkt_chain_tail,
399 n_pkts, n_bytes);
400
401 done:
402 KPKTQ_FINI(&pktq_alloc);
403 return error;
404 }
405
406 /*
407 * Update the pointers to TCP and IPv4 headers
408 */
409 static void
netif_gso_ipv4_tcp_update(struct netif_gso_ip_tcp_state * state,struct __kern_packet * pkt,uint8_t * baddr)410 netif_gso_ipv4_tcp_update(struct netif_gso_ip_tcp_state *state,
411 struct __kern_packet *pkt, uint8_t *baddr)
412 {
413 state->hdr.ip = (struct ip *)(void *)(baddr + pkt->pkt_headroom +
414 pkt->pkt_l2_len);
415 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip) +
416 state->ip_hlen);
417 }
418
419 /*
420 * Finalize the TCP and IPv4 headers
421 */
422 static void
netif_gso_ipv4_tcp_internal(struct netif_gso_ip_tcp_state * state,uint32_t partial,uint16_t payload_len)423 netif_gso_ipv4_tcp_internal(struct netif_gso_ip_tcp_state *state,
424 uint32_t partial, uint16_t payload_len)
425 {
426 /*
427 * Update IP header
428 */
429 state->hdr.ip->ip_id = htons((state->ip_id)++);
430 state->hdr.ip->ip_len = htons(state->ip_hlen + state->tcp_hlen +
431 payload_len);
432 /*
433 * IP header checksum
434 */
435 state->hdr.ip->ip_sum = 0;
436 state->hdr.ip->ip_sum = inet_cksum_buffer(state->hdr.ip, 0, 0,
437 state->ip_hlen);
438 /*
439 * TCP Checksum
440 */
441 state->tcp->th_sum = 0;
442 partial = __packet_cksum(state->tcp, state->tcp_hlen, partial);
443 partial += htons(state->tcp_hlen + IPPROTO_TCP + payload_len);
444 partial += state->psuedo_hdr_csum;
445 ADDCARRY(partial);
446 state->tcp->th_sum = ~(uint16_t)partial;
447 /*
448 * Update tcp sequence number in gso state
449 */
450 state->tcp_seq += payload_len;
451 }
452
453 /*
454 * Updates the pointers to TCP and IPv6 headers
455 */
456 static void
netif_gso_ipv6_tcp_update(struct netif_gso_ip_tcp_state * state,struct __kern_packet * pkt,uint8_t * baddr)457 netif_gso_ipv6_tcp_update(struct netif_gso_ip_tcp_state *state,
458 struct __kern_packet *pkt, uint8_t *baddr)
459 {
460 state->hdr.ip6 = (struct ip6_hdr *)(baddr + pkt->pkt_headroom +
461 pkt->pkt_l2_len);
462 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip6) +
463 state->ip_hlen);
464 }
465
466 /*
467 * Finalize the TCP and IPv6 headers
468 */
469 static void
netif_gso_ipv6_tcp_internal(struct netif_gso_ip_tcp_state * state,uint32_t partial,uint16_t payload_len)470 netif_gso_ipv6_tcp_internal(struct netif_gso_ip_tcp_state *state,
471 uint32_t partial, uint16_t payload_len)
472 {
473 /*
474 * Update IP header
475 */
476 state->hdr.ip6->ip6_plen = htons(state->tcp_hlen + payload_len);
477 /*
478 * TCP Checksum
479 */
480 state->tcp->th_sum = 0;
481 partial = __packet_cksum(state->tcp, state->tcp_hlen, partial);
482 partial += htonl(state->tcp_hlen + IPPROTO_TCP + payload_len);
483 partial += state->psuedo_hdr_csum;
484 ADDCARRY(partial);
485 state->tcp->th_sum = ~(uint16_t)partial;
486 /*
487 * Update tcp sequence number
488 */
489 state->tcp_seq += payload_len;
490 }
491
492 /*
493 * Init the state during the TCP segmentation
494 */
495 static inline void
netif_gso_ip_tcp_init_state(struct netif_gso_ip_tcp_state * state,struct mbuf * m,uint8_t mac_hlen,uint8_t ip_hlen,bool isipv6)496 netif_gso_ip_tcp_init_state(struct netif_gso_ip_tcp_state *state,
497 struct mbuf *m, uint8_t mac_hlen, uint8_t ip_hlen, bool isipv6)
498 {
499 if (isipv6) {
500 state->af = AF_INET6;
501 state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) +
502 mac_hlen);
503 /* should be atleast 16 bit aligned */
504 VERIFY(((uintptr_t)state->hdr.ip6 & (uintptr_t)0x1) == 0);
505 state->tcp = (struct tcphdr *)(void *)((caddr_t)
506 (state->hdr.ip6) + ip_hlen);
507 state->update = netif_gso_ipv6_tcp_update;
508 state->internal = netif_gso_ipv6_tcp_internal;
509 state->psuedo_hdr_csum = in6_pseudo(&state->hdr.ip6->ip6_src,
510 &state->hdr.ip6->ip6_dst, 0);
511 } else {
512 struct in_addr ip_src, ip_dst;
513
514 state->af = AF_INET;
515 state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) +
516 mac_hlen);
517 /* should be atleast 16 bit aligned */
518 VERIFY(((uintptr_t)state->hdr.ip & (uintptr_t)0x1) == 0);
519 state->ip_id = ntohs(state->hdr.ip->ip_id);
520 state->tcp = (struct tcphdr *)(void *)((caddr_t)
521 (state->hdr.ip) + ip_hlen);
522 state->update = netif_gso_ipv4_tcp_update;
523 state->internal = netif_gso_ipv4_tcp_internal;
524 bcopy(&state->hdr.ip->ip_src, &ip_src, sizeof(ip_src));
525 bcopy(&state->hdr.ip->ip_dst, &ip_dst, sizeof(ip_dst));
526 state->psuedo_hdr_csum = in_pseudo(ip_src.s_addr,
527 ip_dst.s_addr, 0);
528 }
529
530 state->mac_hlen = mac_hlen;
531 state->ip_hlen = ip_hlen;
532 state->tcp_hlen = (uint8_t)(state->tcp->th_off << 2);
533 state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
534 VERIFY(m->m_pkthdr.tso_segsz != 0);
535 state->mss = (uint16_t)m->m_pkthdr.tso_segsz;
536 state->tcp_seq = ntohl(state->tcp->th_seq);
537 }
538
539 /*
540 * GSO on TCP/IPv4
541 */
542 static int
netif_gso_ipv4_tcp(struct ifnet * ifp,struct mbuf * m)543 netif_gso_ipv4_tcp(struct ifnet *ifp, struct mbuf *m)
544 {
545 struct ip *ip;
546 struct kern_pbufpool *pp = NULL;
547 struct netif_gso_ip_tcp_state state;
548 uint16_t hlen;
549 uint8_t ip_hlen;
550 uint8_t mac_hlen;
551 struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
552 boolean_t pkt_dropped = false;
553 int error;
554
555 STATS_INC(nifs, NETIF_STATS_GSO_PKT);
556 if (__improbable(m->m_pkthdr.pkt_proto != IPPROTO_TCP)) {
557 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NONTCP);
558 error = ENOTSUP;
559 pkt_dropped = true;
560 goto done;
561 }
562
563 error = netif_gso_check_netif_active(ifp, m, &pp);
564 if (__improbable(error != 0)) {
565 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NA_INACTIVE);
566 error = ENXIO;
567 pkt_dropped = true;
568 goto done;
569 }
570
571 error = netif_gso_get_frame_header_len(m, &mac_hlen);
572 if (__improbable(error != 0)) {
573 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_BADLEN);
574 pkt_dropped = true;
575 goto done;
576 }
577
578 hlen = mac_hlen + sizeof(struct ip);
579 if (__improbable(m->m_len < hlen)) {
580 m = m_pullup(m, hlen);
581 if (m == NULL) {
582 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
583 error = ENOBUFS;
584 pkt_dropped = true;
585 goto done;
586 }
587 }
588 ip = (struct ip *)(void *)(mtod(m, uint8_t *) + mac_hlen);
589 ip_hlen = (uint8_t)(ip->ip_hl << 2);
590 hlen = mac_hlen + ip_hlen + sizeof(struct tcphdr);
591 if (__improbable(m->m_len < hlen)) {
592 m = m_pullup(m, hlen);
593 if (m == NULL) {
594 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
595 error = ENOBUFS;
596 pkt_dropped = true;
597 goto done;
598 }
599 }
600 netif_gso_ip_tcp_init_state(&state, m, mac_hlen, ip_hlen, false);
601 error = netif_gso_tcp_segment_mbuf(m, ifp, &state, pp);
602 done:
603 m_freem(m);
604 if (__improbable(pkt_dropped)) {
605 STATS_INC(nifs, NETIF_STATS_DROP);
606 }
607 return error;
608 }
609
610 /*
611 * GSO on TCP/IPv6
612 */
613 static int
netif_gso_ipv6_tcp(struct ifnet * ifp,struct mbuf * m)614 netif_gso_ipv6_tcp(struct ifnet *ifp, struct mbuf *m)
615 {
616 struct ip6_hdr *ip6;
617 struct kern_pbufpool *pp = NULL;
618 struct netif_gso_ip_tcp_state state;
619 int lasthdr_off;
620 uint16_t hlen;
621 uint8_t ip_hlen;
622 uint8_t mac_hlen;
623 struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
624 boolean_t pkt_dropped = false;
625 int error;
626
627 STATS_INC(nifs, NETIF_STATS_GSO_PKT);
628 if (__improbable(m->m_pkthdr.pkt_proto != IPPROTO_TCP)) {
629 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NONTCP);
630 error = ENOTSUP;
631 pkt_dropped = true;
632 goto done;
633 }
634
635 error = netif_gso_check_netif_active(ifp, m, &pp);
636 if (__improbable(error != 0)) {
637 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NA_INACTIVE);
638 error = ENXIO;
639 pkt_dropped = true;
640 goto done;
641 }
642
643 error = netif_gso_get_frame_header_len(m, &mac_hlen);
644 if (__improbable(error != 0)) {
645 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_BADLEN);
646 pkt_dropped = true;
647 goto done;
648 }
649
650 hlen = mac_hlen + sizeof(struct ip6_hdr);
651 if (__improbable(m->m_len < hlen)) {
652 m = m_pullup(m, hlen);
653 if (m == NULL) {
654 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
655 error = ENOBUFS;
656 pkt_dropped = true;
657 goto done;
658 }
659 }
660 ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + mac_hlen);
661 lasthdr_off = ip6_lasthdr(m, mac_hlen, IPPROTO_IPV6, NULL) - mac_hlen;
662 VERIFY(lasthdr_off <= UINT8_MAX);
663 ip_hlen = (uint8_t)lasthdr_off;
664 hlen = mac_hlen + ip_hlen + sizeof(struct tcphdr);
665 if (__improbable(m->m_len < hlen)) {
666 m = m_pullup(m, hlen);
667 if (m == NULL) {
668 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
669 error = ENOBUFS;
670 pkt_dropped = true;
671 goto done;
672 }
673 }
674 netif_gso_ip_tcp_init_state(&state, m, mac_hlen, ip_hlen, true);
675 error = netif_gso_tcp_segment_mbuf(m, ifp, &state, pp);
676 done:
677 m_freem(m);
678 if (__improbable(pkt_dropped)) {
679 STATS_INC(nifs, NETIF_STATS_DROP);
680 }
681 return error;
682 }
683
684 int
netif_gso_dispatch(struct ifnet * ifp,struct mbuf * m)685 netif_gso_dispatch(struct ifnet *ifp, struct mbuf *m)
686 {
687 int gso_flags;
688
689 ASSERT(m->m_nextpkt == NULL);
690 gso_flags = CSUM_TO_GSO(m->m_pkthdr.csum_flags);
691 VERIFY(gso_flags < GSO_END_OF_TYPE);
692 return netif_gso_functions[gso_flags](ifp, m);
693 }
694
695 void
netif_gso_init(void)696 netif_gso_init(void)
697 {
698 _CASSERT(CSUM_TO_GSO(~(CSUM_TSO_IPV4 | CSUM_TSO_IPV6)) == GSO_NONE);
699 _CASSERT(CSUM_TO_GSO(CSUM_TSO_IPV4) == GSO_TCP4);
700 _CASSERT(CSUM_TO_GSO(CSUM_TSO_IPV6) == GSO_TCP6);
701 netif_gso_functions[GSO_NONE] = nx_netif_host_output;
702 netif_gso_functions[GSO_TCP4] = netif_gso_ipv4_tcp;
703 netif_gso_functions[GSO_TCP6] = netif_gso_ipv6_tcp;
704 }
705
706 void
netif_gso_fini(void)707 netif_gso_fini(void)
708 {
709 netif_gso_functions[GSO_NONE] = NULL;
710 netif_gso_functions[GSO_TCP4] = NULL;
711 netif_gso_functions[GSO_TCP6] = NULL;
712 }
713