1 /*
2 * Copyright (c) 2020-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 */
54
55 #include <sys/param.h>
56 #include <sys/kernel.h>
57 #include <sys/types.h>
58 #include <sys/systm.h>
59 #include <sys/mbuf.h>
60 #include <sys/socket.h>
61 #include <sys/sysctl.h>
62 #include <sys/malloc.h>
63
64 #include <netinet/in.h>
65 #include <netinet/ip_var.h>
66 #include <netinet/ip.h>
67 #include <netinet/tcp.h>
68 #include <netinet/tcpip.h>
69 #include <netinet/ip6.h>
70 #include <netinet6/ip6_var.h>
71
72 #include <net/if.h>
73 #include <net/if_var.h>
74 #include <net/ethernet.h>
75 #include <net/pktap.h>
76 #include <skywalk/os_skywalk_private.h>
77 #include <skywalk/nexus/netif/nx_netif.h>
78
79 #define CSUM_GSO_MASK 0x00300000
80 #define CSUM_GSO_OFFSET 20
81 #define CSUM_TO_GSO(x) ((x & CSUM_GSO_MASK) >> CSUM_GSO_OFFSET)
82
83 enum netif_gso_type {
84 GSO_NONE,
85 GSO_TCP4,
86 GSO_TCP6,
87 GSO_END_OF_TYPE
88 };
89
90 uint32_t netif_chain_enqueue = 1;
91 #if (DEVELOPMENT || DEBUG)
92 SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, chain_enqueue,
93 CTLFLAG_RW | CTLFLAG_LOCKED, &netif_chain_enqueue, 0,
94 "netif chain enqueue");
95 #endif /* (DEVELOPMENT || DEBUG) */
96
97 /*
98 * Array of function pointers that execute GSO depending on packet type
99 */
100 int (*netif_gso_functions[GSO_END_OF_TYPE]) (struct ifnet*, struct mbuf*);
101
102 /*
103 * Structure that contains the state during the TCP segmentation
104 */
105 struct netif_gso_ip_tcp_state {
106 void (*update)(struct netif_gso_ip_tcp_state*,
107 struct __kern_packet *pkt, uint8_t *__bidi_indexable baddr);
108 void (*internal)(struct netif_gso_ip_tcp_state*, uint32_t partial,
109 uint16_t payload_len, uint32_t *csum_flags);
110 union {
111 struct ip *ip;
112 struct ip6_hdr *ip6;
113 } hdr;
114 int af;
115 struct tcphdr *tcp;
116 struct kern_pbufpool *pp;
117 uint32_t psuedo_hdr_csum;
118 uint32_t tcp_seq;
119 uint16_t hlen;
120 uint16_t mss;
121 uint16_t ip_id;
122 uint8_t mac_hlen;
123 uint8_t ip_hlen;
124 uint8_t tcp_hlen;
125 boolean_t copy_data_sum;
126 };
127
128 static inline uint8_t
netif_gso_get_frame_header_len(struct mbuf * m,uint8_t * hlen)129 netif_gso_get_frame_header_len(struct mbuf *m, uint8_t *hlen)
130 {
131 uint64_t len;
132 char *__single ph = m->m_pkthdr.pkt_hdr;
133
134 if (__improbable(m_pktlen(m) == 0 || ph == NULL ||
135 ph < (char *)m->m_data)) {
136 return ERANGE;
137 }
138 len = (ph - m_mtod_current(m));
139 if (__improbable(len > UINT8_MAX)) {
140 return ERANGE;
141 }
142 *hlen = (uint8_t)len;
143 return 0;
144 }
145
146 static inline int
netif_gso_check_netif_active(struct ifnet * ifp,struct mbuf * m,struct kern_pbufpool ** pp)147 netif_gso_check_netif_active(struct ifnet *ifp, struct mbuf *m,
148 struct kern_pbufpool **pp)
149 {
150 struct __kern_channel_ring *kring;
151 struct nx_netif *nif = NA(ifp)->nifna_netif;
152 struct netif_stats *nifs = &nif->nif_stats;
153 struct kern_nexus *nx = nif->nif_nx;
154 struct nexus_adapter *hwna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
155 uint32_t sc_idx = MBUF_SCIDX(m_get_service_class(m));
156
157 if (__improbable(!NA_IS_ACTIVE(hwna))) {
158 STATS_INC(nifs, NETIF_STATS_DROP_NA_INACTIVE);
159 SK_DF(SK_VERB_NETIF,
160 "\"%s\" (%p) not in skywalk mode anymore",
161 hwna->na_name, SK_KVA(hwna));
162 return ENXIO;
163 }
164
165 VERIFY(sc_idx < KPKT_SC_MAX_CLASSES);
166 kring = &hwna->na_tx_rings[hwna->na_kring_svc_lut[sc_idx]];
167 if (__improbable(KR_DROP(kring))) {
168 STATS_INC(nifs, NETIF_STATS_DROP_KRDROP_MODE);
169 SK_DF(SK_VERB_NETIF,
170 "kr \"%s\" (%p) krflags 0x%x or %s in drop mode",
171 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
172 ifp->if_xname);
173 return ENXIO;
174 }
175 *pp = kring->ckr_pp;
176 return 0;
177 }
178
179 boolean_t
netif_chain_enqueue_enabled(struct ifnet * ifp)180 netif_chain_enqueue_enabled(struct ifnet *ifp)
181 {
182 return netif_chain_enqueue != 0 && ifp->if_output_netem == NULL &&
183 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) == 0;
184 }
185
186 static inline int
netif_gso_send(struct ifnet * ifp,struct __kern_packet * head,struct __kern_packet * tail,uint32_t count,uint32_t bytes)187 netif_gso_send(struct ifnet *ifp, struct __kern_packet *head,
188 struct __kern_packet *tail, uint32_t count, uint32_t bytes)
189 {
190 struct nx_netif *nif = NA(ifp)->nifna_netif;
191 struct netif_stats *nifs = &nif->nif_stats;
192 struct netif_qset *__single qset = NULL;
193 int error = 0;
194 boolean_t dropped;
195
196 qset = nx_netif_find_qset_with_pkt(ifp, head);
197 if (netif_chain_enqueue_enabled(ifp)) {
198 dropped = false;
199 if (qset != NULL) {
200 head->pkt_qset_idx = qset->nqs_idx;
201 error = ifnet_enqueue_pkt_chain(ifp, qset->nqs_ifcq,
202 head, tail, count, bytes, false, &dropped);
203 } else {
204 error = ifnet_enqueue_pkt_chain(ifp, ifp->if_snd, head, tail,
205 count, bytes, false, &dropped);
206 }
207 if (__improbable(dropped)) {
208 STATS_ADD(nifs, NETIF_STATS_TX_DROP_ENQ_AQM, count);
209 STATS_ADD(nifs, NETIF_STATS_DROP, count);
210 }
211 } else {
212 struct __kern_packet *pkt = head, *next;
213 uint32_t c = 0, b = 0;
214
215 while (pkt != NULL) {
216 int err;
217
218 next = pkt->pkt_nextpkt;
219 pkt->pkt_nextpkt = NULL;
220 c++;
221 b += pkt->pkt_length;
222
223 dropped = false;
224 if (qset != NULL) {
225 pkt->pkt_qset_idx = qset->nqs_idx;
226 err = ifnet_enqueue_pkt(ifp, qset->nqs_ifcq,
227 pkt, false, &dropped);
228 } else {
229 err = ifnet_enqueue_pkt(ifp, ifp->if_snd, pkt, false, &dropped);
230 }
231 if (error == 0 && __improbable(err != 0)) {
232 error = err;
233 }
234 if (__improbable(dropped)) {
235 STATS_INC(nifs, NETIF_STATS_TX_DROP_ENQ_AQM);
236 STATS_INC(nifs, NETIF_STATS_DROP);
237 }
238 pkt = next;
239 }
240 ASSERT(c == count);
241 ASSERT(b == bytes);
242 }
243 if (qset != NULL) {
244 nx_netif_qset_release(&qset);
245 }
246 netif_transmit(ifp, NETIF_XMIT_FLAG_HOST);
247 return error;
248 }
249
250 /*
251 * Segment and transmit a queue of packets which fit the given mss + hdr_len.
252 * m points to mbuf chain to be segmented.
253 * This function splits the payload (m-> m_pkthdr.len - hdr_len)
254 * into segments of length MSS bytes and then copy the first hdr_len bytes
255 * from m at the top of each segment.
256 */
257 static inline int
netif_gso_tcp_segment_mbuf(struct mbuf * m,struct ifnet * ifp,struct netif_gso_ip_tcp_state * state,struct kern_pbufpool * pp)258 netif_gso_tcp_segment_mbuf(struct mbuf *m, struct ifnet *ifp,
259 struct netif_gso_ip_tcp_state *state, struct kern_pbufpool *pp)
260 {
261 uuid_t euuid;
262 struct pktq pktq_alloc, pktq_seg;
263 uint64_t timestamp = 0, m_tx_timestamp = 0;
264 uint64_t pflags;
265 int error = 0;
266 uint32_t policy_id;
267 uint32_t skip_policy_id;
268 uint32_t svc_class;
269 uint32_t n, n_pkts, n_bytes;
270 int32_t off = 0, total_len = m->m_pkthdr.len;
271 uint8_t tx_headroom = (uint8_t)ifp->if_tx_headroom;
272 struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
273 struct __kern_packet *pkt_chain_head, *pkt_chain_tail;
274 struct m_tag *ts_tag = NULL;
275 uint16_t mss = state->mss;
276 bool skip_pktap;
277
278 VERIFY(total_len > state->hlen);
279 VERIFY(((tx_headroom + state->mac_hlen) & 0x1) == 0);
280 VERIFY((tx_headroom + state->hlen + mss) <= PP_BUF_SIZE_DEF(pp));
281
282 KPKTQ_INIT(&pktq_alloc);
283 KPKTQ_INIT(&pktq_seg);
284 /* batch allocate enough packets */
285 n_pkts = (uint32_t)(SK_ROUNDUP((total_len - state->hlen), mss) / mss);
286 error = pp_alloc_pktq(pp, 1, &pktq_alloc, n_pkts, NULL,
287 NULL, SKMEM_NOSLEEP);
288 if (__improbable(error != 0)) {
289 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
290 SK_ERR("failed to alloc %u pkts", n_pkts);
291 pp_free_pktq(&pktq_alloc);
292 error = ENOBUFS;
293 goto done;
294 }
295
296 ASSERT(m->m_pkthdr.pkt_proto == IPPROTO_TCP);
297 ASSERT((m->m_flags & M_BCAST) == 0);
298 ASSERT((m->m_flags & M_MCAST) == 0);
299 ASSERT(((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) == 0));
300 pflags = m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK;
301 pflags |= PKTF_START_SEQ;
302 pflags |= (m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) ? PKT_F_L4S : 0;
303 (void) mbuf_get_timestamp(m, ×tamp, NULL);
304 necp_get_app_uuid_from_packet(m, euuid);
305 policy_id = necp_get_policy_id_from_packet(m);
306 skip_policy_id = necp_get_skip_policy_id_from_packet(m);
307 svc_class = m_get_service_class(m);
308 skip_pktap = (m->m_pkthdr.pkt_flags & PKTF_SKIP_PKTAP) != 0 ||
309 pktap_total_tap_count == 0;
310
311 ts_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM);
312 if (ts_tag != NULL) {
313 m_tx_timestamp = *(uint64_t *)(ts_tag->m_tag_data);
314 }
315
316 for (n = 1, off = state->hlen; off < total_len; off += mss, n++) {
317 uint8_t *baddr, *baddr0;
318 uint32_t partial = 0;
319 struct __kern_packet *pkt;
320
321 KPKTQ_DEQUEUE(&pktq_alloc, pkt);
322 ASSERT(pkt != NULL);
323
324 /* get buffer address from packet */
325 MD_BUFLET_ADDR_ABS(pkt, baddr0);
326 baddr = baddr0;
327 baddr += tx_headroom;
328
329 /*
330 * Copy the link-layer, IP and TCP header from the
331 * original packet.
332 */
333 m_copydata(m, 0, state->hlen, baddr);
334 baddr += state->hlen;
335
336 /*
337 * Copy the payload from original packet and
338 * compute partial checksum on the payload.
339 */
340 if (off + mss > total_len) {
341 /* if last segment is less than mss */
342 mss = (uint16_t)(total_len - off);
343 }
344 if (state->copy_data_sum) {
345 partial = m_copydata_sum(m, off, mss, baddr, 0, NULL);
346 } else {
347 m_copydata(m, off, mss, baddr);
348 }
349
350 /*
351 * update packet metadata
352 */
353 pkt->pkt_headroom = tx_headroom;
354 pkt->pkt_l2_len = state->mac_hlen;
355 pkt->pkt_link_flags = 0;
356 pkt->pkt_csum_flags = 0;
357 pkt->pkt_csum_tx_start_off = 0;
358 pkt->pkt_csum_tx_stuff_off = 0;
359 uuid_copy(pkt->pkt_policy_euuid, euuid);
360 pkt->pkt_policy_id = policy_id;
361 pkt->pkt_skip_policy_id = skip_policy_id;
362 pkt->pkt_timestamp = timestamp;
363 pkt->pkt_svc_class = svc_class;
364 pkt->pkt_pflags |= pflags;
365 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
366 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
367 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
368 pkt->pkt_flow_ip_proto = IPPROTO_TCP;
369 pkt->pkt_transport_protocol = IPPROTO_TCP;
370 pkt->pkt_flow_tcp_seq = htonl(state->tcp_seq);
371 __packet_set_tx_timestamp(SK_PKT2PH(pkt), m_tx_timestamp);
372
373 state->update(state, pkt, baddr0);
374 /*
375 * FIN or PUSH flags if present will be set only on the last
376 * segment.
377 */
378 if (n != n_pkts) {
379 state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
380 }
381 /*
382 * CWR flag if present is set only on the first segment
383 * and cleared on the subsequent segments.
384 */
385 if (n != 1) {
386 state->tcp->th_flags &= ~TH_CWR;
387 state->tcp->th_seq = htonl(state->tcp_seq);
388 }
389 ASSERT(state->tcp->th_seq == pkt->pkt_flow_tcp_seq);
390 state->internal(state, partial, mss, &pkt->pkt_csum_flags);
391 METADATA_ADJUST_LEN(pkt, state->hlen + mss, tx_headroom);
392 VERIFY(__packet_finalize(SK_PKT2PH(pkt)) == 0);
393 KPKTQ_ENQUEUE(&pktq_seg, pkt);
394 if (!skip_pktap) {
395 nx_netif_pktap_output(ifp, state->af, pkt);
396 }
397 }
398 ASSERT(off == total_len);
399 STATS_ADD(nifs, NETIF_STATS_GSO_SEG, n_pkts);
400
401 /* ifnet_enqueue_pkt_chain() consumes the packet chain */
402 pkt_chain_head = KPKTQ_FIRST(&pktq_seg);
403 pkt_chain_tail = KPKTQ_LAST(&pktq_seg);
404 KPKTQ_INIT(&pktq_seg);
405 n_bytes = total_len + (state->hlen * (n_pkts - 1));
406
407 if (m->m_pkthdr.pkt_ext_flags & PKTF_EXT_QSET_ID_VALID) {
408 pkt_chain_head->pkt_pflags |= PKT_F_PRIV_HAS_QSET_ID;
409 pkt_chain_head->pkt_priv =
410 __unsafe_forge_single(void *, m->m_pkthdr.pkt_mpriv_qsetid);
411 }
412
413 error = netif_gso_send(ifp, pkt_chain_head, pkt_chain_tail,
414 n_pkts, n_bytes);
415
416 done:
417 KPKTQ_FINI(&pktq_alloc);
418 return error;
419 }
420
421 /*
422 * Update the pointers to TCP and IPv4 headers
423 */
424 static void
netif_gso_ipv4_tcp_update(struct netif_gso_ip_tcp_state * state,struct __kern_packet * pkt,uint8_t * __bidi_indexable baddr)425 netif_gso_ipv4_tcp_update(struct netif_gso_ip_tcp_state *state,
426 struct __kern_packet *pkt, uint8_t *__bidi_indexable baddr)
427 {
428 state->hdr.ip = (struct ip *)(void *)(baddr + pkt->pkt_headroom +
429 pkt->pkt_l2_len);
430 state->tcp = (struct tcphdr *)(void *)(baddr + pkt->pkt_headroom +
431 pkt->pkt_l2_len + state->ip_hlen);
432 }
433
434 /*
435 * Finalize the TCP and IPv4 headers
436 */
437 static void
netif_gso_ipv4_tcp_internal(struct netif_gso_ip_tcp_state * state,uint32_t partial,uint16_t payload_len,uint32_t * csum_flags __unused)438 netif_gso_ipv4_tcp_internal(struct netif_gso_ip_tcp_state *state,
439 uint32_t partial, uint16_t payload_len, uint32_t *csum_flags __unused)
440 {
441 int hlen;
442 uint8_t *__sized_by(hlen) buffer;
443
444 /*
445 * Update IP header
446 */
447 state->hdr.ip->ip_id = htons((state->ip_id)++);
448 state->hdr.ip->ip_len = htons(state->ip_hlen + state->tcp_hlen +
449 payload_len);
450 /*
451 * IP header checksum
452 */
453 state->hdr.ip->ip_sum = 0;
454 buffer = (uint8_t *__bidi_indexable)(struct ip *__bidi_indexable)
455 state->hdr.ip;
456 hlen = state->ip_hlen;
457 state->hdr.ip->ip_sum = inet_cksum_buffer(buffer, 0, 0, hlen);
458 /*
459 * TCP Checksum
460 */
461 state->tcp->th_sum = 0;
462 partial = __packet_cksum(state->tcp, state->tcp_hlen, partial);
463 partial += htons(state->tcp_hlen + IPPROTO_TCP + payload_len);
464 partial += state->psuedo_hdr_csum;
465 ADDCARRY(partial);
466 state->tcp->th_sum = ~(uint16_t)partial;
467 /*
468 * Update tcp sequence number in gso state
469 */
470 state->tcp_seq += payload_len;
471 }
472
473 static void
netif_gso_ipv4_tcp_internal_nosum(struct netif_gso_ip_tcp_state * state,uint32_t partial __unused,uint16_t payload_len __unused,uint32_t * csum_flags)474 netif_gso_ipv4_tcp_internal_nosum(struct netif_gso_ip_tcp_state *state,
475 uint32_t partial __unused, uint16_t payload_len __unused,
476 uint32_t *csum_flags)
477 {
478 /*
479 * Update IP header
480 */
481 state->hdr.ip->ip_id = htons((state->ip_id)++);
482 state->hdr.ip->ip_len = htons(state->ip_hlen + state->tcp_hlen +
483 payload_len);
484 /*
485 * Update tcp sequence number in gso state
486 */
487 state->tcp_seq += payload_len;
488
489 /* offload csum to hardware */
490 *csum_flags |= PACKET_CSUM_IP | PACKET_CSUM_TCP;
491 }
492
493 /*
494 * Updates the pointers to TCP and IPv6 headers
495 */
496 static void
netif_gso_ipv6_tcp_update(struct netif_gso_ip_tcp_state * state,struct __kern_packet * pkt,uint8_t * __bidi_indexable baddr)497 netif_gso_ipv6_tcp_update(struct netif_gso_ip_tcp_state *state,
498 struct __kern_packet *pkt, uint8_t *__bidi_indexable baddr)
499 {
500 state->hdr.ip6 = (struct ip6_hdr *)(baddr + pkt->pkt_headroom +
501 pkt->pkt_l2_len);
502 state->tcp = (struct tcphdr *)(void *)(baddr + pkt->pkt_headroom +
503 pkt->pkt_l2_len + state->ip_hlen);
504 }
505
506 /*
507 * Finalize the TCP and IPv6 headers
508 */
509 static void
netif_gso_ipv6_tcp_internal_nosum(struct netif_gso_ip_tcp_state * state,uint32_t partial __unused,uint16_t payload_len __unused,uint32_t * csum_flags)510 netif_gso_ipv6_tcp_internal_nosum(struct netif_gso_ip_tcp_state *state,
511 uint32_t partial __unused, uint16_t payload_len __unused,
512 uint32_t *csum_flags)
513 {
514 /*
515 * Update IP header
516 */
517 state->hdr.ip6->ip6_plen = htons(state->tcp_hlen + payload_len);
518
519 /*
520 * Update tcp sequence number
521 */
522 state->tcp_seq += payload_len;
523
524 /* offload csum to hardware */
525 *csum_flags |= PACKET_CSUM_TCPIPV6;
526 }
527
528 /*
529 * Finalize the TCP and IPv6 headers
530 */
531 static void
netif_gso_ipv6_tcp_internal(struct netif_gso_ip_tcp_state * state,uint32_t partial,uint16_t payload_len,uint32_t * csum_flags __unused)532 netif_gso_ipv6_tcp_internal(struct netif_gso_ip_tcp_state *state,
533 uint32_t partial, uint16_t payload_len, uint32_t *csum_flags __unused)
534 {
535 /*
536 * Update IP header
537 */
538 state->hdr.ip6->ip6_plen = htons(state->tcp_hlen + payload_len);
539 /*
540 * TCP Checksum
541 */
542 state->tcp->th_sum = 0;
543 partial = __packet_cksum(state->tcp, state->tcp_hlen, partial);
544 partial += htonl(state->tcp_hlen + IPPROTO_TCP + payload_len);
545 partial += state->psuedo_hdr_csum;
546 ADDCARRY(partial);
547 state->tcp->th_sum = ~(uint16_t)partial;
548 /*
549 * Update tcp sequence number
550 */
551 state->tcp_seq += payload_len;
552 }
553
554 /*
555 * Init the state during the TCP segmentation
556 */
557 static inline void
netif_gso_ip_tcp_init_state(struct netif_gso_ip_tcp_state * state,struct mbuf * m,uint8_t mac_hlen,uint8_t ip_hlen,bool isipv6,ifnet_t ifp)558 netif_gso_ip_tcp_init_state(struct netif_gso_ip_tcp_state *state,
559 struct mbuf *m, uint8_t mac_hlen, uint8_t ip_hlen, bool isipv6, ifnet_t ifp)
560 {
561 if (isipv6) {
562 state->af = AF_INET6;
563 state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) +
564 mac_hlen);
565 /* should be atleast 16 bit aligned */
566 VERIFY(((uintptr_t)state->hdr.ip6 & (uintptr_t)0x1) == 0);
567 state->tcp = (struct tcphdr *)(void *)(m_mtod_current(m) +
568 mac_hlen + ip_hlen);
569 state->update = netif_gso_ipv6_tcp_update;
570 if (ifp->if_hwassist & IFNET_CSUM_TCPIPV6) {
571 state->internal = netif_gso_ipv6_tcp_internal_nosum;
572 state->copy_data_sum = false;
573 } else {
574 state->internal = netif_gso_ipv6_tcp_internal;
575 state->copy_data_sum = true;
576 }
577 state->psuedo_hdr_csum = in6_pseudo(&state->hdr.ip6->ip6_src,
578 &state->hdr.ip6->ip6_dst, 0);
579 } else {
580 struct in_addr ip_src, ip_dst;
581
582 state->af = AF_INET;
583 state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) +
584 mac_hlen);
585 /* should be atleast 16 bit aligned */
586 VERIFY(((uintptr_t)state->hdr.ip & (uintptr_t)0x1) == 0);
587 state->ip_id = ntohs(state->hdr.ip->ip_id);
588 state->tcp = (struct tcphdr *)(void *)(m_mtod_current(m) +
589 mac_hlen + ip_hlen);
590 state->update = netif_gso_ipv4_tcp_update;
591 if ((ifp->if_hwassist & (IFNET_CSUM_IP | IFNET_CSUM_TCP)) ==
592 (IFNET_CSUM_IP | IFNET_CSUM_TCP)) {
593 state->internal = netif_gso_ipv4_tcp_internal_nosum;
594 state->copy_data_sum = false;
595 } else {
596 state->internal = netif_gso_ipv4_tcp_internal;
597 state->copy_data_sum = true;
598 }
599 bcopy(&state->hdr.ip->ip_src, &ip_src, sizeof(ip_src));
600 bcopy(&state->hdr.ip->ip_dst, &ip_dst, sizeof(ip_dst));
601 state->psuedo_hdr_csum = in_pseudo(ip_src.s_addr,
602 ip_dst.s_addr, 0);
603 }
604
605 state->mac_hlen = mac_hlen;
606 state->ip_hlen = ip_hlen;
607 state->tcp_hlen = (uint8_t)(state->tcp->th_off << 2);
608 state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
609 VERIFY(m->m_pkthdr.tso_segsz != 0);
610 state->mss = (uint16_t)m->m_pkthdr.tso_segsz;
611 state->tcp_seq = ntohl(state->tcp->th_seq);
612 }
613
614 /*
615 * GSO on TCP/IPv4
616 */
617 static int
netif_gso_ipv4_tcp(struct ifnet * ifp,struct mbuf * m)618 netif_gso_ipv4_tcp(struct ifnet *ifp, struct mbuf *m)
619 {
620 struct ip *ip;
621 struct kern_pbufpool *__single pp = NULL;
622 struct netif_gso_ip_tcp_state state;
623 uint16_t hlen;
624 uint8_t ip_hlen;
625 uint8_t mac_hlen;
626 struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
627 boolean_t pkt_dropped = false;
628 int error;
629
630 STATS_INC(nifs, NETIF_STATS_GSO_PKT);
631 if (__improbable(m->m_pkthdr.pkt_proto != IPPROTO_TCP)) {
632 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NONTCP);
633 error = ENOTSUP;
634 pkt_dropped = true;
635 goto done;
636 }
637
638 error = netif_gso_check_netif_active(ifp, m, &pp);
639 if (__improbable(error != 0)) {
640 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NA_INACTIVE);
641 error = ENXIO;
642 pkt_dropped = true;
643 goto done;
644 }
645
646 error = netif_gso_get_frame_header_len(m, &mac_hlen);
647 if (__improbable(error != 0)) {
648 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_BADLEN);
649 pkt_dropped = true;
650 goto done;
651 }
652
653 hlen = mac_hlen + sizeof(struct ip);
654 if (__improbable(m->m_len < hlen)) {
655 m = m_pullup(m, hlen);
656 if (m == NULL) {
657 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
658 error = ENOBUFS;
659 pkt_dropped = true;
660 goto done;
661 }
662 }
663 ip = (struct ip *)(void *)(mtod(m, uint8_t *) + mac_hlen);
664 ip_hlen = (uint8_t)(ip->ip_hl << 2);
665 hlen = mac_hlen + ip_hlen + sizeof(struct tcphdr);
666 if (__improbable(m->m_len < hlen)) {
667 m = m_pullup(m, hlen);
668 if (m == NULL) {
669 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
670 error = ENOBUFS;
671 pkt_dropped = true;
672 goto done;
673 }
674 }
675 netif_gso_ip_tcp_init_state(&state, m, mac_hlen, ip_hlen, false, ifp);
676 error = netif_gso_tcp_segment_mbuf(m, ifp, &state, pp);
677 done:
678 m_freem(m);
679 if (__improbable(pkt_dropped)) {
680 STATS_INC(nifs, NETIF_STATS_DROP);
681 }
682 return error;
683 }
684
685 /*
686 * GSO on TCP/IPv6
687 */
688 static int
netif_gso_ipv6_tcp(struct ifnet * ifp,struct mbuf * m)689 netif_gso_ipv6_tcp(struct ifnet *ifp, struct mbuf *m)
690 {
691 struct ip6_hdr *ip6;
692 struct kern_pbufpool *__single pp = NULL;
693 struct netif_gso_ip_tcp_state state;
694 int lasthdr_off;
695 uint16_t hlen;
696 uint8_t ip_hlen;
697 uint8_t mac_hlen;
698 struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
699 boolean_t pkt_dropped = false;
700 int error;
701
702 STATS_INC(nifs, NETIF_STATS_GSO_PKT);
703 if (__improbable(m->m_pkthdr.pkt_proto != IPPROTO_TCP)) {
704 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NONTCP);
705 error = ENOTSUP;
706 pkt_dropped = true;
707 goto done;
708 }
709
710 error = netif_gso_check_netif_active(ifp, m, &pp);
711 if (__improbable(error != 0)) {
712 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NA_INACTIVE);
713 error = ENXIO;
714 pkt_dropped = true;
715 goto done;
716 }
717
718 error = netif_gso_get_frame_header_len(m, &mac_hlen);
719 if (__improbable(error != 0)) {
720 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_BADLEN);
721 pkt_dropped = true;
722 goto done;
723 }
724
725 hlen = mac_hlen + sizeof(struct ip6_hdr);
726 if (__improbable(m->m_len < hlen)) {
727 m = m_pullup(m, hlen);
728 if (m == NULL) {
729 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
730 error = ENOBUFS;
731 pkt_dropped = true;
732 goto done;
733 }
734 }
735 ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + mac_hlen);
736 lasthdr_off = ip6_lasthdr(m, mac_hlen, IPPROTO_IPV6, NULL) - mac_hlen;
737 VERIFY(lasthdr_off <= UINT8_MAX);
738 ip_hlen = (uint8_t)lasthdr_off;
739 hlen = mac_hlen + ip_hlen + sizeof(struct tcphdr);
740 if (__improbable(m->m_len < hlen)) {
741 m = m_pullup(m, hlen);
742 if (m == NULL) {
743 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
744 error = ENOBUFS;
745 pkt_dropped = true;
746 goto done;
747 }
748 }
749 netif_gso_ip_tcp_init_state(&state, m, mac_hlen, ip_hlen, true, ifp);
750 error = netif_gso_tcp_segment_mbuf(m, ifp, &state, pp);
751 done:
752 m_freem(m);
753 if (__improbable(pkt_dropped)) {
754 STATS_INC(nifs, NETIF_STATS_DROP);
755 }
756 return error;
757 }
758
759 int
netif_gso_dispatch(struct ifnet * ifp,struct mbuf * m)760 netif_gso_dispatch(struct ifnet *ifp, struct mbuf *m)
761 {
762 int gso_flags;
763
764 ASSERT(m->m_nextpkt == NULL);
765 gso_flags = CSUM_TO_GSO(m->m_pkthdr.csum_flags);
766 VERIFY(gso_flags < GSO_END_OF_TYPE);
767 return netif_gso_functions[gso_flags](ifp, m);
768 }
769
770 void
netif_gso_init(void)771 netif_gso_init(void)
772 {
773 static_assert(CSUM_TO_GSO(~(CSUM_TSO_IPV4 | CSUM_TSO_IPV6)) == GSO_NONE);
774 static_assert(CSUM_TO_GSO(CSUM_TSO_IPV4) == GSO_TCP4);
775 static_assert(CSUM_TO_GSO(CSUM_TSO_IPV6) == GSO_TCP6);
776 netif_gso_functions[GSO_NONE] = nx_netif_host_output;
777 netif_gso_functions[GSO_TCP4] = netif_gso_ipv4_tcp;
778 netif_gso_functions[GSO_TCP6] = netif_gso_ipv6_tcp;
779 }
780
781 void
netif_gso_fini(void)782 netif_gso_fini(void)
783 {
784 netif_gso_functions[GSO_NONE] = NULL;
785 netif_gso_functions[GSO_TCP4] = NULL;
786 netif_gso_functions[GSO_TCP6] = NULL;
787 }
788