xref: /xnu-12377.41.6/bsd/net/dlil_output.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 1999-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <net/if_var.h>
30 #include <net/dlil_var_private.h>
31 #include <net/dlil.h>
32 #include <net/dlil_sysctl.h>
33 
34 
35 static void dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls);
36 
37 static int dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
38     protocol_family_t protocol_family);
39 
40 static void dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
41     protocol_family_t pf);
42 
43 #if CONFIG_DTRACE
44 static void dlil_output_dtrace(ifnet_t ifp, protocol_family_t proto_family, mbuf_t  m);
45 #endif /* CONFIG_DTRACE */
46 
47 /*
48  * dlil_output
49  *
50  * Caller should have a lock on the protocol domain if the protocol
51  * doesn't support finer grained locking. In most cases, the lock
52  * will be held from the socket layer and won't be released until
53  * we return back to the socket layer.
54  *
55  * This does mean that we must take a protocol lock before we take
56  * an interface lock if we're going to take both. This makes sense
57  * because a protocol is likely to interact with an ifp while it
58  * is under the protocol lock.
59  *
60  * An advisory code will be returned if adv is not null. This
61  * can be used to provide feedback about interface queues to the
62  * application.
63  */
64 errno_t
dlil_output(ifnet_t ifp,protocol_family_t proto_family,mbuf_t packetlist,void * route,const struct sockaddr * dest,int flags,struct flowadv * adv)65 dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
66     void *route, const struct sockaddr *dest, int flags, struct flowadv *adv)
67 {
68 	char *frame_type = NULL;
69 	char *dst_linkaddr = NULL;
70 	int retval = 0;
71 	char frame_type_buffer[IFNET_MAX_FRAME_TYPE_BUFFER_SIZE];
72 	char dst_linkaddr_buffer[IFNET_MAX_LINKADDR_BUFFER_SIZE];
73 	if_proto_ref_t proto = NULL;
74 	mbuf_ref_t m = NULL;
75 	mbuf_ref_t send_head = NULL;
76 	mbuf_ref_ptr_t send_tail = &send_head;
77 	int iorefcnt = 0;
78 	u_int32_t pre = 0, post = 0;
79 	u_int32_t fpkts = 0, fbytes = 0;
80 	int32_t flen = 0;
81 	struct timespec now;
82 	u_int64_t now_nsec;
83 	boolean_t did_clat46 = FALSE;
84 	protocol_family_t old_proto_family = proto_family;
85 	struct sockaddr_in6 dest6;
86 	rtentry_ref_t rt = NULL;
87 	u_int16_t m_loop_set = 0;
88 	bool raw = (flags & DLIL_OUTPUT_FLAGS_RAW) != 0;
89 	uint64_t qset_id;
90 	uint8_t qset_id_valid_flag;
91 
92 	KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
93 
94 	/*
95 	 * Get an io refcnt if the interface is attached to prevent ifnet_detach
96 	 * from happening while this operation is in progress
97 	 */
98 	if (!ifnet_datamov_begin(ifp)) {
99 		retval = ENXIO;
100 		goto cleanup;
101 	}
102 	iorefcnt = 1;
103 
104 	VERIFY(ifp->if_output_dlil != NULL);
105 
106 	/* update the driver's multicast filter, if needed */
107 	if (ifp->if_updatemcasts > 0) {
108 		if_mcasts_update_async(ifp);
109 		ifp->if_updatemcasts = 0;
110 	}
111 
112 	frame_type = frame_type_buffer;
113 	dst_linkaddr = dst_linkaddr_buffer;
114 
115 	if (flags == DLIL_OUTPUT_FLAGS_NONE) {
116 		ifnet_lock_shared(ifp);
117 		/* callee holds a proto refcnt upon success */
118 		proto = find_attached_proto(ifp, proto_family);
119 		if (proto == NULL) {
120 			ifnet_lock_done(ifp);
121 			retval = ENXIO;
122 			goto cleanup;
123 		}
124 		ifnet_lock_done(ifp);
125 	}
126 
127 preout_again:
128 	if (packetlist == NULL) {
129 		goto cleanup;
130 	}
131 
132 	m = packetlist;
133 	packetlist = packetlist->m_nextpkt;
134 	m->m_nextpkt = NULL;
135 
136 	m_add_crumb(m, PKT_CRUMB_DLIL_OUTPUT);
137 
138 	/*
139 	 * Perform address family translation for the first
140 	 * packet outside the loop in order to perform address
141 	 * lookup for the translated proto family.
142 	 */
143 	if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
144 	    (ifp->if_type == IFT_CELLULAR ||
145 	    dlil_is_clat_needed(proto_family, m))) {
146 		retval = dlil_clat46(ifp, &proto_family, &m);
147 		/*
148 		 * Go to the next packet if translation fails
149 		 */
150 		if (retval != 0) {
151 			m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_CLAT64, NULL, 0);
152 			m = NULL;
153 			ip6stat.ip6s_clat464_out_drop++;
154 			/* Make sure that the proto family is PF_INET */
155 			ASSERT(proto_family == PF_INET);
156 			goto preout_again;
157 		}
158 		/*
159 		 * Free the old one and make it point to the IPv6 proto structure.
160 		 *
161 		 * Change proto for the first time we have successfully
162 		 * performed address family translation.
163 		 */
164 		if (!did_clat46 && proto_family == PF_INET6) {
165 			did_clat46 = TRUE;
166 
167 			if (proto != NULL) {
168 				if_proto_free(proto);
169 			}
170 			ifnet_lock_shared(ifp);
171 			/* callee holds a proto refcnt upon success */
172 			proto = find_attached_proto(ifp, proto_family);
173 			if (proto == NULL) {
174 				ifnet_lock_done(ifp);
175 				retval = ENXIO;
176 				m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_CLAT64, NULL, 0);
177 				m = NULL;
178 				goto cleanup;
179 			}
180 			ifnet_lock_done(ifp);
181 			if (ifp->if_type == IFT_ETHER) {
182 				/* Update the dest to translated v6 address */
183 				dest6.sin6_len = sizeof(struct sockaddr_in6);
184 				dest6.sin6_family = AF_INET6;
185 				dest6.sin6_addr = (mtod(m, struct ip6_hdr *))->ip6_dst;
186 				dest = SA(&dest6);
187 
188 				/*
189 				 * Lookup route to the translated destination
190 				 * Free this route ref during cleanup
191 				 */
192 				rt = rtalloc1_scoped(SA(&dest6),
193 				    0, 0, ifp->if_index);
194 
195 				route = rt;
196 			}
197 		}
198 	}
199 
200 	/*
201 	 * This path gets packet chain going to the same destination.
202 	 * The pre output routine is used to either trigger resolution of
203 	 * the next hop or retrieve the next hop's link layer addressing.
204 	 * For ex: ether_inet(6)_pre_output routine.
205 	 *
206 	 * If the routine returns EJUSTRETURN, it implies that packet has
207 	 * been queued, and therefore we have to call preout_again for the
208 	 * following packet in the chain.
209 	 *
210 	 * For errors other than EJUSTRETURN, the current packet is freed
211 	 * and the rest of the chain (pointed by packetlist is freed as
212 	 * part of clean up.
213 	 *
214 	 * Else if there is no error the retrieved information is used for
215 	 * all the packets in the chain.
216 	 */
217 	if (flags == DLIL_OUTPUT_FLAGS_NONE) {
218 		proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
219 		    proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
220 		retval = 0;
221 		if (preoutp != NULL) {
222 			retval = preoutp(ifp, proto_family, &m, dest, route,
223 			    frame_type, dst_linkaddr);
224 
225 			if (retval != 0) {
226 				if (retval == EJUSTRETURN) {
227 					goto preout_again;
228 				}
229 				m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_PRE_OUTPUT, NULL, 0);
230 				m = NULL;
231 				goto cleanup;
232 			}
233 		}
234 	}
235 
236 	nanouptime(&now);
237 	net_timernsec(&now, &now_nsec);
238 
239 	qset_id = m->m_pkthdr.pkt_mpriv_qsetid;
240 	qset_id_valid_flag = (m->m_pkthdr.pkt_ext_flags & PKTF_EXT_QSET_ID_VALID)
241 	    ? PKTF_EXT_QSET_ID_VALID : 0;
242 
243 	do {
244 		m_add_hdr_crumb_interface_output(m, ifp->if_index, false);
245 		/*
246 		 * pkt_hdr is set here to point to m_data prior to
247 		 * calling into the framer. This value of pkt_hdr is
248 		 * used by the netif gso logic to retrieve the ip header
249 		 * for the TCP packets, offloaded for TSO processing.
250 		 */
251 		if (raw && (ifp->if_family == IFNET_FAMILY_ETHERNET)) {
252 			uint8_t vlan_encap_len = 0;
253 
254 			if ((m->m_pkthdr.csum_flags & CSUM_VLAN_ENCAP_PRESENT) != 0) {
255 				vlan_encap_len = ETHER_VLAN_ENCAP_LEN;
256 			}
257 			m->m_pkthdr.pkt_hdr = mtod(m, char *) + ETHER_HDR_LEN + vlan_encap_len;
258 		} else {
259 			m->m_pkthdr.pkt_hdr = mtod(m, void *);
260 		}
261 
262 		/*
263 		 * Perform address family translation if needed.
264 		 * For now we only support stateless 4 to 6 translation
265 		 * on the out path.
266 		 *
267 		 * The routine below translates IP header, updates protocol
268 		 * checksum and also translates ICMP.
269 		 *
270 		 * We skip the first packet as it is already translated and
271 		 * the proto family is set to PF_INET6.
272 		 */
273 		if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
274 		    (ifp->if_type == IFT_CELLULAR ||
275 		    dlil_is_clat_needed(proto_family, m))) {
276 			retval = dlil_clat46(ifp, &proto_family, &m);
277 			/* Goto the next packet if the translation fails */
278 			if (retval != 0) {
279 				m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_CLAT64, NULL, 0);
280 				m = NULL;
281 				ip6stat.ip6s_clat464_out_drop++;
282 				goto next;
283 			}
284 		}
285 
286 #if CONFIG_DTRACE
287 		if (flags == DLIL_OUTPUT_FLAGS_NONE) {
288 			dlil_output_dtrace(ifp, proto_family, m);
289 		}
290 #endif /* CONFIG_DTRACE */
291 
292 		if (flags == DLIL_OUTPUT_FLAGS_NONE && ifp->if_framer != NULL) {
293 			int rcvif_set = 0;
294 
295 			/*
296 			 * If this is a broadcast packet that needs to be
297 			 * looped back into the system, set the inbound ifp
298 			 * to that of the outbound ifp.  This will allow
299 			 * us to determine that it is a legitimate packet
300 			 * for the system.  Only set the ifp if it's not
301 			 * already set, just to be safe.
302 			 */
303 			if ((m->m_flags & (M_BCAST | M_LOOP)) &&
304 			    m->m_pkthdr.rcvif == NULL) {
305 				m->m_pkthdr.rcvif = ifp;
306 				rcvif_set = 1;
307 			}
308 			m_loop_set = m->m_flags & M_LOOP;
309 			retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
310 			    frame_type, &pre, &post);
311 			if (retval != 0) {
312 				if (retval != EJUSTRETURN) {
313 					m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_IF_FRAMER, NULL, 0);
314 				}
315 				goto next;
316 			}
317 
318 			/*
319 			 * For partial checksum offload, adjust the start
320 			 * and stuff offsets based on the prepended header.
321 			 */
322 			if ((m->m_pkthdr.csum_flags &
323 			    (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
324 			    (CSUM_DATA_VALID | CSUM_PARTIAL)) {
325 				m->m_pkthdr.csum_tx_stuff += pre;
326 				m->m_pkthdr.csum_tx_start += pre;
327 			}
328 
329 			if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) {
330 				dlil_output_cksum_dbg(ifp, m, pre,
331 				    proto_family);
332 			}
333 
334 			/*
335 			 * Clear the ifp if it was set above, and to be
336 			 * safe, only if it is still the same as the
337 			 * outbound ifp we have in context.  If it was
338 			 * looped back, then a copy of it was sent to the
339 			 * loopback interface with the rcvif set, and we
340 			 * are clearing the one that will go down to the
341 			 * layer below.
342 			 */
343 			if (rcvif_set && m->m_pkthdr.rcvif == ifp) {
344 				m->m_pkthdr.rcvif = NULL;
345 			}
346 		}
347 
348 		/*
349 		 * Let interface filters (if any) do their thing ...
350 		 */
351 		if ((flags & DLIL_OUTPUT_FLAGS_SKIP_IF_FILTERS) == 0) {
352 			retval = dlil_interface_filters_output(ifp, &m, proto_family);
353 			if (retval != 0) {
354 				if (retval != EJUSTRETURN) {
355 					m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_IF_FILTER, NULL, 0);
356 				}
357 				goto next;
358 			}
359 		}
360 		/*
361 		 * Strip away M_PROTO1 bit prior to sending packet
362 		 * to the driver as this field may be used by the driver
363 		 */
364 		m->m_flags &= ~M_PROTO1;
365 
366 		/*
367 		 * If the underlying interface is not capable of handling a
368 		 * packet whose data portion spans across physically disjoint
369 		 * pages, we need to "normalize" the packet so that we pass
370 		 * down a chain of mbufs where each mbuf points to a span that
371 		 * resides in the system page boundary.  If the packet does
372 		 * not cross page(s), the following is a no-op.
373 		 */
374 		if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
375 			if ((m = m_normalize(m)) == NULL) {
376 				goto next;
377 			}
378 		}
379 
380 		/*
381 		 * If this is a TSO packet, make sure the interface still
382 		 * advertise TSO capability.
383 		 */
384 		if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
385 			retval = EMSGSIZE;
386 			m_drop_if(m, ifp, DROPTAP_FLAG_DIR_OUT, DROP_REASON_DLIL_TSO_NOT_OK, NULL, 0);
387 			goto cleanup;
388 		}
389 
390 		ifp_inc_traffic_class_out(ifp, m);
391 
392 #if SKYWALK
393 		/*
394 		 * For native skywalk devices, packets will be passed to pktap
395 		 * after GSO or after the mbuf to packet conversion.
396 		 * This is done for IPv4/IPv6 packets only because there is no
397 		 * space in the mbuf to pass down the proto family.
398 		 */
399 		if (dlil_is_native_netif_nexus(ifp)) {
400 			if (raw || m->m_pkthdr.pkt_proto == 0) {
401 				pktap_output(ifp, proto_family, m, pre, post);
402 				m->m_pkthdr.pkt_flags |= PKTF_SKIP_PKTAP;
403 			}
404 		} else {
405 			pktap_output(ifp, proto_family, m, pre, post);
406 		}
407 #else /* SKYWALK */
408 		pktap_output(ifp, proto_family, m, pre, post);
409 #endif /* SKYWALK */
410 
411 		/*
412 		 * Count the number of elements in the mbuf chain
413 		 */
414 		if (tx_chain_len_count) {
415 			dlil_count_chain_len(m, &tx_chain_len_stats);
416 		}
417 
418 		/*
419 		 * Discard partial sum information if this packet originated
420 		 * from another interface; the packet would already have the
421 		 * final checksum and we shouldn't recompute it.
422 		 */
423 		if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
424 		    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
425 		    (CSUM_DATA_VALID | CSUM_PARTIAL)) {
426 			m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
427 			m->m_pkthdr.csum_data = 0;
428 		}
429 
430 		/*
431 		 * Finally, call the driver.
432 		 */
433 		if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
434 			if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
435 				flen += (m_pktlen(m) - (pre + post));
436 				m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
437 			}
438 			(void) mbuf_set_timestamp(m, now_nsec, TRUE);
439 
440 			*send_tail = m;
441 			send_tail = &m->m_nextpkt;
442 		} else {
443 			/*
444 			 * Record timestamp; ifnet_enqueue() will use this info
445 			 * rather than redoing the work.
446 			 */
447 			nanouptime(&now);
448 			net_timernsec(&now, &now_nsec);
449 			(void) mbuf_set_timestamp(m, now_nsec, TRUE);
450 
451 			if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
452 				flen = (m_pktlen(m) - (pre + post));
453 				m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
454 			} else {
455 				flen = 0;
456 			}
457 			KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
458 			    0, 0, 0, 0, 0);
459 			retval = (*ifp->if_output_dlil)(ifp, m);
460 			if (retval == EQFULL || retval == EQSUSPENDED) {
461 				if (adv != NULL && adv->code == FADV_SUCCESS) {
462 					adv->code = (retval == EQFULL ?
463 					    FADV_FLOW_CONTROLLED :
464 					    FADV_SUSPENDED);
465 				}
466 				retval = 0;
467 			}
468 			if (retval == EQCONGESTED) {
469 				if (adv != NULL && adv->code == FADV_SUCCESS) {
470 					adv->code = FADV_CONGESTED;
471 				}
472 				retval = 0;
473 			}
474 			if (retval == 0 && flen > 0) {
475 				fbytes += flen;
476 				fpkts++;
477 			}
478 			if (retval != 0 && dlil_verbose) {
479 				DLIL_PRINTF("%s: output error on %s retval = %d\n",
480 				    __func__, if_name(ifp),
481 				    retval);
482 			}
483 			KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
484 			    0, 0, 0, 0, 0);
485 		}
486 		KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
487 
488 next:
489 		m = packetlist;
490 		if (m != NULL) {
491 			m->m_flags |= m_loop_set;
492 			m->m_pkthdr.pkt_ext_flags |= qset_id_valid_flag;
493 			m->m_pkthdr.pkt_mpriv_qsetid = qset_id;
494 			packetlist = packetlist->m_nextpkt;
495 			m->m_nextpkt = NULL;
496 		}
497 		/* Reset the proto family to old proto family for CLAT */
498 		if (did_clat46) {
499 			proto_family = old_proto_family;
500 		}
501 	} while (m != NULL);
502 
503 	if (send_head != NULL) {
504 		KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
505 		    0, 0, 0, 0, 0);
506 		if (ifp->if_eflags & IFEF_SENDLIST) {
507 			retval = (*ifp->if_output_dlil)(ifp, send_head);
508 			if (retval == EQFULL || retval == EQSUSPENDED) {
509 				if (adv != NULL && adv->code != FADV_CONGESTED) {
510 					adv->code = (retval == EQFULL ?
511 					    FADV_FLOW_CONTROLLED :
512 					    FADV_SUSPENDED);
513 				}
514 				retval = 0;
515 			}
516 			if (retval == EQCONGESTED && adv != NULL) {
517 				adv->code = FADV_CONGESTED;
518 				retval = 0;
519 			}
520 			if (retval == 0 && flen > 0) {
521 				fbytes += flen;
522 				fpkts++;
523 			}
524 			if (retval != 0 && dlil_verbose) {
525 				DLIL_PRINTF("%s: output error on %s retval = %d\n",
526 				    __func__, if_name(ifp), retval);
527 			}
528 		} else {
529 			struct mbuf *send_m;
530 			int enq_cnt = 0;
531 			VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
532 			while (send_head != NULL) {
533 				send_m = send_head;
534 				send_head = send_m->m_nextpkt;
535 				send_m->m_nextpkt = NULL;
536 				retval = (*ifp->if_output_dlil)(ifp, send_m);
537 				if (retval == EQFULL || retval == EQSUSPENDED) {
538 					if (adv != NULL && adv->code != FADV_CONGESTED) {
539 						adv->code = (retval == EQFULL ?
540 						    FADV_FLOW_CONTROLLED :
541 						    FADV_SUSPENDED);
542 					}
543 					retval = 0;
544 				}
545 				if (retval == EQCONGESTED && adv != NULL) {
546 					adv->code = FADV_CONGESTED;
547 					retval = 0;
548 				}
549 				if (retval == 0) {
550 					enq_cnt++;
551 					if (flen > 0) {
552 						fpkts++;
553 					}
554 				}
555 				if (retval != 0 && dlil_verbose) {
556 					DLIL_PRINTF("%s: output error on %s "
557 					    "retval = %d\n",
558 					    __func__, if_name(ifp), retval);
559 				}
560 			}
561 			if (enq_cnt > 0) {
562 				fbytes += flen;
563 				ifnet_start(ifp);
564 			}
565 		}
566 		KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
567 	}
568 
569 	KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
570 
571 cleanup:
572 	if (fbytes > 0) {
573 		ifp->if_fbytes += fbytes;
574 	}
575 	if (fpkts > 0) {
576 		ifp->if_fpackets += fpkts;
577 	}
578 	if (proto != NULL) {
579 		if_proto_free(proto);
580 	}
581 	if (packetlist) { /* if any packets are left, clean up */
582 		mbuf_freem_list(packetlist);
583 	}
584 	if (retval == EJUSTRETURN) {
585 		retval = 0;
586 	}
587 	if (iorefcnt == 1) {
588 		ifnet_datamov_end(ifp);
589 	}
590 	if (rt != NULL) {
591 		rtfree(rt);
592 		rt = NULL;
593 	}
594 
595 	return retval;
596 }
597 
598 
599 /*
600  * Static function implementations.
601  */
602 static void
dlil_count_chain_len(mbuf_t m,struct chain_len_stats * cls)603 dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
604 {
605 	mbuf_t  n = m;
606 	int chainlen = 0;
607 
608 	while (n != NULL) {
609 		chainlen++;
610 		n = n->m_next;
611 	}
612 	switch (chainlen) {
613 	case 0:
614 		break;
615 	case 1:
616 		os_atomic_inc(&cls->cls_one, relaxed);
617 		break;
618 	case 2:
619 		os_atomic_inc(&cls->cls_two, relaxed);
620 		break;
621 	case 3:
622 		os_atomic_inc(&cls->cls_three, relaxed);
623 		break;
624 	case 4:
625 		os_atomic_inc(&cls->cls_four, relaxed);
626 		break;
627 	case 5:
628 	default:
629 		os_atomic_inc(&cls->cls_five_or_more, relaxed);
630 		break;
631 	}
632 }
633 
634 
635 __attribute__((noinline))
636 static int
dlil_interface_filters_output(struct ifnet * ifp,struct mbuf ** m_p,protocol_family_t protocol_family)637 dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
638     protocol_family_t protocol_family)
639 {
640 	boolean_t               is_vlan_packet;
641 	struct ifnet_filter     *filter;
642 	struct mbuf             *m = *m_p;
643 
644 	if (TAILQ_EMPTY(&ifp->if_flt_head)) {
645 		return 0;
646 	}
647 	is_vlan_packet = packet_has_vlan_tag(m);
648 
649 	/*
650 	 * Pass the outbound packet to the interface filters
651 	 */
652 	lck_mtx_lock_spin(&ifp->if_flt_lock);
653 	/* prevent filter list from changing in case we drop the lock */
654 	if_flt_monitor_busy(ifp);
655 	TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
656 		int result;
657 
658 		/* exclude VLAN packets from external filters PR-3586856 */
659 		if (is_vlan_packet &&
660 		    (filter->filt_flags & DLIL_IFF_INTERNAL) == 0) {
661 			continue;
662 		}
663 
664 		if (!filter->filt_skip && filter->filt_output != NULL &&
665 		    (filter->filt_protocol == 0 ||
666 		    filter->filt_protocol == protocol_family)) {
667 			lck_mtx_unlock(&ifp->if_flt_lock);
668 
669 			result = filter->filt_output(filter->filt_cookie, ifp,
670 			    protocol_family, m_p);
671 
672 			lck_mtx_lock_spin(&ifp->if_flt_lock);
673 			if (result != 0) {
674 				/* we're done with the filter list */
675 				if_flt_monitor_unbusy(ifp);
676 				lck_mtx_unlock(&ifp->if_flt_lock);
677 				return result;
678 			}
679 		}
680 	}
681 	/* we're done with the filter list */
682 	if_flt_monitor_unbusy(ifp);
683 	lck_mtx_unlock(&ifp->if_flt_lock);
684 
685 	return 0;
686 }
687 
688 __attribute__((noinline))
689 static void
dlil_output_cksum_dbg(struct ifnet * ifp,struct mbuf * m,uint32_t hoff,protocol_family_t pf)690 dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
691     protocol_family_t pf)
692 {
693 #pragma unused(ifp)
694 	uint32_t did_sw;
695 
696 	if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
697 	    (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4 | CSUM_TSO_IPV6))) {
698 		return;
699 	}
700 
701 	switch (pf) {
702 	case PF_INET:
703 		did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
704 		if (did_sw & CSUM_DELAY_IP) {
705 			hwcksum_dbg_finalized_hdr++;
706 		}
707 		if (did_sw & CSUM_DELAY_DATA) {
708 			hwcksum_dbg_finalized_data++;
709 		}
710 		break;
711 	case PF_INET6:
712 		/*
713 		 * Checksum offload should not have been enabled when
714 		 * extension headers exist; that also means that we
715 		 * cannot force-finalize packets with extension headers.
716 		 * Indicate to the callee should it skip such case by
717 		 * setting optlen to -1.
718 		 */
719 		did_sw = in6_finalize_cksum(m, hoff, -1, -1,
720 		    m->m_pkthdr.csum_flags);
721 		if (did_sw & CSUM_DELAY_IPV6_DATA) {
722 			hwcksum_dbg_finalized_data++;
723 		}
724 		break;
725 	default:
726 		return;
727 	}
728 }
729 
730 #if CONFIG_DTRACE
731 __attribute__((noinline))
732 static void
dlil_output_dtrace(ifnet_t ifp,protocol_family_t proto_family,mbuf_t m)733 dlil_output_dtrace(ifnet_t ifp, protocol_family_t proto_family, mbuf_t  m)
734 {
735 	if (proto_family == PF_INET) {
736 		struct ip *ip = mtod(m, struct ip *);
737 		DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
738 		    struct ip *, ip, struct ifnet *, ifp,
739 		    struct ip *, ip, struct ip6_hdr *, NULL);
740 	} else if (proto_family == PF_INET6) {
741 		struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
742 		DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
743 		    struct ip6_hdr *, ip6, struct ifnet *, ifp,
744 		    struct ip *, NULL, struct ip6_hdr *, ip6);
745 	}
746 }
747 #endif /* CONFIG_DTRACE */
748