xref: /xnu-12377.1.9/bsd/netinet/udp_usrreq.c (revision f6217f891ac0bb64f3d375211650a4c1ff8ca1ea)
1 /*
2  * Copyright (c) 2000-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
61  */
62 
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/kernel.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/sysctl.h>
73 #include <sys/syslog.h>
74 #include <sys/mcache.h>
75 #include <net/ntstat.h>
76 
77 #include <kern/uipc_socket.h>
78 #include <kern/zalloc.h>
79 #include <mach/boolean.h>
80 #include <pexpert/pexpert.h>
81 
82 #include <net/if.h>
83 #include <net/if_types.h>
84 #include <net/route.h>
85 #include <net/dlil.h>
86 #include <net/droptap.h>
87 #include <net/net_api_stats.h>
88 
89 #include <netinet/in.h>
90 #include <netinet/in_systm.h>
91 #include <netinet/in_tclass.h>
92 #include <netinet/ip.h>
93 #include <netinet/ip6.h>
94 #include <netinet/in_pcb.h>
95 #include <netinet/in_var.h>
96 #include <netinet/ip_var.h>
97 #include <netinet6/in6_pcb.h>
98 #include <netinet6/ip6_var.h>
99 #include <netinet6/udp6_var.h>
100 #include <netinet/ip_icmp.h>
101 #include <netinet/icmp_var.h>
102 #include <netinet/udp.h>
103 #include <netinet/udp_var.h>
104 #include <netinet/udp_log.h>
105 #include <sys/kdebug.h>
106 
107 #if IPSEC
108 #include <netinet6/ipsec.h>
109 #include <netinet6/esp.h>
110 #include <netkey/key.h>
111 extern int ipsec_bypass;
112 extern int esp_udp_encap_port;
113 #endif /* IPSEC */
114 
115 #if NECP
116 #include <net/necp.h>
117 #endif /* NECP */
118 
119 #if FLOW_DIVERT
120 #include <netinet/flow_divert.h>
121 #endif /* FLOW_DIVERT */
122 
123 #if CONTENT_FILTER
124 #include <net/content_filter.h>
125 #endif /* CONTENT_FILTER */
126 
127 #if SKYWALK
128 #include <skywalk/core/skywalk_var.h>
129 #endif /* SKYWALK */
130 
131 #include <net/sockaddr_utils.h>
132 
133 #define DBG_LAYER_IN_BEG        NETDBG_CODE(DBG_NETUDP, 0)
134 #define DBG_LAYER_IN_END        NETDBG_CODE(DBG_NETUDP, 2)
135 #define DBG_LAYER_OUT_BEG       NETDBG_CODE(DBG_NETUDP, 1)
136 #define DBG_LAYER_OUT_END       NETDBG_CODE(DBG_NETUDP, 3)
137 #define DBG_FNC_UDP_INPUT       NETDBG_CODE(DBG_NETUDP, (5 << 8))
138 #define DBG_FNC_UDP_OUTPUT      NETDBG_CODE(DBG_NETUDP, (6 << 8) | 1)
139 
140 /*
141  * UDP protocol implementation.
142  * Per RFC 768, August, 1980.
143  */
144 #ifndef COMPAT_42
145 static int udpcksum = 1;
146 #else
147 static int udpcksum = 0;                /* XXX */
148 #endif
149 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum,
150     CTLFLAG_RW | CTLFLAG_LOCKED, &udpcksum, 0, "");
151 
152 int udp_log_in_vain = 0;
153 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW | CTLFLAG_LOCKED,
154     &udp_log_in_vain, 0, "Log all incoming UDP packets");
155 
156 static int blackhole = 0;
157 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW | CTLFLAG_LOCKED,
158     &blackhole, 0, "Do not send port unreachables for refused connects");
159 
160 static KALLOC_TYPE_DEFINE(inpcbzone, struct inpcb, NET_KT_DEFAULT);
161 
162 struct inpcbhead udb;           /* from udp_var.h */
163 #define udb6    udb  /* for KAME src sync over BSD*'s */
164 struct inpcbinfo udbinfo;
165 
166 #ifndef UDBHASHSIZE
167 #define UDBHASHSIZE 16
168 #endif
169 
170 /* Garbage collection performed during most recent udp_gc() run */
171 static boolean_t udp_gc_done = FALSE;
172 
173 #define log_in_vain_log(a) { log a; }
174 
175 static int udp_getstat SYSCTL_HANDLER_ARGS;
176 struct  udpstat udpstat;        /* from udp_var.h */
177 SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats,
178     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
179     0, 0, udp_getstat, "S,udpstat",
180     "UDP statistics (struct udpstat, netinet/udp_var.h)");
181 
182 SYSCTL_UINT(_net_inet_udp, OID_AUTO, pcbcount,
183     CTLFLAG_RD | CTLFLAG_LOCKED, &udbinfo.ipi_count, 0,
184     "Number of active PCBs");
185 
186 __private_extern__ int udp_use_randomport = 1;
187 SYSCTL_INT(_net_inet_udp, OID_AUTO, randomize_ports,
188     CTLFLAG_RW | CTLFLAG_LOCKED, &udp_use_randomport, 0,
189     "Randomize UDP port numbers");
190 
191 struct udp_in6 {
192 	struct sockaddr_in6     uin6_sin;
193 	u_char                  uin6_init_done : 1;
194 };
195 struct udp_ip6 {
196 	struct ip6_hdr          uip6_ip6;
197 	u_char                  uip6_init_done : 1;
198 };
199 
200 static int udp_abort(struct socket *);
201 static int udp_attach(struct socket *, int, struct proc *);
202 static int udp_bind(struct socket *, struct sockaddr *, struct proc *);
203 static int udp_connect(struct socket *, struct sockaddr *, struct proc *);
204 static int udp_connectx(struct socket *, struct sockaddr *,
205     struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
206     sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *);
207 static int udp_detach(struct socket *);
208 static int udp_disconnect(struct socket *);
209 static int udp_disconnectx(struct socket *, sae_associd_t, sae_connid_t);
210 static int udp_send(struct socket *, int, struct mbuf *, struct sockaddr *,
211     struct mbuf *, struct proc *);
212 static void udp_append(struct inpcb *, struct ip *, struct mbuf *, int,
213     struct sockaddr_in *, struct udp_in6 *, struct udp_ip6 *, struct ifnet *);
214 static int udp_input_checksum(struct mbuf *, struct udphdr *, int, int);
215 static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
216     struct mbuf *, struct proc *);
217 static void ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip);
218 static void udp_gc(struct inpcbinfo *);
219 static int udp_defunct(struct socket *);
220 
221 struct pr_usrreqs udp_usrreqs = {
222 	.pru_abort =            udp_abort,
223 	.pru_attach =           udp_attach,
224 	.pru_bind =             udp_bind,
225 	.pru_connect =          udp_connect,
226 	.pru_connectx =         udp_connectx,
227 	.pru_control =          in_control,
228 	.pru_detach =           udp_detach,
229 	.pru_disconnect =       udp_disconnect,
230 	.pru_disconnectx =      udp_disconnectx,
231 	.pru_peeraddr =         in_getpeeraddr,
232 	.pru_send =             udp_send,
233 	.pru_shutdown =         udp_shutdown,
234 	.pru_sockaddr =         in_getsockaddr,
235 	.pru_sosend =           sosend,
236 	.pru_soreceive =        soreceive,
237 	.pru_defunct =          udp_defunct,
238 };
239 
240 struct mem_acct *udp_memacct;
241 
242 void
udp_init(struct protosw * pp,struct domain * dp)243 udp_init(struct protosw *pp, struct domain *dp)
244 {
245 #pragma unused(dp)
246 	static int udp_initialized = 0;
247 	struct inpcbinfo        *pcbinfo;
248 	uint32_t pool_size = 0;
249 
250 	VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED);
251 
252 	if (udp_memacct == NULL) {
253 		udp_memacct = mem_acct_register("UDP", 0, 0);
254 		if (udp_memacct == NULL) {
255 			panic("mem_acct_register returned NULL");
256 		}
257 	}
258 	pp->pr_mem_acct = udp_memacct;
259 
260 	if (!os_atomic_cmpxchg(&udp_initialized, 0, 1, relaxed)) {
261 		return;
262 	}
263 
264 	pool_size = (nmbclusters << MCLSHIFT) >> MBSHIFT;
265 	if (pool_size >= 96) {
266 		/* Improves 10GbE UDP performance. */
267 		udp_recvspace = 786896;
268 	}
269 
270 	if (PE_parse_boot_argn("udp_log", &udp_log_enable_flags, sizeof(udp_log_enable_flags))) {
271 		os_log(OS_LOG_DEFAULT, "udp_init: set udp_log_enable_flags to 0x%x", udp_log_enable_flags);
272 	}
273 
274 	LIST_INIT(&udb);
275 	udbinfo.ipi_listhead = &udb;
276 	hashinit_counted_by(UDBHASHSIZE, udbinfo.ipi_hashbase,
277 	    udbinfo.ipi_hashbase_count);
278 	udbinfo.ipi_hashmask = udbinfo.ipi_hashbase_count - 1;
279 	hashinit_counted_by(UDBHASHSIZE, udbinfo.ipi_porthashbase,
280 	    udbinfo.ipi_porthashbase_count);
281 	udbinfo.ipi_porthashmask = udbinfo.ipi_porthashbase_count - 1;
282 	udbinfo.ipi_zone = inpcbzone;
283 
284 	pcbinfo = &udbinfo;
285 	/*
286 	 * allocate lock group and attribute for udp pcb mutexes
287 	 */
288 	pcbinfo->ipi_lock_grp = lck_grp_alloc_init("udppcb",
289 	    LCK_GRP_ATTR_NULL);
290 	lck_attr_setdefault(&pcbinfo->ipi_lock_attr);
291 	lck_rw_init(&pcbinfo->ipi_lock, pcbinfo->ipi_lock_grp,
292 	    &pcbinfo->ipi_lock_attr);
293 
294 	udbinfo.ipi_gc = udp_gc;
295 	in_pcbinfo_attach(&udbinfo);
296 }
297 
298 void
udp_input(struct mbuf * m,int iphlen)299 udp_input(struct mbuf *m, int iphlen)
300 {
301 	struct ip *ip;
302 	struct udphdr *uh;
303 	struct inpcb *inp;
304 	mbuf_ref_t opts = NULL;
305 	int len, isbroadcast;
306 	struct ip save_ip;
307 	struct sockaddr *append_sa = NULL;
308 	struct sockaddr *append_da = NULL;
309 	struct inpcbinfo *pcbinfo = &udbinfo;
310 	struct sockaddr_in udp_in;
311 	struct sockaddr_in udp_dst;
312 	struct ip_moptions *imo = NULL;
313 	int foundmembership = 0, ret = 0;
314 	struct udp_in6 udp_in6;
315 	struct udp_in6 udp_dst6;
316 	struct udp_ip6 udp_ip6;
317 	struct ifnet *ifp = m->m_pkthdr.rcvif;
318 	u_int16_t pf_tag = 0;
319 	boolean_t is_wake_pkt = false;
320 	boolean_t check_cfil = cfil_filter_present();
321 	drop_reason_t drop_reason = DROP_REASON_UNSPECIFIED;
322 
323 	SOCKADDR_ZERO(&udp_in, sizeof(udp_in));
324 	udp_in.sin_len = sizeof(struct sockaddr_in);
325 	udp_in.sin_family = AF_INET;
326 	bzero(&udp_in6, sizeof(udp_in6));
327 	udp_in6.uin6_sin.sin6_len = sizeof(struct sockaddr_in6);
328 	udp_in6.uin6_sin.sin6_family = AF_INET6;
329 
330 	if (m->m_flags & M_PKTHDR) {
331 		pf_tag = m_pftag(m)->pftag_tag;
332 		if (m->m_pkthdr.pkt_flags & PKTF_WAKE_PKT) {
333 			is_wake_pkt = true;
334 		}
335 	}
336 
337 	udpstat.udps_ipackets++;
338 
339 	KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
340 
341 	/* Expect 32-bit aligned data pointer on strict-align platforms */
342 	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
343 
344 	m_add_crumb(m, PKT_CRUMB_UDP_INPUT);
345 
346 	/*
347 	 * Strip IP options, if any; should skip this,
348 	 * make available to user, and use on returned packets,
349 	 * but we don't yet have a way to check the checksum
350 	 * with options still present.
351 	 */
352 	if (iphlen > sizeof(struct ip)) {
353 		ip_stripoptions(m);
354 		iphlen = sizeof(struct ip);
355 	}
356 
357 	/*
358 	 * Get IP and UDP header together in first mbuf.
359 	 */
360 	ip = mtod(m, struct ip *);
361 	if (m->m_len < iphlen + sizeof(struct udphdr)) {
362 		m = m_pullup(m, iphlen + sizeof(struct udphdr));
363 		if (m == NULL) {
364 			udpstat.udps_hdrops++;
365 			KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
366 			    0, 0, 0, 0, 0);
367 			return;
368 		}
369 		ip = mtod(m, struct ip *);
370 	}
371 	uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen);
372 
373 	/* destination port of 0 is illegal, based on RFC768. */
374 	if (uh->uh_dport == 0) {
375 		drop_reason = DROP_REASON_UDP_DST_PORT_ZERO;
376 		IF_UDP_STATINC(ifp, port0);
377 		goto bad;
378 	}
379 
380 	KERNEL_DEBUG(DBG_LAYER_IN_BEG, uh->uh_dport, uh->uh_sport,
381 	    ip->ip_src.s_addr, ip->ip_dst.s_addr, uh->uh_ulen);
382 
383 	/*
384 	 * Make mbuf data length reflect UDP length.
385 	 * If not enough data to reflect UDP length, drop.
386 	 */
387 	len = ntohs((u_short)uh->uh_ulen);
388 	if (ip->ip_len != len) {
389 		if (len > ip->ip_len || len < sizeof(struct udphdr)) {
390 			udpstat.udps_badlen++;
391 			IF_UDP_STATINC(ifp, badlength);
392 			drop_reason = DROP_REASON_UDP_BAD_LENGTH;
393 			goto bad;
394 		}
395 		m_adj(m, len - ip->ip_len);
396 		/* ip->ip_len = len; */
397 	}
398 	/*
399 	 * Save a copy of the IP header in case we want restore it
400 	 * for sending an ICMP error message in response.
401 	 */
402 	save_ip = *ip;
403 
404 	/*
405 	 * Checksum extended UDP header and data.
406 	 */
407 	if (udp_input_checksum(m, uh, iphlen, len)) {
408 		drop_reason = DROP_REASON_UDP_BAD_CHECKSUM;
409 		goto bad;
410 	}
411 
412 	isbroadcast = in_broadcast(ip->ip_dst, ifp);
413 
414 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || isbroadcast) {
415 		int reuse_sock = 0, mcast_delivered = 0;
416 
417 		lck_rw_lock_shared(&pcbinfo->ipi_lock);
418 		/*
419 		 * Deliver a multicast or broadcast datagram to *all* sockets
420 		 * for which the local and remote addresses and ports match
421 		 * those of the incoming datagram.  This allows more than
422 		 * one process to receive multi/broadcasts on the same port.
423 		 * (This really ought to be done for unicast datagrams as
424 		 * well, but that would cause problems with existing
425 		 * applications that open both address-specific sockets and
426 		 * a wildcard socket listening to the same port -- they would
427 		 * end up receiving duplicates of every unicast datagram.
428 		 * Those applications open the multiple sockets to overcome an
429 		 * inadequacy of the UDP socket interface, but for backwards
430 		 * compatibility we avoid the problem here rather than
431 		 * fixing the interface.  Maybe 4.5BSD will remedy this?)
432 		 */
433 
434 		/*
435 		 * Construct sockaddr format source address.
436 		 */
437 		udp_in.sin_port = uh->uh_sport;
438 		udp_in.sin_addr = ip->ip_src;
439 		/*
440 		 * Locate pcb(s) for datagram.
441 		 * (Algorithm copied from raw_intr().)
442 		 */
443 		udp_in6.uin6_init_done = udp_ip6.uip6_init_done = 0;
444 		LIST_FOREACH(inp, &udb, inp_list) {
445 #if IPSEC
446 			int skipit;
447 #endif /* IPSEC */
448 
449 			if (inp->inp_socket == NULL) {
450 				continue;
451 			}
452 			if (inp != sotoinpcb(inp->inp_socket)) {
453 				panic("%s: bad so back ptr inp=%p",
454 				    __func__, inp);
455 				/* NOTREACHED */
456 			}
457 			if ((inp->inp_vflag & INP_IPV4) == 0) {
458 				continue;
459 			}
460 			if (inp_restricted_recv(inp, ifp)) {
461 				continue;
462 			}
463 
464 			if ((inp->inp_moptions == NULL) &&
465 			    (ntohl(ip->ip_dst.s_addr) !=
466 			    INADDR_ALLHOSTS_GROUP) && (isbroadcast == 0)) {
467 				continue;
468 			}
469 			/*
470 			 * Skip unbound sockets before taking the lock on the socket as
471 			 * the test with the destination port in the header will fail
472 			 */
473 			if (inp->inp_lport == 0) {
474 				continue;
475 			}
476 
477 			if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) ==
478 			    WNT_STOPUSING) {
479 				continue;
480 			}
481 
482 			udp_lock(inp->inp_socket, 1, 0);
483 
484 			if (in_pcb_checkstate(inp, WNT_RELEASE, 1) ==
485 			    WNT_STOPUSING) {
486 				udp_unlock(inp->inp_socket, 1, 0);
487 				continue;
488 			}
489 
490 			if (inp->inp_lport != uh->uh_dport) {
491 				udp_unlock(inp->inp_socket, 1, 0);
492 				continue;
493 			}
494 			if (inp->inp_laddr.s_addr != INADDR_ANY) {
495 				if (inp->inp_laddr.s_addr !=
496 				    ip->ip_dst.s_addr) {
497 					udp_unlock(inp->inp_socket, 1, 0);
498 					continue;
499 				}
500 			}
501 			if (inp->inp_faddr.s_addr != INADDR_ANY) {
502 				if (inp->inp_faddr.s_addr !=
503 				    ip->ip_src.s_addr ||
504 				    inp->inp_fport != uh->uh_sport) {
505 					udp_unlock(inp->inp_socket, 1, 0);
506 					continue;
507 				}
508 			}
509 
510 			if (isbroadcast == 0 && (ntohl(ip->ip_dst.s_addr) !=
511 			    INADDR_ALLHOSTS_GROUP)) {
512 				struct sockaddr_in group;
513 				int blocked;
514 
515 				if ((imo = inp->inp_moptions) == NULL) {
516 					udp_unlock(inp->inp_socket, 1, 0);
517 					continue;
518 				}
519 				IMO_LOCK(imo);
520 
521 				SOCKADDR_ZERO(&group, sizeof(struct sockaddr_in));
522 				group.sin_len = sizeof(struct sockaddr_in);
523 				group.sin_family = AF_INET;
524 				group.sin_addr = ip->ip_dst;
525 
526 				blocked = imo_multi_filter(imo, ifp,
527 				    &group, &udp_in);
528 				if (blocked == MCAST_PASS) {
529 					foundmembership = 1;
530 				}
531 
532 				IMO_UNLOCK(imo);
533 				if (!foundmembership) {
534 					udp_unlock(inp->inp_socket, 1, 0);
535 					if (blocked == MCAST_NOTSMEMBER ||
536 					    blocked == MCAST_MUTED) {
537 						udpstat.udps_filtermcast++;
538 					}
539 					continue;
540 				}
541 				foundmembership = 0;
542 			}
543 
544 			reuse_sock = (inp->inp_socket->so_options &
545 			    (SO_REUSEPORT | SO_REUSEADDR));
546 
547 #if NECP
548 			skipit = 0;
549 			if (!necp_socket_is_allowed_to_send_recv_v4(inp,
550 			    uh->uh_dport, uh->uh_sport, &ip->ip_dst,
551 			    &ip->ip_src, ifp, pf_tag, NULL, NULL, NULL, NULL)) {
552 				/* do not inject data to pcb */
553 				skipit = 1;
554 				UDP_LOG_DROP_NECP(ip, uh, inp, false);
555 			}
556 			if (skipit == 0)
557 #endif /* NECP */
558 			{
559 				mbuf_ref_t n = NULL;
560 
561 				if (reuse_sock) {
562 					n = m_copy(m, 0, M_COPYALL);
563 				}
564 				udp_append(inp, ip, m,
565 				    iphlen + sizeof(struct udphdr),
566 				    &udp_in, &udp_in6, &udp_ip6, ifp);
567 				mcast_delivered++;
568 
569 				m = n;
570 			}
571 			if (is_wake_pkt) {
572 				soevent(inp->inp_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_WAKE_PKT);
573 			}
574 
575 			udp_unlock(inp->inp_socket, 1, 0);
576 
577 
578 			/*
579 			 * Don't look for additional matches if this one does
580 			 * not have either the SO_REUSEPORT or SO_REUSEADDR
581 			 * socket options set.  This heuristic avoids searching
582 			 * through all pcbs in the common case of a non-shared
583 			 * port.  It assumes that an application will never
584 			 * clear these options after setting them.
585 			 */
586 			if (reuse_sock == 0 || m == NULL) {
587 				break;
588 			}
589 
590 			/*
591 			 * Expect 32-bit aligned data pointer on strict-align
592 			 * platforms.
593 			 */
594 			MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
595 			/*
596 			 * Recompute IP and UDP header pointers for new mbuf
597 			 */
598 			ip = mtod(m, struct ip *);
599 			uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen);
600 		}
601 		lck_rw_done(&pcbinfo->ipi_lock);
602 
603 		if (mcast_delivered == 0) {
604 			/*
605 			 * No matching pcb found; discard datagram.
606 			 * (No need to send an ICMP Port Unreachable
607 			 * for a broadcast or multicast datgram.)
608 			 */
609 			udpstat.udps_noportbcast++;
610 			IF_UDP_STATINC(ifp, port_unreach);
611 			drop_reason = DROP_REASON_UDP_PORT_UNREACHEABLE;
612 			goto bad;
613 		}
614 
615 		/* free the extra copy of mbuf or skipped by IPsec */
616 		if (m != NULL) {
617 			m_freem(m);
618 		}
619 		KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
620 		return;
621 	}
622 
623 #if IPSEC
624 	/*
625 	 * UDP to port 4500 with a payload where the first four bytes are
626 	 * not zero is a UDP encapsulated IPsec packet. Packets where
627 	 * the payload is one byte and that byte is 0xFF are NAT keepalive
628 	 * packets. Decapsulate the ESP packet and carry on with IPsec input
629 	 * or discard the NAT keep-alive.
630 	 */
631 	if (ipsec_bypass == 0 && (esp_udp_encap_port & 0xFFFF) != 0 &&
632 	    (uh->uh_dport == ntohs((u_short)esp_udp_encap_port) ||
633 	    uh->uh_sport == ntohs((u_short)esp_udp_encap_port))) {
634 		/*
635 		 * Check if ESP or keepalive:
636 		 *      1. If the destination port of the incoming packet is 4500.
637 		 *      2. If the source port of the incoming packet is 4500,
638 		 *         then check the SADB to match IP address and port.
639 		 */
640 		bool check_esp = true;
641 		if (uh->uh_dport != ntohs((u_short)esp_udp_encap_port)) {
642 			union sockaddr_in_4_6 src = {};
643 			union sockaddr_in_4_6 dst = {};
644 
645 			ipsec_fill_ip_sockaddr_4_6(&src, ip->ip_src, uh->uh_sport);
646 			ipsec_fill_ip_sockaddr_4_6(&dst, ip->ip_dst, uh->uh_dport);
647 
648 			check_esp = key_checksa_present(&dst, &src);
649 		}
650 
651 		if (check_esp) {
652 			int payload_len = len - sizeof(struct udphdr) > 4 ? 4 :
653 			    len - sizeof(struct udphdr);
654 
655 			if (m->m_len < iphlen + sizeof(struct udphdr) + payload_len) {
656 				if ((m = m_pullup(m, iphlen + sizeof(struct udphdr) +
657 				    payload_len)) == NULL) {
658 					udpstat.udps_hdrops++;
659 					KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
660 					    0, 0, 0, 0, 0);
661 					return;
662 				}
663 				/*
664 				 * Expect 32-bit aligned data pointer on strict-align
665 				 * platforms.
666 				 */
667 				MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
668 
669 				ip = mtod(m, struct ip *);
670 				uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen);
671 			}
672 			/* Check for NAT keepalive packet */
673 			if (payload_len == 1 && *(u_int8_t *)
674 			    ((caddr_t)uh + sizeof(struct udphdr)) == 0xFF) {
675 				m_freem(m);
676 				KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
677 				    0, 0, 0, 0, 0);
678 				return;
679 			} else if (payload_len == 4 && *(u_int32_t *)(void *)
680 			    ((caddr_t)uh + sizeof(struct udphdr)) != 0) {
681 				/* UDP encapsulated IPsec packet to pass through NAT */
682 				KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END,
683 				    0, 0, 0, 0, 0);
684 				/* preserve the udp header */
685 				esp4_input(m, iphlen + sizeof(struct udphdr));
686 				return;
687 			}
688 		}
689 	}
690 #endif /* IPSEC */
691 
692 	/*
693 	 * Locate pcb for datagram.
694 	 */
695 	inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport,
696 	    ip->ip_dst, uh->uh_dport, 1, ifp);
697 	if (inp == NULL) {
698 		IF_UDP_STATINC(ifp, port_unreach);
699 
700 		if (udp_log_in_vain) {
701 			char buf[MAX_IPv4_STR_LEN];
702 			char buf2[MAX_IPv4_STR_LEN];
703 
704 			/* check src and dst address */
705 			if (udp_log_in_vain < 3) {
706 				log(LOG_INFO, "Connection attempt to "
707 				    "UDP %s:%d from %s:%d\n", inet_ntop(AF_INET,
708 				    &ip->ip_dst, buf, sizeof(buf)),
709 				    ntohs(uh->uh_dport), inet_ntop(AF_INET,
710 				    &ip->ip_src, buf2, sizeof(buf2)),
711 				    ntohs(uh->uh_sport));
712 			} else if (!(m->m_flags & (M_BCAST | M_MCAST)) &&
713 			    ip->ip_dst.s_addr != ip->ip_src.s_addr) {
714 				log_in_vain_log((LOG_INFO,
715 				    "Stealth Mode connection attempt to "
716 				    "UDP %s:%d from %s:%d\n", inet_ntop(AF_INET,
717 				    &ip->ip_dst, buf, sizeof(buf)),
718 				    ntohs(uh->uh_dport), inet_ntop(AF_INET,
719 				    &ip->ip_src, buf2, sizeof(buf2)),
720 				    ntohs(uh->uh_sport)))
721 			}
722 		}
723 		udpstat.udps_noport++;
724 		if (m->m_flags & (M_BCAST | M_MCAST)) {
725 			udpstat.udps_noportbcast++;
726 			drop_reason = DROP_REASON_UDP_PORT_UNREACHEABLE;
727 			goto bad;
728 		}
729 		if (blackhole) {
730 			if (ifp && ifp->if_type != IFT_LOOP) {
731 				drop_reason = DROP_REASON_UDP_PORT_UNREACHEABLE;
732 				goto bad;
733 			}
734 		}
735 		if (if_link_heuristics_enabled(ifp)) {
736 			drop_reason = DROP_REASON_UDP_PORT_UNREACHEABLE;
737 			IF_UDP_STATINC(ifp, linkheur_stealthdrop);
738 			goto bad;
739 		}
740 		*ip = save_ip;
741 		ip->ip_len += iphlen;
742 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
743 		KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
744 		return;
745 	}
746 	udp_lock(inp->inp_socket, 1, 0);
747 
748 	if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
749 		udp_unlock(inp->inp_socket, 1, 0);
750 		IF_UDP_STATINC(ifp, cleanup);
751 		drop_reason = DROP_REASON_UDP_SOCKET_CLOSING;
752 		goto bad;
753 	}
754 #if NECP
755 	if (!necp_socket_is_allowed_to_send_recv_v4(inp, uh->uh_dport,
756 	    uh->uh_sport, &ip->ip_dst, &ip->ip_src, ifp, pf_tag, NULL, NULL, NULL, NULL)) {
757 		udp_unlock(inp->inp_socket, 1, 0);
758 		IF_UDP_STATINC(ifp, badipsec);
759 		drop_reason = DROP_REASON_UDP_NECP;
760 		goto bad;
761 	}
762 #endif /* NECP */
763 
764 	/*
765 	 * Construct sockaddr format source address.
766 	 * Stuff source address and datagram in user buffer.
767 	 */
768 	udp_in.sin_port = uh->uh_sport;
769 	udp_in.sin_addr = ip->ip_src;
770 	if ((inp->inp_flags & INP_CONTROLOPTS) != 0 ||
771 	    SOFLOW_ENABLED(inp->inp_socket) ||
772 	    SO_RECV_CONTROL_OPTS(inp->inp_socket)) {
773 		if (inp->inp_vflag & INP_IPV6 || inp->inp_vflag & INP_V4MAPPEDV6) {
774 			int savedflags;
775 
776 			ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip);
777 			savedflags = inp->inp_flags;
778 			inp->inp_flags &= ~INP_UNMAPPABLEOPTS;
779 			ret = ip6_savecontrol(inp, m, &opts);
780 			inp->inp_flags = savedflags;
781 		} else {
782 			ret = ip_savecontrol(inp, &opts, ip, m);
783 		}
784 		if (ret != 0) {
785 			udp_unlock(inp->inp_socket, 1, 0);
786 			drop_reason = DROP_REASON_UDP_CANNOT_SAVE_CONTROL;
787 			goto bad;
788 		}
789 	}
790 	m_adj(m, iphlen + sizeof(struct udphdr));
791 
792 	KERNEL_DEBUG(DBG_LAYER_IN_END, uh->uh_dport, uh->uh_sport,
793 	    save_ip.ip_src.s_addr, save_ip.ip_dst.s_addr, uh->uh_ulen);
794 
795 	if (inp->inp_vflag & INP_IPV6) {
796 		in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin);
797 		append_sa = SA(&udp_in6.uin6_sin);
798 	} else {
799 		append_sa = SA(&udp_in);
800 	}
801 	if (nstat_collect) {
802 		stats_functional_type ifnet_count_type = IFNET_COUNT_TYPE(ifp);
803 		INP_ADD_RXSTAT(inp, ifnet_count_type, 1, m->m_pkthdr.len);
804 	}
805 #if CONTENT_FILTER && NECP
806 	if (check_cfil && inp != NULL && inp->inp_policyresult.results.filter_control_unit == 0) {
807 		if (inp->inp_vflag & INP_IPV6) {
808 			bzero(&udp_dst6, sizeof(udp_dst6));
809 			udp_dst6.uin6_sin.sin6_len = sizeof(struct sockaddr_in6);
810 			udp_dst6.uin6_sin.sin6_family = AF_INET6;
811 			in6_sin_2_v4mapsin6(&udp_dst, &udp_dst6.uin6_sin);
812 			append_da = SA(&udp_dst6.uin6_sin);
813 		} else {
814 			SOCKADDR_ZERO(&udp_dst, sizeof(udp_dst));
815 			udp_dst.sin_len = sizeof(struct sockaddr_in);
816 			udp_dst.sin_family = AF_INET;
817 			udp_dst.sin_port = uh->uh_dport;
818 			udp_dst.sin_addr = ip->ip_dst;
819 			append_da = SA(&udp_dst);
820 		}
821 		// Override the dst input here so NECP can pick up the policy
822 		// and CFIL can find an existing control socket.
823 		necp_socket_find_policy_match(inp, append_da, append_sa, 0);
824 	}
825 #endif /* CONTENT_FILTER and NECP */
826 	so_recv_data_stat(inp->inp_socket, m, 0);
827 	if (sbappendaddr(&inp->inp_socket->so_rcv, append_sa,
828 	    m, opts, NULL) == 0) {
829 		udpstat.udps_fullsock++;
830 	} else {
831 		sorwakeup(inp->inp_socket);
832 	}
833 	if (is_wake_pkt) {
834 		soevent(inp->inp_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_WAKE_PKT);
835 	}
836 	udp_unlock(inp->inp_socket, 1, 0);
837 	KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
838 	return;
839 bad:
840 	m_drop(m, DROPTAP_FLAG_DIR_IN | DROPTAP_FLAG_L2_MISSING, drop_reason, NULL, 0);
841 	if (opts) {
842 		m_freem(opts);
843 	}
844 	KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
845 }
846 
847 static void
ip_2_ip6_hdr(struct ip6_hdr * ip6,struct ip * ip)848 ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip)
849 {
850 	bzero(ip6, sizeof(*ip6));
851 
852 	ip6->ip6_vfc = IPV6_VERSION;
853 	ip6->ip6_plen = ip->ip_len;
854 	ip6->ip6_nxt = ip->ip_p;
855 	ip6->ip6_hlim = ip->ip_ttl;
856 	if (ip->ip_src.s_addr) {
857 		ip6->ip6_src.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
858 		ip6->ip6_src.s6_addr32[3] = ip->ip_src.s_addr;
859 	}
860 	if (ip->ip_dst.s_addr) {
861 		ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
862 		ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr;
863 	}
864 }
865 
866 /*
867  * subroutine of udp_input(), mainly for source code readability.
868  */
869 static void
udp_append(struct inpcb * last,struct ip * ip,struct mbuf * n,int off,struct sockaddr_in * pudp_in,struct udp_in6 * pudp_in6,struct udp_ip6 * pudp_ip6,struct ifnet * ifp)870 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off,
871     struct sockaddr_in *pudp_in, struct udp_in6 *pudp_in6,
872     struct udp_ip6 *pudp_ip6, struct ifnet *ifp)
873 {
874 	struct sockaddr *append_sa;
875 	mbuf_ref_t opts = NULL;
876 	int ret = 0;
877 
878 	if ((last->inp_flags & INP_CONTROLOPTS) != 0 ||
879 	    SOFLOW_ENABLED(last->inp_socket) ||
880 	    SO_RECV_CONTROL_OPTS(last->inp_socket)) {
881 		if (last->inp_vflag & INP_IPV6 || last->inp_vflag & INP_V4MAPPEDV6) {
882 			int savedflags;
883 
884 			if (pudp_ip6->uip6_init_done == 0) {
885 				ip_2_ip6_hdr(&pudp_ip6->uip6_ip6, ip);
886 				pudp_ip6->uip6_init_done = 1;
887 			}
888 			savedflags = last->inp_flags;
889 			last->inp_flags &= ~INP_UNMAPPABLEOPTS;
890 			ret = ip6_savecontrol(last, n, &opts);
891 			if (ret != 0) {
892 				last->inp_flags = savedflags;
893 				UDP_LOG(last, "ip6_savecontrol error %d", ret);
894 				goto error;
895 			}
896 			last->inp_flags = savedflags;
897 		} else {
898 			ret = ip_savecontrol(last, &opts, ip, n);
899 			if (ret != 0) {
900 				UDP_LOG(last, "ip_savecontrol error %d", ret);
901 				goto error;
902 			}
903 		}
904 	}
905 	if (last->inp_vflag & INP_IPV6) {
906 		if (pudp_in6->uin6_init_done == 0) {
907 			in6_sin_2_v4mapsin6(pudp_in, &pudp_in6->uin6_sin);
908 			pudp_in6->uin6_init_done = 1;
909 		}
910 		append_sa = SA(&pudp_in6->uin6_sin);
911 	} else {
912 		append_sa = SA(pudp_in);
913 	}
914 	if (nstat_collect) {
915 		stats_functional_type ifnet_count_type = IFNET_COUNT_TYPE(ifp);
916 		INP_ADD_RXSTAT(last, ifnet_count_type, 1, n->m_pkthdr.len);
917 	}
918 	so_recv_data_stat(last->inp_socket, n, 0);
919 	m_adj(n, off);
920 	if (sbappendaddr(&last->inp_socket->so_rcv, append_sa,
921 	    n, opts, NULL) == 0) {
922 		udpstat.udps_fullsock++;
923 		UDP_LOG(last, "sbappendaddr full receive socket buffer");
924 	} else {
925 		sorwakeup(last->inp_socket);
926 	}
927 	return;
928 error:
929 	m_freem(n);
930 	m_freem(opts);
931 }
932 
933 /*
934  * Notify a udp user of an asynchronous error;
935  * just wake up so that he can collect error status.
936  */
937 void
udp_notify(struct inpcb * inp,int errno)938 udp_notify(struct inpcb *inp, int errno)
939 {
940 	inp->inp_socket->so_error = (u_short)errno;
941 	sorwakeup(inp->inp_socket);
942 	sowwakeup(inp->inp_socket);
943 }
944 
945 void
udp_ctlinput(int cmd,struct sockaddr * sa,void * vip,__unused struct ifnet * ifp)946 udp_ctlinput(int cmd, struct sockaddr *sa, void *vip, __unused struct ifnet * ifp)
947 {
948 	struct ipctlparam *__single ctl_param = vip;
949 	struct ip *ip = NULL;
950 	mbuf_ref_t m = NULL;
951 	void (*notify)(struct inpcb *, int) = udp_notify;
952 	struct in_addr faddr;
953 	struct inpcb *inp = NULL;
954 	struct icmp *icp = NULL;
955 	size_t off;
956 
957 	if (ctl_param != NULL) {
958 		ip = ctl_param->ipc_icmp_ip;
959 		icp = ctl_param->ipc_icmp;
960 		m = ctl_param->ipc_m;
961 		off = ctl_param->ipc_off;
962 	} else {
963 		ip = NULL;
964 		icp = NULL;
965 		m = NULL;
966 		off = 0;
967 	}
968 
969 	faddr = SIN(sa)->sin_addr;
970 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) {
971 		return;
972 	}
973 
974 	if (PRC_IS_REDIRECT(cmd)) {
975 		ip = 0;
976 		notify = in_rtchange;
977 	} else if (cmd == PRC_HOSTDEAD) {
978 		ip = 0;
979 	} else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) {
980 		return;
981 	}
982 	if (ip) {
983 		struct udphdr uh;
984 
985 		/* Check if we can safely get the ports from the UDP header */
986 		if (m == NULL ||
987 		    (m->m_len < off + sizeof(uh))) {
988 			/* Insufficient length */
989 			return;
990 		}
991 
992 		bcopy(m_mtod_current(m) + off, &uh, sizeof(uh));
993 		inp = in_pcblookup_hash(&udbinfo, faddr, uh.uh_dport,
994 		    ip->ip_src, uh.uh_sport, 0, NULL);
995 
996 		if (inp != NULL && inp->inp_socket != NULL) {
997 			udp_lock(inp->inp_socket, 1, 0);
998 			if (in_pcb_checkstate(inp, WNT_RELEASE, 1) ==
999 			    WNT_STOPUSING) {
1000 				udp_unlock(inp->inp_socket, 1, 0);
1001 				return;
1002 			}
1003 			if (cmd == PRC_MSGSIZE && !uuid_is_null(inp->necp_client_uuid)) {
1004 				uuid_t null_uuid;
1005 				uuid_clear(null_uuid);
1006 				necp_update_flow_protoctl_event(null_uuid, inp->necp_client_uuid,
1007 				    PRC_MSGSIZE, ntohs(icp->icmp_nextmtu), 0);
1008 				/*
1009 				 * Avoid calling udp_notify() to set so_error
1010 				 * when using Network.framework since the notification
1011 				 * of PRC_MSGSIZE has been delivered through NECP.
1012 				 */
1013 			} else {
1014 				(*notify)(inp, inetctlerrmap[cmd]);
1015 			}
1016 			udp_unlock(inp->inp_socket, 1, 0);
1017 		}
1018 #if SKYWALK
1019 		else {
1020 			union sockaddr_in_4_6 sock_laddr;
1021 			struct protoctl_ev_val prctl_ev_val;
1022 			bzero(&prctl_ev_val, sizeof(prctl_ev_val));
1023 			bzero(&sock_laddr, sizeof(sock_laddr));
1024 
1025 			if (cmd == PRC_MSGSIZE) {
1026 				prctl_ev_val.val = ntohs(icp->icmp_nextmtu);
1027 			}
1028 
1029 			sock_laddr.sin.sin_family = AF_INET;
1030 			sock_laddr.sin.sin_len = sizeof(sock_laddr.sin);
1031 			sock_laddr.sin.sin_addr = ip->ip_src;
1032 
1033 			protoctl_event_enqueue_nwk_wq_entry(ifp,
1034 			    SA(&sock_laddr), sa,
1035 			    uh.uh_sport, uh.uh_dport, IPPROTO_UDP,
1036 			    cmd, &prctl_ev_val);
1037 		}
1038 #endif /* SKYWALK */
1039 	} else {
1040 		in_pcbnotifyall(&udbinfo, faddr, inetctlerrmap[cmd], notify);
1041 	}
1042 }
1043 
1044 int
udp_ctloutput(struct socket * so,struct sockopt * sopt)1045 udp_ctloutput(struct socket *so, struct sockopt *sopt)
1046 {
1047 	int     error = 0, optval = 0;
1048 	struct  inpcb *inp;
1049 
1050 	/* Allow <SOL_SOCKET,SO_FLUSH/SO_BINDTODEVICE> at this level */
1051 	if (sopt->sopt_level == SOL_SOCKET) {
1052 		if (sopt->sopt_name == SO_BINDTODEVICE) {
1053 			if (SOCK_CHECK_DOM(so, PF_INET6)) {
1054 				error = ip6_ctloutput(so, sopt);
1055 			} else {
1056 				error = ip_ctloutput(so, sopt);
1057 			}
1058 			return error;
1059 		} else if (sopt->sopt_name != SO_FLUSH) {
1060 			return EINVAL;
1061 		}
1062 	}
1063 	if (sopt->sopt_level != IPPROTO_UDP) {
1064 		if (SOCK_CHECK_DOM(so, PF_INET6)) {
1065 			error = ip6_ctloutput(so, sopt);
1066 		} else {
1067 			error = ip_ctloutput(so, sopt);
1068 		}
1069 		return error;
1070 	}
1071 
1072 	inp = sotoinpcb(so);
1073 
1074 	switch (sopt->sopt_dir) {
1075 	case SOPT_SET:
1076 		switch (sopt->sopt_name) {
1077 		case UDP_NOCKSUM:
1078 			/* This option is settable only for UDP over IPv4 */
1079 			if (!(inp->inp_vflag & INP_IPV4)) {
1080 				error = EINVAL;
1081 				break;
1082 			}
1083 
1084 			if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
1085 			    sizeof(optval))) != 0) {
1086 				break;
1087 			}
1088 
1089 			if (optval != 0) {
1090 				inp->inp_flags |= INP_UDP_NOCKSUM;
1091 			} else {
1092 				inp->inp_flags &= ~INP_UDP_NOCKSUM;
1093 			}
1094 			break;
1095 		case UDP_KEEPALIVE_OFFLOAD:
1096 		{
1097 			struct udp_keepalive_offload ka;
1098 			/*
1099 			 * If the socket is not connected, the stack will
1100 			 * not know the destination address to put in the
1101 			 * keepalive datagram. Return an error now instead
1102 			 * of failing later.
1103 			 */
1104 			if (!(so->so_state & SS_ISCONNECTED)) {
1105 				error = EINVAL;
1106 				break;
1107 			}
1108 			if (sopt->sopt_valsize != sizeof(ka)) {
1109 				error = EINVAL;
1110 				break;
1111 			}
1112 			if ((error = sooptcopyin(sopt, &ka, sizeof(ka),
1113 			    sizeof(ka))) != 0) {
1114 				break;
1115 			}
1116 
1117 			/* application should specify the type */
1118 			if (ka.ka_type == 0) {
1119 				return EINVAL;
1120 			}
1121 
1122 			if (ka.ka_interval == 0) {
1123 				/*
1124 				 * if interval is 0, disable the offload
1125 				 * mechanism
1126 				 */
1127 				if (inp->inp_keepalive_data != NULL) {
1128 					kfree_data_sized_by(inp->inp_keepalive_data,
1129 					    inp->inp_keepalive_datalen);
1130 				}
1131 				inp->inp_keepalive_data = NULL;
1132 				inp->inp_keepalive_datalen = 0;
1133 				inp->inp_keepalive_interval = 0;
1134 				inp->inp_keepalive_type = 0;
1135 				inp->inp_flags2 &= ~INP2_KEEPALIVE_OFFLOAD;
1136 			} else {
1137 				if (inp->inp_keepalive_data != NULL) {
1138 					kfree_data_sized_by(inp->inp_keepalive_data,
1139 					    inp->inp_keepalive_datalen);
1140 				}
1141 
1142 				uint8_t datalen = (uint8_t)min(
1143 					ka.ka_data_len,
1144 					UDP_KEEPALIVE_OFFLOAD_DATA_SIZE);
1145 				if (datalen > 0) {
1146 					uint8_t *data = kalloc_data(datalen, Z_WAITOK);
1147 					if (data == NULL) {
1148 						inp->inp_keepalive_data = NULL;
1149 						inp->inp_keepalive_datalen = 0;
1150 						error = ENOMEM;
1151 						break;
1152 					} else {
1153 						inp->inp_keepalive_data = data;
1154 						inp->inp_keepalive_datalen = datalen;
1155 					}
1156 					bcopy(ka.ka_data,
1157 					    inp->inp_keepalive_data,
1158 					    inp->inp_keepalive_datalen);
1159 				} else {
1160 					inp->inp_keepalive_datalen = 0;
1161 					inp->inp_keepalive_data = NULL;
1162 				}
1163 				inp->inp_keepalive_interval = (uint8_t)
1164 				    min(UDP_KEEPALIVE_INTERVAL_MAX_SECONDS,
1165 				    ka.ka_interval);
1166 				inp->inp_keepalive_type = ka.ka_type;
1167 				inp->inp_flags2 |= INP2_KEEPALIVE_OFFLOAD;
1168 			}
1169 			break;
1170 		}
1171 		case SO_FLUSH:
1172 			if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
1173 			    sizeof(optval))) != 0) {
1174 				break;
1175 			}
1176 
1177 			error = inp_flush(inp, optval);
1178 			break;
1179 
1180 		default:
1181 			error = ENOPROTOOPT;
1182 			break;
1183 		}
1184 		break;
1185 
1186 	case SOPT_GET:
1187 		switch (sopt->sopt_name) {
1188 		case UDP_NOCKSUM:
1189 			optval = inp->inp_flags & INP_UDP_NOCKSUM;
1190 			break;
1191 
1192 		default:
1193 			error = ENOPROTOOPT;
1194 			break;
1195 		}
1196 		if (error == 0) {
1197 			error = sooptcopyout(sopt, &optval, sizeof(optval));
1198 		}
1199 		break;
1200 	}
1201 	return error;
1202 }
1203 
1204 static int
1205 udp_pcblist SYSCTL_HANDLER_ARGS
1206 {
1207 #pragma unused(oidp, arg1, arg2)
1208 	int error, i, n, sz;
1209 	struct inpcb *inp, **inp_list;
1210 	inp_gen_t gencnt;
1211 	struct xinpgen xig;
1212 
1213 	/*
1214 	 * The process of preparing the TCB list is too time-consuming and
1215 	 * resource-intensive to repeat twice on every request.
1216 	 */
1217 	lck_rw_lock_exclusive(&udbinfo.ipi_lock);
1218 	if (req->oldptr == USER_ADDR_NULL) {
1219 		n = udbinfo.ipi_count;
1220 		req->oldidx = 2 * (sizeof(xig))
1221 		    + (n + n / 8) * sizeof(struct xinpcb);
1222 		lck_rw_done(&udbinfo.ipi_lock);
1223 		return 0;
1224 	}
1225 
1226 	if (req->newptr != USER_ADDR_NULL) {
1227 		lck_rw_done(&udbinfo.ipi_lock);
1228 		return EPERM;
1229 	}
1230 
1231 	/*
1232 	 * OK, now we're committed to doing something.
1233 	 */
1234 	gencnt = udbinfo.ipi_gencnt;
1235 	sz = n = udbinfo.ipi_count;
1236 
1237 	bzero(&xig, sizeof(xig));
1238 	xig.xig_len = sizeof(xig);
1239 	xig.xig_count = n;
1240 	xig.xig_gen = gencnt;
1241 	xig.xig_sogen = so_gencnt;
1242 	error = SYSCTL_OUT(req, &xig, sizeof(xig));
1243 	if (error) {
1244 		lck_rw_done(&udbinfo.ipi_lock);
1245 		return error;
1246 	}
1247 	/*
1248 	 * We are done if there is no pcb
1249 	 */
1250 	if (n == 0) {
1251 		lck_rw_done(&udbinfo.ipi_lock);
1252 		return 0;
1253 	}
1254 
1255 	inp_list = kalloc_type(struct inpcb *, n, Z_WAITOK);
1256 	if (inp_list == NULL) {
1257 		lck_rw_done(&udbinfo.ipi_lock);
1258 		return ENOMEM;
1259 	}
1260 
1261 	for (inp = LIST_FIRST(udbinfo.ipi_listhead), i = 0; inp && i < n;
1262 	    inp = LIST_NEXT(inp, inp_list)) {
1263 		if (inp->inp_gencnt <= gencnt &&
1264 		    inp->inp_state != INPCB_STATE_DEAD) {
1265 			inp_list[i++] = inp;
1266 		}
1267 	}
1268 	n = i;
1269 
1270 	error = 0;
1271 	for (i = 0; i < n; i++) {
1272 		struct xinpcb xi;
1273 
1274 		inp = inp_list[i];
1275 
1276 		if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
1277 			continue;
1278 		}
1279 		udp_lock(inp->inp_socket, 1, 0);
1280 		if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
1281 			udp_unlock(inp->inp_socket, 1, 0);
1282 			continue;
1283 		}
1284 		if (inp->inp_gencnt > gencnt) {
1285 			udp_unlock(inp->inp_socket, 1, 0);
1286 			continue;
1287 		}
1288 
1289 		bzero(&xi, sizeof(xi));
1290 		xi.xi_len = sizeof(xi);
1291 		/* XXX should avoid extra copy */
1292 		inpcb_to_compat(inp, &xi.xi_inp);
1293 		if (inp->inp_socket) {
1294 			sotoxsocket(inp->inp_socket, &xi.xi_socket);
1295 		}
1296 
1297 		udp_unlock(inp->inp_socket, 1, 0);
1298 
1299 		error = SYSCTL_OUT(req, &xi, sizeof(xi));
1300 	}
1301 	if (!error) {
1302 		/*
1303 		 * Give the user an updated idea of our state.
1304 		 * If the generation differs from what we told
1305 		 * her before, she knows that something happened
1306 		 * while we were processing this request, and it
1307 		 * might be necessary to retry.
1308 		 */
1309 		bzero(&xig, sizeof(xig));
1310 		xig.xig_len = sizeof(xig);
1311 		xig.xig_gen = udbinfo.ipi_gencnt;
1312 		xig.xig_sogen = so_gencnt;
1313 		xig.xig_count = udbinfo.ipi_count;
1314 		error = SYSCTL_OUT(req, &xig, sizeof(xig));
1315 	}
1316 
1317 	lck_rw_done(&udbinfo.ipi_lock);
1318 	kfree_type(struct inpcb *, sz, inp_list);
1319 	return error;
1320 }
1321 
1322 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist,
1323     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_pcblist,
1324     "S,xinpcb", "List of active UDP sockets");
1325 
1326 #if XNU_TARGET_OS_OSX
1327 
1328 static int
1329 udp_pcblist64 SYSCTL_HANDLER_ARGS
1330 {
1331 #pragma unused(oidp, arg1, arg2)
1332 	int error, i, n, sz;
1333 	struct inpcb *inp, **inp_list;
1334 	inp_gen_t gencnt;
1335 	struct xinpgen xig;
1336 
1337 	/*
1338 	 * The process of preparing the TCB list is too time-consuming and
1339 	 * resource-intensive to repeat twice on every request.
1340 	 */
1341 	lck_rw_lock_shared(&udbinfo.ipi_lock);
1342 	if (req->oldptr == USER_ADDR_NULL) {
1343 		n = udbinfo.ipi_count;
1344 		req->oldidx =
1345 		    2 * (sizeof(xig)) + (n + n / 8) * sizeof(struct xinpcb64);
1346 		lck_rw_done(&udbinfo.ipi_lock);
1347 		return 0;
1348 	}
1349 
1350 	if (req->newptr != USER_ADDR_NULL) {
1351 		lck_rw_done(&udbinfo.ipi_lock);
1352 		return EPERM;
1353 	}
1354 
1355 	/*
1356 	 * OK, now we're committed to doing something.
1357 	 */
1358 	gencnt = udbinfo.ipi_gencnt;
1359 	sz = n = udbinfo.ipi_count;
1360 
1361 	bzero(&xig, sizeof(xig));
1362 	xig.xig_len = sizeof(xig);
1363 	xig.xig_count = n;
1364 	xig.xig_gen = gencnt;
1365 	xig.xig_sogen = so_gencnt;
1366 	error = SYSCTL_OUT(req, &xig, sizeof(xig));
1367 	if (error) {
1368 		lck_rw_done(&udbinfo.ipi_lock);
1369 		return error;
1370 	}
1371 	/*
1372 	 * We are done if there is no pcb
1373 	 */
1374 	if (n == 0) {
1375 		lck_rw_done(&udbinfo.ipi_lock);
1376 		return 0;
1377 	}
1378 
1379 	inp_list = kalloc_type(struct inpcb *, n, Z_WAITOK);
1380 	if (inp_list == NULL) {
1381 		lck_rw_done(&udbinfo.ipi_lock);
1382 		return ENOMEM;
1383 	}
1384 
1385 	for (inp = LIST_FIRST(udbinfo.ipi_listhead), i = 0; inp && i < n;
1386 	    inp = LIST_NEXT(inp, inp_list)) {
1387 		if (inp->inp_gencnt <= gencnt &&
1388 		    inp->inp_state != INPCB_STATE_DEAD) {
1389 			inp_list[i++] = inp;
1390 		}
1391 	}
1392 	n = i;
1393 
1394 	error = 0;
1395 	for (i = 0; i < n; i++) {
1396 		struct xinpcb64 xi;
1397 
1398 		inp = inp_list[i];
1399 
1400 		if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
1401 			continue;
1402 		}
1403 		udp_lock(inp->inp_socket, 1, 0);
1404 		if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
1405 			udp_unlock(inp->inp_socket, 1, 0);
1406 			continue;
1407 		}
1408 		if (inp->inp_gencnt > gencnt) {
1409 			udp_unlock(inp->inp_socket, 1, 0);
1410 			continue;
1411 		}
1412 
1413 		bzero(&xi, sizeof(xi));
1414 		xi.xi_len = sizeof(xi);
1415 		inpcb_to_xinpcb64(inp, &xi);
1416 		if (inp->inp_socket) {
1417 			sotoxsocket64(inp->inp_socket, &xi.xi_socket);
1418 		}
1419 
1420 		udp_unlock(inp->inp_socket, 1, 0);
1421 
1422 		error = SYSCTL_OUT(req, &xi, sizeof(xi));
1423 	}
1424 	if (!error) {
1425 		/*
1426 		 * Give the user an updated idea of our state.
1427 		 * If the generation differs from what we told
1428 		 * her before, she knows that something happened
1429 		 * while we were processing this request, and it
1430 		 * might be necessary to retry.
1431 		 */
1432 		bzero(&xig, sizeof(xig));
1433 		xig.xig_len = sizeof(xig);
1434 		xig.xig_gen = udbinfo.ipi_gencnt;
1435 		xig.xig_sogen = so_gencnt;
1436 		xig.xig_count = udbinfo.ipi_count;
1437 		error = SYSCTL_OUT(req, &xig, sizeof(xig));
1438 	}
1439 
1440 	lck_rw_done(&udbinfo.ipi_lock);
1441 	kfree_type(struct inpcb *, sz, inp_list);
1442 	return error;
1443 }
1444 
1445 SYSCTL_PROC(_net_inet_udp, OID_AUTO, pcblist64,
1446     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_pcblist64,
1447     "S,xinpcb64", "List of active UDP sockets");
1448 
1449 #endif /* XNU_TARGET_OS_OSX */
1450 
1451 static int
1452 udp_pcblist_n SYSCTL_HANDLER_ARGS
1453 {
1454 #pragma unused(oidp, arg1, arg2)
1455 	return get_pcblist_n(IPPROTO_UDP, req, &udbinfo);
1456 }
1457 
1458 SYSCTL_PROC(_net_inet_udp, OID_AUTO, pcblist_n,
1459     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_pcblist_n,
1460     "S,xinpcb_n", "List of active UDP sockets");
1461 
1462 __private_extern__ void
udp_get_ports_used(ifnet_t ifp,int protocol,uint32_t flags,bitstr_t * __counted_by (bitstr_size (IP_PORTRANGE_SIZE))bitfield)1463 udp_get_ports_used(ifnet_t ifp, int protocol, uint32_t flags,
1464     bitstr_t *__counted_by(bitstr_size(IP_PORTRANGE_SIZE)) bitfield)
1465 {
1466 	inpcb_get_ports_used(ifp, protocol, flags, bitfield,
1467 	    &udbinfo);
1468 }
1469 
1470 __private_extern__ uint32_t
udp_count_opportunistic(unsigned int ifindex,u_int32_t flags)1471 udp_count_opportunistic(unsigned int ifindex, u_int32_t flags)
1472 {
1473 	return inpcb_count_opportunistic(ifindex, &udbinfo, flags);
1474 }
1475 
1476 __private_extern__ uint32_t
udp_find_anypcb_byaddr(struct ifaddr * ifa)1477 udp_find_anypcb_byaddr(struct ifaddr *ifa)
1478 {
1479 #if SKYWALK
1480 	if (netns_is_enabled()) {
1481 		return netns_find_anyres_byaddr(ifa, IPPROTO_UDP);
1482 	} else
1483 #endif /* SKYWALK */
1484 	return inpcb_find_anypcb_byaddr(ifa, &udbinfo);
1485 }
1486 
1487 static int
udp_check_pktinfo(struct mbuf * control,struct ifnet ** outif,struct in_addr * laddr)1488 udp_check_pktinfo(struct mbuf *control, struct ifnet **outif,
1489     struct in_addr *laddr)
1490 {
1491 	struct cmsghdr *cm = 0;
1492 	struct in_pktinfo *pktinfo;
1493 	struct ifnet *ifp;
1494 
1495 	*outif = NULL;
1496 
1497 	/*
1498 	 * XXX: Currently, we assume all the optional information is stored
1499 	 * in a single mbuf.
1500 	 */
1501 	if (control->m_next) {
1502 		return EINVAL;
1503 	}
1504 
1505 	if (control->m_len < CMSG_LEN(0)) {
1506 		return EINVAL;
1507 	}
1508 
1509 	for (cm = M_FIRST_CMSGHDR(control);
1510 	    is_cmsg_valid(control, cm);
1511 	    cm = M_NXT_CMSGHDR(control, cm)) {
1512 		if (cm->cmsg_level != IPPROTO_IP ||
1513 		    cm->cmsg_type != IP_PKTINFO) {
1514 			continue;
1515 		}
1516 
1517 		if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) {
1518 			return EINVAL;
1519 		}
1520 
1521 		pktinfo =  (struct in_pktinfo *)(void *)CMSG_DATA(cm);
1522 
1523 		/* Check for a valid ifindex in pktinfo */
1524 		ifnet_head_lock_shared();
1525 
1526 		if (pktinfo->ipi_ifindex > if_index) {
1527 			ifnet_head_done();
1528 			return ENXIO;
1529 		}
1530 
1531 		/*
1532 		 * If ipi_ifindex is specified it takes precedence
1533 		 * over ipi_spec_dst.
1534 		 */
1535 		if (pktinfo->ipi_ifindex) {
1536 			ifp = ifindex2ifnet[pktinfo->ipi_ifindex];
1537 			if (ifp == NULL) {
1538 				ifnet_head_done();
1539 				return ENXIO;
1540 			}
1541 			ifnet_reference(ifp);
1542 			*outif = ifp;
1543 			ifnet_head_done();
1544 			laddr->s_addr = INADDR_ANY;
1545 			break;
1546 		}
1547 
1548 		ifnet_head_done();
1549 
1550 		/*
1551 		 * Use the provided ipi_spec_dst address for temp
1552 		 * source address.
1553 		 */
1554 		*laddr = pktinfo->ipi_spec_dst;
1555 		break;
1556 	}
1557 	return 0;
1558 }
1559 
1560 static int
udp_output(struct inpcb * inp,struct mbuf * m,struct sockaddr * addr,struct mbuf * control,struct proc * p)1561 udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
1562     struct mbuf *control, struct proc *p)
1563 {
1564 	struct udpiphdr *ui;
1565 	int len = m->m_pkthdr.len;
1566 	struct sockaddr_in *sin;
1567 	struct in_addr laddr, faddr, pi_laddr;
1568 	u_short lport, fport;
1569 	int error = 0, pktinfo = 0;
1570 	struct socket *so = inp->inp_socket;
1571 	int soopts = 0;
1572 	struct mbuf *inpopts;
1573 	struct ip_moptions *__single mopts;
1574 	struct route ro;
1575 	struct ip_out_args ipoa;
1576 	bool sndinprog_cnt_used = false;
1577 #if CONTENT_FILTER
1578 	struct m_tag *__single cfil_tag = NULL;
1579 	bool cfil_faddr_use = false;
1580 	uint32_t cfil_so_state_change_cnt = 0;
1581 	uint32_t cfil_so_options = 0;
1582 	struct sockaddr *__single cfil_faddr = NULL;
1583 #endif
1584 	bool check_qos_marking_again = (so->so_flags1 & SOF1_QOSMARKING_POLICY_OVERRIDE) ? FALSE : TRUE;
1585 
1586 	bzero(&ipoa, sizeof(ipoa));
1587 	ipoa.ipoa_boundif = IFSCOPE_NONE;
1588 	ipoa.ipoa_flags = IPOAF_SELECT_SRCIF;
1589 
1590 	ifnet_ref_t outif = NULL;
1591 	struct flowadv *adv = &ipoa.ipoa_flowadv;
1592 	struct sock_cm_info sockcminfo;
1593 	int flowadv = 0;
1594 	int tos = IPTOS_UNSPEC;
1595 
1596 	/* Enable flow advisory only when connected */
1597 	flowadv = (so->so_state & SS_ISCONNECTED) ? 1 : 0;
1598 	pi_laddr.s_addr = INADDR_ANY;
1599 
1600 	KERNEL_DEBUG(DBG_FNC_UDP_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
1601 
1602 	socket_lock_assert_owned(so);
1603 
1604 #if CONTENT_FILTER
1605 	/*
1606 	 * If socket is subject to UDP Content Filter and no addr is passed in,
1607 	 * retrieve CFIL saved state from mbuf and use it if necessary.
1608 	 */
1609 	if (CFIL_DGRAM_FILTERED(so) && !addr) {
1610 		cfil_tag = cfil_dgram_get_socket_state(m, &cfil_so_state_change_cnt, &cfil_so_options, &cfil_faddr, NULL);
1611 		if (cfil_tag) {
1612 			sin = SIN(cfil_faddr);
1613 			if (inp && inp->inp_faddr.s_addr == INADDR_ANY) {
1614 				/*
1615 				 * Socket is unconnected, simply use the saved faddr as 'addr' to go through
1616 				 * the connect/disconnect logic.
1617 				 */
1618 				addr = SA(cfil_faddr);
1619 			} else if ((so->so_state_change_cnt != cfil_so_state_change_cnt) &&
1620 			    (inp->inp_fport != sin->sin_port ||
1621 			    inp->inp_faddr.s_addr != sin->sin_addr.s_addr)) {
1622 				/*
1623 				 * Socket is connected but socket state and dest addr/port changed.
1624 				 * We need to use the saved faddr info.
1625 				 */
1626 				cfil_faddr_use = true;
1627 			}
1628 		}
1629 	}
1630 #endif
1631 
1632 	sock_init_cm_info(&sockcminfo, so);
1633 
1634 	if (control != NULL) {
1635 		tos = ip_tos_from_control(control);
1636 
1637 		sock_parse_cm_info(control, &sockcminfo);
1638 
1639 		error = udp_check_pktinfo(control, &outif, &pi_laddr);
1640 		m_freem(control);
1641 		control = NULL;
1642 		if (error) {
1643 			UDP_LOG(inp, "udp_check_pktinfo error %d", error);
1644 			goto release;
1645 		}
1646 		if (outif != NULL) {
1647 			pktinfo++;
1648 			ipoa.ipoa_boundif = outif->if_index;
1649 		}
1650 	}
1651 
1652 	KERNEL_DEBUG(DBG_LAYER_OUT_BEG, inp->inp_fport, inp->inp_lport,
1653 	    inp->inp_laddr.s_addr, inp->inp_faddr.s_addr,
1654 	    (htons((u_short)len + sizeof(struct udphdr))));
1655 
1656 	if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
1657 		error = EMSGSIZE;
1658 		UDP_LOG(inp, "len %d too big error EMSGSIZE", len);
1659 		goto release;
1660 	}
1661 
1662 	if (flowadv && INP_WAIT_FOR_IF_FEEDBACK(inp)) {
1663 		/*
1664 		 * The socket is flow-controlled, drop the packets
1665 		 * until the inp is not flow controlled
1666 		 */
1667 		error = ENOBUFS;
1668 		UDP_LOG(inp, "flow controlled error ENOBUFS");
1669 		goto release;
1670 	}
1671 	/*
1672 	 * If socket was bound to an ifindex, tell ip_output about it.
1673 	 * If the ancillary IP_PKTINFO option contains an interface index,
1674 	 * it takes precedence over the one specified by IP_BOUND_IF.
1675 	 */
1676 	if (ipoa.ipoa_boundif == IFSCOPE_NONE &&
1677 	    (inp->inp_flags & INP_BOUND_IF)) {
1678 		VERIFY(inp->inp_boundifp != NULL);
1679 		ifnet_reference(inp->inp_boundifp);     /* for this routine */
1680 		if (outif != NULL) {
1681 			ifnet_release(outif);
1682 		}
1683 		outif = inp->inp_boundifp;
1684 		ipoa.ipoa_boundif = outif->if_index;
1685 	}
1686 	if (INP_NO_CELLULAR(inp)) {
1687 		ipoa.ipoa_flags |=  IPOAF_NO_CELLULAR;
1688 	}
1689 	if (INP_NO_EXPENSIVE(inp)) {
1690 		ipoa.ipoa_flags |=  IPOAF_NO_EXPENSIVE;
1691 	}
1692 	if (INP_NO_CONSTRAINED(inp)) {
1693 		ipoa.ipoa_flags |=  IPOAF_NO_CONSTRAINED;
1694 	}
1695 	if (INP_AWDL_UNRESTRICTED(inp)) {
1696 		ipoa.ipoa_flags |=  IPOAF_AWDL_UNRESTRICTED;
1697 	}
1698 	if (INP_MANAGEMENT_ALLOWED(inp)) {
1699 		ipoa.ipoa_flags |= IPOAF_MANAGEMENT_ALLOWED;
1700 	}
1701 	if (INP_ULTRA_CONSTRAINED_ALLOWED(inp)) {
1702 		ipoa.ipoa_flags |= IPOAF_ULTRA_CONSTRAINED_ALLOWED;
1703 	}
1704 	ipoa.ipoa_sotc = sockcminfo.sotc;
1705 	ipoa.ipoa_netsvctype = sockcminfo.netsvctype;
1706 	soopts |= IP_OUTARGS;
1707 
1708 	/*
1709 	 * If there was a routing change, discard cached route and check
1710 	 * that we have a valid source address.  Reacquire a new source
1711 	 * address if INADDR_ANY was specified.
1712 	 *
1713 	 * If we are using cfil saved state, go through this cache cleanup
1714 	 * so that we can get a new route.
1715 	 */
1716 	if (ROUTE_UNUSABLE(&inp->inp_route)
1717 #if CONTENT_FILTER
1718 	    || cfil_faddr_use
1719 #endif
1720 	    ) {
1721 		struct in_ifaddr *ia = NULL;
1722 
1723 		ROUTE_RELEASE(&inp->inp_route);
1724 
1725 		/* src address is gone? */
1726 		if (inp->inp_laddr.s_addr != INADDR_ANY &&
1727 		    (ia = ifa_foraddr(inp->inp_laddr.s_addr)) == NULL) {
1728 			if (!(inp->inp_flags & INP_INADDR_ANY) ||
1729 			    (so->so_state & SS_ISCONNECTED)) {
1730 				/*
1731 				 * Rdar://5448998
1732 				 * If the source address is gone, return an
1733 				 * error if:
1734 				 * - the source was specified
1735 				 * - the socket was already connected
1736 				 */
1737 				soevent(so, (SO_FILT_HINT_LOCKED |
1738 				    SO_FILT_HINT_NOSRCADDR));
1739 				error = EADDRNOTAVAIL;
1740 				UDP_LOG(inp, "source address not available error EADDRNOTAVAIL");
1741 				goto release;
1742 			} else {
1743 				/* new src will be set later */
1744 				inp->inp_laddr.s_addr = INADDR_ANY;
1745 				inp->inp_last_outifp = NULL;
1746 #if SKYWALK
1747 				if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
1748 					netns_set_ifnet(&inp->inp_netns_token, NULL);
1749 				}
1750 #endif /* SKYWALK */
1751 			}
1752 		}
1753 		if (ia != NULL) {
1754 			ifa_remref(&ia->ia_ifa);
1755 		}
1756 	}
1757 
1758 	/*
1759 	 * IP_PKTINFO option check.  If a temporary scope or src address
1760 	 * is provided, use it for this packet only and make sure we forget
1761 	 * it after sending this datagram.
1762 	 */
1763 	if (pi_laddr.s_addr != INADDR_ANY ||
1764 	    (ipoa.ipoa_boundif != IFSCOPE_NONE && pktinfo)) {
1765 		/* temp src address for this datagram only */
1766 		laddr = pi_laddr;
1767 	} else {
1768 		laddr = inp->inp_laddr;
1769 	}
1770 
1771 	faddr = inp->inp_faddr;
1772 	lport = inp->inp_lport;
1773 	fport = inp->inp_fport;
1774 
1775 #if CONTENT_FILTER
1776 	if (cfil_faddr_use) {
1777 		faddr = SIN(cfil_faddr)->sin_addr;
1778 		fport = SIN(cfil_faddr)->sin_port;
1779 	}
1780 #endif
1781 	inp->inp_sndinprog_cnt++;
1782 	sndinprog_cnt_used = true;
1783 
1784 	if (addr) {
1785 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
1786 			error = EISCONN;
1787 			UDP_LOG(inp, "socket already connected error EISCONN");
1788 			goto release;
1789 		}
1790 		sin = SIN(addr);
1791 		faddr = sin->sin_addr;
1792 		fport = sin->sin_port; /* allow 0 port */
1793 
1794 		/*
1795 		 * Fast path case
1796 		 *
1797 		 * If neeed get a local address and a local port  to build
1798 		 * the packet without changing the pcb
1799 		 * and interfering with the input path. See 3851370.
1800 		 *
1801 		 * And don't disconnect as this could unbind the local port
1802 		 *
1803 		 * Scope from IP_PKTINFO takes precendence over the
1804 		 * the scope set via INP_BOUND_IF.
1805 		 */
1806 		if (laddr.s_addr == INADDR_ANY) {
1807 			char laddr_str[MAX_IPv4_STR_LEN];
1808 			char addr_str[MAX_IPv4_STR_LEN];
1809 
1810 			inet_ntop(AF_INET, &laddr.s_addr, laddr_str, sizeof(laddr_str));
1811 			inet_ntop(AF_INET, &sin->sin_addr.s_addr, addr_str, sizeof(addr_str));
1812 			UDP_LOG(inp, "calling in_pcbladdr addr %s laddr %s ipoa_boundif %u outif %s",
1813 			    addr_str, laddr_str, ipoa.ipoa_boundif, outif != NULL ? if_name(outif) : "<null>");
1814 
1815 			if ((error = in_pcbladdr(inp, addr, &laddr,
1816 			    ipoa.ipoa_boundif, &outif, 0)) != 0) {
1817 				UDP_LOG(inp, "in_pcbladdr error %d", error);
1818 				goto release;
1819 			}
1820 
1821 			/* synch up in case in_pcbladdr() overrides */
1822 			if (outif != NULL &&
1823 			    ipoa.ipoa_boundif != IFSCOPE_NONE) {
1824 				ipoa.ipoa_boundif = outif->if_index;
1825 			}
1826 
1827 			inet_ntop(AF_INET, &laddr.s_addr, laddr_str, sizeof(laddr_str));
1828 			inet_ntop(AF_INET, &sin->sin_addr.s_addr, addr_str, sizeof(addr_str));
1829 			UDP_LOG(inp, "after in_pcbladdr addr %s laddr %s ipoa_boundif %u outif %s",
1830 			    addr_str, laddr_str, ipoa.ipoa_boundif, outif != NULL ? if_name(outif) : "<null>");
1831 		}
1832 
1833 		if (lport == 0) {
1834 			inp_enter_bind_in_progress(so);
1835 
1836 			error = in_pcbsetport(laddr, addr, inp, p, 0);
1837 
1838 			if (error == 0) {
1839 				ASSERT(inp->inp_lport != 0);
1840 			}
1841 
1842 			inp_exit_bind_in_progress(so);
1843 
1844 			if (error != 0) {
1845 				UDP_LOG(inp, "in_pcbsetport error %d", error);
1846 				goto release;
1847 			}
1848 			lport = inp->inp_lport;
1849 			UDP_LOG(inp, "in_pcbsetport returned lport %u",
1850 			    ntohs(lport));
1851 		}
1852 	} else {
1853 		if (faddr.s_addr == INADDR_ANY) {
1854 			error = ENOTCONN;
1855 			UDP_LOG(inp, "not connected error ENOTCONN");
1856 			goto release;
1857 		}
1858 	}
1859 
1860 	if (inp->inp_flowhash == 0) {
1861 		inp_calc_flowhash(inp);
1862 		ASSERT(inp->inp_flowhash != 0);
1863 	}
1864 
1865 	if (fport == htons(53) && !(so->so_flags1 & SOF1_DNS_COUNTED)) {
1866 		so->so_flags1 |= SOF1_DNS_COUNTED;
1867 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_dns);
1868 	}
1869 
1870 	/*
1871 	 * Calculate data length and get a mbuf
1872 	 * for UDP and IP headers.
1873 	 */
1874 	M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT, 1);
1875 	if (m == 0) {
1876 		error = ENOBUFS;
1877 		UDP_LOG(inp, "M_PREPEND error ENOBUFS");
1878 		goto abort;
1879 	}
1880 
1881 	/*
1882 	 * Fill in mbuf with extended UDP header
1883 	 * and addresses and length put into network format.
1884 	 */
1885 	ui = mtod(m, struct udpiphdr *);
1886 	bzero(ui->ui_x1, sizeof(ui->ui_x1));    /* XXX still needed? */
1887 	ui->ui_pr = IPPROTO_UDP;
1888 	ui->ui_src = laddr;
1889 	ui->ui_dst = faddr;
1890 	ui->ui_sport = lport;
1891 	ui->ui_dport = fport;
1892 	ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
1893 
1894 	/*
1895 	 * Set the Don't Fragment bit in the IP header.
1896 	 */
1897 	if (inp->inp_flags2 & INP2_DONTFRAG) {
1898 		struct ip *ip;
1899 
1900 		ip = (struct ip *)&ui->ui_i;
1901 		ip->ip_off |= IP_DF;
1902 	}
1903 
1904 	/*
1905 	 * Set up checksum to pseudo header checksum and output datagram.
1906 	 *
1907 	 * Treat flows to be CLAT46'd as IPv6 flow and compute checksum
1908 	 * no matter what, as IPv6 mandates checksum for UDP.
1909 	 *
1910 	 * Here we only compute the one's complement sum of the pseudo header.
1911 	 * The payload computation and final complement is delayed to much later
1912 	 * in IP processing to decide if remaining computation needs to be done
1913 	 * through offload.
1914 	 *
1915 	 * That is communicated by setting CSUM_UDP in csum_flags.
1916 	 * The offset of checksum from the start of ULP header is communicated
1917 	 * through csum_data.
1918 	 *
1919 	 * Note since this already contains the pseudo checksum header, any
1920 	 * later operation at IP layer that modify the values used here must
1921 	 * update the checksum as well (for example NAT etc).
1922 	 */
1923 	if ((inp->inp_flags2 & INP2_CLAT46_FLOW) ||
1924 	    (udpcksum && !(inp->inp_flags & INP_UDP_NOCKSUM))) {
1925 		ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr,
1926 		    htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
1927 		m->m_pkthdr.csum_flags = (CSUM_UDP | CSUM_ZERO_INVERT);
1928 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
1929 	} else {
1930 		ui->ui_sum = 0;
1931 	}
1932 	((struct ip *)ui)->ip_len = (uint16_t)(sizeof(struct udpiphdr) + len);
1933 	((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;    /* XXX */
1934 	if (tos != IPTOS_UNSPEC) {
1935 		((struct ip *)ui)->ip_tos = (uint8_t)(tos & IPTOS_MASK);
1936 	} else {
1937 		((struct ip *)ui)->ip_tos = inp->inp_ip_tos;    /* XXX */
1938 	}
1939 	udpstat.udps_opackets++;
1940 
1941 	KERNEL_DEBUG(DBG_LAYER_OUT_END, ui->ui_dport, ui->ui_sport,
1942 	    ui->ui_src.s_addr, ui->ui_dst.s_addr, ui->ui_ulen);
1943 
1944 #if NECP
1945 	{
1946 		necp_kernel_policy_id policy_id;
1947 		necp_kernel_policy_id skip_policy_id;
1948 		u_int32_t route_rule_id;
1949 		u_int32_t pass_flags;
1950 
1951 		/*
1952 		 * We need a route to perform NECP route rule checks
1953 		 */
1954 		if (net_qos_policy_restricted != 0 &&
1955 		    ROUTE_UNUSABLE(&inp->inp_route)) {
1956 			struct sockaddr_in to;
1957 			struct sockaddr_in from;
1958 
1959 			ROUTE_RELEASE(&inp->inp_route);
1960 
1961 			SOCKADDR_ZERO(&from, sizeof(struct sockaddr_in));
1962 			from.sin_family = AF_INET;
1963 			from.sin_len = sizeof(struct sockaddr_in);
1964 			from.sin_addr = laddr;
1965 
1966 			SOCKADDR_ZERO(&to, sizeof(struct sockaddr_in));
1967 			to.sin_family = AF_INET;
1968 			to.sin_len = sizeof(struct sockaddr_in);
1969 			to.sin_addr = faddr;
1970 
1971 			inp->inp_route.ro_dst.sa_family = AF_INET;
1972 			inp->inp_route.ro_dst.sa_len = sizeof(struct sockaddr_in);
1973 			SIN(&inp->inp_route.ro_dst)->sin_addr = faddr;
1974 
1975 			rtalloc_scoped(&inp->inp_route, ipoa.ipoa_boundif);
1976 
1977 			inp_update_necp_policy(inp, SA(&from),
1978 			    SA(&to), ipoa.ipoa_boundif);
1979 			inp->inp_policyresult.results.qos_marking_gencount = 0;
1980 		}
1981 
1982 		if (!necp_socket_is_allowed_to_send_recv_v4(inp, lport, fport,
1983 		    &laddr, &faddr, NULL, 0, &policy_id, &route_rule_id, &skip_policy_id, &pass_flags)) {
1984 			error = EHOSTUNREACH;
1985 			UDP_LOG_DROP_NECP((struct ip *)&ui->ui_i, &ui->ui_u, inp, true);
1986 			m_drop_if(m, outif, DROPTAP_FLAG_DIR_OUT | DROPTAP_FLAG_L2_MISSING, DROP_REASON_UDP_NECP, NULL, 0);
1987 			m = NULL;
1988 			UDP_LOG(inp, "necp_socket_is_allowed_to_send_recv_v4 error %d", error);
1989 			goto abort;
1990 		}
1991 
1992 		necp_mark_packet_from_socket(m, inp, policy_id, route_rule_id, skip_policy_id, pass_flags);
1993 
1994 		if (net_qos_policy_restricted != 0) {
1995 			necp_socket_update_qos_marking(inp, inp->inp_route.ro_rt, route_rule_id);
1996 		}
1997 	}
1998 #endif /* NECP */
1999 	if ((so->so_flags1 & SOF1_QOSMARKING_ALLOWED)) {
2000 		ipoa.ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
2001 	}
2002 	if (check_qos_marking_again) {
2003 		ipoa.ipoa_flags |= IPOAF_REDO_QOSMARKING_POLICY;
2004 	}
2005 	ipoa.qos_marking_gencount = inp->inp_policyresult.results.qos_marking_gencount;
2006 
2007 #if IPSEC
2008 	if (inp->inp_sp != NULL && ipsec_setsocket(m, inp->inp_socket) != 0) {
2009 		error = ENOBUFS;
2010 		UDP_LOG_DROP_PCB((struct ip *)&ui->ui_i, &ui->ui_u, inp, true, "ipsec_setsocket error ENOBUFS");
2011 		m_drop_if(m, outif, DROPTAP_FLAG_DIR_OUT | DROPTAP_FLAG_L2_MISSING, DROP_REASON_UDP_IPSEC, NULL, 0);
2012 		m = NULL;
2013 		UDP_LOG(inp, "necp_socket_is_allowed_to_send_recv_v4 error %d", error);
2014 		goto abort;
2015 	}
2016 #endif /* IPSEC */
2017 
2018 	inpopts = inp->inp_options;
2019 #if CONTENT_FILTER
2020 	if (cfil_tag && (inp->inp_socket->so_options != cfil_so_options)) {
2021 		soopts |= (cfil_so_options & (SO_DONTROUTE | SO_BROADCAST));
2022 	} else
2023 #endif
2024 	soopts |= (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST));
2025 
2026 	mopts = inp->inp_moptions;
2027 	if (mopts != NULL) {
2028 		IMO_LOCK(mopts);
2029 		IMO_ADDREF_LOCKED(mopts);
2030 		if (IN_MULTICAST(ntohl(ui->ui_dst.s_addr)) &&
2031 		    mopts->imo_multicast_ifp != NULL) {
2032 			/* no reference needed */
2033 			inp->inp_last_outifp = mopts->imo_multicast_ifp;
2034 #if SKYWALK
2035 			if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2036 				netns_set_ifnet(&inp->inp_netns_token,
2037 				    inp->inp_last_outifp);
2038 			}
2039 #endif /* SKYWALK */
2040 		}
2041 		IMO_UNLOCK(mopts);
2042 	}
2043 
2044 	/* Copy the cached route and take an extra reference */
2045 	inp_route_copyout(inp, &ro);
2046 
2047 	set_packet_service_class(m, so, sockcminfo.sotc, 0);
2048 	if (sockcminfo.tx_time) {
2049 		mbuf_set_tx_time(m, sockcminfo.tx_time);
2050 	}
2051 	m->m_pkthdr.pkt_flowsrc = FLOWSRC_INPCB;
2052 	m->m_pkthdr.pkt_flowid = inp->inp_flowhash;
2053 	m->m_pkthdr.pkt_proto = IPPROTO_UDP;
2054 	m->m_pkthdr.pkt_flags |= (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC);
2055 	if (flowadv) {
2056 		m->m_pkthdr.pkt_flags |= PKTF_FLOW_ADV;
2057 	}
2058 	m->m_pkthdr.tx_udp_pid = so->last_pid;
2059 	if (so->so_flags & SOF_DELEGATED) {
2060 		m->m_pkthdr.tx_udp_e_pid = so->e_pid;
2061 	} else {
2062 		m->m_pkthdr.tx_udp_e_pid = 0;
2063 	}
2064 #if (DEBUG || DEVELOPMENT)
2065 	if (so->so_flags & SOF_MARK_WAKE_PKT) {
2066 		so->so_flags &= ~SOF_MARK_WAKE_PKT;
2067 		m->m_pkthdr.pkt_flags |= PKTF_WAKE_PKT;
2068 	}
2069 #endif /* (DEBUG || DEVELOPMENT) */
2070 
2071 	m_add_crumb(m, PKT_CRUMB_UDP_OUTPUT);
2072 
2073 	if (ipoa.ipoa_boundif != IFSCOPE_NONE) {
2074 		ipoa.ipoa_flags |= IPOAF_BOUND_IF;
2075 	}
2076 
2077 	if (laddr.s_addr != INADDR_ANY) {
2078 		ipoa.ipoa_flags |= IPOAF_BOUND_SRCADDR;
2079 	}
2080 
2081 	socket_unlock(so, 0);
2082 	error = ip_output(m, inpopts, &ro, soopts, mopts, &ipoa);
2083 	m = NULL;
2084 	socket_lock(so, 0);
2085 	if (mopts != NULL) {
2086 		IMO_REMREF(mopts);
2087 	}
2088 
2089 	if (error != 0) {
2090 		UDP_LOG(inp, "ip_output error %d", error);
2091 	}
2092 
2093 	if (check_qos_marking_again) {
2094 		inp->inp_policyresult.results.qos_marking_gencount = ipoa.qos_marking_gencount;
2095 
2096 		if (ipoa.ipoa_flags & IPOAF_QOSMARKING_ALLOWED) {
2097 			inp->inp_socket->so_flags1 |= SOF1_QOSMARKING_ALLOWED;
2098 		} else {
2099 			inp->inp_socket->so_flags1 &= ~SOF1_QOSMARKING_ALLOWED;
2100 		}
2101 	}
2102 
2103 	if (error == 0 && nstat_collect) {
2104 		stats_functional_type ifnet_count_type = stats_functional_type_unclassified;
2105 
2106 		if (ro.ro_rt != NULL) {
2107 			ifnet_count_type = IFNET_COUNT_TYPE(ro.ro_rt->rt_ifp);
2108 		}
2109 		INP_ADD_TXSTAT(inp, ifnet_count_type, 1, len);
2110 	}
2111 
2112 	if (flowadv && (adv->code == FADV_FLOW_CONTROLLED ||
2113 	    adv->code == FADV_SUSPENDED)) {
2114 		/*
2115 		 * return a hint to the application that
2116 		 * the packet has been dropped
2117 		 */
2118 		error = ENOBUFS;
2119 		inp_set_fc_state(inp, adv->code);
2120 	}
2121 
2122 	/* Synchronize PCB cached route */
2123 	inp_route_copyin(inp, &ro);
2124 
2125 	if (inp->inp_route.ro_rt != NULL) {
2126 		if (IS_LOCALNET_ROUTE(inp->inp_route.ro_rt)) {
2127 			inp->inp_flags2 |= INP2_LAST_ROUTE_LOCAL;
2128 		} else {
2129 			inp->inp_flags2 &= ~INP2_LAST_ROUTE_LOCAL;
2130 		}
2131 	}
2132 
2133 abort:
2134 	if (inp->inp_route.ro_rt != NULL) {
2135 		struct rtentry *rt = inp->inp_route.ro_rt;
2136 		struct ifnet *outifp;
2137 
2138 		if (rt->rt_flags & (RTF_MULTICAST | RTF_BROADCAST)) {
2139 			rt = NULL;      /* unusable */
2140 		}
2141 #if CONTENT_FILTER
2142 		/*
2143 		 * Discard temporary route for cfil case
2144 		 */
2145 		if (cfil_faddr_use) {
2146 			rt = NULL;      /* unusable */
2147 		}
2148 #endif
2149 
2150 		/*
2151 		 * Always discard if it is a multicast or broadcast route.
2152 		 */
2153 		if (rt == NULL) {
2154 			ROUTE_RELEASE(&inp->inp_route);
2155 		}
2156 
2157 		/*
2158 		 * If the destination route is unicast, update outifp with
2159 		 * that of the route interface used by IP.
2160 		 */
2161 		if (rt != NULL &&
2162 		    (outifp = rt->rt_ifp) != inp->inp_last_outifp) {
2163 			inp->inp_last_outifp = outifp; /* no reference needed */
2164 #if SKYWALK
2165 			if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2166 				netns_set_ifnet(&inp->inp_netns_token,
2167 				    inp->inp_last_outifp);
2168 			}
2169 #endif /* SKYWALK */
2170 
2171 			so->so_pktheadroom = (uint16_t)P2ROUNDUP(
2172 				sizeof(struct udphdr) +
2173 				sizeof(struct ip) +
2174 				ifnet_hdrlen(outifp) +
2175 				ifnet_mbuf_packetpreamblelen(outifp),
2176 				sizeof(u_int32_t));
2177 		}
2178 	} else {
2179 		ROUTE_RELEASE(&inp->inp_route);
2180 	}
2181 
2182 	/*
2183 	 * If output interface was cellular/expensive, and this socket is
2184 	 * denied access to it, generate an event.
2185 	 */
2186 	if (error != 0 && (ipoa.ipoa_flags & IPOAF_R_IFDENIED) &&
2187 	    (INP_NO_CELLULAR(inp) || INP_NO_EXPENSIVE(inp) || INP_NO_CONSTRAINED(inp))) {
2188 		soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED));
2189 	}
2190 
2191 release:
2192 	KERNEL_DEBUG(DBG_FNC_UDP_OUTPUT | DBG_FUNC_END, error, 0, 0, 0, 0);
2193 
2194 	if (m != NULL) {
2195 		m_freem(m);
2196 	}
2197 
2198 	if (outif != NULL) {
2199 		ifnet_release(outif);
2200 	}
2201 
2202 #if CONTENT_FILTER
2203 	if (cfil_tag) {
2204 		m_tag_free(cfil_tag);
2205 	}
2206 #endif
2207 	if (sndinprog_cnt_used) {
2208 		VERIFY(inp->inp_sndinprog_cnt > 0);
2209 		if (--inp->inp_sndinprog_cnt == 0) {
2210 			inp->inp_flags &= ~(INP_FC_FEEDBACK);
2211 			if (inp->inp_sndingprog_waiters > 0) {
2212 				wakeup(&inp->inp_sndinprog_cnt);
2213 			}
2214 		}
2215 		sndinprog_cnt_used = false;
2216 	}
2217 
2218 	return error;
2219 }
2220 
2221 u_int32_t       udp_sendspace = 9216;           /* really max datagram size */
2222 /* 187 1K datagrams (approx 192 KB) */
2223 u_int32_t       udp_recvspace = 187 * (1024 + sizeof(struct sockaddr_in6));
2224 
2225 /* Check that the values of udp send and recv space do not exceed sb_max */
2226 static int
sysctl_udp_sospace(struct sysctl_oid * oidp,void * arg1,int arg2,struct sysctl_req * req)2227 sysctl_udp_sospace(struct sysctl_oid *oidp, void *arg1, int arg2,
2228     struct sysctl_req *req)
2229 {
2230 #pragma unused(arg1, arg2)
2231 	u_int32_t new_value = 0, *space_p = NULL;
2232 	int changed = 0, error = 0;
2233 
2234 	switch (oidp->oid_number) {
2235 	case UDPCTL_RECVSPACE:
2236 		space_p = &udp_recvspace;
2237 		break;
2238 	case UDPCTL_MAXDGRAM:
2239 		space_p = &udp_sendspace;
2240 		break;
2241 	default:
2242 		return EINVAL;
2243 	}
2244 	error = sysctl_io_number(req, *space_p, sizeof(u_int32_t),
2245 	    &new_value, &changed);
2246 	if (changed) {
2247 		if (new_value > 0 && new_value <= sb_max) {
2248 			*space_p = new_value;
2249 		} else {
2250 			error = ERANGE;
2251 		}
2252 	}
2253 	return error;
2254 }
2255 
2256 SYSCTL_PROC(_net_inet_udp, UDPCTL_RECVSPACE, recvspace,
2257     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &udp_recvspace, 0,
2258     &sysctl_udp_sospace, "IU", "Maximum incoming UDP datagram size");
2259 
2260 SYSCTL_PROC(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram,
2261     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &udp_sendspace, 0,
2262     &sysctl_udp_sospace, "IU", "Maximum outgoing UDP datagram size");
2263 
2264 static int
udp_abort(struct socket * so)2265 udp_abort(struct socket *so)
2266 {
2267 	struct inpcb *inp;
2268 
2269 	inp = sotoinpcb(so);
2270 	if (inp == NULL) {
2271 		panic("%s: so=%p null inp", __func__, so);
2272 		/* NOTREACHED */
2273 	}
2274 	soisdisconnected(so);
2275 	in_pcbdetach(inp);
2276 	return 0;
2277 }
2278 
2279 static int
udp_attach(struct socket * so,int proto,struct proc * p)2280 udp_attach(struct socket *so, int proto, struct proc *p)
2281 {
2282 #pragma unused(proto)
2283 	struct inpcb *inp;
2284 	int error;
2285 
2286 	error = soreserve(so, udp_sendspace, udp_recvspace);
2287 	if (error != 0) {
2288 		return error;
2289 	}
2290 	inp = sotoinpcb(so);
2291 	if (inp != NULL) {
2292 		panic("%s so=%p inp=%p", __func__, so, inp);
2293 		/* NOTREACHED */
2294 	}
2295 	error = in_pcballoc(so, &udbinfo, p);
2296 	if (error != 0) {
2297 		return error;
2298 	}
2299 	inp = (struct inpcb *)so->so_pcb;
2300 	inp->inp_vflag |= INP_IPV4;
2301 	inp->inp_ip_ttl = (uint8_t)ip_defttl;
2302 	if (nstat_collect) {
2303 		nstat_udp_new_pcb(inp);
2304 	}
2305 	return 0;
2306 }
2307 
2308 static int
udp_bind(struct socket * so,struct sockaddr * nam,struct proc * p)2309 udp_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
2310 {
2311 	struct inpcb *inp;
2312 	int error;
2313 
2314 	if (nam->sa_family != 0 && nam->sa_family != AF_INET &&
2315 	    nam->sa_family != AF_INET6) {
2316 		return EAFNOSUPPORT;
2317 	}
2318 
2319 	inp = sotoinpcb(so);
2320 	if (inp == NULL) {
2321 		return EINVAL;
2322 	}
2323 
2324 	inp_enter_bind_in_progress(so);
2325 
2326 	error = in_pcbbind(inp, nam, NULL, p);
2327 
2328 #if NECP
2329 	/* Update NECP client with bind result if not in middle of connect */
2330 	if (error == 0 &&
2331 	    (inp->inp_flags2 & INP2_CONNECT_IN_PROGRESS) &&
2332 	    !uuid_is_null(inp->necp_client_uuid)) {
2333 		socket_unlock(so, 0);
2334 		necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
2335 		socket_lock(so, 0);
2336 	}
2337 #endif /* NECP */
2338 
2339 	UDP_LOG_BIND(inp, error);
2340 
2341 	inp_exit_bind_in_progress(so);
2342 
2343 	return error;
2344 }
2345 
2346 static int
udp_connect(struct socket * so,struct sockaddr * nam,struct proc * p)2347 udp_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
2348 {
2349 	struct inpcb *inp;
2350 	int error;
2351 
2352 	inp = sotoinpcb(so);
2353 	if (inp == NULL) {
2354 		return EINVAL;
2355 	}
2356 	inp_enter_bind_in_progress(so);
2357 
2358 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
2359 		error = EISCONN;
2360 		goto done;
2361 	}
2362 
2363 	if (!(so->so_flags1 & SOF1_CONNECT_COUNTED)) {
2364 		so->so_flags1 |= SOF1_CONNECT_COUNTED;
2365 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_connected);
2366 	}
2367 
2368 #if NECP
2369 #if FLOW_DIVERT
2370 	if (necp_socket_should_use_flow_divert(inp)) {
2371 		error = flow_divert_pcb_init(so);
2372 		if (error == 0) {
2373 			error = flow_divert_connect_out(so, nam, p);
2374 		}
2375 		goto done;
2376 	} else {
2377 		so->so_flags1 |= SOF1_FLOW_DIVERT_SKIP;
2378 	}
2379 #endif /* FLOW_DIVERT */
2380 #endif /* NECP */
2381 
2382 	error = in_pcbconnect(inp, nam, p, IFSCOPE_NONE, NULL);
2383 	if (error == 0) {
2384 #if NECP
2385 		/* Update NECP client with connected five-tuple */
2386 		if (!uuid_is_null(inp->necp_client_uuid)) {
2387 			socket_unlock(so, 0);
2388 			necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
2389 			socket_lock(so, 0);
2390 		}
2391 #endif /* NECP */
2392 
2393 		soisconnected(so);
2394 		if (inp->inp_flowhash == 0) {
2395 			inp_calc_flowhash(inp);
2396 			ASSERT(inp->inp_flowhash != 0);
2397 		}
2398 		inp->inp_connect_timestamp = mach_continuous_time();
2399 	}
2400 done:
2401 	UDP_LOG_CONNECT(inp, error);
2402 
2403 	inp_exit_bind_in_progress(so);
2404 
2405 	return error;
2406 }
2407 
2408 int
udp_connectx_common(struct socket * so,int af,struct sockaddr * src,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uint32_t flags,void * arg,uint32_t arglen,struct uio * uio,user_ssize_t * bytes_written)2409 udp_connectx_common(struct socket *so, int af, struct sockaddr *src, struct sockaddr *dst,
2410     struct proc *p, uint32_t ifscope, sae_associd_t aid, sae_connid_t *pcid,
2411     uint32_t flags, void *arg, uint32_t arglen,
2412     struct uio *uio, user_ssize_t *bytes_written)
2413 {
2414 #pragma unused(aid, flags, arg, arglen)
2415 	struct inpcb *inp = sotoinpcb(so);
2416 	int error = 0;
2417 	user_ssize_t datalen = 0;
2418 
2419 	if (inp == NULL) {
2420 		return EINVAL;
2421 	}
2422 
2423 	VERIFY(dst != NULL);
2424 
2425 	ASSERT(!(inp->inp_flags2 & INP2_CONNECT_IN_PROGRESS));
2426 	inp->inp_flags2 |= INP2_CONNECT_IN_PROGRESS;
2427 
2428 #if NECP
2429 	inp_update_necp_policy(inp, src, dst, ifscope);
2430 #endif /* NECP */
2431 
2432 	/* bind socket to the specified interface, if requested */
2433 	if (ifscope != IFSCOPE_NONE &&
2434 	    (error = inp_bindif(inp, ifscope, NULL)) != 0) {
2435 		goto done;
2436 	}
2437 
2438 	/* if source address and/or port is specified, bind to it */
2439 	if (src != NULL) {
2440 		error = sobindlock(so, src, 0); /* already locked */
2441 		if (error != 0) {
2442 			goto done;
2443 		}
2444 	}
2445 
2446 	switch (af) {
2447 	case AF_INET:
2448 		error = udp_connect(so, dst, p);
2449 		break;
2450 	case AF_INET6:
2451 		error = udp6_connect(so, dst, p);
2452 		break;
2453 	default:
2454 		VERIFY(0);
2455 		/* NOTREACHED */
2456 	}
2457 
2458 	if (error != 0) {
2459 		goto done;
2460 	}
2461 
2462 	/*
2463 	 * If there is data, copy it. DATA_IDEMPOTENT is ignored.
2464 	 * CONNECT_RESUME_ON_READ_WRITE is ignored.
2465 	 */
2466 	if (uio != NULL) {
2467 		socket_unlock(so, 0);
2468 
2469 		VERIFY(bytes_written != NULL);
2470 
2471 		datalen = uio_resid(uio);
2472 		error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL,
2473 		    (uio_t)uio, NULL, NULL, 0);
2474 		socket_lock(so, 0);
2475 
2476 		/* If error returned is EMSGSIZE, for example, disconnect */
2477 		if (error == 0 || error == EWOULDBLOCK) {
2478 			*bytes_written = datalen - uio_resid(uio);
2479 		} else {
2480 			(void) so->so_proto->pr_usrreqs->pru_disconnectx(so,
2481 			    SAE_ASSOCID_ANY, SAE_CONNID_ANY);
2482 		}
2483 		/*
2484 		 * mask the EWOULDBLOCK error so that the caller
2485 		 * knows that atleast the connect was successful.
2486 		 */
2487 		if (error == EWOULDBLOCK) {
2488 			error = 0;
2489 		}
2490 	}
2491 
2492 	if (error == 0 && pcid != NULL) {
2493 		*pcid = 1;      /* there is only 1 connection for UDP */
2494 	}
2495 done:
2496 	inp->inp_flags2 &= ~INP2_CONNECT_IN_PROGRESS;
2497 	return error;
2498 }
2499 
2500 static int
udp_connectx(struct socket * so,struct sockaddr * src,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uint32_t flags,void * arg,uint32_t arglen,struct uio * uio,user_ssize_t * bytes_written)2501 udp_connectx(struct socket *so, struct sockaddr *src,
2502     struct sockaddr *dst, struct proc *p, uint32_t ifscope,
2503     sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
2504     uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
2505 {
2506 	return udp_connectx_common(so, AF_INET, src, dst,
2507 	           p, ifscope, aid, pcid, flags, arg, arglen, uio, bytes_written);
2508 }
2509 
2510 static int
udp_detach(struct socket * so)2511 udp_detach(struct socket *so)
2512 {
2513 	struct inpcb *inp;
2514 
2515 	inp = sotoinpcb(so);
2516 	if (inp == NULL) {
2517 		panic("%s: so=%p null inp", __func__, so);
2518 		/* NOTREACHED */
2519 	}
2520 
2521 	/*
2522 	 * If this is a socket that does not want to wakeup the device
2523 	 * for it's traffic, the application might be waiting for
2524 	 * close to complete before going to sleep. Send a notification
2525 	 * for this kind of sockets
2526 	 */
2527 	if (so->so_options & SO_NOWAKEFROMSLEEP) {
2528 		socket_post_kev_msg_closed(so);
2529 	}
2530 
2531 	UDP_LOG_CONNECTION_SUMMARY(inp);
2532 
2533 	in_pcbdetach(inp);
2534 	inp->inp_state = INPCB_STATE_DEAD;
2535 	return 0;
2536 }
2537 
2538 static int
udp_disconnect(struct socket * so)2539 udp_disconnect(struct socket *so)
2540 {
2541 	struct inpcb *inp;
2542 
2543 	inp = sotoinpcb(so);
2544 	if (inp == NULL) {
2545 		return EINVAL;
2546 	}
2547 	if (inp->inp_faddr.s_addr == INADDR_ANY) {
2548 		return ENOTCONN;
2549 	}
2550 
2551 	UDP_LOG_CONNECTION_SUMMARY(inp);
2552 
2553 	in_pcbdisconnect(inp);
2554 
2555 	/* reset flow controlled state, just in case */
2556 	inp_reset_fc_state(inp);
2557 
2558 	inp->inp_laddr.s_addr = INADDR_ANY;
2559 	so->so_state &= ~SS_ISCONNECTED;                /* XXX */
2560 	inp->inp_last_outifp = NULL;
2561 #if SKYWALK
2562 	if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2563 		netns_set_ifnet(&inp->inp_netns_token, NULL);
2564 	}
2565 #endif /* SKYWALK */
2566 
2567 	return 0;
2568 }
2569 
2570 static int
udp_disconnectx(struct socket * so,sae_associd_t aid,sae_connid_t cid)2571 udp_disconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid)
2572 {
2573 #pragma unused(cid)
2574 	if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
2575 		return EINVAL;
2576 	}
2577 
2578 	return udp_disconnect(so);
2579 }
2580 
2581 static int
udp_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * addr,struct mbuf * control,struct proc * p)2582 udp_send(struct socket *so, int flags, struct mbuf *m,
2583     struct sockaddr *addr, struct mbuf *control, struct proc *p)
2584 {
2585 #ifndef FLOW_DIVERT
2586 #pragma unused(flags)
2587 #endif /* !(FLOW_DIVERT) */
2588 	struct inpcb *inp;
2589 	int error;
2590 
2591 	inp = sotoinpcb(so);
2592 	if (inp == NULL) {
2593 		if (m != NULL) {
2594 			m_freem(m);
2595 		}
2596 		if (control != NULL) {
2597 			m_freem(control);
2598 		}
2599 		return EINVAL;
2600 	}
2601 
2602 #if NECP
2603 #if FLOW_DIVERT
2604 	if (necp_socket_should_use_flow_divert(inp)) {
2605 		/* Implicit connect */
2606 		return flow_divert_implicit_data_out(so, flags, m, addr,
2607 		           control, p);
2608 	} else {
2609 		so->so_flags1 |= SOF1_FLOW_DIVERT_SKIP;
2610 	}
2611 #endif /* FLOW_DIVERT */
2612 #endif /* NECP */
2613 
2614 	so_update_tx_data_stats(so, 1, m->m_pkthdr.len);
2615 
2616 	in_pcb_check_management_entitled(inp);
2617 	in_pcb_check_ultra_constrained_entitled(inp);
2618 
2619 #if SKYWALK
2620 	sk_protect_t protect = sk_async_transmit_protect();
2621 #endif /* SKYWALK */
2622 	error = udp_output(inp, m, addr, control, p);
2623 #if SKYWALK
2624 	sk_async_transmit_unprotect(protect);
2625 #endif /* SKYWALK */
2626 
2627 	return error;
2628 }
2629 
2630 int
udp_shutdown(struct socket * so)2631 udp_shutdown(struct socket *so)
2632 {
2633 	struct inpcb *inp;
2634 
2635 	inp = sotoinpcb(so);
2636 	if (inp == NULL) {
2637 		return EINVAL;
2638 	}
2639 	socantsendmore(so);
2640 	return 0;
2641 }
2642 
2643 int
udp_lock(struct socket * so,int refcount,void * debug)2644 udp_lock(struct socket *so, int refcount, void *debug)
2645 {
2646 	void *__single lr_saved;
2647 
2648 	if (debug == NULL) {
2649 		lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
2650 	} else {
2651 		lr_saved = debug;
2652 	}
2653 
2654 	if (so->so_pcb != NULL) {
2655 		LCK_MTX_ASSERT(&((struct inpcb *)so->so_pcb)->inpcb_mtx,
2656 		    LCK_MTX_ASSERT_NOTOWNED);
2657 		lck_mtx_lock(&((struct inpcb *)so->so_pcb)->inpcb_mtx);
2658 	} else {
2659 		panic("%s: so=%p NO PCB! lr=%p lrh= %s", __func__,
2660 		    so, lr_saved, solockhistory_nr(so));
2661 		/* NOTREACHED */
2662 	}
2663 	if (refcount) {
2664 		so->so_usecount++;
2665 	}
2666 
2667 	so->lock_lr[so->next_lock_lr] = lr_saved;
2668 	so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
2669 	return 0;
2670 }
2671 
2672 int
udp_unlock(struct socket * so,int refcount,void * debug)2673 udp_unlock(struct socket *so, int refcount, void *debug)
2674 {
2675 	void *__single lr_saved;
2676 
2677 	if (debug == NULL) {
2678 		lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
2679 	} else {
2680 		lr_saved = debug;
2681 	}
2682 
2683 	if (refcount) {
2684 		VERIFY(so->so_usecount > 0);
2685 		so->so_usecount--;
2686 	}
2687 	if (so->so_pcb == NULL) {
2688 		panic("%s: so=%p NO PCB! lr=%p lrh= %s", __func__,
2689 		    so, lr_saved, solockhistory_nr(so));
2690 		/* NOTREACHED */
2691 	} else {
2692 		LCK_MTX_ASSERT(&((struct inpcb *)so->so_pcb)->inpcb_mtx,
2693 		    LCK_MTX_ASSERT_OWNED);
2694 		so->unlock_lr[so->next_unlock_lr] = lr_saved;
2695 		so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2696 		lck_mtx_unlock(&((struct inpcb *)so->so_pcb)->inpcb_mtx);
2697 	}
2698 	return 0;
2699 }
2700 
2701 lck_mtx_t *
udp_getlock(struct socket * so,int flags)2702 udp_getlock(struct socket *so, int flags)
2703 {
2704 #pragma unused(flags)
2705 	struct inpcb *__single inp = sotoinpcb(so);
2706 
2707 	if (so->so_pcb == NULL) {
2708 		panic("%s: so=%p NULL so_pcb lrh= %s", __func__,
2709 		    so, solockhistory_nr(so));
2710 		/* NOTREACHED */
2711 	}
2712 	return &inp->inpcb_mtx;
2713 }
2714 
2715 /*
2716  * UDP garbage collector callback (inpcb_timer_func_t).
2717  *
2718  * Returns > 0 to keep timer active.
2719  */
2720 static void
udp_gc(struct inpcbinfo * ipi)2721 udp_gc(struct inpcbinfo *ipi)
2722 {
2723 	struct inpcb *inp, *inpnxt;
2724 	struct socket *so;
2725 
2726 	if (lck_rw_try_lock_exclusive(&ipi->ipi_lock) == FALSE) {
2727 		if (udp_gc_done == TRUE) {
2728 			udp_gc_done = FALSE;
2729 			/* couldn't get the lock, must lock next time */
2730 			os_atomic_inc(&ipi->ipi_gc_req.intimer_fast, relaxed);
2731 			return;
2732 		}
2733 		lck_rw_lock_exclusive(&ipi->ipi_lock);
2734 	}
2735 
2736 	udp_gc_done = TRUE;
2737 
2738 	for (inp = udb.lh_first; inp != NULL; inp = inpnxt) {
2739 		inpnxt = inp->inp_list.le_next;
2740 
2741 		/*
2742 		 * Skip unless it's STOPUSING; garbage collector will
2743 		 * be triggered by in_pcb_checkstate() upon setting
2744 		 * wantcnt to that value.  If the PCB is already dead,
2745 		 * keep gc active to anticipate wantcnt changing.
2746 		 */
2747 		if (inp->inp_wantcnt != WNT_STOPUSING) {
2748 			continue;
2749 		}
2750 
2751 		/*
2752 		 * Skip if busy, no hurry for cleanup.  Keep gc active
2753 		 * and try the lock again during next round.
2754 		 */
2755 		if (!socket_try_lock(inp->inp_socket)) {
2756 			os_atomic_inc(&ipi->ipi_gc_req.intimer_fast, relaxed);
2757 			continue;
2758 		}
2759 
2760 		/*
2761 		 * Keep gc active unless usecount is 0.
2762 		 */
2763 		so = inp->inp_socket;
2764 		if (so->so_usecount == 0) {
2765 			if (inp->inp_state != INPCB_STATE_DEAD) {
2766 				if (SOCK_CHECK_DOM(so, PF_INET6)) {
2767 					in6_pcbdetach(inp);
2768 				} else {
2769 					in_pcbdetach(inp);
2770 				}
2771 			}
2772 			in_pcbdispose(inp);
2773 		} else {
2774 			socket_unlock(so, 0);
2775 			os_atomic_inc(&ipi->ipi_gc_req.intimer_fast, relaxed);
2776 		}
2777 	}
2778 	lck_rw_done(&ipi->ipi_lock);
2779 }
2780 
2781 static int
2782 udp_getstat SYSCTL_HANDLER_ARGS
2783 {
2784 #pragma unused(oidp, arg1, arg2)
2785 	if (req->oldptr == USER_ADDR_NULL) {
2786 		req->oldlen = (size_t)sizeof(struct udpstat);
2787 	}
2788 
2789 	return SYSCTL_OUT(req, &udpstat, MIN(sizeof(udpstat), req->oldlen));
2790 }
2791 
2792 void
udp_in_cksum_stats(u_int32_t len)2793 udp_in_cksum_stats(u_int32_t len)
2794 {
2795 	udpstat.udps_rcv_swcsum++;
2796 	udpstat.udps_rcv_swcsum_bytes += len;
2797 }
2798 
2799 void
udp_out_cksum_stats(u_int32_t len)2800 udp_out_cksum_stats(u_int32_t len)
2801 {
2802 	udpstat.udps_snd_swcsum++;
2803 	udpstat.udps_snd_swcsum_bytes += len;
2804 }
2805 
2806 void
udp_in6_cksum_stats(u_int32_t len)2807 udp_in6_cksum_stats(u_int32_t len)
2808 {
2809 	udpstat.udps_rcv6_swcsum++;
2810 	udpstat.udps_rcv6_swcsum_bytes += len;
2811 }
2812 
2813 void
udp_out6_cksum_stats(u_int32_t len)2814 udp_out6_cksum_stats(u_int32_t len)
2815 {
2816 	udpstat.udps_snd6_swcsum++;
2817 	udpstat.udps_snd6_swcsum_bytes += len;
2818 }
2819 
2820 /*
2821  * Checksum extended UDP header and data.
2822  */
2823 static int
udp_input_checksum(struct mbuf * m,struct udphdr * uh,int off,int ulen)2824 udp_input_checksum(struct mbuf *m, struct udphdr *uh, int off, int ulen)
2825 {
2826 	ifnet_ref_t ifp = m->m_pkthdr.rcvif;
2827 	struct ip *__single ip = mtod(m, struct ip *);
2828 	struct ipovly *__single ipov = (struct ipovly *)ip;
2829 
2830 	if (uh->uh_sum == 0) {
2831 		udpstat.udps_nosum++;
2832 		return 0;
2833 	}
2834 
2835 	/* ip_stripoptions() must have been called before we get here */
2836 	ASSERT((ip->ip_hl << 2) == sizeof(*ip));
2837 
2838 	if ((hwcksum_rx || (ifp->if_flags & IFF_LOOPBACK) ||
2839 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) &&
2840 	    (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) {
2841 		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
2842 			uh->uh_sum = m->m_pkthdr.csum_rx_val;
2843 		} else {
2844 			uint32_t sum = m->m_pkthdr.csum_rx_val;
2845 			uint32_t start = m->m_pkthdr.csum_rx_start;
2846 			int32_t trailer = (m_pktlen(m) - (off + ulen));
2847 
2848 			/*
2849 			 * Perform 1's complement adjustment of octets
2850 			 * that got included/excluded in the hardware-
2851 			 * calculated checksum value.  Ignore cases
2852 			 * where the value already includes the entire
2853 			 * IP header span, as the sum for those octets
2854 			 * would already be 0 by the time we get here;
2855 			 * IP has already performed its header checksum
2856 			 * checks.  If we do need to adjust, restore
2857 			 * the original fields in the IP header when
2858 			 * computing the adjustment value.  Also take
2859 			 * care of any trailing bytes and subtract out
2860 			 * their partial sum.
2861 			 */
2862 			ASSERT(trailer >= 0);
2863 			if ((m->m_pkthdr.csum_flags & CSUM_PARTIAL) &&
2864 			    ((start != 0 && start != off) || trailer != 0)) {
2865 				uint32_t swbytes = (uint32_t)trailer;
2866 
2867 				if (start < off) {
2868 					ip->ip_len += sizeof(*ip);
2869 #if BYTE_ORDER != BIG_ENDIAN
2870 					HTONS(ip->ip_len);
2871 					HTONS(ip->ip_off);
2872 #endif /* BYTE_ORDER != BIG_ENDIAN */
2873 				}
2874 				/* callee folds in sum */
2875 				sum = m_adj_sum16(m, start, off, ulen, sum);
2876 				if (off > start) {
2877 					swbytes += (off - start);
2878 				} else {
2879 					swbytes += (start - off);
2880 				}
2881 
2882 				if (start < off) {
2883 #if BYTE_ORDER != BIG_ENDIAN
2884 					NTOHS(ip->ip_off);
2885 					NTOHS(ip->ip_len);
2886 #endif /* BYTE_ORDER != BIG_ENDIAN */
2887 					ip->ip_len -= sizeof(*ip);
2888 				}
2889 
2890 				if (swbytes != 0) {
2891 					udp_in_cksum_stats(swbytes);
2892 				}
2893 				if (trailer != 0) {
2894 					m_adj(m, -trailer);
2895 				}
2896 			}
2897 
2898 			/* callee folds in sum */
2899 			uh->uh_sum = in_pseudo(ip->ip_src.s_addr,
2900 			    ip->ip_dst.s_addr, sum + htonl(ulen + IPPROTO_UDP));
2901 		}
2902 		uh->uh_sum ^= 0xffff;
2903 	} else {
2904 		uint16_t ip_sum;
2905 		char b[9];
2906 
2907 		bcopy(ipov->ih_x1, b, sizeof(ipov->ih_x1));
2908 		bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
2909 		ip_sum = ipov->ih_len;
2910 		ipov->ih_len = uh->uh_ulen;
2911 		uh->uh_sum = in_cksum(m, ulen + sizeof(struct ip));
2912 		bcopy(b, ipov->ih_x1, sizeof(ipov->ih_x1));
2913 		ipov->ih_len = ip_sum;
2914 
2915 		udp_in_cksum_stats(ulen);
2916 	}
2917 
2918 	if (uh->uh_sum != 0) {
2919 		udpstat.udps_badsum++;
2920 		IF_UDP_STATINC(ifp, badchksum);
2921 		return -1;
2922 	}
2923 
2924 	return 0;
2925 }
2926 
2927 void
udp_fill_keepalive_offload_frames(ifnet_t ifp,struct ifnet_keepalive_offload_frame * __counted_by (frames_array_count)frames_array,u_int32_t frames_array_count,size_t frame_data_offset,u_int32_t * used_frames_count)2928 udp_fill_keepalive_offload_frames(ifnet_t ifp,
2929     struct ifnet_keepalive_offload_frame *__counted_by(frames_array_count) frames_array,
2930     u_int32_t frames_array_count, size_t frame_data_offset,
2931     u_int32_t *used_frames_count)
2932 {
2933 	struct inpcb *inp;
2934 	inp_gen_t gencnt;
2935 	u_int32_t frame_index = *used_frames_count;
2936 
2937 	if (ifp == NULL || frames_array == NULL ||
2938 	    frames_array_count == 0 ||
2939 	    frame_index >= frames_array_count ||
2940 	    frame_data_offset >= IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE) {
2941 		return;
2942 	}
2943 
2944 	lck_rw_lock_shared(&udbinfo.ipi_lock);
2945 	gencnt = udbinfo.ipi_gencnt;
2946 	LIST_FOREACH(inp, udbinfo.ipi_listhead, inp_list) {
2947 		struct socket *so;
2948 		u_int8_t *data;
2949 		struct ifnet_keepalive_offload_frame *frame;
2950 		mbuf_ref_t m = NULL;
2951 
2952 		if (frame_index >= frames_array_count) {
2953 			break;
2954 		}
2955 
2956 		if (inp->inp_gencnt > gencnt ||
2957 		    inp->inp_state == INPCB_STATE_DEAD) {
2958 			continue;
2959 		}
2960 
2961 		if ((so = inp->inp_socket) == NULL ||
2962 		    (so->so_state & SS_DEFUNCT)) {
2963 			continue;
2964 		}
2965 		/*
2966 		 * check for keepalive offload flag without socket
2967 		 * lock to avoid a deadlock
2968 		 */
2969 		if (!(inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD)) {
2970 			continue;
2971 		}
2972 
2973 		udp_lock(so, 1, 0);
2974 		if (!(inp->inp_vflag & (INP_IPV4 | INP_IPV6))) {
2975 			udp_unlock(so, 1, 0);
2976 			continue;
2977 		}
2978 		if ((inp->inp_vflag & INP_IPV4) &&
2979 		    (inp->inp_laddr.s_addr == INADDR_ANY ||
2980 		    inp->inp_faddr.s_addr == INADDR_ANY)) {
2981 			udp_unlock(so, 1, 0);
2982 			continue;
2983 		}
2984 		if ((inp->inp_vflag & INP_IPV6) &&
2985 		    (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ||
2986 		    IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))) {
2987 			udp_unlock(so, 1, 0);
2988 			continue;
2989 		}
2990 		if (inp->inp_lport == 0 || inp->inp_fport == 0) {
2991 			udp_unlock(so, 1, 0);
2992 			continue;
2993 		}
2994 		if (inp->inp_last_outifp == NULL ||
2995 		    inp->inp_last_outifp->if_index != ifp->if_index) {
2996 			udp_unlock(so, 1, 0);
2997 			continue;
2998 		}
2999 		if ((inp->inp_vflag & INP_IPV4)) {
3000 			if ((frame_data_offset + sizeof(struct udpiphdr) +
3001 			    inp->inp_keepalive_datalen) >
3002 			    IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE) {
3003 				udp_unlock(so, 1, 0);
3004 				continue;
3005 			}
3006 			if ((sizeof(struct udpiphdr) +
3007 			    inp->inp_keepalive_datalen) > _MHLEN) {
3008 				udp_unlock(so, 1, 0);
3009 				continue;
3010 			}
3011 		} else {
3012 			if ((frame_data_offset + sizeof(struct ip6_hdr) +
3013 			    sizeof(struct udphdr) +
3014 			    inp->inp_keepalive_datalen) >
3015 			    IFNET_KEEPALIVE_OFFLOAD_FRAME_DATA_SIZE) {
3016 				udp_unlock(so, 1, 0);
3017 				continue;
3018 			}
3019 			if ((sizeof(struct ip6_hdr) + sizeof(struct udphdr) +
3020 			    inp->inp_keepalive_datalen) > _MHLEN) {
3021 				udp_unlock(so, 1, 0);
3022 				continue;
3023 			}
3024 		}
3025 		MGETHDR(m, M_WAIT, MT_HEADER);
3026 		if (m == NULL) {
3027 			udp_unlock(so, 1, 0);
3028 			continue;
3029 		}
3030 		/*
3031 		 * This inp has all the information that is needed to
3032 		 * generate an offload frame.
3033 		 */
3034 		if (inp->inp_vflag & INP_IPV4) {
3035 			struct ip *ip;
3036 			struct udphdr *udp;
3037 
3038 			frame = &frames_array[frame_index];
3039 			frame->length = (uint8_t)(frame_data_offset +
3040 			    sizeof(struct udpiphdr) +
3041 			    inp->inp_keepalive_datalen);
3042 			frame->ether_type =
3043 			    IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV4;
3044 			frame->interval = inp->inp_keepalive_interval;
3045 			switch (inp->inp_keepalive_type) {
3046 			case UDP_KEEPALIVE_OFFLOAD_TYPE_AIRPLAY:
3047 				frame->type =
3048 				    IFNET_KEEPALIVE_OFFLOAD_FRAME_AIRPLAY;
3049 				break;
3050 			default:
3051 				break;
3052 			}
3053 			data = mtod(m, u_int8_t *);
3054 			bzero(data, sizeof(struct udpiphdr));
3055 			ip = (__typeof__(ip))(void *)data;
3056 			udp = (__typeof__(udp))(void *) (data +
3057 			    sizeof(struct ip));
3058 			m->m_len = sizeof(struct udpiphdr);
3059 			data = data + sizeof(struct udpiphdr);
3060 			if (inp->inp_keepalive_datalen > 0 &&
3061 			    inp->inp_keepalive_data != NULL) {
3062 				bcopy(inp->inp_keepalive_data, data,
3063 				    inp->inp_keepalive_datalen);
3064 				m->m_len += inp->inp_keepalive_datalen;
3065 			}
3066 			m->m_pkthdr.len = m->m_len;
3067 
3068 			ip->ip_v = IPVERSION;
3069 			ip->ip_hl = (sizeof(struct ip) >> 2);
3070 			ip->ip_p = IPPROTO_UDP;
3071 			ip->ip_len = htons(sizeof(struct udpiphdr) +
3072 			    (u_short)inp->inp_keepalive_datalen);
3073 			ip->ip_ttl = inp->inp_ip_ttl;
3074 			ip->ip_tos |= (inp->inp_ip_tos & ~IPTOS_ECN_MASK);
3075 			ip->ip_src = inp->inp_laddr;
3076 			ip->ip_dst = inp->inp_faddr;
3077 			ip->ip_sum = in_cksum_hdr_opt(ip);
3078 
3079 			udp->uh_sport = inp->inp_lport;
3080 			udp->uh_dport = inp->inp_fport;
3081 			udp->uh_ulen = htons(sizeof(struct udphdr) +
3082 			    (u_short)inp->inp_keepalive_datalen);
3083 
3084 			if (!(inp->inp_flags & INP_UDP_NOCKSUM)) {
3085 				udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
3086 				    ip->ip_dst.s_addr,
3087 				    htons(sizeof(struct udphdr) +
3088 				    (u_short)inp->inp_keepalive_datalen +
3089 				    IPPROTO_UDP));
3090 				m->m_pkthdr.csum_flags =
3091 				    (CSUM_UDP | CSUM_ZERO_INVERT);
3092 				m->m_pkthdr.csum_data = offsetof(struct udphdr,
3093 				    uh_sum);
3094 			}
3095 			m->m_pkthdr.pkt_proto = IPPROTO_UDP;
3096 			in_delayed_cksum(m);
3097 			bcopy(m_mtod_current(m), frame->data + frame_data_offset,
3098 			    m->m_len);
3099 		} else {
3100 			struct ip6_hdr *ip6;
3101 			struct udphdr *udp6;
3102 
3103 			VERIFY(inp->inp_vflag & INP_IPV6);
3104 			frame = &frames_array[frame_index];
3105 			frame->length = (uint8_t)(frame_data_offset +
3106 			    sizeof(struct ip6_hdr) +
3107 			    sizeof(struct udphdr) +
3108 			    inp->inp_keepalive_datalen);
3109 			frame->ether_type =
3110 			    IFNET_KEEPALIVE_OFFLOAD_FRAME_ETHERTYPE_IPV6;
3111 			frame->interval = inp->inp_keepalive_interval;
3112 			switch (inp->inp_keepalive_type) {
3113 			case UDP_KEEPALIVE_OFFLOAD_TYPE_AIRPLAY:
3114 				frame->type =
3115 				    IFNET_KEEPALIVE_OFFLOAD_FRAME_AIRPLAY;
3116 				break;
3117 			default:
3118 				break;
3119 			}
3120 			data = mtod(m, u_int8_t *);
3121 			bzero(data, sizeof(struct ip6_hdr) + sizeof(struct udphdr));
3122 			ip6 = (__typeof__(ip6))(void *)data;
3123 			udp6 = (__typeof__(udp6))(void *)(data +
3124 			    sizeof(struct ip6_hdr));
3125 			m->m_len = sizeof(struct ip6_hdr) +
3126 			    sizeof(struct udphdr);
3127 			data = data + (sizeof(struct ip6_hdr) +
3128 			    sizeof(struct udphdr));
3129 			if (inp->inp_keepalive_datalen > 0 &&
3130 			    inp->inp_keepalive_data != NULL) {
3131 				bcopy(inp->inp_keepalive_data, data,
3132 				    inp->inp_keepalive_datalen);
3133 				m->m_len += inp->inp_keepalive_datalen;
3134 			}
3135 			m->m_pkthdr.len = m->m_len;
3136 			ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK;
3137 			ip6->ip6_flow = ip6->ip6_flow & ~IPV6_FLOW_ECN_MASK;
3138 			ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
3139 			ip6->ip6_vfc |= IPV6_VERSION;
3140 			ip6->ip6_nxt = IPPROTO_UDP;
3141 			ip6->ip6_hlim = (uint8_t)ip6_defhlim;
3142 			ip6->ip6_plen = htons(sizeof(struct udphdr) +
3143 			    (u_short)inp->inp_keepalive_datalen);
3144 			ip6->ip6_src = inp->in6p_laddr;
3145 			if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
3146 				ip6->ip6_src.s6_addr16[1] = 0;
3147 			}
3148 
3149 			ip6->ip6_dst = inp->in6p_faddr;
3150 			if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
3151 				ip6->ip6_dst.s6_addr16[1] = 0;
3152 			}
3153 
3154 			udp6->uh_sport = inp->in6p_lport;
3155 			udp6->uh_dport = inp->in6p_fport;
3156 			udp6->uh_ulen = htons(sizeof(struct udphdr) +
3157 			    (u_short)inp->inp_keepalive_datalen);
3158 			if (!(inp->inp_flags & INP_UDP_NOCKSUM)) {
3159 				udp6->uh_sum = in6_pseudo(&ip6->ip6_src,
3160 				    &ip6->ip6_dst,
3161 				    htonl(sizeof(struct udphdr) +
3162 				    (u_short)inp->inp_keepalive_datalen +
3163 				    IPPROTO_UDP));
3164 				m->m_pkthdr.csum_flags =
3165 				    (CSUM_UDPIPV6 | CSUM_ZERO_INVERT);
3166 				m->m_pkthdr.csum_data = offsetof(struct udphdr,
3167 				    uh_sum);
3168 			}
3169 			m->m_pkthdr.pkt_proto = IPPROTO_UDP;
3170 			in6_delayed_cksum(m);
3171 			bcopy(m_mtod_current(m), frame->data + frame_data_offset, m->m_len);
3172 		}
3173 		if (m != NULL) {
3174 			m_freem(m);
3175 			m = NULL;
3176 		}
3177 		frame_index++;
3178 		udp_unlock(so, 1, 0);
3179 	}
3180 	lck_rw_done(&udbinfo.ipi_lock);
3181 	*used_frames_count = frame_index;
3182 }
3183 
3184 int
udp_defunct(struct socket * so)3185 udp_defunct(struct socket *so)
3186 {
3187 	struct ip_moptions *__single imo;
3188 	struct inpcb *__single inp;
3189 
3190 	inp = sotoinpcb(so);
3191 	if (inp == NULL) {
3192 		return EINVAL;
3193 	}
3194 
3195 	imo = inp->inp_moptions;
3196 	if (imo != NULL) {
3197 		struct proc *p = current_proc();
3198 
3199 		SODEFUNCTLOG("%s[%d, %s]: defuncting so 0x%llu drop multicast memberships",
3200 		    __func__, proc_pid(p), proc_best_name(p),
3201 		    so->so_gencnt);
3202 
3203 		inp->inp_moptions = NULL;
3204 
3205 		IMO_REMREF(imo);
3206 	}
3207 
3208 	return 0;
3209 }
3210