xref: /xnu-12377.81.4/bsd/netinet/in_pcb.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1991, 1993, 1995
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
61  * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/sysctl.h>
75 #include <sys/mcache.h>
76 #include <sys/kauth.h>
77 #include <sys/priv.h>
78 #include <sys/proc_uuid_policy.h>
79 #include <sys/syslog.h>
80 #include <sys/priv.h>
81 #include <sys/file_internal.h>
82 #include <net/dlil.h>
83 
84 #include <libkern/OSAtomic.h>
85 #include <kern/locks.h>
86 
87 #include <machine/limits.h>
88 
89 #include <kern/uipc_domain.h>
90 #include <kern/zalloc.h>
91 
92 #include <net/if.h>
93 #include <net/if_types.h>
94 #include <net/route.h>
95 #include <net/flowhash.h>
96 #include <net/flowadv.h>
97 #include <net/nat464_utils.h>
98 #include <net/ntstat.h>
99 #include <net/nwk_wq.h>
100 #include <net/restricted_in_port.h>
101 
102 #include <netinet/in.h>
103 #include <netinet/in_pcb.h>
104 #include <netinet/inp_log.h>
105 #include <netinet/in_var.h>
106 #include <netinet/ip_var.h>
107 
108 #include <netinet/ip6.h>
109 #include <netinet6/ip6_var.h>
110 
111 #include <sys/kdebug.h>
112 #include <sys/random.h>
113 
114 #include <dev/random/randomdev.h>
115 #include <mach/boolean.h>
116 
117 #include <atm/atm_internal.h>
118 #include <pexpert/pexpert.h>
119 
120 #if NECP
121 #include <net/necp.h>
122 #endif
123 
124 #include <sys/stat.h>
125 #include <sys/ubc.h>
126 #include <sys/vnode.h>
127 
128 #include <os/log.h>
129 
130 #if SKYWALK
131 #include <skywalk/namespace/flowidns.h>
132 #endif /* SKYWALK */
133 
134 #include <IOKit/IOBSD.h>
135 
136 #include <net/sockaddr_utils.h>
137 
138 extern int      udp_use_randomport;
139 extern int      tcp_use_randomport;
140 
141 extern const char *proc_name_address(struct proc *);
142 
143 static LCK_GRP_DECLARE(inpcb_lock_grp, "inpcb");
144 static LCK_ATTR_DECLARE(inpcb_lock_attr, 0, 0);
145 static LCK_MTX_DECLARE_ATTR(inpcb_lock, &inpcb_lock_grp, &inpcb_lock_attr);
146 static LCK_MTX_DECLARE_ATTR(inpcb_timeout_lock, &inpcb_lock_grp, &inpcb_lock_attr);
147 
148 static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
149 
150 static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
151 static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
152 static boolean_t inpcb_ticking = FALSE;         /* "slow" timer is scheduled */
153 static boolean_t inpcb_fast_timer_on = FALSE;
154 
155 #define INPCB_GCREQ_THRESHOLD   50000
156 
157 static thread_call_t inpcb_thread_call, inpcb_fast_thread_call;
158 static void inpcb_sched_timeout(void);
159 static void inpcb_sched_lazy_timeout(void);
160 static void _inpcb_sched_timeout(unsigned int);
161 static void inpcb_timeout(void *, void *);
162 const int inpcb_timeout_lazy = 10;      /* 10 seconds leeway for lazy timers */
163 extern int tvtohz(struct timeval *);
164 
165 #if CONFIG_PROC_UUID_POLICY
166 static void inp_update_cellular_policy(struct inpcb *, boolean_t);
167 #if NECP
168 static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
169 #endif /* NECP */
170 #endif /* !CONFIG_PROC_UUID_POLICY */
171 
172 #define DBG_FNC_PCB_LOOKUP      NETDBG_CODE(DBG_NETTCP, (6 << 8))
173 #define DBG_FNC_PCB_HLOOKUP     NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
174 
175 int allow_udp_port_exhaustion = 0;
176 
177 /*
178  * These configure the range of local port addresses assigned to
179  * "unspecified" outgoing connections/packets/whatever.
180  */
181 int     ipport_lowfirstauto  = IPPORT_RESERVED - 1;     /* 1023 */
182 int     ipport_lowlastauto = IPPORT_RESERVEDSTART;      /* 600 */
183 int     ipport_firstauto = IPPORT_HIFIRSTAUTO;          /* 49152 */
184 int     ipport_lastauto  = IPPORT_HILASTAUTO;           /* 65535 */
185 int     ipport_hifirstauto = IPPORT_HIFIRSTAUTO;        /* 49152 */
186 int     ipport_hilastauto  = IPPORT_HILASTAUTO;         /* 65535 */
187 
188 #define RANGECHK(var, min, max) \
189 	if ((var) < (min)) { (var) = (min); } \
190 	else if ((var) > (max)) { (var) = (max); }
191 
192 static int
193 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
194 {
195 #pragma unused(arg1, arg2)
196 	int error;
197 	int new_value = *(int *)oidp->oid_arg1;
198 #if (DEBUG | DEVELOPMENT)
199 	int old_value = *(int *)oidp->oid_arg1;
200 	/*
201 	 * For unit testing allow a non-superuser process with the
202 	 * proper entitlement to modify the variables
203 	 */
204 	if (req->newptr) {
205 		if (proc_suser(current_proc()) != 0 &&
206 		    (error = priv_check_cred(kauth_cred_get(),
207 		    PRIV_NETINET_RESERVEDPORT, 0))) {
208 			return EPERM;
209 		}
210 	}
211 #endif /* (DEBUG | DEVELOPMENT) */
212 
213 	error = sysctl_handle_int(oidp, &new_value, 0, req);
214 	if (!error) {
215 		if (oidp->oid_arg1 == &ipport_lowfirstauto || oidp->oid_arg1 == &ipport_lowlastauto) {
216 			RANGECHK(new_value, 1, IPPORT_RESERVED - 1);
217 		} else {
218 			RANGECHK(new_value, IPPORT_RESERVED, USHRT_MAX);
219 		}
220 		*(int *)oidp->oid_arg1 = new_value;
221 	}
222 
223 #if (DEBUG | DEVELOPMENT)
224 	os_log(OS_LOG_DEFAULT,
225 	    "%s:%u sysctl net.restricted_port.verbose: %d -> %d)",
226 	    proc_best_name(current_proc()), proc_selfpid(),
227 	    old_value, *(int *)oidp->oid_arg1);
228 #endif /* (DEBUG | DEVELOPMENT) */
229 
230 	return error;
231 }
232 
233 #undef RANGECHK
234 
235 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
236     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IP Ports");
237 
238 #if (DEBUG | DEVELOPMENT)
239 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY)
240 #else
241 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED)
242 #endif /* (DEBUG | DEVELOPMENT) */
243 
244 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
245     CTLFAGS_IP_PORTRANGE,
246     &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
247 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
248     CTLFAGS_IP_PORTRANGE,
249     &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
250 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
251     CTLFAGS_IP_PORTRANGE,
252     &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
253 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
254     CTLFAGS_IP_PORTRANGE,
255     &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
256 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
257     CTLFAGS_IP_PORTRANGE,
258     &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
259 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
260     CTLFAGS_IP_PORTRANGE,
261     &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
262 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, ipport_allow_udp_port_exhaustion,
263     CTLFLAG_LOCKED | CTLFLAG_RW, &allow_udp_port_exhaustion, 0, "");
264 
265 static uint32_t apn_fallbk_debug = 0;
266 #define apn_fallbk_log(x)       do { if (apn_fallbk_debug >= 1) log x; } while (0)
267 
268 #if !XNU_TARGET_OS_OSX
269 static boolean_t apn_fallbk_enabled = TRUE;
270 
271 SYSCTL_DECL(_net_inet);
272 SYSCTL_NODE(_net_inet, OID_AUTO, apn_fallback, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "APN Fallback");
273 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
274     &apn_fallbk_enabled, 0, "APN fallback enable");
275 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
276     &apn_fallbk_debug, 0, "APN fallback debug enable");
277 #else /* XNU_TARGET_OS_OSX */
278 static boolean_t apn_fallbk_enabled = FALSE;
279 #endif /* XNU_TARGET_OS_OSX */
280 
281 extern int      udp_use_randomport;
282 extern int      tcp_use_randomport;
283 
284 /* Structs used for flowhash computation */
285 struct inp_flowhash_key_addr {
286 	union {
287 		struct in_addr  v4;
288 		struct in6_addr v6;
289 		u_int8_t        addr8[16];
290 		u_int16_t       addr16[8];
291 		u_int32_t       addr32[4];
292 	} infha;
293 };
294 
295 struct inp_flowhash_key {
296 	struct inp_flowhash_key_addr    infh_laddr;
297 	struct inp_flowhash_key_addr    infh_faddr;
298 	u_int32_t                       infh_lport;
299 	u_int32_t                       infh_fport;
300 	u_int32_t                       infh_af;
301 	u_int32_t                       infh_proto;
302 	u_int32_t                       infh_rand1;
303 	u_int32_t                       infh_rand2;
304 };
305 
306 #if !SKYWALK
307 static u_int32_t inp_hash_seed = 0;
308 #endif /* !SKYWALK */
309 
310 static int infc_cmp(const struct inpcb *, const struct inpcb *);
311 
312 /* Flags used by inp_fc_getinp */
313 #define INPFC_SOLOCKED  0x1
314 #define INPFC_REMOVE    0x2
315 static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
316 
317 static void inp_fc_feedback(struct inpcb *);
318 extern void tcp_remove_from_time_wait(struct inpcb *inp);
319 
320 static LCK_MTX_DECLARE_ATTR(inp_fc_lck, &inpcb_lock_grp, &inpcb_lock_attr);
321 
322 RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
323 RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
324 RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
325 
326 /*
327  * Use this inp as a key to find an inp in the flowhash tree.
328  * Accesses to it are protected by inp_fc_lck.
329  */
330 struct inpcb key_inp;
331 
332 /*
333  * in_pcb.c: manage the Protocol Control Blocks.
334  */
335 
336 void
in_pcbinit(void)337 in_pcbinit(void)
338 {
339 	static int inpcb_initialized = 0;
340 	uint32_t logging_config;
341 
342 	VERIFY(!inpcb_initialized);
343 	inpcb_initialized = 1;
344 
345 	logging_config = atm_get_diagnostic_config();
346 	if (logging_config & 0x80000000) {
347 		inp_log_privacy = 1;
348 	}
349 
350 	inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout,
351 	    NULL, THREAD_CALL_PRIORITY_KERNEL);
352 	/* Give it an arg so that we know that this is the fast timer */
353 	inpcb_fast_thread_call = thread_call_allocate_with_priority(
354 		inpcb_timeout, &inpcb_timeout, THREAD_CALL_PRIORITY_KERNEL);
355 	if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL) {
356 		panic("unable to alloc the inpcb thread call");
357 	}
358 
359 	/*
360 	 * Initialize data structures required to deliver
361 	 * flow advisories.
362 	 */
363 	lck_mtx_lock(&inp_fc_lck);
364 	RB_INIT(&inp_fc_tree);
365 	bzero(&key_inp, sizeof(key_inp));
366 	lck_mtx_unlock(&inp_fc_lck);
367 }
368 
369 #define INPCB_HAVE_TIMER_REQ(req)       (((req).intimer_lazy > 0) || \
370 	((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
371 static void
inpcb_timeout(void * arg0,void * arg1)372 inpcb_timeout(void *arg0, void *arg1)
373 {
374 #pragma unused(arg1)
375 	struct inpcbinfo *ipi;
376 	boolean_t t, gc;
377 	struct intimercount gccnt, tmcnt;
378 
379 	/*
380 	 * Update coarse-grained networking timestamp (in sec.); the idea
381 	 * is to piggy-back on the timeout callout to update the counter
382 	 * returnable via net_uptime().
383 	 */
384 	net_update_uptime();
385 
386 	bzero(&gccnt, sizeof(gccnt));
387 	bzero(&tmcnt, sizeof(tmcnt));
388 
389 	lck_mtx_lock_spin(&inpcb_timeout_lock);
390 	gc = inpcb_garbage_collecting;
391 	inpcb_garbage_collecting = FALSE;
392 
393 	t = inpcb_ticking;
394 	inpcb_ticking = FALSE;
395 
396 	if (gc || t) {
397 		lck_mtx_unlock(&inpcb_timeout_lock);
398 
399 		lck_mtx_lock(&inpcb_lock);
400 		TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
401 			if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
402 				bzero(&ipi->ipi_gc_req,
403 				    sizeof(ipi->ipi_gc_req));
404 				if (gc && ipi->ipi_gc != NULL) {
405 					ipi->ipi_gc(ipi);
406 					gccnt.intimer_lazy +=
407 					    ipi->ipi_gc_req.intimer_lazy;
408 					gccnt.intimer_fast +=
409 					    ipi->ipi_gc_req.intimer_fast;
410 					gccnt.intimer_nodelay +=
411 					    ipi->ipi_gc_req.intimer_nodelay;
412 				}
413 			}
414 			if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
415 				bzero(&ipi->ipi_timer_req,
416 				    sizeof(ipi->ipi_timer_req));
417 				if (t && ipi->ipi_timer != NULL) {
418 					ipi->ipi_timer(ipi);
419 					tmcnt.intimer_lazy +=
420 					    ipi->ipi_timer_req.intimer_lazy;
421 					tmcnt.intimer_fast +=
422 					    ipi->ipi_timer_req.intimer_fast;
423 					tmcnt.intimer_nodelay +=
424 					    ipi->ipi_timer_req.intimer_nodelay;
425 				}
426 			}
427 		}
428 		lck_mtx_unlock(&inpcb_lock);
429 		lck_mtx_lock_spin(&inpcb_timeout_lock);
430 	}
431 
432 	/* lock was dropped above, so check first before overriding */
433 	if (!inpcb_garbage_collecting) {
434 		inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
435 	}
436 	if (!inpcb_ticking) {
437 		inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
438 	}
439 
440 	/* arg0 will be set if we are the fast timer */
441 	if (arg0 != NULL) {
442 		inpcb_fast_timer_on = FALSE;
443 	}
444 	inpcb_timeout_run--;
445 	VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
446 
447 	/* re-arm the timer if there's work to do */
448 	if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) {
449 		inpcb_sched_timeout();
450 	} else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) {
451 		/* be lazy when idle with little activity */
452 		inpcb_sched_lazy_timeout();
453 	} else {
454 		inpcb_sched_timeout();
455 	}
456 
457 	lck_mtx_unlock(&inpcb_timeout_lock);
458 }
459 
460 static void
inpcb_sched_timeout(void)461 inpcb_sched_timeout(void)
462 {
463 	_inpcb_sched_timeout(0);
464 }
465 
466 static void
inpcb_sched_lazy_timeout(void)467 inpcb_sched_lazy_timeout(void)
468 {
469 	_inpcb_sched_timeout(inpcb_timeout_lazy);
470 }
471 
472 static void
_inpcb_sched_timeout(unsigned int offset)473 _inpcb_sched_timeout(unsigned int offset)
474 {
475 	uint64_t deadline, leeway;
476 
477 	clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline);
478 	LCK_MTX_ASSERT(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
479 	if (inpcb_timeout_run == 0 &&
480 	    (inpcb_garbage_collecting || inpcb_ticking)) {
481 		lck_mtx_convert_spin(&inpcb_timeout_lock);
482 		inpcb_timeout_run++;
483 		if (offset == 0) {
484 			inpcb_fast_timer_on = TRUE;
485 			thread_call_enter_delayed(inpcb_fast_thread_call,
486 			    deadline);
487 		} else {
488 			inpcb_fast_timer_on = FALSE;
489 			clock_interval_to_absolutetime_interval(offset,
490 			    NSEC_PER_SEC, &leeway);
491 			thread_call_enter_delayed_with_leeway(
492 				inpcb_thread_call, NULL, deadline, leeway,
493 				THREAD_CALL_DELAY_LEEWAY);
494 		}
495 	} else if (inpcb_timeout_run == 1 &&
496 	    offset == 0 && !inpcb_fast_timer_on) {
497 		/*
498 		 * Since the request was for a fast timer but the
499 		 * scheduled timer is a lazy timer, try to schedule
500 		 * another instance of fast timer also.
501 		 */
502 		lck_mtx_convert_spin(&inpcb_timeout_lock);
503 		inpcb_timeout_run++;
504 		inpcb_fast_timer_on = TRUE;
505 		thread_call_enter_delayed(inpcb_fast_thread_call, deadline);
506 	}
507 }
508 
509 void
inpcb_gc_sched(struct inpcbinfo * ipi,u_int32_t type)510 inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
511 {
512 	u_int32_t gccnt;
513 
514 	lck_mtx_lock_spin(&inpcb_timeout_lock);
515 	inpcb_garbage_collecting = TRUE;
516 	gccnt = ipi->ipi_gc_req.intimer_nodelay +
517 	    ipi->ipi_gc_req.intimer_fast;
518 
519 	if (gccnt > INPCB_GCREQ_THRESHOLD) {
520 		type = INPCB_TIMER_FAST;
521 	}
522 
523 	switch (type) {
524 	case INPCB_TIMER_NODELAY:
525 		os_atomic_inc(&ipi->ipi_gc_req.intimer_nodelay, relaxed);
526 		inpcb_sched_timeout();
527 		break;
528 	case INPCB_TIMER_FAST:
529 		os_atomic_inc(&ipi->ipi_gc_req.intimer_fast, relaxed);
530 		inpcb_sched_timeout();
531 		break;
532 	default:
533 		os_atomic_inc(&ipi->ipi_gc_req.intimer_lazy, relaxed);
534 		inpcb_sched_lazy_timeout();
535 		break;
536 	}
537 	lck_mtx_unlock(&inpcb_timeout_lock);
538 }
539 
540 void
inpcb_timer_sched(struct inpcbinfo * ipi,u_int32_t type)541 inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
542 {
543 	lck_mtx_lock_spin(&inpcb_timeout_lock);
544 	inpcb_ticking = TRUE;
545 	switch (type) {
546 	case INPCB_TIMER_NODELAY:
547 		os_atomic_inc(&ipi->ipi_timer_req.intimer_nodelay, relaxed);
548 		inpcb_sched_timeout();
549 		break;
550 	case INPCB_TIMER_FAST:
551 		os_atomic_inc(&ipi->ipi_timer_req.intimer_fast, relaxed);
552 		inpcb_sched_timeout();
553 		break;
554 	default:
555 		os_atomic_inc(&ipi->ipi_timer_req.intimer_lazy, relaxed);
556 		inpcb_sched_lazy_timeout();
557 		break;
558 	}
559 	lck_mtx_unlock(&inpcb_timeout_lock);
560 }
561 
562 void
in_pcbinfo_attach(struct inpcbinfo * ipi)563 in_pcbinfo_attach(struct inpcbinfo *ipi)
564 {
565 	struct inpcbinfo *ipi0;
566 
567 	lck_mtx_lock(&inpcb_lock);
568 	TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
569 		if (ipi0 == ipi) {
570 			panic("%s: ipi %p already in the list",
571 			    __func__, ipi);
572 			/* NOTREACHED */
573 		}
574 	}
575 	TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
576 	lck_mtx_unlock(&inpcb_lock);
577 }
578 
579 int
in_pcbinfo_detach(struct inpcbinfo * ipi)580 in_pcbinfo_detach(struct inpcbinfo *ipi)
581 {
582 	struct inpcbinfo *ipi0;
583 	int error = 0;
584 
585 	lck_mtx_lock(&inpcb_lock);
586 	TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
587 		if (ipi0 == ipi) {
588 			break;
589 		}
590 	}
591 	if (ipi0 != NULL) {
592 		TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
593 	} else {
594 		error = ENXIO;
595 	}
596 	lck_mtx_unlock(&inpcb_lock);
597 
598 	return error;
599 }
600 
601 __attribute__((noinline))
602 char *
inp_snprintf_tuple(struct inpcb * inp,char * __sized_by (buflen)buf,size_t buflen)603 inp_snprintf_tuple(struct inpcb *inp, char *__sized_by(buflen) buf, size_t buflen)
604 {
605 	char laddrstr[MAX_IPv6_STR_LEN];
606 	char faddrstr[MAX_IPv6_STR_LEN];
607 	uint16_t lport = 0;
608 	uint16_t fport = 0;
609 	uint16_t proto = IPPROTO_IP;
610 
611 	if (inp->inp_socket != NULL) {
612 		proto = SOCK_PROTO(inp->inp_socket);
613 
614 		if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
615 			lport  = inp->inp_lport;
616 			fport = inp->inp_fport;
617 		}
618 	}
619 	if (inp->inp_vflag & INP_IPV4) {
620 		inet_ntop(AF_INET, (void *)&inp->inp_laddr.s_addr, laddrstr, sizeof(laddrstr));
621 		inet_ntop(AF_INET, (void *)&inp->inp_faddr.s_addr, faddrstr, sizeof(faddrstr));
622 	} else if (inp->inp_vflag & INP_IPV6) {
623 		inet_ntop(AF_INET6, (void *)&inp->in6p_faddr, laddrstr, sizeof(laddrstr));
624 		inet_ntop(AF_INET6, (void *)&inp->in6p_faddr, faddrstr, sizeof(faddrstr));
625 	}
626 	snprintf(buf, buflen, "[%u %s:%u %s:%u]",
627 	    proto, laddrstr, ntohs(lport), faddrstr, ntohs(fport));
628 
629 	return buf;
630 }
631 
632 __attribute__((noinline))
633 void
in_pcb_check_management_entitled(struct inpcb * inp)634 in_pcb_check_management_entitled(struct inpcb *inp)
635 {
636 	if (inp->inp_flags2 & INP2_MANAGEMENT_CHECKED) {
637 		return;
638 	}
639 
640 	if (management_data_unrestricted) {
641 		inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
642 		inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
643 	} else if (if_management_interface_check_needed == true) {
644 		inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
645 		/*
646 		 * Note that soopt_cred_check check both intcoproc entitlements
647 		 * We check MANAGEMENT_DATA_ENTITLEMENT as there is no corresponding PRIV value
648 		 */
649 		if (soopt_cred_check(inp->inp_socket, PRIV_NET_RESTRICTED_INTCOPROC, false, false) == 0
650 		    || IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT) == true
651 #if DEBUG || DEVELOPMENT
652 		    || IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT_DEVELOPMENT) == true
653 #endif /* DEBUG || DEVELOPMENT */
654 		    ) {
655 			inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
656 		} else {
657 			if (__improbable(if_management_verbose > 1)) {
658 				char buf[128];
659 
660 				os_log(OS_LOG_DEFAULT, "in_pcb_check_management_entitled %s:%d not management entitled %s",
661 				    proc_best_name(current_proc()),
662 				    proc_selfpid(),
663 				    inp_snprintf_tuple(inp, buf, sizeof(buf)));
664 			}
665 		}
666 	}
667 }
668 
669 __attribute__((noinline))
670 void
in_pcb_check_ultra_constrained_entitled(struct inpcb * inp)671 in_pcb_check_ultra_constrained_entitled(struct inpcb *inp)
672 {
673 	if (inp->inp_flags2 & INP2_ULTRA_CONSTRAINED_CHECKED) {
674 		return;
675 	}
676 
677 	if (if_ultra_constrained_check_needed) {
678 		inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_CHECKED;
679 		if (if_ultra_constrained_default_allowed || IOCurrentTaskHasEntitlement(ULTRA_CONSTRAINED_ENTITLEMENT)) {
680 			inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_ALLOWED;
681 		}
682 	}
683 }
684 
685 /*
686  * Allocate a PCB and associate it with the socket.
687  *
688  * Returns:	0			Success
689  *		ENOBUFS
690  *		ENOMEM
691  */
692 int
in_pcballoc(struct socket * so,struct inpcbinfo * pcbinfo,struct proc * p)693 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
694 {
695 #pragma unused(p)
696 	void *__unsafe_indexable addr;
697 	struct inpcb *inp;
698 
699 	if (proto_memacct_hardlimit(so->so_proto)) {
700 		return ENOBUFS;
701 	}
702 	addr = __zalloc_flags(pcbinfo->ipi_zone, Z_WAITOK_ZERO_NOFAIL);
703 	__builtin_assume(addr != NULL);
704 
705 	proto_memacct_add(so->so_proto, kalloc_type_size(pcbinfo->ipi_zone));
706 
707 	/*
708 	 * N.B: the allocation above may actually be inp_tp
709 	 * which is a structure that includes inpcb, but for
710 	 * the purposes of this function we just touch
711 	 * struct inpcb.
712 	 */
713 	inp = __unsafe_forge_single(struct inpcb *, addr);
714 
715 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
716 	inp->inp_pcbinfo = pcbinfo;
717 	inp->inp_socket = so;
718 	so->so_pcb = (caddr_t)inp;
719 	// There was some history about alignment of statistics counters
720 	// Ensure that all is as expected
721 	VERIFY(IS_P2ALIGNED(&inp->inp_mstat, sizeof(u_int64_t)));
722 
723 	if (so->so_proto->pr_flags & PR_PCBLOCK) {
724 		lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
725 		    &pcbinfo->ipi_lock_attr);
726 	}
727 
728 	if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) {
729 		inp->inp_flags |= IN6P_IPV6_V6ONLY;
730 	}
731 
732 	if (ip6_auto_flowlabel) {
733 		inp->inp_flags |= IN6P_AUTOFLOWLABEL;
734 	}
735 	if (intcoproc_unrestricted) {
736 		inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
737 	}
738 
739 	(void) inp_update_policy(inp);
740 
741 	inp->inp_max_pacing_rate = UINT64_MAX;
742 
743 	lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
744 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
745 	LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
746 	pcbinfo->ipi_count++;
747 	lck_rw_done(&pcbinfo->ipi_lock);
748 	return 0;
749 }
750 
751 /*
752  * in_pcblookup_local_and_cleanup does everything
753  * in_pcblookup_local does but it checks for a socket
754  * that's going away. Since we know that the lock is
755  * held read+write when this function is called, we
756  * can safely dispose of this socket like the slow
757  * timer would usually do and return NULL. This is
758  * great for bind.
759  */
760 struct inpcb *
in_pcblookup_local_and_cleanup(struct inpcbinfo * pcbinfo,struct in_addr laddr,u_int lport_arg,int wild_okay)761 in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
762     u_int lport_arg, int wild_okay)
763 {
764 	struct inpcb *inp;
765 
766 	/* Perform normal lookup */
767 	inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
768 
769 	/* Check if we found a match but it's waiting to be disposed */
770 	if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
771 		struct socket *so = inp->inp_socket;
772 
773 		socket_lock(so, 0);
774 
775 		if (so->so_usecount == 0) {
776 			if (inp->inp_state != INPCB_STATE_DEAD) {
777 				in_pcbdetach(inp);
778 			}
779 			in_pcbdispose(inp);     /* will unlock & destroy */
780 			inp = NULL;
781 		} else {
782 			socket_unlock(so, 0);
783 		}
784 	}
785 
786 	return inp;
787 }
788 
789 static void
in_pcb_conflict_post_msg(u_int16_t port)790 in_pcb_conflict_post_msg(u_int16_t port)
791 {
792 	/*
793 	 * Radar 5523020 send a kernel event notification if a
794 	 * non-participating socket tries to bind the port a socket
795 	 * who has set SOF_NOTIFYCONFLICT owns.
796 	 */
797 	struct kev_msg ev_msg;
798 	struct kev_in_portinuse in_portinuse;
799 
800 	bzero(&in_portinuse, sizeof(struct kev_in_portinuse));
801 	bzero(&ev_msg, sizeof(struct kev_msg));
802 	in_portinuse.port = ntohs(port);        /* port in host order */
803 	in_portinuse.req_pid = proc_selfpid();
804 	ev_msg.vendor_code = KEV_VENDOR_APPLE;
805 	ev_msg.kev_class = KEV_NETWORK_CLASS;
806 	ev_msg.kev_subclass = KEV_INET_SUBCLASS;
807 	ev_msg.event_code = KEV_INET_PORTINUSE;
808 	ev_msg.dv[0].data_ptr = &in_portinuse;
809 	ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse);
810 	ev_msg.dv[1].data_length = 0;
811 	dlil_post_complete_msg(NULL, &ev_msg);
812 }
813 
814 /*
815  * Bind an INPCB to an address and/or port.  This routine should not alter
816  * the caller-supplied local address "nam" or remote address "remote".
817  *
818  * Returns:	0			Success
819  *		EADDRNOTAVAIL		Address not available.
820  *		EINVAL			Invalid argument
821  *		EAFNOSUPPORT		Address family not supported [notdef]
822  *		EACCES			Permission denied
823  *		EADDRINUSE		Address in use
824  *		EAGAIN			Resource unavailable, try again
825  *		priv_check_cred:EPERM	Operation not permitted
826  */
827 int
in_pcbbind(struct inpcb * inp,struct sockaddr * nam,struct sockaddr * remote,struct proc * p)828 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct sockaddr *remote, struct proc *p)
829 {
830 	struct socket *so = inp->inp_socket;
831 	unsigned short *lastport;
832 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
833 	u_short lport = 0, rand_port = 0;
834 	int wild = 0;
835 	int reuseport = (so->so_options & SO_REUSEPORT);
836 	int error = 0;
837 	int randomport;
838 	int conflict = 0;
839 	boolean_t anonport = FALSE;
840 	kauth_cred_t cred;
841 	struct in_addr laddr;
842 	struct ifnet *outif = NULL;
843 
844 	ASSERT((inp->inp_flags2 & INP2_BIND_IN_PROGRESS) != 0);
845 
846 	if (TAILQ_EMPTY(&in_ifaddrhead)) { /* XXX broken! */
847 		error = EADDRNOTAVAIL;
848 		goto done;
849 	}
850 	if (!(so->so_options & (SO_REUSEADDR | SO_REUSEPORT))) {
851 		wild = 1;
852 	}
853 
854 	bzero(&laddr, sizeof(laddr));
855 
856 	socket_unlock(so, 0); /* keep reference on socket */
857 	lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
858 	if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
859 		/* another thread completed the bind */
860 		lck_rw_done(&pcbinfo->ipi_lock);
861 		socket_lock(so, 0);
862 		error = EINVAL;
863 		goto done;
864 	}
865 
866 	if (nam != NULL) {
867 		if (nam->sa_len != sizeof(struct sockaddr_in)) {
868 			lck_rw_done(&pcbinfo->ipi_lock);
869 			socket_lock(so, 0);
870 			error = EINVAL;
871 			goto done;
872 		}
873 #if 0
874 		/*
875 		 * We should check the family, but old programs
876 		 * incorrectly fail to initialize it.
877 		 */
878 		if (nam->sa_family != AF_INET) {
879 			lck_rw_done(&pcbinfo->ipi_lock);
880 			socket_lock(so, 0);
881 			error = EAFNOSUPPORT;
882 			goto done;
883 		}
884 #endif /* 0 */
885 		lport = SIN(nam)->sin_port;
886 
887 		if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
888 			/*
889 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
890 			 * allow complete duplication of binding if
891 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
892 			 * and a multicast address is bound on both
893 			 * new and duplicated sockets.
894 			 */
895 			if (so->so_options & SO_REUSEADDR) {
896 				reuseport = SO_REUSEADDR | SO_REUSEPORT;
897 			}
898 		} else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
899 			struct sockaddr_in sin;
900 			struct ifaddr *ifa;
901 
902 			/* Sanitized for interface address searches */
903 			SOCKADDR_ZERO(&sin, sizeof(sin));
904 			sin.sin_family = AF_INET;
905 			sin.sin_len = sizeof(struct sockaddr_in);
906 			sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
907 
908 			ifa = ifa_ifwithaddr(SA(&sin));
909 			if (ifa == NULL) {
910 				lck_rw_done(&pcbinfo->ipi_lock);
911 				socket_lock(so, 0);
912 				error = EADDRNOTAVAIL;
913 				goto done;
914 			} else {
915 				/*
916 				 * Opportunistically determine the outbound
917 				 * interface that may be used; this may not
918 				 * hold true if we end up using a route
919 				 * going over a different interface, e.g.
920 				 * when sending to a local address.  This
921 				 * will get updated again after sending.
922 				 */
923 				IFA_LOCK(ifa);
924 				outif = ifa->ifa_ifp;
925 				IFA_UNLOCK(ifa);
926 				ifa_remref(ifa);
927 			}
928 		}
929 
930 #if SKYWALK
931 		if (inp->inp_flags2 & INP2_EXTERNAL_PORT) {
932 			// Extract the external flow info
933 			struct ns_flow_info nfi = {};
934 			error = necp_client_get_netns_flow_info(inp->necp_client_uuid,
935 			    &nfi);
936 			if (error != 0) {
937 				lck_rw_done(&pcbinfo->ipi_lock);
938 				socket_lock(so, 0);
939 				goto done;
940 			}
941 
942 			// Extract the reserved port
943 			u_int16_t reserved_lport = 0;
944 			if (nfi.nfi_laddr.sa.sa_family == AF_INET) {
945 				reserved_lport = nfi.nfi_laddr.sin.sin_port;
946 			} else if (nfi.nfi_laddr.sa.sa_family == AF_INET6) {
947 				reserved_lport = nfi.nfi_laddr.sin6.sin6_port;
948 			} else {
949 				lck_rw_done(&pcbinfo->ipi_lock);
950 				socket_lock(so, 0);
951 				error = EINVAL;
952 				goto done;
953 			}
954 
955 			// Validate or use the reserved port
956 			if (lport == 0) {
957 				lport = reserved_lport;
958 			} else if (lport != reserved_lport) {
959 				lck_rw_done(&pcbinfo->ipi_lock);
960 				socket_lock(so, 0);
961 				error = EINVAL;
962 				goto done;
963 			}
964 		}
965 
966 		/* Do not allow reserving a UDP port if remaining UDP port count is below 4096 */
967 		if (SOCK_PROTO(so) == IPPROTO_UDP && !allow_udp_port_exhaustion) {
968 			uint32_t current_reservations = 0;
969 			if (inp->inp_vflag & INP_IPV6) {
970 				current_reservations = netns_lookup_reservations_count_in6(inp->in6p_laddr, IPPROTO_UDP);
971 			} else {
972 				current_reservations = netns_lookup_reservations_count_in(inp->inp_laddr, IPPROTO_UDP);
973 			}
974 			if (USHRT_MAX - UDP_RANDOM_PORT_RESERVE < current_reservations) {
975 				log(LOG_ERR, "UDP port not available, less than 4096 UDP ports left");
976 				lck_rw_done(&pcbinfo->ipi_lock);
977 				socket_lock(so, 0);
978 				error = EADDRNOTAVAIL;
979 				goto done;
980 			}
981 		}
982 
983 #endif /* SKYWALK */
984 
985 		if (lport != 0) {
986 			struct inpcb *t;
987 			uid_t u;
988 
989 #if XNU_TARGET_OS_OSX
990 			if (ntohs(lport) < IPPORT_RESERVED &&
991 			    SIN(nam)->sin_addr.s_addr != 0 &&
992 			    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
993 				cred = kauth_cred_proc_ref(p);
994 				error = priv_check_cred(cred,
995 				    PRIV_NETINET_RESERVEDPORT, 0);
996 				kauth_cred_unref(&cred);
997 				if (error != 0) {
998 					lck_rw_done(&pcbinfo->ipi_lock);
999 					socket_lock(so, 0);
1000 					error = EACCES;
1001 					goto done;
1002 				}
1003 			}
1004 #endif /* XNU_TARGET_OS_OSX */
1005 			/*
1006 			 * Check wether the process is allowed to bind to a restricted port
1007 			 */
1008 			if (!current_task_can_use_restricted_in_port(lport,
1009 			    (uint8_t)SOCK_PROTO(so), PORT_FLAGS_BSD)) {
1010 				lck_rw_done(&pcbinfo->ipi_lock);
1011 				socket_lock(so, 0);
1012 				error = EADDRINUSE;
1013 				goto done;
1014 			}
1015 
1016 			if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1017 			    (u = kauth_cred_getuid(so->so_cred)) != 0 &&
1018 			    (t = in_pcblookup_local_and_cleanup(
1019 				    inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
1020 				    INPLOOKUP_WILDCARD)) != NULL &&
1021 			    (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1022 			    t->inp_laddr.s_addr != INADDR_ANY ||
1023 			    !(t->inp_socket->so_options & SO_REUSEPORT)) &&
1024 			    (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
1025 			    !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
1026 			    (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1027 			    t->inp_laddr.s_addr != INADDR_ANY) &&
1028 			    (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
1029 			    !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
1030 			    uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
1031 				if ((t->inp_socket->so_flags &
1032 				    SOF_NOTIFYCONFLICT) &&
1033 				    !(so->so_flags & SOF_NOTIFYCONFLICT)) {
1034 					conflict = 1;
1035 				}
1036 
1037 				lck_rw_done(&pcbinfo->ipi_lock);
1038 
1039 				if (conflict) {
1040 					in_pcb_conflict_post_msg(lport);
1041 				}
1042 
1043 				socket_lock(so, 0);
1044 				error = EADDRINUSE;
1045 				goto done;
1046 			}
1047 			t = in_pcblookup_local_and_cleanup(pcbinfo,
1048 			    SIN(nam)->sin_addr, lport, wild);
1049 			if (t != NULL &&
1050 			    (reuseport & t->inp_socket->so_options) == 0 &&
1051 			    (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
1052 			    !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
1053 			    uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
1054 				if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1055 				    t->inp_laddr.s_addr != INADDR_ANY ||
1056 				    SOCK_DOM(so) != PF_INET6 ||
1057 				    SOCK_DOM(t->inp_socket) != PF_INET6) {
1058 					if ((t->inp_socket->so_flags &
1059 					    SOF_NOTIFYCONFLICT) &&
1060 					    !(so->so_flags & SOF_NOTIFYCONFLICT)) {
1061 						conflict = 1;
1062 					}
1063 
1064 					lck_rw_done(&pcbinfo->ipi_lock);
1065 
1066 					if (conflict) {
1067 						in_pcb_conflict_post_msg(lport);
1068 					}
1069 					socket_lock(so, 0);
1070 					error = EADDRINUSE;
1071 					goto done;
1072 				}
1073 			}
1074 #if SKYWALK
1075 			if ((SOCK_PROTO(so) == IPPROTO_TCP ||
1076 			    SOCK_PROTO(so) == IPPROTO_UDP) &&
1077 			    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1078 				int res_err = 0;
1079 				if (inp->inp_vflag & INP_IPV6) {
1080 					res_err = netns_reserve_in6(
1081 						&inp->inp_netns_token,
1082 						SIN6(nam)->sin6_addr,
1083 						(uint8_t)SOCK_PROTO(so), lport, NETNS_BSD,
1084 						NULL);
1085 				} else {
1086 					res_err = netns_reserve_in(
1087 						&inp->inp_netns_token,
1088 						SIN(nam)->sin_addr, (uint8_t)SOCK_PROTO(so),
1089 						lport, NETNS_BSD, NULL);
1090 				}
1091 				if (res_err != 0) {
1092 					lck_rw_done(&pcbinfo->ipi_lock);
1093 					socket_lock(so, 0);
1094 					error = EADDRINUSE;
1095 					goto done;
1096 				}
1097 			}
1098 #endif /* SKYWALK */
1099 		}
1100 		laddr = SIN(nam)->sin_addr;
1101 	}
1102 	if (lport == 0) {
1103 		u_short first, last;
1104 		int count;
1105 		bool found;
1106 
1107 		/*
1108 		 * Override wild = 1 for implicit bind (mainly used by connect)
1109 		 * For implicit bind (lport == 0), we always use an unused port,
1110 		 * so REUSEADDR|REUSEPORT don't apply
1111 		 */
1112 		wild = 1;
1113 
1114 		randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
1115 		    (so->so_type == SOCK_STREAM ? tcp_use_randomport :
1116 		    udp_use_randomport);
1117 
1118 		/*
1119 		 * Even though this looks similar to the code in
1120 		 * in6_pcbsetport, the v6 vs v4 checks are different.
1121 		 */
1122 		anonport = TRUE;
1123 		if (inp->inp_flags & INP_HIGHPORT) {
1124 			first = (u_short)ipport_hifirstauto;     /* sysctl */
1125 			last  = (u_short)ipport_hilastauto;
1126 			lastport = &pcbinfo->ipi_lasthi;
1127 		} else if (inp->inp_flags & INP_LOWPORT) {
1128 			cred = kauth_cred_proc_ref(p);
1129 			error = priv_check_cred(cred,
1130 			    PRIV_NETINET_RESERVEDPORT, 0);
1131 			kauth_cred_unref(&cred);
1132 			if (error != 0) {
1133 				lck_rw_done(&pcbinfo->ipi_lock);
1134 				socket_lock(so, 0);
1135 				goto done;
1136 			}
1137 			first = (u_short)ipport_lowfirstauto;    /* 1023 */
1138 			last  = (u_short)ipport_lowlastauto;     /* 600 */
1139 			lastport = &pcbinfo->ipi_lastlow;
1140 		} else {
1141 			first = (u_short)ipport_firstauto;       /* sysctl */
1142 			last  = (u_short)ipport_lastauto;
1143 			lastport = &pcbinfo->ipi_lastport;
1144 		}
1145 		/* No point in randomizing if only one port is available */
1146 
1147 		if (first == last) {
1148 			randomport = 0;
1149 		}
1150 		/*
1151 		 * Simple check to ensure all ports are not used up causing
1152 		 * a deadlock here.
1153 		 *
1154 		 * We split the two cases (up and down) so that the direction
1155 		 * is not being tested on each round of the loop.
1156 		 */
1157 		if (first > last) {
1158 			struct in_addr lookup_addr;
1159 
1160 			/*
1161 			 * counting down
1162 			 */
1163 			if (randomport) {
1164 				read_frandom(&rand_port, sizeof(rand_port));
1165 				*lastport =
1166 				    first - (rand_port % (first - last));
1167 			}
1168 			count = first - last;
1169 
1170 			lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1171 			    inp->inp_laddr;
1172 
1173 			found = false;
1174 			do {
1175 				if (count-- < 0) {      /* completely used? */
1176 					lck_rw_done(&pcbinfo->ipi_lock);
1177 					socket_lock(so, 0);
1178 					error = EADDRNOTAVAIL;
1179 					goto done;
1180 				}
1181 				--*lastport;
1182 				if (*lastport > first || *lastport < last) {
1183 					*lastport = first;
1184 				}
1185 				lport = htons(*lastport);
1186 
1187 				/*
1188 				 * Skip if this is a restricted port as we do not want to
1189 				 * use restricted ports as ephemeral
1190 				 */
1191 				if (IS_RESTRICTED_IN_PORT(lport)) {
1192 					continue;
1193 				}
1194 
1195 				found = in_pcblookup_local_and_cleanup(pcbinfo,
1196 				    lookup_addr, lport, wild) == NULL;
1197 #if SKYWALK
1198 				if (found &&
1199 				    (SOCK_PROTO(so) == IPPROTO_TCP ||
1200 				    SOCK_PROTO(so) == IPPROTO_UDP) &&
1201 				    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1202 					int res_err;
1203 					if (inp->inp_vflag & INP_IPV6) {
1204 						res_err = netns_reserve_in6(
1205 							&inp->inp_netns_token,
1206 							inp->in6p_laddr,
1207 							(uint8_t)SOCK_PROTO(so), lport,
1208 							NETNS_BSD, NULL);
1209 					} else {
1210 						res_err = netns_reserve_in(
1211 							&inp->inp_netns_token,
1212 							lookup_addr, (uint8_t)SOCK_PROTO(so),
1213 							lport, NETNS_BSD, NULL);
1214 					}
1215 					found = res_err == 0;
1216 				}
1217 #endif /* SKYWALK */
1218 			} while (!found);
1219 		} else {
1220 			struct in_addr lookup_addr;
1221 
1222 			/*
1223 			 * counting up
1224 			 */
1225 			if (randomport) {
1226 				read_frandom(&rand_port, sizeof(rand_port));
1227 				*lastport =
1228 				    first + (rand_port % (first - last));
1229 			}
1230 			count = last - first;
1231 
1232 			lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1233 			    inp->inp_laddr;
1234 
1235 			found = false;
1236 			do {
1237 				if (count-- < 0) {      /* completely used? */
1238 					lck_rw_done(&pcbinfo->ipi_lock);
1239 					socket_lock(so, 0);
1240 					error = EADDRNOTAVAIL;
1241 					goto done;
1242 				}
1243 				++*lastport;
1244 				if (*lastport < first || *lastport > last) {
1245 					*lastport = first;
1246 				}
1247 				lport = htons(*lastport);
1248 
1249 				/*
1250 				 * Skip if this is a restricted port as we do not want to
1251 				 * use restricted ports as ephemeral
1252 				 */
1253 				if (IS_RESTRICTED_IN_PORT(lport)) {
1254 					continue;
1255 				}
1256 
1257 				found = in_pcblookup_local_and_cleanup(pcbinfo,
1258 				    lookup_addr, lport, wild) == NULL;
1259 #if SKYWALK
1260 				if (found &&
1261 				    (SOCK_PROTO(so) == IPPROTO_TCP ||
1262 				    SOCK_PROTO(so) == IPPROTO_UDP) &&
1263 				    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1264 					int res_err;
1265 					if (inp->inp_vflag & INP_IPV6) {
1266 						res_err = netns_reserve_in6(
1267 							&inp->inp_netns_token,
1268 							inp->in6p_laddr,
1269 							(uint8_t)SOCK_PROTO(so), lport,
1270 							NETNS_BSD, NULL);
1271 					} else {
1272 						res_err = netns_reserve_in(
1273 							&inp->inp_netns_token,
1274 							lookup_addr, (uint8_t)SOCK_PROTO(so),
1275 							lport, NETNS_BSD, NULL);
1276 					}
1277 					found = res_err == 0;
1278 				}
1279 #endif /* SKYWALK */
1280 			} while (!found);
1281 		}
1282 	}
1283 	socket_lock(so, 0);
1284 
1285 	/*
1286 	 * We unlocked socket's protocol lock for a long time.
1287 	 * The socket might have been dropped/defuncted.
1288 	 * Checking if world has changed since.
1289 	 */
1290 	if (inp->inp_state == INPCB_STATE_DEAD) {
1291 #if SKYWALK
1292 		netns_release(&inp->inp_netns_token);
1293 #endif /* SKYWALK */
1294 		lck_rw_done(&pcbinfo->ipi_lock);
1295 		error = ECONNABORTED;
1296 		goto done;
1297 	}
1298 
1299 	if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
1300 #if SKYWALK
1301 		netns_release(&inp->inp_netns_token);
1302 #endif /* SKYWALK */
1303 		lck_rw_done(&pcbinfo->ipi_lock);
1304 		error = EINVAL;
1305 		goto done;
1306 	}
1307 
1308 	if (laddr.s_addr != INADDR_ANY) {
1309 		inp->inp_laddr = laddr;
1310 		inp->inp_last_outifp = outif;
1311 #if SKYWALK
1312 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
1313 			netns_set_ifnet(&inp->inp_netns_token, outif);
1314 		}
1315 #endif /* SKYWALK */
1316 	}
1317 	inp->inp_lport = lport;
1318 	if (anonport) {
1319 		inp->inp_flags |= INP_ANONPORT;
1320 	}
1321 
1322 	if (in_pcbinshash(inp, remote, 1) != 0) {
1323 		inp->inp_laddr.s_addr = INADDR_ANY;
1324 		inp->inp_last_outifp = NULL;
1325 
1326 #if SKYWALK
1327 		netns_release(&inp->inp_netns_token);
1328 #endif /* SKYWALK */
1329 		inp->inp_lport = 0;
1330 		if (anonport) {
1331 			inp->inp_flags &= ~INP_ANONPORT;
1332 		}
1333 		lck_rw_done(&pcbinfo->ipi_lock);
1334 		error = EAGAIN;
1335 		goto done;
1336 	}
1337 	lck_rw_done(&pcbinfo->ipi_lock);
1338 	sflt_notify(so, sock_evt_bound, NULL);
1339 
1340 	in_pcb_check_management_entitled(inp);
1341 	in_pcb_check_ultra_constrained_entitled(inp);
1342 done:
1343 	return error;
1344 }
1345 
1346 #define APN_FALLBACK_IP_FILTER(a)       \
1347 	(IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \
1348 	 IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \
1349 	 IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \
1350 	 IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \
1351 	 IN_PRIVATE(ntohl((a)->sin_addr.s_addr)))
1352 
1353 #define APN_FALLBACK_NOTIF_INTERVAL     2 /* Magic Number */
1354 static uint64_t last_apn_fallback = 0;
1355 
1356 static boolean_t
apn_fallback_required(proc_t proc,struct socket * so,struct sockaddr_in * p_dstv4)1357 apn_fallback_required(proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4)
1358 {
1359 	uint64_t timenow;
1360 	struct sockaddr_storage lookup_default_addr;
1361 	struct rtentry *rt = NULL;
1362 
1363 	VERIFY(proc != NULL);
1364 
1365 	if (apn_fallbk_enabled == FALSE) {
1366 		return FALSE;
1367 	}
1368 
1369 	if (proc == kernproc) {
1370 		return FALSE;
1371 	}
1372 
1373 	if (so && (so->so_options & SO_NOAPNFALLBK)) {
1374 		return FALSE;
1375 	}
1376 
1377 	timenow = net_uptime();
1378 	if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) {
1379 		apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n"));
1380 		return FALSE;
1381 	}
1382 
1383 	if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4)) {
1384 		return FALSE;
1385 	}
1386 
1387 	/* Check if we have unscoped IPv6 default route through cellular */
1388 	bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1389 	lookup_default_addr.ss_family = AF_INET6;
1390 	lookup_default_addr.ss_len = sizeof(struct sockaddr_in6);
1391 
1392 	rt = rtalloc1(SA(&lookup_default_addr), 0, 0);
1393 	if (NULL == rt) {
1394 		apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1395 		    "unscoped default IPv6 route.\n"));
1396 		return FALSE;
1397 	}
1398 
1399 	if (!IFNET_IS_CELLULAR(rt->rt_ifp)) {
1400 		rtfree(rt);
1401 		apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1402 		    "unscoped default IPv6 route through cellular interface.\n"));
1403 		return FALSE;
1404 	}
1405 
1406 	/*
1407 	 * We have a default IPv6 route, ensure that
1408 	 * we do not have IPv4 default route before triggering
1409 	 * the event
1410 	 */
1411 	rtfree(rt);
1412 	rt = NULL;
1413 
1414 	bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1415 	lookup_default_addr.ss_family = AF_INET;
1416 	lookup_default_addr.ss_len = sizeof(struct sockaddr_in);
1417 
1418 	rt = rtalloc1(SA(&lookup_default_addr), 0, 0);
1419 
1420 	if (rt) {
1421 		rtfree(rt);
1422 		rt = NULL;
1423 		apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped "
1424 		    "IPv4 default route!\n"));
1425 		return FALSE;
1426 	}
1427 
1428 	{
1429 		/*
1430 		 * We disable APN fallback if the binary is not a third-party app.
1431 		 * Note that platform daemons use their process name as a
1432 		 * bundle ID so we filter out bundle IDs without dots.
1433 		 */
1434 		const char *__null_terminated bundle_id = cs_identity_get(proc);
1435 		if (bundle_id == NULL ||
1436 		    bundle_id[0] == '\0' ||
1437 		    strchr(bundle_id, '.') == NULL ||
1438 		    strlcmp("com.apple.", bundle_id, sizeof("com.apple.") - 1) == 0) {
1439 			apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-"
1440 			    "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL")));
1441 			return FALSE;
1442 		}
1443 	}
1444 
1445 	{
1446 		/*
1447 		 * The Apple App Store IPv6 requirement started on
1448 		 * June 1st, 2016 at 12:00:00 AM PDT.
1449 		 * We disable APN fallback if the binary is more recent than that.
1450 		 * We check both atime and birthtime since birthtime is not always supported.
1451 		 */
1452 		static const long ipv6_start_date = 1464764400L;
1453 		vfs_context_t __single context;
1454 		struct stat64 sb;
1455 		int vn_stat_error;
1456 
1457 		bzero(&sb, sizeof(struct stat64));
1458 		context = vfs_context_create(NULL);
1459 		vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, 0, context);
1460 		(void)vfs_context_rele(context);
1461 
1462 		if (vn_stat_error != 0 ||
1463 		    sb.st_atimespec.tv_sec >= ipv6_start_date ||
1464 		    sb.st_birthtimespec.tv_sec >= ipv6_start_date) {
1465 			apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary "
1466 			    "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n",
1467 			    vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec,
1468 			    sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec));
1469 			return FALSE;
1470 		}
1471 	}
1472 	return TRUE;
1473 }
1474 
1475 static void
apn_fallback_trigger(proc_t proc,struct socket * so)1476 apn_fallback_trigger(proc_t proc, struct socket *so)
1477 {
1478 	pid_t pid = 0;
1479 	struct kev_msg ev_msg;
1480 	struct kev_netevent_apnfallbk_data apnfallbk_data;
1481 
1482 	last_apn_fallback = net_uptime();
1483 	pid = proc_pid(proc);
1484 	uuid_t application_uuid;
1485 	uuid_clear(application_uuid);
1486 	proc_getexecutableuuid(proc, application_uuid,
1487 	    sizeof(application_uuid));
1488 
1489 	bzero(&ev_msg, sizeof(struct kev_msg));
1490 	ev_msg.vendor_code      = KEV_VENDOR_APPLE;
1491 	ev_msg.kev_class        = KEV_NETWORK_CLASS;
1492 	ev_msg.kev_subclass     = KEV_NETEVENT_SUBCLASS;
1493 	ev_msg.event_code       = KEV_NETEVENT_APNFALLBACK;
1494 
1495 	bzero(&apnfallbk_data, sizeof(apnfallbk_data));
1496 
1497 	if (so->so_flags & SOF_DELEGATED) {
1498 		apnfallbk_data.epid = so->e_pid;
1499 		uuid_copy(apnfallbk_data.euuid, so->e_uuid);
1500 	} else {
1501 		apnfallbk_data.epid = so->last_pid;
1502 		uuid_copy(apnfallbk_data.euuid, so->last_uuid);
1503 	}
1504 
1505 	ev_msg.dv[0].data_ptr   = &apnfallbk_data;
1506 	ev_msg.dv[0].data_length = sizeof(apnfallbk_data);
1507 	kev_post_msg(&ev_msg);
1508 	apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n"));
1509 }
1510 
1511 /*
1512  * Transform old in_pcbconnect() into an inner subroutine for new
1513  * in_pcbconnect(); do some validity-checking on the remote address
1514  * (in "nam") and then determine local host address (i.e., which
1515  * interface) to use to access that remote host.
1516  *
1517  * This routine may alter the caller-supplied remote address "nam".
1518  *
1519  * The caller may override the bound-to-interface setting of the socket
1520  * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1521  *
1522  * This routine might return an ifp with a reference held if the caller
1523  * provides a non-NULL outif, even in the error case.  The caller is
1524  * responsible for releasing its reference.
1525  *
1526  * Returns:	0			Success
1527  *		EINVAL			Invalid argument
1528  *		EAFNOSUPPORT		Address family not supported
1529  *		EADDRNOTAVAIL		Address not available
1530  */
1531 int
in_pcbladdr(struct inpcb * inp,struct sockaddr * nam,struct in_addr * laddr,unsigned int ifscope,struct ifnet ** outif,int raw)1532 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
1533     unsigned int ifscope, struct ifnet **outif, int raw)
1534 {
1535 	struct route *ro = &inp->inp_route;
1536 	struct in_ifaddr *ia = NULL;
1537 	struct sockaddr_in sin;
1538 	int error = 0;
1539 	boolean_t restricted = FALSE;
1540 
1541 	if (outif != NULL) {
1542 		*outif = NULL;
1543 	}
1544 	if (nam->sa_len != sizeof(struct sockaddr_in)) {
1545 		return EINVAL;
1546 	}
1547 	if (SIN(nam)->sin_family != AF_INET) {
1548 		return EAFNOSUPPORT;
1549 	}
1550 	if (raw == 0 && SIN(nam)->sin_port == 0) {
1551 		return EADDRNOTAVAIL;
1552 	}
1553 
1554 	in_pcb_check_management_entitled(inp);
1555 	in_pcb_check_ultra_constrained_entitled(inp);
1556 
1557 	/*
1558 	 * If the destination address is INADDR_ANY,
1559 	 * use the primary local address.
1560 	 * If the supplied address is INADDR_BROADCAST,
1561 	 * and the primary interface supports broadcast,
1562 	 * choose the broadcast address for that interface.
1563 	 */
1564 	if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1565 	    SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) {
1566 		lck_rw_lock_shared(&in_ifaddr_rwlock);
1567 		if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1568 			ia = TAILQ_FIRST(&in_ifaddrhead);
1569 			IFA_LOCK_SPIN(&ia->ia_ifa);
1570 			if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1571 				SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1572 			} else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1573 				SIN(nam)->sin_addr =
1574 				    SIN(&ia->ia_broadaddr)->sin_addr;
1575 			}
1576 			IFA_UNLOCK(&ia->ia_ifa);
1577 			ia = NULL;
1578 		}
1579 		lck_rw_done(&in_ifaddr_rwlock);
1580 	}
1581 	/*
1582 	 * Otherwise, if the socket has already bound the source, just use it.
1583 	 */
1584 	if (inp->inp_laddr.s_addr != INADDR_ANY) {
1585 		VERIFY(ia == NULL);
1586 		*laddr = inp->inp_laddr;
1587 		return 0;
1588 	}
1589 
1590 	/*
1591 	 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1592 	 * then it overrides the sticky ifscope set for the socket.
1593 	 */
1594 	if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) {
1595 		ifscope = inp->inp_boundifp->if_index;
1596 	}
1597 
1598 	/*
1599 	 * If route is known or can be allocated now,
1600 	 * our src addr is taken from the i/f, else punt.
1601 	 * Note that we should check the address family of the cached
1602 	 * destination, in case of sharing the cache with IPv6.
1603 	 */
1604 	if (ro->ro_rt != NULL) {
1605 		RT_LOCK_SPIN(ro->ro_rt);
1606 	}
1607 	if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1608 	    SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1609 	    (inp->inp_socket->so_options & SO_DONTROUTE)) {
1610 		if (ro->ro_rt != NULL) {
1611 			RT_UNLOCK(ro->ro_rt);
1612 		}
1613 		ROUTE_RELEASE(ro);
1614 	}
1615 	if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1616 	    (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1617 		if (ro->ro_rt != NULL) {
1618 			RT_UNLOCK(ro->ro_rt);
1619 		}
1620 		ROUTE_RELEASE(ro);
1621 		/* No route yet, so try to acquire one */
1622 		SOCKADDR_ZERO(&ro->ro_dst, sizeof(struct sockaddr_in));
1623 		ro->ro_dst.sa_family = AF_INET;
1624 		ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
1625 		SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1626 		rtalloc_scoped(ro, ifscope);
1627 		if (ro->ro_rt != NULL) {
1628 			RT_LOCK_SPIN(ro->ro_rt);
1629 		}
1630 	}
1631 	/* Sanitized local copy for interface address searches */
1632 	SOCKADDR_ZERO(&sin, sizeof(sin));
1633 	sin.sin_family = AF_INET;
1634 	sin.sin_len = sizeof(struct sockaddr_in);
1635 	sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1636 	/*
1637 	 * If we did not find (or use) a route, assume dest is reachable
1638 	 * on a directly connected network and try to find a corresponding
1639 	 * interface to take the source address from.
1640 	 */
1641 	if (ro->ro_rt == NULL) {
1642 		proc_t proc = current_proc();
1643 
1644 		VERIFY(ia == NULL);
1645 		ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1646 		if (ia == NULL) {
1647 			ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1648 		}
1649 		error = ((ia == NULL) ? ENETUNREACH : 0);
1650 
1651 		if (apn_fallback_required(proc, inp->inp_socket,
1652 		    (void *)nam)) {
1653 			apn_fallback_trigger(proc, inp->inp_socket);
1654 		}
1655 
1656 		goto done;
1657 	}
1658 	RT_LOCK_ASSERT_HELD(ro->ro_rt);
1659 	/*
1660 	 * If the outgoing interface on the route found is not
1661 	 * a loopback interface, use the address from that interface.
1662 	 */
1663 	if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1664 		VERIFY(ia == NULL);
1665 		/*
1666 		 * If the route points to a cellular interface and the
1667 		 * caller forbids our using interfaces of such type,
1668 		 * pretend that there is no route.
1669 		 * Apply the same logic for expensive interfaces.
1670 		 */
1671 		if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
1672 			RT_UNLOCK(ro->ro_rt);
1673 			ROUTE_RELEASE(ro);
1674 			error = EHOSTUNREACH;
1675 			restricted = TRUE;
1676 		} else {
1677 			/* Become a regular mutex */
1678 			RT_CONVERT_LOCK(ro->ro_rt);
1679 			ia = ifatoia(ro->ro_rt->rt_ifa);
1680 			ifa_addref(&ia->ia_ifa);
1681 
1682 			/*
1683 			 * Mark the control block for notification of
1684 			 * a possible flow that might undergo clat46
1685 			 * translation.
1686 			 *
1687 			 * We defer the decision to a later point when
1688 			 * inpcb is being disposed off.
1689 			 * The reason is that we only want to send notification
1690 			 * if the flow was ever used to send data.
1691 			 */
1692 			if (IS_INTF_CLAT46(ro->ro_rt->rt_ifp)) {
1693 				inp->inp_flags2 |= INP2_CLAT46_FLOW;
1694 			}
1695 
1696 			RT_UNLOCK(ro->ro_rt);
1697 			error = 0;
1698 		}
1699 		goto done;
1700 	}
1701 	VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1702 	RT_UNLOCK(ro->ro_rt);
1703 	/*
1704 	 * The outgoing interface is marked with 'loopback net', so a route
1705 	 * to ourselves is here.
1706 	 * Try to find the interface of the destination address and then
1707 	 * take the address from there. That interface is not necessarily
1708 	 * a loopback interface.
1709 	 */
1710 	VERIFY(ia == NULL);
1711 	ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1712 	if (ia == NULL) {
1713 		ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1714 	}
1715 	if (ia == NULL) {
1716 		ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1717 	}
1718 	if (ia == NULL) {
1719 		RT_LOCK(ro->ro_rt);
1720 		ia = ifatoia(ro->ro_rt->rt_ifa);
1721 		if (ia != NULL) {
1722 			ifa_addref(&ia->ia_ifa);
1723 		}
1724 		RT_UNLOCK(ro->ro_rt);
1725 	}
1726 	error = ((ia == NULL) ? ENETUNREACH : 0);
1727 
1728 done:
1729 	/*
1730 	 * If the destination address is multicast and an outgoing
1731 	 * interface has been set as a multicast option, use the
1732 	 * address of that interface as our source address.
1733 	 */
1734 	if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1735 	    inp->inp_moptions != NULL) {
1736 		struct ip_moptions *imo;
1737 		struct ifnet *ifp;
1738 
1739 		imo = inp->inp_moptions;
1740 		IMO_LOCK(imo);
1741 		if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1742 		    ia->ia_ifp != imo->imo_multicast_ifp)) {
1743 			ifp = imo->imo_multicast_ifp;
1744 			if (ia != NULL) {
1745 				ifa_remref(&ia->ia_ifa);
1746 			}
1747 			lck_rw_lock_shared(&in_ifaddr_rwlock);
1748 			TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1749 				if (ia->ia_ifp == ifp) {
1750 					break;
1751 				}
1752 			}
1753 			if (ia != NULL) {
1754 				ifa_addref(&ia->ia_ifa);
1755 			}
1756 			lck_rw_done(&in_ifaddr_rwlock);
1757 			if (ia == NULL) {
1758 				error = EADDRNOTAVAIL;
1759 			} else {
1760 				error = 0;
1761 			}
1762 		}
1763 		IMO_UNLOCK(imo);
1764 	}
1765 	/*
1766 	 * Don't do pcblookup call here; return interface in laddr
1767 	 * and exit to caller, that will do the lookup.
1768 	 */
1769 	if (ia != NULL) {
1770 		/*
1771 		 * If the source address belongs to a cellular interface
1772 		 * and the socket forbids our using interfaces of such
1773 		 * type, pretend that there is no source address.
1774 		 * Apply the same logic for expensive interfaces.
1775 		 */
1776 		IFA_LOCK_SPIN(&ia->ia_ifa);
1777 		if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
1778 			IFA_UNLOCK(&ia->ia_ifa);
1779 			error = EHOSTUNREACH;
1780 			restricted = TRUE;
1781 		} else if (error == 0) {
1782 			*laddr = ia->ia_addr.sin_addr;
1783 			if (outif != NULL) {
1784 				struct ifnet *ifp;
1785 
1786 				if (ro->ro_rt != NULL) {
1787 					ifp = ro->ro_rt->rt_ifp;
1788 				} else {
1789 					ifp = ia->ia_ifp;
1790 				}
1791 
1792 				VERIFY(ifp != NULL);
1793 				IFA_CONVERT_LOCK(&ia->ia_ifa);
1794 				ifnet_reference(ifp);   /* for caller */
1795 				if (*outif != NULL) {
1796 					ifnet_release(*outif);
1797 				}
1798 				*outif = ifp;
1799 			}
1800 			IFA_UNLOCK(&ia->ia_ifa);
1801 		} else {
1802 			IFA_UNLOCK(&ia->ia_ifa);
1803 		}
1804 		ifa_remref(&ia->ia_ifa);
1805 		ia = NULL;
1806 	}
1807 
1808 	if (restricted && error == EHOSTUNREACH) {
1809 		soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1810 		    SO_FILT_HINT_IFDENIED));
1811 	}
1812 
1813 	return error;
1814 }
1815 
1816 /*
1817  * Outer subroutine:
1818  * Connect from a socket to a specified address.
1819  * Both address and port must be specified in argument sin.
1820  * If don't have a local address for this socket yet,
1821  * then pick one.
1822  *
1823  * The caller may override the bound-to-interface setting of the socket
1824  * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1825  */
1826 int
in_pcbconnect(struct inpcb * inp,struct sockaddr * nam,struct proc * p,unsigned int ifscope,struct ifnet ** outif)1827 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
1828     unsigned int ifscope, struct ifnet **outif)
1829 {
1830 	struct in_addr laddr;
1831 	struct sockaddr_in *sin = SIN(nam);
1832 	struct inpcb *pcb;
1833 	int error;
1834 	struct socket *so = inp->inp_socket;
1835 
1836 #if CONTENT_FILTER
1837 	if (so) {
1838 		so->so_state_change_cnt++;
1839 	}
1840 #endif
1841 
1842 	/*
1843 	 *   Call inner routine, to assign local interface address.
1844 	 */
1845 	if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0) {
1846 		return error;
1847 	}
1848 
1849 	socket_unlock(so, 0);
1850 	pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
1851 	    inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
1852 	    inp->inp_lport, 0, NULL);
1853 	socket_lock(so, 0);
1854 
1855 	/*
1856 	 * Check if the socket is still in a valid state. When we unlock this
1857 	 * embryonic socket, it can get aborted if another thread is closing
1858 	 * the listener (radar 7947600).
1859 	 */
1860 	if ((so->so_flags & SOF_ABORTED) != 0) {
1861 		return ECONNREFUSED;
1862 	}
1863 
1864 	if (pcb != NULL) {
1865 		in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1866 		return EADDRINUSE;
1867 	}
1868 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
1869 		if (inp->inp_lport == 0) {
1870 			error = in_pcbbind(inp, NULL, nam, p);
1871 			if (error) {
1872 				return error;
1873 			}
1874 		}
1875 		if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1876 			/*
1877 			 * Lock inversion issue, mostly with udp
1878 			 * multicast packets.
1879 			 */
1880 			socket_unlock(so, 0);
1881 			lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1882 			socket_lock(so, 0);
1883 		}
1884 		inp->inp_laddr = laddr;
1885 		/* no reference needed */
1886 		inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
1887 #if SKYWALK
1888 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
1889 			netns_set_ifnet(&inp->inp_netns_token,
1890 			    inp->inp_last_outifp);
1891 		}
1892 #endif /* SKYWALK */
1893 		inp->inp_flags |= INP_INADDR_ANY;
1894 	} else {
1895 		/*
1896 		 * Usage of IP_PKTINFO, without local port already
1897 		 * speficified will cause kernel to panic,
1898 		 * see rdar://problem/18508185.
1899 		 * For now returning error to avoid a kernel panic
1900 		 * This routines can be refactored and handle this better
1901 		 * in future.
1902 		 */
1903 		if (inp->inp_lport == 0) {
1904 			return EINVAL;
1905 		}
1906 		if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1907 			/*
1908 			 * Lock inversion issue, mostly with udp
1909 			 * multicast packets.
1910 			 */
1911 			socket_unlock(so, 0);
1912 			lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1913 			socket_lock(so, 0);
1914 		}
1915 	}
1916 	inp->inp_faddr = sin->sin_addr;
1917 	inp->inp_fport = sin->sin_port;
1918 	if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1919 		nstat_udp_pcb_invalidate_cache(inp);
1920 	}
1921 	in_pcbrehash(inp);
1922 	lck_rw_done(&inp->inp_pcbinfo->ipi_lock);
1923 	return 0;
1924 }
1925 
1926 void
in_pcbdisconnect(struct inpcb * inp)1927 in_pcbdisconnect(struct inpcb *inp)
1928 {
1929 	struct socket *so = inp->inp_socket;
1930 
1931 	if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1932 		nstat_udp_pcb_cache(inp);
1933 	}
1934 
1935 	inp->inp_faddr.s_addr = INADDR_ANY;
1936 	inp->inp_fport = 0;
1937 
1938 #if CONTENT_FILTER
1939 	if (so) {
1940 		so->so_state_change_cnt++;
1941 	}
1942 #endif
1943 
1944 	if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1945 		/* lock inversion issue, mostly with udp multicast packets */
1946 		socket_unlock(so, 0);
1947 		lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1948 		socket_lock(so, 0);
1949 	}
1950 
1951 	in_pcbrehash(inp);
1952 	lck_rw_done(&inp->inp_pcbinfo->ipi_lock);
1953 	/*
1954 	 * A multipath subflow socket would have its SS_NOFDREF set by default,
1955 	 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1956 	 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1957 	 */
1958 	if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) {
1959 		in_pcbdetach(inp);
1960 	}
1961 }
1962 
1963 void
in_pcbdetach(struct inpcb * inp)1964 in_pcbdetach(struct inpcb *inp)
1965 {
1966 	struct socket *so = inp->inp_socket;
1967 
1968 	if (so->so_pcb == NULL) {
1969 		/* PCB has been disposed */
1970 		panic("%s: inp=%p so=%p proto=%d so_pcb is null!", __func__,
1971 		    inp, so, SOCK_PROTO(so));
1972 		/* NOTREACHED */
1973 	}
1974 
1975 #if IPSEC
1976 	if (inp->inp_sp != NULL) {
1977 		(void) ipsec4_delete_pcbpolicy(inp);
1978 	}
1979 #endif /* IPSEC */
1980 
1981 	if (SOCK_PROTO(so) == IPPROTO_UDP) {
1982 		if (inp->inp_mstat.ms_total.ts_rxpackets == 0 && inp->inp_mstat.ms_total.ts_txpackets == 0) {
1983 			INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_no_data);
1984 		}
1985 	}
1986 
1987 	/*
1988 	 * Let NetworkStatistics know this PCB is going away
1989 	 * before we detach it.
1990 	 */
1991 	if (nstat_collect &&
1992 	    (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
1993 		nstat_pcb_detach(inp);
1994 	}
1995 
1996 	/* Free memory buffer held for generating keep alives */
1997 	if (inp->inp_keepalive_data != NULL) {
1998 		kfree_data_counted_by(inp->inp_keepalive_data, inp->inp_keepalive_datalen);
1999 	}
2000 
2001 	/* mark socket state as dead */
2002 	if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
2003 		panic("%s: so=%p proto=%d couldn't set to STOPUSING",
2004 		    __func__, so, SOCK_PROTO(so));
2005 		/* NOTREACHED */
2006 	}
2007 
2008 #if SKYWALK
2009 	/* Free up the port in the namespace registrar if not in TIME_WAIT */
2010 	if (!(inp->inp_flags2 & INP2_TIMEWAIT)) {
2011 		netns_release(&inp->inp_netns_token);
2012 		netns_release(&inp->inp_wildcard_netns_token);
2013 	}
2014 #endif /* SKYWALK */
2015 
2016 	if (!(so->so_flags & SOF_PCBCLEARING)) {
2017 		struct ip_moptions *imo;
2018 
2019 		inp->inp_vflag = 0;
2020 		if (inp->inp_options != NULL) {
2021 			(void) m_free(inp->inp_options);
2022 			inp->inp_options = NULL;
2023 		}
2024 		ROUTE_RELEASE(&inp->inp_route);
2025 		imo = inp->inp_moptions;
2026 		if (imo != NULL) {
2027 			IMO_REMREF(imo);
2028 		}
2029 		inp->inp_moptions = NULL;
2030 		sofreelastref(so, 0);
2031 		inp->inp_state = INPCB_STATE_DEAD;
2032 
2033 		/*
2034 		 * Enqueue an event to send kernel event notification
2035 		 * if the flow has to CLAT46 for data packets
2036 		 */
2037 		if (inp->inp_flags2 & INP2_CLAT46_FLOW) {
2038 			/*
2039 			 * If there has been any exchange of data bytes
2040 			 * over this flow.
2041 			 * Schedule a notification to report that flow is
2042 			 * using client side translation.
2043 			 */
2044 			if (inp->inp_mstat.ms_total.ts_txbytes != 0 ||
2045 			    inp->inp_mstat.ms_total.ts_rxbytes != 0) {
2046 				if (so->so_flags & SOF_DELEGATED) {
2047 					in6_clat46_event_enqueue_nwk_wq_entry(
2048 						IN6_CLAT46_EVENT_V4_FLOW,
2049 						so->e_pid,
2050 						so->e_uuid);
2051 				} else {
2052 					in6_clat46_event_enqueue_nwk_wq_entry(
2053 						IN6_CLAT46_EVENT_V4_FLOW,
2054 						so->last_pid,
2055 						so->last_uuid);
2056 				}
2057 			}
2058 		}
2059 
2060 		/* makes sure we're not called twice from so_close */
2061 		so->so_flags |= SOF_PCBCLEARING;
2062 
2063 		inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
2064 	}
2065 }
2066 
2067 
2068 void
in_pcbdispose(struct inpcb * inp)2069 in_pcbdispose(struct inpcb *inp)
2070 {
2071 	struct socket *so = inp->inp_socket;
2072 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
2073 
2074 	if (so != NULL && so->so_usecount != 0) {
2075 		panic("%s: so %p [%d,%d] usecount %d lockhistory %s",
2076 		    __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
2077 		    solockhistory_nr(so));
2078 		/* NOTREACHED */
2079 	} else if (inp->inp_wantcnt != WNT_STOPUSING) {
2080 		if (so != NULL) {
2081 			panic_plain("%s: inp %p invalid wantcnt %d, so %p "
2082 			    "[%d,%d] usecount %d retaincnt %d state 0x%x "
2083 			    "flags 0x%x lockhistory %s\n", __func__, inp,
2084 			    inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
2085 			    so->so_usecount, so->so_retaincnt, so->so_state,
2086 			    so->so_flags, solockhistory_nr(so));
2087 			/* NOTREACHED */
2088 		} else {
2089 			panic("%s: inp %p invalid wantcnt %d no socket",
2090 			    __func__, inp, inp->inp_wantcnt);
2091 			/* NOTREACHED */
2092 		}
2093 	}
2094 
2095 	LCK_RW_ASSERT(&ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
2096 
2097 	inp->inp_gencnt = ++ipi->ipi_gencnt;
2098 	/* access ipi in in_pcbremlists */
2099 	in_pcbremlists(inp);
2100 
2101 	if (so != NULL) {
2102 		if (so->so_proto->pr_flags & PR_PCBLOCK) {
2103 			sofreelastref(so, 0);
2104 			if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
2105 				/*
2106 				 * selthreadclear() already called
2107 				 * during sofreelastref() above.
2108 				 */
2109 				sbrelease(&so->so_rcv);
2110 				sbrelease(&so->so_snd);
2111 			}
2112 			if (so->so_head != NULL) {
2113 				panic("%s: so=%p head still exist",
2114 				    __func__, so);
2115 				/* NOTREACHED */
2116 			}
2117 			lck_mtx_unlock(&inp->inpcb_mtx);
2118 
2119 #if NECP
2120 			necp_inpcb_remove_cb(inp);
2121 #endif /* NECP */
2122 
2123 			lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
2124 		}
2125 		/* makes sure we're not called twice from so_close */
2126 		so->so_flags |= SOF_PCBCLEARING;
2127 		so->so_pcb = NULL;
2128 		inp->inp_socket = NULL;
2129 #if NECP
2130 		necp_inpcb_dispose(inp);
2131 #endif /* NECP */
2132 		/*
2133 		 * In case there a route cached after a detach (possible
2134 		 * in the tcp case), make sure that it is freed before
2135 		 * we deallocate the structure.
2136 		 */
2137 		ROUTE_RELEASE(&inp->inp_route);
2138 		zfree(ipi->ipi_zone, inp);
2139 		proto_memacct_sub(so->so_proto, kalloc_type_size(ipi->ipi_zone));
2140 
2141 		sodealloc(so);
2142 	}
2143 }
2144 
2145 /*
2146  * The calling convention of in_getsockaddr() and in_getpeeraddr() was
2147  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
2148  * in struct pr_usrreqs, so that protocols can just reference then directly
2149  * without the need for a wrapper function.
2150  */
2151 int
in_getsockaddr(struct socket * so,struct sockaddr ** nam)2152 in_getsockaddr(struct socket *so, struct sockaddr **nam)
2153 {
2154 	struct inpcb *inp;
2155 	struct sockaddr_in *sin;
2156 
2157 	/*
2158 	 * Do the malloc first in case it blocks.
2159 	 */
2160 	sin = SIN(alloc_sockaddr(sizeof(*sin),
2161 	    Z_WAITOK | Z_NOFAIL));
2162 
2163 	sin->sin_family = AF_INET;
2164 
2165 	if ((inp = sotoinpcb(so)) == NULL) {
2166 		free_sockaddr(sin);
2167 		return EINVAL;
2168 	}
2169 	sin->sin_port = inp->inp_lport;
2170 	sin->sin_addr = inp->inp_laddr;
2171 
2172 	*nam = SA(sin);
2173 	return 0;
2174 }
2175 
2176 int
in_getsockaddr_s(struct socket * so,struct sockaddr_in * ss)2177 in_getsockaddr_s(struct socket *so, struct sockaddr_in *ss)
2178 {
2179 	struct sockaddr_in *sin = ss;
2180 	struct inpcb *inp;
2181 
2182 	VERIFY(ss != NULL);
2183 	SOCKADDR_ZERO(ss, sizeof(*ss));
2184 
2185 	sin->sin_family = AF_INET;
2186 	sin->sin_len = sizeof(*sin);
2187 
2188 	if ((inp = sotoinpcb(so)) == NULL) {
2189 		return EINVAL;
2190 	}
2191 
2192 	sin->sin_port = inp->inp_lport;
2193 	sin->sin_addr = inp->inp_laddr;
2194 	return 0;
2195 }
2196 
2197 int
in_getpeeraddr(struct socket * so,struct sockaddr ** nam)2198 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
2199 {
2200 	struct inpcb *inp;
2201 	struct sockaddr_in *sin;
2202 
2203 	/*
2204 	 * Do the malloc first in case it blocks.
2205 	 */
2206 	sin = SIN(alloc_sockaddr(sizeof(*sin),
2207 	    Z_WAITOK | Z_NOFAIL));
2208 
2209 	sin->sin_family = AF_INET;
2210 
2211 	if ((inp = sotoinpcb(so)) == NULL) {
2212 		free_sockaddr(sin);
2213 		return EINVAL;
2214 	}
2215 	sin->sin_port = inp->inp_fport;
2216 	sin->sin_addr = inp->inp_faddr;
2217 
2218 	*nam = SA(sin);
2219 	return 0;
2220 }
2221 
2222 void
in_pcbnotifyall(struct inpcbinfo * pcbinfo,struct in_addr faddr,int errno,void (* notify)(struct inpcb *,int))2223 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2224     int errno, void (*notify)(struct inpcb *, int))
2225 {
2226 	struct inpcb *inp;
2227 
2228 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
2229 
2230 	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
2231 		if (!(inp->inp_vflag & INP_IPV4)) {
2232 			continue;
2233 		}
2234 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
2235 		    inp->inp_socket == NULL) {
2236 			continue;
2237 		}
2238 		if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
2239 			continue;
2240 		}
2241 		socket_lock(inp->inp_socket, 1);
2242 		(*notify)(inp, errno);
2243 		(void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
2244 		socket_unlock(inp->inp_socket, 1);
2245 	}
2246 	lck_rw_done(&pcbinfo->ipi_lock);
2247 }
2248 
2249 /*
2250  * Check for alternatives when higher level complains
2251  * about service problems.  For now, invalidate cached
2252  * routing information.  If the route was created dynamically
2253  * (by a redirect), time to try a default gateway again.
2254  */
2255 void
in_losing(struct inpcb * inp)2256 in_losing(struct inpcb *inp)
2257 {
2258 	boolean_t release = FALSE;
2259 	struct rtentry *rt;
2260 
2261 	if ((rt = inp->inp_route.ro_rt) != NULL) {
2262 		struct in_ifaddr *ia = NULL;
2263 
2264 		RT_LOCK(rt);
2265 		if (rt->rt_flags & RTF_DYNAMIC) {
2266 			/*
2267 			 * Prevent another thread from modifying rt_key,
2268 			 * rt_gateway via rt_setgate() after rt_lock is
2269 			 * dropped by marking the route as defunct.
2270 			 */
2271 			rt->rt_flags |= RTF_CONDEMNED;
2272 			RT_UNLOCK(rt);
2273 			(void) rtrequest(RTM_DELETE, rt_key(rt),
2274 			    rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
2275 		} else {
2276 			RT_UNLOCK(rt);
2277 		}
2278 		/* if the address is gone keep the old route in the pcb */
2279 		if (inp->inp_laddr.s_addr != INADDR_ANY &&
2280 		    (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2281 			/*
2282 			 * Address is around; ditch the route.  A new route
2283 			 * can be allocated the next time output is attempted.
2284 			 */
2285 			release = TRUE;
2286 		}
2287 		if (ia != NULL) {
2288 			ifa_remref(&ia->ia_ifa);
2289 		}
2290 	}
2291 	if (rt == NULL || release) {
2292 		ROUTE_RELEASE(&inp->inp_route);
2293 	}
2294 }
2295 
2296 /*
2297  * After a routing change, flush old routing
2298  * and allocate a (hopefully) better one.
2299  */
2300 void
in_rtchange(struct inpcb * inp,int errno)2301 in_rtchange(struct inpcb *inp, int errno)
2302 {
2303 #pragma unused(errno)
2304 	boolean_t release = FALSE;
2305 	struct rtentry *rt;
2306 
2307 	if ((rt = inp->inp_route.ro_rt) != NULL) {
2308 		struct in_ifaddr *ia = NULL;
2309 
2310 		/* if address is gone, keep the old route */
2311 		if (inp->inp_laddr.s_addr != INADDR_ANY &&
2312 		    (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2313 			/*
2314 			 * Address is around; ditch the route.  A new route
2315 			 * can be allocated the next time output is attempted.
2316 			 */
2317 			release = TRUE;
2318 		}
2319 		if (ia != NULL) {
2320 			ifa_remref(&ia->ia_ifa);
2321 		}
2322 	}
2323 	if (rt == NULL || release) {
2324 		ROUTE_RELEASE(&inp->inp_route);
2325 	}
2326 }
2327 
2328 /*
2329  * Lookup a PCB based on the local address and port.
2330  */
2331 struct inpcb *
in_pcblookup_local(struct inpcbinfo * pcbinfo,struct in_addr laddr,unsigned int lport_arg,int wild_okay)2332 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
2333     unsigned int lport_arg, int wild_okay)
2334 {
2335 	struct inpcb *inp;
2336 	int matchwild = 3, wildcard;
2337 	u_short lport = (u_short)lport_arg;
2338 
2339 	KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
2340 
2341 	if (!wild_okay) {
2342 		struct inpcbhead *head;
2343 		/*
2344 		 * Look for an unconnected (wildcard foreign addr) PCB that
2345 		 * matches the local address and port we're looking for.
2346 		 */
2347 		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2348 		    pcbinfo->ipi_hashmask)];
2349 		LIST_FOREACH(inp, head, inp_hash) {
2350 			if (!(inp->inp_vflag & INP_IPV4)) {
2351 				continue;
2352 			}
2353 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
2354 			    inp->inp_laddr.s_addr == laddr.s_addr &&
2355 			    inp->inp_lport == lport) {
2356 				/*
2357 				 * Found.
2358 				 */
2359 				return inp;
2360 			}
2361 		}
2362 		/*
2363 		 * Not found.
2364 		 */
2365 		KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
2366 		return NULL;
2367 	} else {
2368 		struct inpcbporthead *porthash;
2369 		struct inpcbport *phd;
2370 		struct inpcb *match = NULL;
2371 		/*
2372 		 * Best fit PCB lookup.
2373 		 *
2374 		 * First see if this local port is in use by looking on the
2375 		 * port hash list.
2376 		 */
2377 		porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
2378 		    pcbinfo->ipi_porthashmask)];
2379 		LIST_FOREACH(phd, porthash, phd_hash) {
2380 			if (phd->phd_port == lport) {
2381 				break;
2382 			}
2383 		}
2384 		if (phd != NULL) {
2385 			/*
2386 			 * Port is in use by one or more PCBs. Look for best
2387 			 * fit.
2388 			 */
2389 			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
2390 				wildcard = 0;
2391 				if (!(inp->inp_vflag & INP_IPV4)) {
2392 					continue;
2393 				}
2394 				if (inp->inp_faddr.s_addr != INADDR_ANY) {
2395 					wildcard++;
2396 				}
2397 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
2398 					if (laddr.s_addr == INADDR_ANY) {
2399 						wildcard++;
2400 					} else if (inp->inp_laddr.s_addr !=
2401 					    laddr.s_addr) {
2402 						continue;
2403 					}
2404 				} else {
2405 					if (laddr.s_addr != INADDR_ANY) {
2406 						wildcard++;
2407 					}
2408 				}
2409 				if (wildcard < matchwild) {
2410 					match = inp;
2411 					matchwild = wildcard;
2412 					if (matchwild == 0) {
2413 						break;
2414 					}
2415 				}
2416 			}
2417 		}
2418 		KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
2419 		    0, 0, 0, 0);
2420 		return match;
2421 	}
2422 }
2423 
2424 /*
2425  * Check if PCB exists in hash list.
2426  */
2427 int
in_pcblookup_hash_exists(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,uid_t * uid,gid_t * gid,struct ifnet * ifp)2428 in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2429     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2430     uid_t *uid, gid_t *gid, struct ifnet *ifp)
2431 {
2432 	struct inpcbhead *head;
2433 	struct inpcb *inp;
2434 	u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2435 	int found = 0;
2436 	struct inpcb *local_wild = NULL;
2437 	struct inpcb *local_wild_mapped = NULL;
2438 
2439 	*uid = UID_MAX;
2440 	*gid = GID_MAX;
2441 
2442 	/*
2443 	 * We may have found the pcb in the last lookup - check this first.
2444 	 */
2445 
2446 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
2447 
2448 	/*
2449 	 * First look for an exact match.
2450 	 */
2451 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2452 	    pcbinfo->ipi_hashmask)];
2453 	LIST_FOREACH(inp, head, inp_hash) {
2454 		if (!(inp->inp_vflag & INP_IPV4)) {
2455 			continue;
2456 		}
2457 		if (inp_restricted_recv(inp, ifp)) {
2458 			continue;
2459 		}
2460 
2461 #if NECP
2462 		if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2463 			continue;
2464 		}
2465 #endif /* NECP */
2466 
2467 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
2468 		    inp->inp_laddr.s_addr == laddr.s_addr &&
2469 		    inp->inp_fport == fport &&
2470 		    inp->inp_lport == lport) {
2471 			if ((found = (inp->inp_socket != NULL))) {
2472 				/*
2473 				 * Found.
2474 				 */
2475 				*uid = kauth_cred_getuid(
2476 					inp->inp_socket->so_cred);
2477 				*gid = kauth_cred_getgid(
2478 					inp->inp_socket->so_cred);
2479 			}
2480 			lck_rw_done(&pcbinfo->ipi_lock);
2481 			return found;
2482 		}
2483 	}
2484 
2485 	if (!wildcard) {
2486 		/*
2487 		 * Not found.
2488 		 */
2489 		lck_rw_done(&pcbinfo->ipi_lock);
2490 		return 0;
2491 	}
2492 
2493 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2494 	    pcbinfo->ipi_hashmask)];
2495 	LIST_FOREACH(inp, head, inp_hash) {
2496 		if (!(inp->inp_vflag & INP_IPV4)) {
2497 			continue;
2498 		}
2499 		if (inp_restricted_recv(inp, ifp)) {
2500 			continue;
2501 		}
2502 
2503 #if NECP
2504 		if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2505 			continue;
2506 		}
2507 #endif /* NECP */
2508 
2509 		if (inp->inp_faddr.s_addr == INADDR_ANY &&
2510 		    inp->inp_lport == lport) {
2511 			if (inp->inp_laddr.s_addr == laddr.s_addr) {
2512 				if ((found = (inp->inp_socket != NULL))) {
2513 					*uid = kauth_cred_getuid(
2514 						inp->inp_socket->so_cred);
2515 					*gid = kauth_cred_getgid(
2516 						inp->inp_socket->so_cred);
2517 				}
2518 				lck_rw_done(&pcbinfo->ipi_lock);
2519 				return found;
2520 			} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2521 				if (inp->inp_socket &&
2522 				    SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2523 					local_wild_mapped = inp;
2524 				} else {
2525 					local_wild = inp;
2526 				}
2527 			}
2528 		}
2529 	}
2530 	if (local_wild == NULL) {
2531 		if (local_wild_mapped != NULL) {
2532 			if ((found = (local_wild_mapped->inp_socket != NULL))) {
2533 				*uid = kauth_cred_getuid(
2534 					local_wild_mapped->inp_socket->so_cred);
2535 				*gid = kauth_cred_getgid(
2536 					local_wild_mapped->inp_socket->so_cred);
2537 			}
2538 			lck_rw_done(&pcbinfo->ipi_lock);
2539 			return found;
2540 		}
2541 		lck_rw_done(&pcbinfo->ipi_lock);
2542 		return 0;
2543 	}
2544 	if ((found = (local_wild->inp_socket != NULL))) {
2545 		*uid = kauth_cred_getuid(
2546 			local_wild->inp_socket->so_cred);
2547 		*gid = kauth_cred_getgid(
2548 			local_wild->inp_socket->so_cred);
2549 	}
2550 	lck_rw_done(&pcbinfo->ipi_lock);
2551 	return found;
2552 }
2553 
2554 /*
2555  * Lookup PCB in hash list.
2556  */
2557 static struct inpcb *
in_pcblookup_hash_locked(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,struct ifnet * ifp)2558 in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2559     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2560     struct ifnet *ifp)
2561 {
2562 	struct inpcbhead *head;
2563 	struct inpcb *inp;
2564 	u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2565 	struct inpcb *local_wild = NULL;
2566 	struct inpcb *local_wild_mapped = NULL;
2567 
2568 	/*
2569 	 * First look for an exact match.
2570 	 */
2571 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2572 	    pcbinfo->ipi_hashmask)];
2573 	LIST_FOREACH(inp, head, inp_hash) {
2574 		if (!(inp->inp_vflag & INP_IPV4)) {
2575 			continue;
2576 		}
2577 		if (inp_restricted_recv(inp, ifp)) {
2578 			continue;
2579 		}
2580 
2581 #if NECP
2582 		if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2583 			continue;
2584 		}
2585 #endif /* NECP */
2586 
2587 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
2588 		    inp->inp_laddr.s_addr == laddr.s_addr &&
2589 		    inp->inp_fport == fport &&
2590 		    inp->inp_lport == lport) {
2591 			/*
2592 			 * Found.
2593 			 */
2594 			if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2595 			    WNT_STOPUSING) {
2596 				return inp;
2597 			} else {
2598 				/* it's there but dead, say it isn't found */
2599 				return NULL;
2600 			}
2601 		}
2602 	}
2603 
2604 	if (!wildcard) {
2605 		/*
2606 		 * Not found.
2607 		 */
2608 		return NULL;
2609 	}
2610 
2611 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2612 	    pcbinfo->ipi_hashmask)];
2613 	LIST_FOREACH(inp, head, inp_hash) {
2614 		if (!(inp->inp_vflag & INP_IPV4)) {
2615 			continue;
2616 		}
2617 		if (inp_restricted_recv(inp, ifp)) {
2618 			continue;
2619 		}
2620 
2621 #if NECP
2622 		if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2623 			continue;
2624 		}
2625 #endif /* NECP */
2626 
2627 		if (inp->inp_faddr.s_addr == INADDR_ANY &&
2628 		    inp->inp_lport == lport) {
2629 			if (inp->inp_laddr.s_addr == laddr.s_addr) {
2630 				if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2631 				    WNT_STOPUSING) {
2632 					return inp;
2633 				} else {
2634 					/* it's dead; say it isn't found */
2635 					return NULL;
2636 				}
2637 			} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2638 				if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2639 					local_wild_mapped = inp;
2640 				} else {
2641 					local_wild = inp;
2642 				}
2643 			}
2644 		}
2645 	}
2646 	if (local_wild == NULL) {
2647 		if (local_wild_mapped != NULL) {
2648 			if (in_pcb_checkstate(local_wild_mapped,
2649 			    WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2650 				return local_wild_mapped;
2651 			} else {
2652 				/* it's dead; say it isn't found */
2653 				return NULL;
2654 			}
2655 		}
2656 		return NULL;
2657 	}
2658 	if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2659 		return local_wild;
2660 	}
2661 	/*
2662 	 * It's either not found or is already dead.
2663 	 */
2664 	return NULL;
2665 }
2666 
2667 struct inpcb *
in_pcblookup_hash(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,struct ifnet * ifp)2668 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2669     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2670     struct ifnet *ifp)
2671 {
2672 	struct inpcb *inp;
2673 
2674 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
2675 
2676 	inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport_arg, laddr,
2677 	    lport_arg, wildcard, ifp);
2678 
2679 	lck_rw_done(&pcbinfo->ipi_lock);
2680 
2681 	return inp;
2682 }
2683 
2684 
2685 struct inpcb *
in_pcblookup_hash_try(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,struct ifnet * ifp)2686 in_pcblookup_hash_try(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2687     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2688     struct ifnet *ifp)
2689 {
2690 	struct inpcb *inp;
2691 
2692 	if (!lck_rw_try_lock_shared(&pcbinfo->ipi_lock)) {
2693 		return NULL;
2694 	}
2695 
2696 	inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport_arg, laddr,
2697 	    lport_arg, wildcard, ifp);
2698 
2699 	lck_rw_done(&pcbinfo->ipi_lock);
2700 
2701 	return inp;
2702 }
2703 
2704 /*
2705  * @brief	Insert PCB onto various hash lists.
2706  *
2707  * @param	inp Pointer to internet protocol control block
2708  * @param	remote Pointer to remote address sockaddr for policy evaluation
2709  * @param	locked	Implies if ipi_lock (protecting pcb list)
2710  *              is already locked or not.
2711  *
2712  * @return	int error on failure and 0 on success
2713  */
2714 int
in_pcbinshash(struct inpcb * inp,struct sockaddr * remote,int locked)2715 in_pcbinshash(struct inpcb *inp, struct sockaddr *remote, int locked)
2716 {
2717 	struct inpcbhead *pcbhash;
2718 	struct inpcbporthead *pcbporthash;
2719 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2720 	struct inpcbport *phd;
2721 	u_int32_t hashkey_faddr;
2722 
2723 	if (!locked) {
2724 		if (!lck_rw_try_lock_exclusive(&pcbinfo->ipi_lock)) {
2725 			/*
2726 			 * Lock inversion issue, mostly with udp
2727 			 * multicast packets
2728 			 */
2729 			socket_unlock(inp->inp_socket, 0);
2730 			lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
2731 			socket_lock(inp->inp_socket, 0);
2732 		}
2733 	}
2734 
2735 	/*
2736 	 * This routine or its caller may have given up
2737 	 * socket's protocol lock briefly.
2738 	 * During that time the socket may have been dropped.
2739 	 * Safe-guarding against that.
2740 	 */
2741 	if (inp->inp_state == INPCB_STATE_DEAD) {
2742 		if (!locked) {
2743 			lck_rw_done(&pcbinfo->ipi_lock);
2744 		}
2745 		return ECONNABORTED;
2746 	}
2747 
2748 
2749 	if (inp->inp_vflag & INP_IPV6) {
2750 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2751 	} else {
2752 		hashkey_faddr = inp->inp_faddr.s_addr;
2753 	}
2754 
2755 	inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2756 	    inp->inp_fport, pcbinfo->ipi_hashmask);
2757 
2758 	pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
2759 
2760 	pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2761 	    pcbinfo->ipi_porthashmask)];
2762 
2763 	/*
2764 	 * Go through port list and look for a head for this lport.
2765 	 */
2766 	LIST_FOREACH(phd, pcbporthash, phd_hash) {
2767 		if (phd->phd_port == inp->inp_lport) {
2768 			break;
2769 		}
2770 	}
2771 
2772 	/*
2773 	 * If none exists, malloc one and tack it on.
2774 	 */
2775 	if (phd == NULL) {
2776 		phd = kalloc_type(struct inpcbport, Z_WAITOK | Z_NOFAIL);
2777 		phd->phd_port = inp->inp_lport;
2778 		LIST_INIT(&phd->phd_pcblist);
2779 		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2780 	}
2781 
2782 	VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2783 
2784 #if SKYWALK
2785 	int err;
2786 	struct socket *so = inp->inp_socket;
2787 	if ((SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP) &&
2788 	    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
2789 		if (inp->inp_vflag & INP_IPV6) {
2790 			err = netns_reserve_in6(&inp->inp_netns_token,
2791 			    inp->in6p_laddr, (uint8_t)SOCK_PROTO(so), inp->inp_lport,
2792 			    NETNS_BSD | NETNS_PRERESERVED, NULL);
2793 		} else {
2794 			err = netns_reserve_in(&inp->inp_netns_token,
2795 			    inp->inp_laddr, (uint8_t)SOCK_PROTO(so), inp->inp_lport,
2796 			    NETNS_BSD | NETNS_PRERESERVED, NULL);
2797 		}
2798 		if (err) {
2799 			if (!locked) {
2800 				lck_rw_done(&pcbinfo->ipi_lock);
2801 			}
2802 			return err;
2803 		}
2804 		netns_set_ifnet(&inp->inp_netns_token, inp->inp_last_outifp);
2805 		inp_update_netns_flags(so);
2806 	}
2807 #endif /* SKYWALK */
2808 
2809 	inp->inp_phd = phd;
2810 	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2811 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
2812 	inp->inp_flags2 |= INP2_INHASHLIST;
2813 
2814 	if (!locked) {
2815 		lck_rw_done(&pcbinfo->ipi_lock);
2816 	}
2817 
2818 #if NECP
2819 	// This call catches the original setting of the local address
2820 	inp_update_necp_policy(inp, NULL, remote, 0);
2821 #endif /* NECP */
2822 
2823 	return 0;
2824 }
2825 
2826 /*
2827  * Move PCB to the proper hash bucket when { faddr, fport } have  been
2828  * changed. NOTE: This does not handle the case of the lport changing (the
2829  * hashed port list would have to be updated as well), so the lport must
2830  * not change after in_pcbinshash() has been called.
2831  */
2832 void
in_pcbrehash(struct inpcb * inp)2833 in_pcbrehash(struct inpcb *inp)
2834 {
2835 	struct inpcbhead *head;
2836 	u_int32_t hashkey_faddr;
2837 
2838 #if SKYWALK
2839 	struct socket *so = inp->inp_socket;
2840 	if ((SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP) &&
2841 	    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
2842 		int err;
2843 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2844 			if (inp->inp_vflag & INP_IPV6) {
2845 				err = netns_change_addr_in6(
2846 					&inp->inp_netns_token, inp->in6p_laddr);
2847 			} else {
2848 				err = netns_change_addr_in(
2849 					&inp->inp_netns_token, inp->inp_laddr);
2850 			}
2851 		} else {
2852 			if (inp->inp_vflag & INP_IPV6) {
2853 				err = netns_reserve_in6(&inp->inp_netns_token,
2854 				    inp->in6p_laddr, (uint8_t)SOCK_PROTO(so),
2855 				    inp->inp_lport, NETNS_BSD, NULL);
2856 			} else {
2857 				err = netns_reserve_in(&inp->inp_netns_token,
2858 				    inp->inp_laddr, (uint8_t)SOCK_PROTO(so),
2859 				    inp->inp_lport, NETNS_BSD, NULL);
2860 			}
2861 		}
2862 		/* We are assuming that whatever code paths result in a rehash
2863 		 * did their due diligence and ensured that the given
2864 		 * <proto, laddr, lport> tuple was free ahead of time. Just
2865 		 * reserving the lport on INADDR_ANY should be enough, since
2866 		 * that will block Skywalk from trying to reserve that same
2867 		 * port. Given this assumption, the above netns calls should
2868 		 * never fail*/
2869 		VERIFY(err == 0);
2870 
2871 		netns_set_ifnet(&inp->inp_netns_token, inp->inp_last_outifp);
2872 		inp_update_netns_flags(so);
2873 	}
2874 #endif /* SKYWALK */
2875 	if (inp->inp_vflag & INP_IPV6) {
2876 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2877 	} else {
2878 		hashkey_faddr = inp->inp_faddr.s_addr;
2879 	}
2880 
2881 	inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2882 	    inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2883 	head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
2884 
2885 	if (inp->inp_flags2 & INP2_INHASHLIST) {
2886 		LIST_REMOVE(inp, inp_hash);
2887 		inp->inp_flags2 &= ~INP2_INHASHLIST;
2888 	}
2889 
2890 	VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2891 	LIST_INSERT_HEAD(head, inp, inp_hash);
2892 	inp->inp_flags2 |= INP2_INHASHLIST;
2893 
2894 #if NECP
2895 	// This call catches updates to the remote addresses
2896 	inp_update_necp_policy(inp, NULL, NULL, 0);
2897 #endif /* NECP */
2898 }
2899 
2900 /*
2901  * Remove PCB from various lists.
2902  * Must be called pcbinfo lock is held in exclusive mode.
2903  */
2904 void
in_pcbremlists(struct inpcb * inp)2905 in_pcbremlists(struct inpcb *inp)
2906 {
2907 	inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
2908 
2909 	/*
2910 	 * Check if it's in hashlist -- an inp is placed in hashlist when
2911 	 * it's local port gets assigned. So it should also be present
2912 	 * in the port list.
2913 	 */
2914 	if (inp->inp_flags2 & INP2_INHASHLIST) {
2915 		struct inpcbport *phd = inp->inp_phd;
2916 
2917 		VERIFY(phd != NULL && inp->inp_lport > 0);
2918 
2919 		LIST_REMOVE(inp, inp_hash);
2920 		inp->inp_hash.le_next = NULL;
2921 		inp->inp_hash.le_prev = NULL;
2922 
2923 		LIST_REMOVE(inp, inp_portlist);
2924 		inp->inp_portlist.le_next = NULL;
2925 		inp->inp_portlist.le_prev = NULL;
2926 		if (LIST_EMPTY(&phd->phd_pcblist)) {
2927 			LIST_REMOVE(phd, phd_hash);
2928 			kfree_type(struct inpcbport, phd);
2929 		}
2930 		inp->inp_phd = NULL;
2931 		inp->inp_flags2 &= ~INP2_INHASHLIST;
2932 #if SKYWALK
2933 		/* Free up the port in the namespace registrar */
2934 		netns_release(&inp->inp_netns_token);
2935 		netns_release(&inp->inp_wildcard_netns_token);
2936 #endif /* SKYWALK */
2937 	}
2938 	VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2939 
2940 	if (inp->inp_flags2 & INP2_TIMEWAIT) {
2941 		/* Remove from time-wait queue */
2942 		tcp_remove_from_time_wait(inp);
2943 		inp->inp_flags2 &= ~INP2_TIMEWAIT;
2944 		VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2945 		inp->inp_pcbinfo->ipi_twcount--;
2946 	} else {
2947 		/* Remove from global inp list if it is not time-wait */
2948 		LIST_REMOVE(inp, inp_list);
2949 	}
2950 
2951 	if (inp->inp_flags2 & INP2_IN_FCTREE) {
2952 		inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED | INPFC_REMOVE));
2953 		VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2954 	}
2955 
2956 	inp->inp_pcbinfo->ipi_count--;
2957 }
2958 
2959 /*
2960  * Mechanism used to defer the memory release of PCBs
2961  * The pcb list will contain the pcb until the reaper can clean it up if
2962  * the following conditions are met:
2963  *	1) state "DEAD",
2964  *	2) wantcnt is STOPUSING
2965  *	3) usecount is 0
2966  * This function will be called to either mark the pcb as
2967  */
2968 int
in_pcb_checkstate(struct inpcb * pcb,int mode,int locked)2969 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
2970 {
2971 	volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2972 	UInt32 origwant;
2973 	UInt32 newwant;
2974 
2975 	switch (mode) {
2976 	case WNT_STOPUSING:
2977 		/*
2978 		 * Try to mark the pcb as ready for recycling.  CAS with
2979 		 * STOPUSING, if success we're good, if it's in use, will
2980 		 * be marked later
2981 		 */
2982 		if (locked == 0) {
2983 			socket_lock(pcb->inp_socket, 1);
2984 		}
2985 		pcb->inp_state = INPCB_STATE_DEAD;
2986 
2987 stopusing:
2988 		if (pcb->inp_socket->so_usecount < 0) {
2989 			panic("%s: pcb=%p so=%p usecount is negative",
2990 			    __func__, pcb, pcb->inp_socket);
2991 			/* NOTREACHED */
2992 		}
2993 		if (locked == 0) {
2994 			socket_unlock(pcb->inp_socket, 1);
2995 		}
2996 
2997 		inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
2998 
2999 		origwant = *wantcnt;
3000 		if ((UInt16) origwant == 0xffff) { /* should stop using */
3001 			return WNT_STOPUSING;
3002 		}
3003 		newwant = 0xffff;
3004 		if ((UInt16) origwant == 0) {
3005 			/* try to mark it as unsuable now */
3006 			OSCompareAndSwap(origwant, newwant, wantcnt);
3007 		}
3008 		return WNT_STOPUSING;
3009 
3010 	case WNT_ACQUIRE:
3011 		/*
3012 		 * Try to increase reference to pcb.  If WNT_STOPUSING
3013 		 * should bail out.  If socket state DEAD, try to set count
3014 		 * to STOPUSING, return failed otherwise increase cnt.
3015 		 */
3016 		do {
3017 			origwant = *wantcnt;
3018 			if ((UInt16) origwant == 0xffff) {
3019 				/* should stop using */
3020 				return WNT_STOPUSING;
3021 			}
3022 			newwant = origwant + 1;
3023 		} while (!OSCompareAndSwap(origwant, newwant, wantcnt));
3024 		return WNT_ACQUIRE;
3025 
3026 	case WNT_RELEASE:
3027 		/*
3028 		 * Release reference.  If result is null and pcb state
3029 		 * is DEAD, set wanted bit to STOPUSING
3030 		 */
3031 		if (locked == 0) {
3032 			socket_lock(pcb->inp_socket, 1);
3033 		}
3034 
3035 		do {
3036 			origwant = *wantcnt;
3037 			if ((UInt16) origwant == 0x0) {
3038 				panic("%s: pcb=%p release with zero count",
3039 				    __func__, pcb);
3040 				/* NOTREACHED */
3041 			}
3042 			if ((UInt16) origwant == 0xffff) {
3043 				/* should stop using */
3044 				if (locked == 0) {
3045 					socket_unlock(pcb->inp_socket, 1);
3046 				}
3047 				return WNT_STOPUSING;
3048 			}
3049 			newwant = origwant - 1;
3050 		} while (!OSCompareAndSwap(origwant, newwant, wantcnt));
3051 
3052 		if (pcb->inp_state == INPCB_STATE_DEAD) {
3053 			goto stopusing;
3054 		}
3055 		if (pcb->inp_socket->so_usecount < 0) {
3056 			panic("%s: RELEASE pcb=%p so=%p usecount is negative",
3057 			    __func__, pcb, pcb->inp_socket);
3058 			/* NOTREACHED */
3059 		}
3060 
3061 		if (locked == 0) {
3062 			socket_unlock(pcb->inp_socket, 1);
3063 		}
3064 		return WNT_RELEASE;
3065 
3066 	default:
3067 		panic("%s: so=%p not a valid state =%x", __func__,
3068 		    pcb->inp_socket, mode);
3069 		/* NOTREACHED */
3070 	}
3071 
3072 	/* NOTREACHED */
3073 	return mode;
3074 }
3075 
3076 /*
3077  * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
3078  * The inpcb_compat data structure is passed to user space and must
3079  * not change. We intentionally avoid copying pointers.
3080  */
3081 void
inpcb_to_compat(struct inpcb * inp,struct inpcb_compat * inp_compat)3082 inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
3083 {
3084 	bzero(inp_compat, sizeof(*inp_compat));
3085 	inp_compat->inp_fport = inp->inp_fport;
3086 	inp_compat->inp_lport = inp->inp_lport;
3087 	inp_compat->nat_owner = 0;
3088 	inp_compat->nat_cookie = 0;
3089 	inp_compat->inp_gencnt = inp->inp_gencnt;
3090 	inp_compat->inp_flags = inp->inp_flags;
3091 	inp_compat->inp_flow = inp->inp_flow;
3092 	inp_compat->inp_vflag = inp->inp_vflag;
3093 	inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
3094 	inp_compat->inp_ip_p = inp->inp_ip_p;
3095 	inp_compat->inp_dependfaddr.inp6_foreign =
3096 	    inp->inp_dependfaddr.inp6_foreign;
3097 	inp_compat->inp_dependladdr.inp6_local =
3098 	    inp->inp_dependladdr.inp6_local;
3099 	inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
3100 	inp_compat->inp_depend6.inp6_hlim = 0;
3101 	inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
3102 	inp_compat->inp_depend6.inp6_ifindex = 0;
3103 	inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
3104 }
3105 
3106 #if XNU_TARGET_OS_OSX
3107 void
inpcb_to_xinpcb64(struct inpcb * inp,struct xinpcb64 * xinp)3108 inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
3109 {
3110 	xinp->inp_fport = inp->inp_fport;
3111 	xinp->inp_lport = inp->inp_lport;
3112 	xinp->inp_gencnt = inp->inp_gencnt;
3113 	xinp->inp_flags = inp->inp_flags;
3114 	xinp->inp_flow = inp->inp_flow;
3115 	xinp->inp_vflag = inp->inp_vflag;
3116 	xinp->inp_ip_ttl = inp->inp_ip_ttl;
3117 	xinp->inp_ip_p = inp->inp_ip_p;
3118 	xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
3119 	xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
3120 	xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
3121 	xinp->inp_depend6.inp6_hlim = 0;
3122 	xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
3123 	xinp->inp_depend6.inp6_ifindex = 0;
3124 	xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
3125 }
3126 #endif /* XNU_TARGET_OS_OSX */
3127 
3128 /*
3129  * The following routines implement this scheme:
3130  *
3131  * Callers of ip_output() that intend to cache the route in the inpcb pass
3132  * a local copy of the struct route to ip_output().  Using a local copy of
3133  * the cached route significantly simplifies things as IP no longer has to
3134  * worry about having exclusive access to the passed in struct route, since
3135  * it's defined in the caller's stack; in essence, this allows for a lock-
3136  * less operation when updating the struct route at the IP level and below,
3137  * whenever necessary. The scheme works as follows:
3138  *
3139  * Prior to dropping the socket's lock and calling ip_output(), the caller
3140  * copies the struct route from the inpcb into its stack, and adds a reference
3141  * to the cached route entry, if there was any.  The socket's lock is then
3142  * dropped and ip_output() is called with a pointer to the copy of struct
3143  * route defined on the stack (not to the one in the inpcb.)
3144  *
3145  * Upon returning from ip_output(), the caller then acquires the socket's
3146  * lock and synchronizes the cache; if there is no route cached in the inpcb,
3147  * it copies the local copy of struct route (which may or may not contain any
3148  * route) back into the cache; otherwise, if the inpcb has a route cached in
3149  * it, the one in the local copy will be freed, if there's any.  Trashing the
3150  * cached route in the inpcb can be avoided because ip_output() is single-
3151  * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
3152  * by the socket/transport layer.)
3153  */
3154 void
inp_route_copyout(struct inpcb * inp,struct route * dst)3155 inp_route_copyout(struct inpcb *inp, struct route *dst)
3156 {
3157 	struct route *src = &inp->inp_route;
3158 
3159 	socket_lock_assert_owned(inp->inp_socket);
3160 
3161 	/*
3162 	 * If the route in the PCB is stale or not for IPv4, blow it away;
3163 	 * this is possible in the case of IPv4-mapped address case.
3164 	 */
3165 	if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) {
3166 		ROUTE_RELEASE(src);
3167 	}
3168 
3169 	route_copyout(dst, src, sizeof(*dst));
3170 }
3171 
3172 void
inp_route_copyin(struct inpcb * inp,struct route * src)3173 inp_route_copyin(struct inpcb *inp, struct route *src)
3174 {
3175 	struct route *dst = &inp->inp_route;
3176 
3177 	socket_lock_assert_owned(inp->inp_socket);
3178 
3179 	/* Minor sanity check */
3180 	if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) {
3181 		panic("%s: wrong or corrupted route: %p", __func__, src);
3182 	}
3183 
3184 	route_copyin(src, dst, sizeof(*src));
3185 }
3186 
3187 /*
3188  * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option.
3189  */
3190 static void
inp_bindif_common(struct inpcb * inp,struct ifnet * ifp)3191 inp_bindif_common(struct inpcb *inp, struct ifnet *ifp)
3192 {
3193 	/*
3194 	 * A zero interface scope value indicates an "unbind".
3195 	 * Otherwise, take in whatever value the app desires;
3196 	 * the app may already know the scope (or force itself
3197 	 * to such a scope) ahead of time before the interface
3198 	 * gets attached.  It doesn't matter either way; any
3199 	 * route lookup from this point on will require an
3200 	 * exact match for the embedded interface scope.
3201 	 */
3202 	inp->inp_boundifp = ifp;
3203 	if (inp->inp_boundifp == NULL) {
3204 		inp->inp_flags &= ~INP_BOUND_IF;
3205 	} else {
3206 		inp->inp_flags |= INP_BOUND_IF;
3207 	}
3208 
3209 	/* Blow away any cached route in the PCB */
3210 	ROUTE_RELEASE(&inp->inp_route);
3211 }
3212 
3213 
3214 int
inp_bindif(struct inpcb * inp,unsigned int ifscope,struct ifnet ** pifp)3215 inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
3216 {
3217 	struct ifnet *ifp = NULL;
3218 
3219 	ifnet_head_lock_shared();
3220 	if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
3221 	    (ifp = ifindex2ifnet[ifscope]) == NULL)) {
3222 		ifnet_head_done();
3223 		return ENXIO;
3224 	}
3225 	ifnet_head_done();
3226 
3227 	VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
3228 
3229 	inp_bindif_common(inp, ifp);
3230 
3231 	if (pifp != NULL) {
3232 		*pifp = ifp;
3233 	}
3234 
3235 	return 0;
3236 }
3237 
3238 int
inp_bindtodevice(struct inpcb * inp,const char * ifname)3239 inp_bindtodevice(struct inpcb *inp, const char *ifname)
3240 {
3241 	ifnet_ref_t ifp = NULL;
3242 
3243 	if (*ifname != 0) {
3244 		int error = ifnet_find_by_name(ifname, &ifp);
3245 		if (error != 0) {
3246 			return error;
3247 		}
3248 	}
3249 
3250 	inp_bindif_common(inp, ifp);
3251 
3252 	if (ifp != NULL) {
3253 		ifnet_release(ifp);
3254 	}
3255 	return 0;
3256 }
3257 
3258 /*
3259  * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
3260  * as well as for setting PROC_UUID_NO_CELLULAR policy.
3261  */
3262 void
inp_set_nocellular(struct inpcb * inp)3263 inp_set_nocellular(struct inpcb *inp)
3264 {
3265 	inp->inp_flags |= INP_NO_IFT_CELLULAR;
3266 
3267 	/* Blow away any cached route in the PCB */
3268 	ROUTE_RELEASE(&inp->inp_route);
3269 }
3270 
3271 /*
3272  * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
3273  * as well as for clearing PROC_UUID_NO_CELLULAR policy.
3274  */
3275 void
inp_clear_nocellular(struct inpcb * inp)3276 inp_clear_nocellular(struct inpcb *inp)
3277 {
3278 	struct socket *so = inp->inp_socket;
3279 
3280 	/*
3281 	 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
3282 	 * has a higher precendence than INP_NO_IFT_CELLULAR.  Clear the flag
3283 	 * if and only if the socket is unrestricted.
3284 	 */
3285 	if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
3286 		inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
3287 
3288 		/* Blow away any cached route in the PCB */
3289 		ROUTE_RELEASE(&inp->inp_route);
3290 	}
3291 }
3292 
3293 void
inp_set_noexpensive(struct inpcb * inp)3294 inp_set_noexpensive(struct inpcb *inp)
3295 {
3296 	inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
3297 
3298 	/* Blow away any cached route in the PCB */
3299 	ROUTE_RELEASE(&inp->inp_route);
3300 }
3301 
3302 void
inp_set_noconstrained(struct inpcb * inp)3303 inp_set_noconstrained(struct inpcb *inp)
3304 {
3305 	inp->inp_flags2 |= INP2_NO_IFF_CONSTRAINED;
3306 
3307 	/* Blow away any cached route in the PCB */
3308 	ROUTE_RELEASE(&inp->inp_route);
3309 }
3310 
3311 void
inp_set_awdl_unrestricted(struct inpcb * inp)3312 inp_set_awdl_unrestricted(struct inpcb *inp)
3313 {
3314 	inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
3315 
3316 	/* Blow away any cached route in the PCB */
3317 	ROUTE_RELEASE(&inp->inp_route);
3318 }
3319 
3320 boolean_t
inp_get_awdl_unrestricted(struct inpcb * inp)3321 inp_get_awdl_unrestricted(struct inpcb *inp)
3322 {
3323 	return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
3324 }
3325 
3326 void
inp_clear_awdl_unrestricted(struct inpcb * inp)3327 inp_clear_awdl_unrestricted(struct inpcb *inp)
3328 {
3329 	inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
3330 
3331 	/* Blow away any cached route in the PCB */
3332 	ROUTE_RELEASE(&inp->inp_route);
3333 }
3334 
3335 void
inp_set_intcoproc_allowed(struct inpcb * inp)3336 inp_set_intcoproc_allowed(struct inpcb *inp)
3337 {
3338 	inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
3339 
3340 	/* Blow away any cached route in the PCB */
3341 	ROUTE_RELEASE(&inp->inp_route);
3342 }
3343 
3344 boolean_t
inp_get_intcoproc_allowed(struct inpcb * inp)3345 inp_get_intcoproc_allowed(struct inpcb *inp)
3346 {
3347 	return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE;
3348 }
3349 
3350 void
inp_clear_intcoproc_allowed(struct inpcb * inp)3351 inp_clear_intcoproc_allowed(struct inpcb *inp)
3352 {
3353 	inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED;
3354 
3355 	/* Blow away any cached route in the PCB */
3356 	ROUTE_RELEASE(&inp->inp_route);
3357 }
3358 
3359 void
inp_set_management_allowed(struct inpcb * inp)3360 inp_set_management_allowed(struct inpcb *inp)
3361 {
3362 	inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
3363 	inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
3364 
3365 	/* Blow away any cached route in the PCB */
3366 	ROUTE_RELEASE(&inp->inp_route);
3367 }
3368 
3369 boolean_t
inp_get_management_allowed(struct inpcb * inp)3370 inp_get_management_allowed(struct inpcb *inp)
3371 {
3372 	return (inp->inp_flags2 & INP2_MANAGEMENT_ALLOWED) ? TRUE : FALSE;
3373 }
3374 
3375 void
inp_clear_management_allowed(struct inpcb * inp)3376 inp_clear_management_allowed(struct inpcb *inp)
3377 {
3378 	inp->inp_flags2 &= ~INP2_MANAGEMENT_ALLOWED;
3379 
3380 	/* Blow away any cached route in the PCB */
3381 	ROUTE_RELEASE(&inp->inp_route);
3382 }
3383 
3384 void
inp_set_ultra_constrained_allowed(struct inpcb * inp)3385 inp_set_ultra_constrained_allowed(struct inpcb *inp)
3386 {
3387 	inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_ALLOWED;
3388 	inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_CHECKED;
3389 
3390 	/* Blow away any cached route in the PCB */
3391 	ROUTE_RELEASE(&inp->inp_route);
3392 }
3393 
3394 #if NECP
3395 /*
3396  * Called when PROC_UUID_NECP_APP_POLICY is set.
3397  */
3398 void
inp_set_want_app_policy(struct inpcb * inp)3399 inp_set_want_app_policy(struct inpcb *inp)
3400 {
3401 	inp->inp_flags2 |= INP2_WANT_APP_POLICY;
3402 }
3403 
3404 /*
3405  * Called when PROC_UUID_NECP_APP_POLICY is cleared.
3406  */
3407 void
inp_clear_want_app_policy(struct inpcb * inp)3408 inp_clear_want_app_policy(struct inpcb *inp)
3409 {
3410 	inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
3411 }
3412 #endif /* NECP */
3413 
3414 /*
3415  * Calculate flow hash for an inp, used by an interface to identify a
3416  * flow. When an interface provides flow control advisory, this flow
3417  * hash is used as an identifier.
3418  */
3419 u_int32_t
inp_calc_flowhash(struct inpcb * inp)3420 inp_calc_flowhash(struct inpcb *inp)
3421 {
3422 #if SKYWALK
3423 
3424 	uint32_t flowid;
3425 	struct flowidns_flow_key fk;
3426 
3427 	bzero(&fk, sizeof(fk));
3428 
3429 	if (inp->inp_vflag & INP_IPV4) {
3430 		fk.ffk_af = AF_INET;
3431 		fk.ffk_laddr_v4 = inp->inp_laddr;
3432 		fk.ffk_raddr_v4 = inp->inp_faddr;
3433 	} else {
3434 		fk.ffk_af = AF_INET6;
3435 		fk.ffk_laddr_v6 = inp->in6p_laddr;
3436 		fk.ffk_raddr_v6 = inp->in6p_faddr;
3437 		/* clear embedded scope ID */
3438 		if (IN6_IS_SCOPE_EMBED(&fk.ffk_laddr_v6)) {
3439 			fk.ffk_laddr_v6.s6_addr16[1] = 0;
3440 		}
3441 		if (IN6_IS_SCOPE_EMBED(&fk.ffk_raddr_v6)) {
3442 			fk.ffk_raddr_v6.s6_addr16[1] = 0;
3443 		}
3444 	}
3445 
3446 	fk.ffk_lport = inp->inp_lport;
3447 	fk.ffk_rport = inp->inp_fport;
3448 	fk.ffk_proto = (inp->inp_ip_p != 0) ? inp->inp_ip_p :
3449 	    (uint8_t)SOCK_PROTO(inp->inp_socket);
3450 	flowidns_allocate_flowid(FLOWIDNS_DOMAIN_INPCB, &fk, &flowid);
3451 	/* Insert the inp into inp_fc_tree */
3452 	lck_mtx_lock_spin(&inp_fc_lck);
3453 	ASSERT(inp->inp_flowhash == 0);
3454 	ASSERT((inp->inp_flags2 & INP2_IN_FCTREE) == 0);
3455 	inp->inp_flowhash = flowid;
3456 	VERIFY(RB_INSERT(inp_fc_tree, &inp_fc_tree, inp) == NULL);
3457 	inp->inp_flags2 |= INP2_IN_FCTREE;
3458 	lck_mtx_unlock(&inp_fc_lck);
3459 
3460 	return flowid;
3461 
3462 #else /* !SKYWALK */
3463 
3464 	struct inp_flowhash_key fh __attribute__((aligned(8)));
3465 	u_int32_t flowhash = 0;
3466 	struct inpcb *tmp_inp = NULL;
3467 
3468 	if (inp_hash_seed == 0) {
3469 		inp_hash_seed = RandomULong();
3470 	}
3471 
3472 	bzero(&fh, sizeof(fh));
3473 
3474 	bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof(fh.infh_laddr));
3475 	bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof(fh.infh_faddr));
3476 
3477 	fh.infh_lport = inp->inp_lport;
3478 	fh.infh_fport = inp->inp_fport;
3479 	fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
3480 	fh.infh_proto = inp->inp_ip_p;
3481 	fh.infh_rand1 = RandomULong();
3482 	fh.infh_rand2 = RandomULong();
3483 
3484 try_again:
3485 	flowhash = net_flowhash(&fh, sizeof(fh), inp_hash_seed);
3486 	if (flowhash == 0) {
3487 		/* try to get a non-zero flowhash */
3488 		inp_hash_seed = RandomULong();
3489 		goto try_again;
3490 	}
3491 
3492 	inp->inp_flowhash = flowhash;
3493 
3494 	/* Insert the inp into inp_fc_tree */
3495 	lck_mtx_lock_spin(&inp_fc_lck);
3496 	tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
3497 	if (tmp_inp != NULL) {
3498 		/*
3499 		 * There is a different inp with the same flowhash.
3500 		 * There can be a collision on flow hash but the
3501 		 * probability is low.  Let's recompute the
3502 		 * flowhash.
3503 		 */
3504 		lck_mtx_unlock(&inp_fc_lck);
3505 		/* recompute hash seed */
3506 		inp_hash_seed = RandomULong();
3507 		goto try_again;
3508 	}
3509 
3510 	RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
3511 	inp->inp_flags2 |= INP2_IN_FCTREE;
3512 	lck_mtx_unlock(&inp_fc_lck);
3513 
3514 	return flowhash;
3515 
3516 #endif /* !SKYWALK */
3517 }
3518 
3519 void
inp_flowadv(uint32_t flowhash)3520 inp_flowadv(uint32_t flowhash)
3521 {
3522 	struct inpcb *inp;
3523 
3524 	inp = inp_fc_getinp(flowhash, 0);
3525 
3526 	if (inp == NULL) {
3527 		return;
3528 	}
3529 	inp_fc_feedback(inp);
3530 }
3531 
3532 /*
3533  * Function to compare inp_fc_entries in inp flow control tree
3534  */
3535 static inline int
infc_cmp(const struct inpcb * inp1,const struct inpcb * inp2)3536 infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
3537 {
3538 	return memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
3539 	           sizeof(inp1->inp_flowhash));
3540 }
3541 
3542 static struct inpcb *
inp_fc_getinp(u_int32_t flowhash,u_int32_t flags)3543 inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
3544 {
3545 	struct inpcb *inp = NULL;
3546 	int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
3547 
3548 	lck_mtx_lock_spin(&inp_fc_lck);
3549 	key_inp.inp_flowhash = flowhash;
3550 	inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
3551 	if (inp == NULL) {
3552 		/* inp is not present, return */
3553 		lck_mtx_unlock(&inp_fc_lck);
3554 		return NULL;
3555 	}
3556 
3557 	if (flags & INPFC_REMOVE) {
3558 		ASSERT((inp->inp_flags2 & INP2_IN_FCTREE) != 0);
3559 		lck_mtx_convert_spin(&inp_fc_lck);
3560 		RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
3561 		bzero(&(inp->infc_link), sizeof(inp->infc_link));
3562 #if SKYWALK
3563 		VERIFY(inp->inp_flowhash != 0);
3564 		flowidns_release_flowid(inp->inp_flowhash);
3565 		inp->inp_flowhash = 0;
3566 #endif /* !SKYWALK */
3567 		inp->inp_flags2 &= ~INP2_IN_FCTREE;
3568 		lck_mtx_unlock(&inp_fc_lck);
3569 		return NULL;
3570 	}
3571 
3572 	if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) {
3573 		inp = NULL;
3574 	}
3575 	lck_mtx_unlock(&inp_fc_lck);
3576 
3577 	return inp;
3578 }
3579 
3580 static void
inp_fc_feedback(struct inpcb * inp)3581 inp_fc_feedback(struct inpcb *inp)
3582 {
3583 	struct socket *so = inp->inp_socket;
3584 
3585 	/* we already hold a want_cnt on this inp, socket can't be null */
3586 	VERIFY(so != NULL);
3587 	socket_lock(so, 1);
3588 
3589 	if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3590 		socket_unlock(so, 1);
3591 		return;
3592 	}
3593 
3594 	if (inp->inp_sndinprog_cnt > 0) {
3595 		inp->inp_flags |= INP_FC_FEEDBACK;
3596 	}
3597 
3598 	/*
3599 	 * Return if the connection is not in flow-controlled state.
3600 	 * This can happen if the connection experienced
3601 	 * loss while it was in flow controlled state
3602 	 */
3603 	if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
3604 		socket_unlock(so, 1);
3605 		return;
3606 	}
3607 	inp_reset_fc_state(inp);
3608 
3609 	if (SOCK_TYPE(so) == SOCK_STREAM) {
3610 		inp_fc_unthrottle_tcp(inp);
3611 	}
3612 
3613 	socket_unlock(so, 1);
3614 }
3615 
3616 static void
inp_reset_fc_timerstat(struct inpcb * inp)3617 inp_reset_fc_timerstat(struct inpcb *inp)
3618 {
3619 	uint64_t now;
3620 
3621 	if (inp->inp_fadv_start_time == 0) {
3622 		return;
3623 	}
3624 
3625 	now = net_uptime_us();
3626 	ASSERT(now >= inp->inp_fadv_start_time);
3627 
3628 	inp->inp_fadv_total_time += (now - inp->inp_fadv_start_time);
3629 	inp->inp_fadv_cnt++;
3630 
3631 	inp->inp_fadv_start_time = 0;
3632 }
3633 
3634 static void
inp_set_fc_timerstat(struct inpcb * inp)3635 inp_set_fc_timerstat(struct inpcb *inp)
3636 {
3637 	if (inp->inp_fadv_start_time != 0) {
3638 		return;
3639 	}
3640 
3641 	inp->inp_fadv_start_time = net_uptime_us();
3642 }
3643 
3644 void
inp_reset_fc_state(struct inpcb * inp)3645 inp_reset_fc_state(struct inpcb *inp)
3646 {
3647 	struct socket *so = inp->inp_socket;
3648 	int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
3649 	int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
3650 
3651 	inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3652 
3653 	inp_reset_fc_timerstat(inp);
3654 
3655 	if (suspended) {
3656 		so->so_flags &= ~(SOF_SUSPENDED);
3657 		soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
3658 	}
3659 
3660 	/* Give a write wakeup to unblock the socket */
3661 	if (needwakeup) {
3662 		sowwakeup(so);
3663 	}
3664 }
3665 
3666 int
inp_set_fc_state(struct inpcb * inp,int advcode)3667 inp_set_fc_state(struct inpcb *inp, int advcode)
3668 {
3669 	boolean_t is_flow_controlled = INP_WAIT_FOR_IF_FEEDBACK(inp);
3670 	struct inpcb *tmp_inp = NULL;
3671 	/*
3672 	 * If there was a feedback from the interface when
3673 	 * send operation was in progress, we should ignore
3674 	 * this flow advisory to avoid a race between setting
3675 	 * flow controlled state and receiving feedback from
3676 	 * the interface
3677 	 */
3678 	if (inp->inp_flags & INP_FC_FEEDBACK) {
3679 		return 0;
3680 	}
3681 
3682 	inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3683 	if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
3684 	    INPFC_SOLOCKED)) != NULL) {
3685 		if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3686 			goto exit_reset;
3687 		}
3688 		VERIFY(tmp_inp == inp);
3689 		switch (advcode) {
3690 		case FADV_FLOW_CONTROLLED:
3691 			inp->inp_flags |= INP_FLOW_CONTROLLED;
3692 			inp_set_fc_timerstat(inp);
3693 			break;
3694 		case FADV_SUSPENDED:
3695 			inp->inp_flags |= INP_FLOW_SUSPENDED;
3696 			inp_set_fc_timerstat(inp);
3697 
3698 			soevent(inp->inp_socket,
3699 			    (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
3700 
3701 			/* Record the fact that suspend event was sent */
3702 			inp->inp_socket->so_flags |= SOF_SUSPENDED;
3703 			break;
3704 		}
3705 
3706 		if (!is_flow_controlled && SOCK_TYPE(inp->inp_socket) == SOCK_STREAM) {
3707 			inp_fc_throttle_tcp(inp);
3708 		}
3709 		return 1;
3710 	}
3711 
3712 exit_reset:
3713 	inp_reset_fc_timerstat(inp);
3714 
3715 	return 0;
3716 }
3717 
3718 /*
3719  * Handler for SO_FLUSH socket option.
3720  */
3721 int
inp_flush(struct inpcb * inp,int optval)3722 inp_flush(struct inpcb *inp, int optval)
3723 {
3724 	u_int32_t flowhash = inp->inp_flowhash;
3725 	struct ifnet *rtifp, *oifp;
3726 
3727 	/* Either all classes or one of the valid ones */
3728 	if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) {
3729 		return EINVAL;
3730 	}
3731 
3732 	/* We need a flow hash for identification */
3733 	if (flowhash == 0) {
3734 		return 0;
3735 	}
3736 
3737 	/* Grab the interfaces from the route and pcb */
3738 	rtifp = ((inp->inp_route.ro_rt != NULL) ?
3739 	    inp->inp_route.ro_rt->rt_ifp : NULL);
3740 	oifp = inp->inp_last_outifp;
3741 
3742 	if (rtifp != NULL) {
3743 		if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL);
3744 	}
3745 	if (oifp != NULL && oifp != rtifp) {
3746 		if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL);
3747 	}
3748 
3749 	return 0;
3750 }
3751 
3752 /*
3753  * Clear the INP_INADDR_ANY flag (special case for PPP only)
3754  */
3755 void
inp_clear_INP_INADDR_ANY(struct socket * so)3756 inp_clear_INP_INADDR_ANY(struct socket *so)
3757 {
3758 	struct inpcb *inp = NULL;
3759 
3760 	socket_lock(so, 1);
3761 	inp = sotoinpcb(so);
3762 	if (inp) {
3763 		inp->inp_flags &= ~INP_INADDR_ANY;
3764 	}
3765 	socket_unlock(so, 1);
3766 }
3767 
3768 void
inp_get_soprocinfo(struct inpcb * inp,struct so_procinfo * soprocinfo)3769 inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
3770 {
3771 	struct socket *so = inp->inp_socket;
3772 
3773 	soprocinfo->spi_pid = so->last_pid;
3774 	strbufcpy(soprocinfo->spi_proc_name, inp->inp_last_proc_name);
3775 	if (so->last_pid != 0) {
3776 		uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
3777 	}
3778 	/*
3779 	 * When not delegated, the effective pid is the same as the real pid
3780 	 */
3781 	if (so->so_flags & SOF_DELEGATED) {
3782 		soprocinfo->spi_delegated = 1;
3783 		soprocinfo->spi_epid = so->e_pid;
3784 		uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
3785 	} else {
3786 		soprocinfo->spi_delegated = 0;
3787 		soprocinfo->spi_epid = so->last_pid;
3788 	}
3789 	strbufcpy(soprocinfo->spi_e_proc_name, inp->inp_e_proc_name);
3790 }
3791 
3792 int
inp_findinpcb_procinfo(struct inpcbinfo * pcbinfo,uint32_t flowhash,struct so_procinfo * soprocinfo)3793 inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
3794     struct so_procinfo *soprocinfo)
3795 {
3796 	struct inpcb *inp = NULL;
3797 	int found = 0;
3798 
3799 	bzero(soprocinfo, sizeof(struct so_procinfo));
3800 
3801 	if (!flowhash) {
3802 		return -1;
3803 	}
3804 
3805 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
3806 	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
3807 		if (inp->inp_state != INPCB_STATE_DEAD &&
3808 		    inp->inp_socket != NULL &&
3809 		    inp->inp_flowhash == flowhash) {
3810 			found = 1;
3811 			inp_get_soprocinfo(inp, soprocinfo);
3812 			break;
3813 		}
3814 	}
3815 	lck_rw_done(&pcbinfo->ipi_lock);
3816 
3817 	return found;
3818 }
3819 
3820 #if CONFIG_PROC_UUID_POLICY
3821 static void
inp_update_cellular_policy(struct inpcb * inp,boolean_t set)3822 inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
3823 {
3824 	struct socket *so = inp->inp_socket;
3825 	int before, after;
3826 
3827 	VERIFY(so != NULL);
3828 	VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3829 
3830 	before = INP_NO_CELLULAR(inp);
3831 	if (set) {
3832 		inp_set_nocellular(inp);
3833 	} else {
3834 		inp_clear_nocellular(inp);
3835 	}
3836 	after = INP_NO_CELLULAR(inp);
3837 	if (net_io_policy_log && (before != after)) {
3838 		static const char *ok = "OK";
3839 		static const char *nok = "NOACCESS";
3840 		uuid_string_t euuid_buf;
3841 		pid_t epid;
3842 
3843 		if (so->so_flags & SOF_DELEGATED) {
3844 			uuid_unparse(so->e_uuid, euuid_buf);
3845 			epid = so->e_pid;
3846 		} else {
3847 			uuid_unparse(so->last_uuid, euuid_buf);
3848 			epid = so->last_pid;
3849 		}
3850 
3851 		/* allow this socket to generate another notification event */
3852 		so->so_ifdenied_notifies = 0;
3853 
3854 		log(LOG_DEBUG, "%s: so %llu [%d,%d] epid %d "
3855 		    "euuid %s%s %s->%s\n", __func__,
3856 		    so->so_gencnt, SOCK_DOM(so),
3857 		    SOCK_TYPE(so), epid, euuid_buf,
3858 		    (so->so_flags & SOF_DELEGATED) ?
3859 		    " [delegated]" : "",
3860 		    ((before < after) ? ok : nok),
3861 		    ((before < after) ? nok : ok));
3862 	}
3863 }
3864 
3865 #if NECP
3866 static void
inp_update_necp_want_app_policy(struct inpcb * inp,boolean_t set)3867 inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
3868 {
3869 	struct socket *so = inp->inp_socket;
3870 	int before, after;
3871 
3872 	VERIFY(so != NULL);
3873 	VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3874 
3875 	before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3876 	if (set) {
3877 		inp_set_want_app_policy(inp);
3878 	} else {
3879 		inp_clear_want_app_policy(inp);
3880 	}
3881 	after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3882 	if (net_io_policy_log && (before != after)) {
3883 		static const char *wanted = "WANTED";
3884 		static const char *unwanted = "UNWANTED";
3885 		uuid_string_t euuid_buf;
3886 		pid_t epid;
3887 
3888 		if (so->so_flags & SOF_DELEGATED) {
3889 			uuid_unparse(so->e_uuid, euuid_buf);
3890 			epid = so->e_pid;
3891 		} else {
3892 			uuid_unparse(so->last_uuid, euuid_buf);
3893 			epid = so->last_pid;
3894 		}
3895 
3896 		log(LOG_DEBUG, "%s: so %llu [%d,%d] epid %d "
3897 		    "euuid %s%s %s->%s\n", __func__,
3898 		    so->so_gencnt, SOCK_DOM(so),
3899 		    SOCK_TYPE(so), epid, euuid_buf,
3900 		    (so->so_flags & SOF_DELEGATED) ?
3901 		    " [delegated]" : "",
3902 		    ((before < after) ? unwanted : wanted),
3903 		    ((before < after) ? wanted : unwanted));
3904 	}
3905 }
3906 #endif /* NECP */
3907 #endif /* !CONFIG_PROC_UUID_POLICY */
3908 
3909 #if NECP
3910 void
inp_update_necp_policy(struct inpcb * inp,struct sockaddr * override_local_addr,struct sockaddr * override_remote_addr,u_int override_bound_interface)3911 inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
3912 {
3913 	necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
3914 	if (necp_socket_should_rescope(inp) &&
3915 	    inp->inp_lport == 0 &&
3916 	    inp->inp_laddr.s_addr == INADDR_ANY &&
3917 	    IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
3918 		// If we should rescope, and the socket is not yet bound
3919 		inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
3920 		inp->inp_flags2 |= INP2_SCOPED_BY_NECP;
3921 	}
3922 }
3923 #endif /* NECP */
3924 
3925 int
inp_update_policy(struct inpcb * inp)3926 inp_update_policy(struct inpcb *inp)
3927 {
3928 #if CONFIG_PROC_UUID_POLICY
3929 	struct socket *so = inp->inp_socket;
3930 	uint32_t pflags = 0;
3931 	int32_t ogencnt;
3932 	int err = 0;
3933 	uint8_t *lookup_uuid = NULL;
3934 
3935 	if (!net_io_policy_uuid ||
3936 	    so == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3937 		return 0;
3938 	}
3939 
3940 	/*
3941 	 * Kernel-created sockets that aren't delegating other sockets
3942 	 * are currently exempted from UUID policy checks.
3943 	 */
3944 	if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) {
3945 		return 0;
3946 	}
3947 
3948 #if defined(XNU_TARGET_OS_OSX)
3949 	if (so->so_rpid > 0) {
3950 		lookup_uuid = so->so_ruuid;
3951 		ogencnt = so->so_policy_gencnt;
3952 		err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3953 	}
3954 #endif
3955 	if (lookup_uuid == NULL || err == ENOENT) {
3956 		lookup_uuid = ((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid);
3957 		ogencnt = so->so_policy_gencnt;
3958 		err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3959 	}
3960 
3961 	/*
3962 	 * Discard cached generation count if the entry is gone (ENOENT),
3963 	 * so that we go thru the checks below.
3964 	 */
3965 	if (err == ENOENT && ogencnt != 0) {
3966 		so->so_policy_gencnt = 0;
3967 	}
3968 
3969 	/*
3970 	 * If the generation count has changed, inspect the policy flags
3971 	 * and act accordingly.  If a policy flag was previously set and
3972 	 * the UUID is no longer present in the table (ENOENT), treat it
3973 	 * as if the flag has been cleared.
3974 	 */
3975 	if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
3976 		/* update cellular policy for this socket */
3977 		if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
3978 			inp_update_cellular_policy(inp, TRUE);
3979 		} else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
3980 			inp_update_cellular_policy(inp, FALSE);
3981 		}
3982 #if NECP
3983 		/* update necp want app policy for this socket */
3984 		if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
3985 			inp_update_necp_want_app_policy(inp, TRUE);
3986 		} else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
3987 			inp_update_necp_want_app_policy(inp, FALSE);
3988 		}
3989 #endif /* NECP */
3990 	}
3991 
3992 	return (err == ENOENT) ? 0 : err;
3993 #else /* !CONFIG_PROC_UUID_POLICY */
3994 #pragma unused(inp)
3995 	return 0;
3996 #endif /* !CONFIG_PROC_UUID_POLICY */
3997 }
3998 
3999 unsigned int log_restricted;
4000 SYSCTL_DECL(_net_inet);
4001 SYSCTL_INT(_net_inet, OID_AUTO, log_restricted,
4002     CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0,
4003     "Log network restrictions");
4004 
4005 
4006 /*
4007  * Called when we need to enforce policy restrictions in the input path.
4008  *
4009  * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
4010  */
4011 static boolean_t
_inp_restricted_recv(struct inpcb * inp,struct ifnet * ifp)4012 _inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
4013 {
4014 	VERIFY(inp != NULL);
4015 
4016 	/*
4017 	 * Inbound restrictions.
4018 	 */
4019 	if (!sorestrictrecv) {
4020 		return FALSE;
4021 	}
4022 
4023 	if (ifp == NULL) {
4024 		return FALSE;
4025 	}
4026 
4027 	if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
4028 		return TRUE;
4029 	}
4030 
4031 	if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
4032 		return TRUE;
4033 	}
4034 
4035 	if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
4036 		return TRUE;
4037 	}
4038 
4039 	if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
4040 		return TRUE;
4041 	}
4042 
4043 	if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) {
4044 		return FALSE;
4045 	}
4046 
4047 	if (inp->inp_flags & INP_RECV_ANYIF) {
4048 		return FALSE;
4049 	}
4050 
4051 	/*
4052 	 * An entitled process can use the management interface without being bound
4053 	 * to the interface
4054 	 */
4055 	if (IFNET_IS_MANAGEMENT(ifp)) {
4056 		if (INP_MANAGEMENT_ALLOWED(inp)) {
4057 			return FALSE;
4058 		}
4059 		if (if_management_verbose > 1) {
4060 			os_log(OS_LOG_DEFAULT, "_inp_restricted_recv %s:%d not allowed on management interface %s",
4061 			    proc_best_name(current_proc()), proc_getpid(current_proc()),
4062 			    ifp->if_xname);
4063 		}
4064 		return TRUE;
4065 	}
4066 
4067 	if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) {
4068 		return FALSE;
4069 	}
4070 
4071 	if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
4072 		return TRUE;
4073 	}
4074 
4075 
4076 	return TRUE;
4077 }
4078 
4079 boolean_t
inp_restricted_recv(struct inpcb * inp,struct ifnet * ifp)4080 inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
4081 {
4082 	boolean_t ret;
4083 
4084 	ret = _inp_restricted_recv(inp, ifp);
4085 	if (ret == TRUE && log_restricted) {
4086 		printf("pid %d (%s) is unable to receive packets on %s\n",
4087 		    proc_getpid(current_proc()), proc_best_name(current_proc()),
4088 		    ifp->if_xname);
4089 	}
4090 	return ret;
4091 }
4092 
4093 /*
4094  * Called when we need to enforce policy restrictions in the output path.
4095  *
4096  * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
4097  */
4098 static boolean_t
_inp_restricted_send(struct inpcb * inp,struct ifnet * ifp)4099 _inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
4100 {
4101 	VERIFY(inp != NULL);
4102 
4103 	/*
4104 	 * Outbound restrictions.
4105 	 */
4106 	if (!sorestrictsend) {
4107 		return FALSE;
4108 	}
4109 
4110 	if (ifp == NULL) {
4111 		return FALSE;
4112 	}
4113 
4114 	if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
4115 		return TRUE;
4116 	}
4117 
4118 	if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
4119 		return TRUE;
4120 	}
4121 
4122 	if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
4123 		return TRUE;
4124 	}
4125 
4126 	if (IFNET_IS_ULTRA_CONSTRAINED(ifp) && uuid_is_null(inp->necp_client_uuid) &&
4127 	    !INP_ULTRA_CONSTRAINED_ALLOWED(inp)) {
4128 		// Non-NECP-aware sockets are not allowed to use ultra constrained interfaces
4129 		// without an entitlement
4130 		return TRUE;
4131 	}
4132 
4133 	if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
4134 		return TRUE;
4135 	}
4136 
4137 	if (IFNET_IS_MANAGEMENT(ifp)) {
4138 		if (!INP_MANAGEMENT_ALLOWED(inp)) {
4139 			if (if_management_verbose > 1) {
4140 				os_log(OS_LOG_DEFAULT, "_inp_restricted_send %s:%d not allowed on management interface %s",
4141 				    proc_best_name(current_proc()), proc_getpid(current_proc()),
4142 				    ifp->if_xname);
4143 			}
4144 			return TRUE;
4145 		}
4146 	}
4147 
4148 	if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
4149 		return TRUE;
4150 	}
4151 
4152 	return FALSE;
4153 }
4154 
4155 boolean_t
inp_restricted_send(struct inpcb * inp,struct ifnet * ifp)4156 inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
4157 {
4158 	boolean_t ret;
4159 
4160 	ret = _inp_restricted_send(inp, ifp);
4161 	if (ret == TRUE && log_restricted) {
4162 		printf("%s:%d pid %d (%s) is unable to transmit packets on %s\n",
4163 		    __func__, __LINE__,
4164 		    proc_getpid(current_proc()), proc_best_name(current_proc()),
4165 		    ifp->if_xname);
4166 	}
4167 	return ret;
4168 }
4169 
4170 inline void
inp_count_sndbytes(struct inpcb * inp,u_int32_t th_ack)4171 inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack)
4172 {
4173 	struct ifnet *ifp = inp->inp_last_outifp;
4174 	struct socket *so = inp->inp_socket;
4175 	if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
4176 	    (ifp->if_type == IFT_CELLULAR || IFNET_IS_WIFI(ifp))) {
4177 		int32_t unsent;
4178 
4179 		so->so_snd.sb_flags |= SB_SNDBYTE_CNT;
4180 
4181 		/*
4182 		 * There can be data outstanding before the connection
4183 		 * becomes established -- TFO case
4184 		 */
4185 		if (so->so_snd.sb_cc > 0) {
4186 			inp_incr_sndbytes_total(so, so->so_snd.sb_cc);
4187 		}
4188 
4189 		unsent = inp_get_sndbytes_allunsent(so, th_ack);
4190 		if (unsent > 0) {
4191 			inp_incr_sndbytes_unsent(so, unsent);
4192 		}
4193 	}
4194 }
4195 
4196 inline void
inp_incr_sndbytes_total(struct socket * so,int32_t len)4197 inp_incr_sndbytes_total(struct socket *so, int32_t len)
4198 {
4199 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
4200 	struct ifnet *ifp = inp->inp_last_outifp;
4201 
4202 	if (ifp != NULL) {
4203 		VERIFY(ifp->if_sndbyte_total >= 0);
4204 		OSAddAtomic64(len, &ifp->if_sndbyte_total);
4205 	}
4206 }
4207 
4208 inline void
inp_decr_sndbytes_total(struct socket * so,int32_t len)4209 inp_decr_sndbytes_total(struct socket *so, int32_t len)
4210 {
4211 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
4212 	struct ifnet *ifp = inp->inp_last_outifp;
4213 
4214 	if (ifp != NULL) {
4215 		if (ifp->if_sndbyte_total >= len) {
4216 			OSAddAtomic64(-len, &ifp->if_sndbyte_total);
4217 		} else {
4218 			ifp->if_sndbyte_total = 0;
4219 		}
4220 	}
4221 }
4222 
4223 inline void
inp_incr_sndbytes_unsent(struct socket * so,int32_t len)4224 inp_incr_sndbytes_unsent(struct socket *so, int32_t len)
4225 {
4226 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
4227 	struct ifnet *ifp = inp->inp_last_outifp;
4228 
4229 	if (ifp != NULL) {
4230 		VERIFY(ifp->if_sndbyte_unsent >= 0);
4231 		OSAddAtomic64(len, &ifp->if_sndbyte_unsent);
4232 	}
4233 }
4234 
4235 inline void
inp_decr_sndbytes_unsent(struct socket * so,int32_t len)4236 inp_decr_sndbytes_unsent(struct socket *so, int32_t len)
4237 {
4238 	if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
4239 		return;
4240 	}
4241 
4242 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
4243 	struct ifnet *ifp = inp->inp_last_outifp;
4244 
4245 	if (ifp != NULL) {
4246 		if (ifp->if_sndbyte_unsent >= len) {
4247 			OSAddAtomic64(-len, &ifp->if_sndbyte_unsent);
4248 		} else {
4249 			ifp->if_sndbyte_unsent = 0;
4250 		}
4251 	}
4252 }
4253 
4254 inline void
inp_decr_sndbytes_allunsent(struct socket * so,u_int32_t th_ack)4255 inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
4256 {
4257 	int32_t len;
4258 
4259 	if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
4260 		return;
4261 	}
4262 
4263 	len = inp_get_sndbytes_allunsent(so, th_ack);
4264 	inp_decr_sndbytes_unsent(so, len);
4265 }
4266 
4267 #if SKYWALK
4268 inline void
inp_update_netns_flags(struct socket * so)4269 inp_update_netns_flags(struct socket *so)
4270 {
4271 	struct inpcb *inp;
4272 	uint32_t set_flags = 0;
4273 	uint32_t clear_flags = 0;
4274 
4275 	if (!(SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
4276 		return;
4277 	}
4278 
4279 	inp = sotoinpcb(so);
4280 
4281 	if (inp == NULL) {
4282 		return;
4283 	}
4284 
4285 	if (!NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
4286 		return;
4287 	}
4288 
4289 	if (so->so_options & SO_NOWAKEFROMSLEEP) {
4290 		set_flags |= NETNS_NOWAKEFROMSLEEP;
4291 	} else {
4292 		clear_flags |= NETNS_NOWAKEFROMSLEEP;
4293 	}
4294 
4295 	if (inp->inp_flags & INP_RECV_ANYIF) {
4296 		set_flags |= NETNS_RECVANYIF;
4297 	} else {
4298 		clear_flags |= NETNS_RECVANYIF;
4299 	}
4300 
4301 	if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
4302 		set_flags |= NETNS_EXTBGIDLE;
4303 	} else {
4304 		clear_flags |= NETNS_EXTBGIDLE;
4305 	}
4306 
4307 	netns_change_flags(&inp->inp_netns_token, set_flags, clear_flags);
4308 }
4309 #endif /* SKYWALK */
4310 
4311 inline void
inp_get_activity_bitmap(struct inpcb * inp,activity_bitmap_t * ab)4312 inp_get_activity_bitmap(struct inpcb *inp, activity_bitmap_t *ab)
4313 {
4314 	// Just grab the total bitmap until we have more precision in bitmap retrieval
4315 	bcopy(&inp->inp_mstat.ms_total.ts_bitmap, ab, sizeof(*ab));
4316 }
4317 
4318 void
inp_update_last_owner(struct socket * so,struct proc * p,struct proc * ep)4319 inp_update_last_owner(struct socket *so, struct proc *p, struct proc *ep)
4320 {
4321 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
4322 
4323 	if (inp == NULL) {
4324 		return;
4325 	}
4326 
4327 	if (p != NULL) {
4328 		strlcpy(&inp->inp_last_proc_name[0], proc_name_address(p), sizeof(inp->inp_last_proc_name));
4329 	}
4330 	if (so->so_flags & SOF_DELEGATED) {
4331 		if (ep != NULL) {
4332 			strlcpy(&inp->inp_e_proc_name[0], proc_name_address(ep), sizeof(inp->inp_e_proc_name));
4333 		} else {
4334 			inp->inp_e_proc_name[0] = 0;
4335 		}
4336 	} else {
4337 		inp->inp_e_proc_name[0] = 0;
4338 	}
4339 	nstat_pcb_update_last_owner(inp);
4340 }
4341 
4342 void
inp_copy_last_owner(struct socket * so,struct socket * head)4343 inp_copy_last_owner(struct socket *so, struct socket *head)
4344 {
4345 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
4346 	struct inpcb *head_inp = (struct inpcb *)head->so_pcb;
4347 
4348 	if (inp == NULL || head_inp == NULL) {
4349 		return;
4350 	}
4351 
4352 	strbufcpy(inp->inp_last_proc_name, head_inp->inp_last_proc_name);
4353 	strbufcpy(inp->inp_e_proc_name, head_inp->inp_e_proc_name);
4354 }
4355 
4356 static int
in_check_management_interface_proc_callout(proc_t proc,void * arg __unused)4357 in_check_management_interface_proc_callout(proc_t proc, void *arg __unused)
4358 {
4359 	struct fileproc *fp = NULL;
4360 	task_t __single task = proc_task(proc);
4361 	bool allowed = false;
4362 
4363 	if (IOTaskHasEntitlement(task, INTCOPROC_RESTRICTED_ENTITLEMENT) == true
4364 	    || IOTaskHasEntitlement(task, MANAGEMENT_DATA_ENTITLEMENT) == true
4365 #if DEBUG || DEVELOPMENT
4366 	    || IOTaskHasEntitlement(task, INTCOPROC_RESTRICTED_ENTITLEMENT_DEVELOPMENT) == true
4367 	    || IOTaskHasEntitlement(task, MANAGEMENT_DATA_ENTITLEMENT_DEVELOPMENT) == true
4368 #endif /* DEBUG || DEVELOPMENT */
4369 	    ) {
4370 		allowed = true;
4371 	}
4372 	if (allowed == false && management_data_unrestricted == false) {
4373 		return PROC_RETURNED;
4374 	}
4375 
4376 	proc_fdlock(proc);
4377 	fdt_foreach(fp, proc) {
4378 		struct fileglob *fg = fp->fp_glob;
4379 		struct socket *so;
4380 		struct inpcb *inp;
4381 
4382 		if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET) {
4383 			continue;
4384 		}
4385 
4386 		so = (struct socket *)fp_get_data(fp);
4387 		if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
4388 			continue;
4389 		}
4390 
4391 		inp = (struct inpcb *)so->so_pcb;
4392 
4393 		if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
4394 			continue;
4395 		}
4396 
4397 		socket_lock(so, 1);
4398 
4399 		if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
4400 			socket_unlock(so, 1);
4401 			continue;
4402 		}
4403 		inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
4404 		inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
4405 
4406 		socket_unlock(so, 1);
4407 	}
4408 	proc_fdunlock(proc);
4409 
4410 	return PROC_RETURNED;
4411 }
4412 
4413 static bool in_management_interface_checked = false;
4414 
4415 static void
in_management_interface_event_callback(struct nwk_wq_entry * nwk_item)4416 in_management_interface_event_callback(struct nwk_wq_entry *nwk_item)
4417 {
4418 	kfree_type(struct nwk_wq_entry, nwk_item);
4419 
4420 	if (in_management_interface_checked == true) {
4421 		return;
4422 	}
4423 	in_management_interface_checked = true;
4424 
4425 	proc_iterate(PROC_ALLPROCLIST,
4426 	    in_check_management_interface_proc_callout,
4427 	    NULL, NULL, NULL);
4428 }
4429 
4430 void
in_management_interface_check(void)4431 in_management_interface_check(void)
4432 {
4433 	struct nwk_wq_entry *nwk_item;
4434 
4435 	if (if_management_interface_check_needed == false ||
4436 	    in_management_interface_checked == true) {
4437 		return;
4438 	}
4439 
4440 	nwk_item  = kalloc_type(struct nwk_wq_entry,
4441 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
4442 
4443 	nwk_item->func = in_management_interface_event_callback;
4444 
4445 	nwk_wq_enqueue(nwk_item);
4446 }
4447 
4448 void
inp_enter_bind_in_progress(struct socket * so)4449 inp_enter_bind_in_progress(struct socket *so)
4450 {
4451 	struct inpcb *inp = sotoinpcb(so);
4452 
4453 #if (DEBUG || DEVELOPMENT)
4454 	socket_lock_assert_owned(so);
4455 #endif /* (DEBUG || DEVELOPMENT) */
4456 
4457 	VERIFY(inp->inp_bind_in_progress_waiters != UINT16_MAX);
4458 
4459 	while ((inp->inp_flags2 & INP2_BIND_IN_PROGRESS) != 0) {
4460 		lck_mtx_t *mutex_held;
4461 
4462 		inp->inp_bind_in_progress_waiters++;
4463 		inp->inp_bind_in_progress_last_waiter_thread = current_thread();
4464 
4465 		if (so->so_proto->pr_getlock != NULL) {
4466 			mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
4467 		} else {
4468 			mutex_held = so->so_proto->pr_domain->dom_mtx;
4469 		}
4470 		msleep(&inp->inp_bind_in_progress_waiters, mutex_held,
4471 		    PSOCK | PCATCH, "inp_enter_bind_in_progress", NULL);
4472 
4473 		inp->inp_bind_in_progress_last_waiter_thread = NULL;
4474 
4475 		inp->inp_bind_in_progress_waiters--;
4476 	}
4477 	inp->inp_flags2 |= INP2_BIND_IN_PROGRESS;
4478 	inp->inp_bind_in_progress_thread = current_thread();
4479 }
4480 
4481 void
inp_exit_bind_in_progress(struct socket * so)4482 inp_exit_bind_in_progress(struct socket *so)
4483 {
4484 	struct inpcb *inp = sotoinpcb(so);
4485 
4486 #if (DEBUG || DEVELOPMENT)
4487 	socket_lock_assert_owned(so);
4488 #endif /* (DEBUG || DEVELOPMENT) */
4489 
4490 	inp->inp_flags2 &= ~INP2_BIND_IN_PROGRESS;
4491 	inp->inp_bind_in_progress_thread = NULL;
4492 	if (__improbable(inp->inp_bind_in_progress_waiters > 0)) {
4493 		wakeup_one((caddr_t)&inp->inp_bind_in_progress_waiters);
4494 	}
4495 }
4496 
4497 /*
4498  * XXX: this is borrowed from in6_pcbsetport(). If possible, we should
4499  * share this function by all *bsd*...
4500  */
4501 int
in_pcbsetport(struct in_addr laddr,struct sockaddr * remote,struct inpcb * inp,struct proc * p,int locked)4502 in_pcbsetport(struct in_addr laddr, struct sockaddr *remote, struct inpcb *inp, struct proc *p,
4503     int locked)
4504 {
4505 	struct socket *__single so = inp->inp_socket;
4506 	uint16_t lport = 0, first, last, rand_port;
4507 	uint16_t *__single lastport;
4508 	int count, error = 0, wild = 0;
4509 	boolean_t counting_down;
4510 	bool found, randomport;
4511 	struct inpcbinfo *__single pcbinfo = inp->inp_pcbinfo;
4512 	kauth_cred_t __single cred;
4513 #if SKYWALK
4514 	bool laddr_unspecified = laddr.s_addr == INADDR_ANY;
4515 #else
4516 #pragma unused(laddr)
4517 #endif
4518 	if (!locked) { /* Make sure we don't run into a deadlock: 4052373 */
4519 		if (!lck_rw_try_lock_exclusive(&pcbinfo->ipi_lock)) {
4520 			socket_unlock(inp->inp_socket, 0);
4521 			lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
4522 			socket_lock(inp->inp_socket, 0);
4523 		}
4524 
4525 		/*
4526 		 * Check if a local port was assigned to the inp while
4527 		 * this thread was waiting for the pcbinfo lock
4528 		 */
4529 		if (inp->inp_lport != 0) {
4530 			VERIFY(inp->inp_flags2 & INP2_INHASHLIST);
4531 			lck_rw_done(&pcbinfo->ipi_lock);
4532 
4533 			/*
4534 			 * It is not an error if another thread allocated
4535 			 * a port
4536 			 */
4537 			return 0;
4538 		}
4539 	}
4540 
4541 	/* XXX: this is redundant when called from in6_pcbbind */
4542 	if ((so->so_options & (SO_REUSEADDR | SO_REUSEPORT)) == 0) {
4543 		wild = INPLOOKUP_WILDCARD;
4544 	}
4545 
4546 	randomport = (so->so_flags & SOF_BINDRANDOMPORT) > 0 ||
4547 	    (so->so_type == SOCK_STREAM ? tcp_use_randomport :
4548 	    udp_use_randomport) > 0;
4549 
4550 	if (inp->inp_flags & INP_HIGHPORT) {
4551 		first = (uint16_t)ipport_hifirstauto;     /* sysctl */
4552 		last  = (uint16_t)ipport_hilastauto;
4553 		lastport = &pcbinfo->ipi_lasthi;
4554 	} else if (inp->inp_flags & INP_LOWPORT) {
4555 		cred = kauth_cred_proc_ref(p);
4556 		error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
4557 		kauth_cred_unref(&cred);
4558 		if (error != 0) {
4559 			if (!locked) {
4560 				lck_rw_done(&pcbinfo->ipi_lock);
4561 			}
4562 			return error;
4563 		}
4564 		first = (uint16_t)ipport_lowfirstauto;    /* 1023 */
4565 		last  = (uint16_t)ipport_lowlastauto;     /* 600 */
4566 		lastport = &pcbinfo->ipi_lastlow;
4567 	} else {
4568 		first = (uint16_t)ipport_firstauto;       /* sysctl */
4569 		last  = (uint16_t)ipport_lastauto;
4570 		lastport = &pcbinfo->ipi_lastport;
4571 	}
4572 
4573 	if (first == last) {
4574 		randomport = false;
4575 	}
4576 	/*
4577 	 * Simple check to ensure all ports are not used up causing
4578 	 * a deadlock here.
4579 	 */
4580 	found = false;
4581 	if (first > last) {
4582 		/* counting down */
4583 		if (randomport) {
4584 			read_frandom(&rand_port, sizeof(rand_port));
4585 			*lastport = first - (rand_port % (first - last));
4586 		}
4587 		count = first - last;
4588 		counting_down = TRUE;
4589 	} else {
4590 		/* counting up */
4591 		if (randomport) {
4592 			read_frandom(&rand_port, sizeof(rand_port));
4593 			*lastport = first + (rand_port % (first - last));
4594 		}
4595 		count = last - first;
4596 		counting_down = FALSE;
4597 	}
4598 	do {
4599 		if (count-- < 0) {      /* completely used? */
4600 			/*
4601 			 * Undo any address bind that may have
4602 			 * occurred above.
4603 			 */
4604 			inp->in6p_laddr = in6addr_any;
4605 			inp->in6p_last_outifp = NULL;
4606 #if SKYWALK
4607 			if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
4608 				netns_set_ifnet(&inp->inp_netns_token,
4609 				    NULL);
4610 			}
4611 #endif /* SKYWALK */
4612 			if (!locked) {
4613 				lck_rw_done(&pcbinfo->ipi_lock);
4614 			}
4615 			return EAGAIN;
4616 		}
4617 		if (counting_down) {
4618 			--*lastport;
4619 			if (*lastport > first || *lastport < last) {
4620 				*lastport = first;
4621 			}
4622 		} else {
4623 			++*lastport;
4624 			if (*lastport < first || *lastport > last) {
4625 				*lastport = first;
4626 			}
4627 		}
4628 		lport = htons(*lastport);
4629 
4630 		/*
4631 		 * Skip if this is a restricted port as we do not want to
4632 		 * use restricted ports as ephemeral
4633 		 */
4634 		if (IS_RESTRICTED_IN_PORT(lport)) {
4635 			continue;
4636 		}
4637 
4638 		found = (in_pcblookup_local(pcbinfo, inp->inp_laddr,
4639 		    lport, wild) == NULL);
4640 #if SKYWALK
4641 		if (found &&
4642 		    (SOCK_PROTO(so) == IPPROTO_TCP ||
4643 		    SOCK_PROTO(so) == IPPROTO_UDP) &&
4644 		    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
4645 			if (laddr_unspecified &&
4646 			    (inp->inp_vflag & INP_IPV6) != 0 &&
4647 			    (inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
4648 				struct in_addr ip_zero = { .s_addr = 0 };
4649 
4650 				netns_release(&inp->inp_wildcard_netns_token);
4651 				if (netns_reserve_in(
4652 					    &inp->inp_wildcard_netns_token,
4653 					    ip_zero,
4654 					    (uint8_t)SOCK_PROTO(so), lport,
4655 					    NETNS_BSD, NULL) != 0) {
4656 					/* port in use in IPv4 namespace */
4657 					found = false;
4658 				}
4659 			}
4660 			if (found &&
4661 			    netns_reserve_in(&inp->inp_netns_token,
4662 			    inp->inp_laddr, (uint8_t)SOCK_PROTO(so), lport,
4663 			    NETNS_BSD, NULL) != 0) {
4664 				netns_release(&inp->inp_wildcard_netns_token);
4665 				found = false;
4666 			}
4667 		}
4668 #endif /* SKYWALK */
4669 	} while (!found);
4670 
4671 	inp->inp_lport = lport;
4672 	inp->inp_flags |= INP_ANONPORT;
4673 
4674 	bool is_ipv6 = (inp->inp_vflag & INP_IPV6);
4675 	if (is_ipv6) {
4676 		inp->inp_vflag &= ~INP_IPV6;
4677 	}
4678 
4679 	if (in_pcbinshash(inp, remote, 1) != 0) {
4680 		inp->inp_last_outifp = NULL;
4681 		inp->inp_lifscope = IFSCOPE_NONE;
4682 #if SKYWALK
4683 		netns_release(&inp->inp_netns_token);
4684 #endif /* SKYWALK */
4685 		inp->inp_lport = 0;
4686 		inp->inp_flags &= ~INP_ANONPORT;
4687 		if (is_ipv6) {
4688 			inp->inp_vflag |= INP_IPV6;
4689 		}
4690 		if (!locked) {
4691 			lck_rw_done(&pcbinfo->ipi_lock);
4692 		}
4693 		return EAGAIN;
4694 	}
4695 	if (is_ipv6) {
4696 		inp->inp_vflag |= INP_IPV6;
4697 	}
4698 
4699 	if (!locked) {
4700 		lck_rw_done(&pcbinfo->ipi_lock);
4701 	}
4702 	return 0;
4703 }
4704