xref: /xnu-11215.1.10/bsd/netinet/in_pcb.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1991, 1993, 1995
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
61  * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/sysctl.h>
75 #include <sys/mcache.h>
76 #include <sys/kauth.h>
77 #include <sys/priv.h>
78 #include <sys/proc_uuid_policy.h>
79 #include <sys/syslog.h>
80 #include <sys/priv.h>
81 #include <sys/file_internal.h>
82 #include <net/dlil.h>
83 
84 #include <libkern/OSAtomic.h>
85 #include <kern/locks.h>
86 
87 #include <machine/limits.h>
88 
89 #include <kern/zalloc.h>
90 
91 #include <net/if.h>
92 #include <net/if_types.h>
93 #include <net/route.h>
94 #include <net/flowhash.h>
95 #include <net/flowadv.h>
96 #include <net/nat464_utils.h>
97 #include <net/ntstat.h>
98 #include <net/nwk_wq.h>
99 #include <net/restricted_in_port.h>
100 
101 #include <netinet/in.h>
102 #include <netinet/in_pcb.h>
103 #include <netinet/inp_log.h>
104 #include <netinet/in_var.h>
105 #include <netinet/ip_var.h>
106 
107 #include <netinet/ip6.h>
108 #include <netinet6/ip6_var.h>
109 
110 #include <sys/kdebug.h>
111 #include <sys/random.h>
112 
113 #include <dev/random/randomdev.h>
114 #include <mach/boolean.h>
115 
116 #include <atm/atm_internal.h>
117 #include <pexpert/pexpert.h>
118 
119 #if NECP
120 #include <net/necp.h>
121 #endif
122 
123 #include <sys/stat.h>
124 #include <sys/ubc.h>
125 #include <sys/vnode.h>
126 
127 #include <os/log.h>
128 
129 #if SKYWALK
130 #include <skywalk/namespace/flowidns.h>
131 #endif /* SKYWALK */
132 
133 #include <IOKit/IOBSD.h>
134 
135 #include <net/sockaddr_utils.h>
136 
137 extern const char *proc_name_address(struct proc *);
138 
139 static LCK_GRP_DECLARE(inpcb_lock_grp, "inpcb");
140 static LCK_ATTR_DECLARE(inpcb_lock_attr, 0, 0);
141 static LCK_MTX_DECLARE_ATTR(inpcb_lock, &inpcb_lock_grp, &inpcb_lock_attr);
142 static LCK_MTX_DECLARE_ATTR(inpcb_timeout_lock, &inpcb_lock_grp, &inpcb_lock_attr);
143 
144 static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
145 
146 static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
147 static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
148 static boolean_t inpcb_ticking = FALSE;         /* "slow" timer is scheduled */
149 static boolean_t inpcb_fast_timer_on = FALSE;
150 
151 #define INPCB_GCREQ_THRESHOLD   50000
152 
153 static thread_call_t inpcb_thread_call, inpcb_fast_thread_call;
154 static void inpcb_sched_timeout(void);
155 static void inpcb_sched_lazy_timeout(void);
156 static void _inpcb_sched_timeout(unsigned int);
157 static void inpcb_timeout(void *, void *);
158 const int inpcb_timeout_lazy = 10;      /* 10 seconds leeway for lazy timers */
159 extern int tvtohz(struct timeval *);
160 
161 #if CONFIG_PROC_UUID_POLICY
162 static void inp_update_cellular_policy(struct inpcb *, boolean_t);
163 #if NECP
164 static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
165 #endif /* NECP */
166 #endif /* !CONFIG_PROC_UUID_POLICY */
167 
168 #define DBG_FNC_PCB_LOOKUP      NETDBG_CODE(DBG_NETTCP, (6 << 8))
169 #define DBG_FNC_PCB_HLOOKUP     NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
170 
171 int allow_udp_port_exhaustion = 0;
172 
173 /*
174  * These configure the range of local port addresses assigned to
175  * "unspecified" outgoing connections/packets/whatever.
176  */
177 int     ipport_lowfirstauto  = IPPORT_RESERVED - 1;     /* 1023 */
178 int     ipport_lowlastauto = IPPORT_RESERVEDSTART;      /* 600 */
179 int     ipport_firstauto = IPPORT_HIFIRSTAUTO;          /* 49152 */
180 int     ipport_lastauto  = IPPORT_HILASTAUTO;           /* 65535 */
181 int     ipport_hifirstauto = IPPORT_HIFIRSTAUTO;        /* 49152 */
182 int     ipport_hilastauto  = IPPORT_HILASTAUTO;         /* 65535 */
183 
184 #define RANGECHK(var, min, max) \
185 	if ((var) < (min)) { (var) = (min); } \
186 	else if ((var) > (max)) { (var) = (max); }
187 
188 static int
189 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
190 {
191 #pragma unused(arg1, arg2)
192 	int error;
193 	int new_value = *(int *)oidp->oid_arg1;
194 #if (DEBUG | DEVELOPMENT)
195 	int old_value = *(int *)oidp->oid_arg1;
196 	/*
197 	 * For unit testing allow a non-superuser process with the
198 	 * proper entitlement to modify the variables
199 	 */
200 	if (req->newptr) {
201 		if (proc_suser(current_proc()) != 0 &&
202 		    (error = priv_check_cred(kauth_cred_get(),
203 		    PRIV_NETINET_RESERVEDPORT, 0))) {
204 			return EPERM;
205 		}
206 	}
207 #endif /* (DEBUG | DEVELOPMENT) */
208 
209 	error = sysctl_handle_int(oidp, &new_value, 0, req);
210 	if (!error) {
211 		if (oidp->oid_arg1 == &ipport_lowfirstauto || oidp->oid_arg1 == &ipport_lowlastauto) {
212 			RANGECHK(new_value, 1, IPPORT_RESERVED - 1);
213 		} else {
214 			RANGECHK(new_value, IPPORT_RESERVED, USHRT_MAX);
215 		}
216 		*(int *)oidp->oid_arg1 = new_value;
217 	}
218 
219 #if (DEBUG | DEVELOPMENT)
220 	os_log(OS_LOG_DEFAULT,
221 	    "%s:%u sysctl net.restricted_port.verbose: %d -> %d)",
222 	    proc_best_name(current_proc()), proc_selfpid(),
223 	    old_value, *(int *)oidp->oid_arg1);
224 #endif /* (DEBUG | DEVELOPMENT) */
225 
226 	return error;
227 }
228 
229 #undef RANGECHK
230 
231 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
232     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IP Ports");
233 
234 #if (DEBUG | DEVELOPMENT)
235 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY)
236 #else
237 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED)
238 #endif /* (DEBUG | DEVELOPMENT) */
239 
240 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
241     CTLFAGS_IP_PORTRANGE,
242     &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
243 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
244     CTLFAGS_IP_PORTRANGE,
245     &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
246 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
247     CTLFAGS_IP_PORTRANGE,
248     &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
249 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
250     CTLFAGS_IP_PORTRANGE,
251     &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
252 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
253     CTLFAGS_IP_PORTRANGE,
254     &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
255 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
256     CTLFAGS_IP_PORTRANGE,
257     &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
258 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, ipport_allow_udp_port_exhaustion,
259     CTLFLAG_LOCKED | CTLFLAG_RW, &allow_udp_port_exhaustion, 0, "");
260 
261 static uint32_t apn_fallbk_debug = 0;
262 #define apn_fallbk_log(x)       do { if (apn_fallbk_debug >= 1) log x; } while (0)
263 
264 #if !XNU_TARGET_OS_OSX
265 static boolean_t apn_fallbk_enabled = TRUE;
266 
267 SYSCTL_DECL(_net_inet);
268 SYSCTL_NODE(_net_inet, OID_AUTO, apn_fallback, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "APN Fallback");
269 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
270     &apn_fallbk_enabled, 0, "APN fallback enable");
271 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
272     &apn_fallbk_debug, 0, "APN fallback debug enable");
273 #else /* XNU_TARGET_OS_OSX */
274 static boolean_t apn_fallbk_enabled = FALSE;
275 #endif /* XNU_TARGET_OS_OSX */
276 
277 extern int      udp_use_randomport;
278 extern int      tcp_use_randomport;
279 
280 /* Structs used for flowhash computation */
281 struct inp_flowhash_key_addr {
282 	union {
283 		struct in_addr  v4;
284 		struct in6_addr v6;
285 		u_int8_t        addr8[16];
286 		u_int16_t       addr16[8];
287 		u_int32_t       addr32[4];
288 	} infha;
289 };
290 
291 struct inp_flowhash_key {
292 	struct inp_flowhash_key_addr    infh_laddr;
293 	struct inp_flowhash_key_addr    infh_faddr;
294 	u_int32_t                       infh_lport;
295 	u_int32_t                       infh_fport;
296 	u_int32_t                       infh_af;
297 	u_int32_t                       infh_proto;
298 	u_int32_t                       infh_rand1;
299 	u_int32_t                       infh_rand2;
300 };
301 
302 #if !SKYWALK
303 static u_int32_t inp_hash_seed = 0;
304 #endif /* !SKYWALK */
305 
306 static int infc_cmp(const struct inpcb *, const struct inpcb *);
307 
308 /* Flags used by inp_fc_getinp */
309 #define INPFC_SOLOCKED  0x1
310 #define INPFC_REMOVE    0x2
311 static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
312 
313 static void inp_fc_feedback(struct inpcb *);
314 extern void tcp_remove_from_time_wait(struct inpcb *inp);
315 
316 static LCK_MTX_DECLARE_ATTR(inp_fc_lck, &inpcb_lock_grp, &inpcb_lock_attr);
317 
318 RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
319 RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
320 RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
321 
322 /*
323  * Use this inp as a key to find an inp in the flowhash tree.
324  * Accesses to it are protected by inp_fc_lck.
325  */
326 struct inpcb key_inp;
327 
328 /*
329  * in_pcb.c: manage the Protocol Control Blocks.
330  */
331 
332 void
in_pcbinit(void)333 in_pcbinit(void)
334 {
335 	static int inpcb_initialized = 0;
336 	uint32_t logging_config;
337 
338 	VERIFY(!inpcb_initialized);
339 	inpcb_initialized = 1;
340 
341 	logging_config = atm_get_diagnostic_config();
342 	if (logging_config & 0x80000000) {
343 		inp_log_privacy = 1;
344 	}
345 
346 	inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout,
347 	    NULL, THREAD_CALL_PRIORITY_KERNEL);
348 	/* Give it an arg so that we know that this is the fast timer */
349 	inpcb_fast_thread_call = thread_call_allocate_with_priority(
350 		inpcb_timeout, &inpcb_timeout, THREAD_CALL_PRIORITY_KERNEL);
351 	if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL) {
352 		panic("unable to alloc the inpcb thread call");
353 	}
354 
355 	/*
356 	 * Initialize data structures required to deliver
357 	 * flow advisories.
358 	 */
359 	lck_mtx_lock(&inp_fc_lck);
360 	RB_INIT(&inp_fc_tree);
361 	bzero(&key_inp, sizeof(key_inp));
362 	lck_mtx_unlock(&inp_fc_lck);
363 }
364 
365 #define INPCB_HAVE_TIMER_REQ(req)       (((req).intimer_lazy > 0) || \
366 	((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
367 static void
inpcb_timeout(void * arg0,void * arg1)368 inpcb_timeout(void *arg0, void *arg1)
369 {
370 #pragma unused(arg1)
371 	struct inpcbinfo *ipi;
372 	boolean_t t, gc;
373 	struct intimercount gccnt, tmcnt;
374 
375 	/*
376 	 * Update coarse-grained networking timestamp (in sec.); the idea
377 	 * is to piggy-back on the timeout callout to update the counter
378 	 * returnable via net_uptime().
379 	 */
380 	net_update_uptime();
381 
382 	bzero(&gccnt, sizeof(gccnt));
383 	bzero(&tmcnt, sizeof(tmcnt));
384 
385 	lck_mtx_lock_spin(&inpcb_timeout_lock);
386 	gc = inpcb_garbage_collecting;
387 	inpcb_garbage_collecting = FALSE;
388 
389 	t = inpcb_ticking;
390 	inpcb_ticking = FALSE;
391 
392 	if (gc || t) {
393 		lck_mtx_unlock(&inpcb_timeout_lock);
394 
395 		lck_mtx_lock(&inpcb_lock);
396 		TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
397 			if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
398 				bzero(&ipi->ipi_gc_req,
399 				    sizeof(ipi->ipi_gc_req));
400 				if (gc && ipi->ipi_gc != NULL) {
401 					ipi->ipi_gc(ipi);
402 					gccnt.intimer_lazy +=
403 					    ipi->ipi_gc_req.intimer_lazy;
404 					gccnt.intimer_fast +=
405 					    ipi->ipi_gc_req.intimer_fast;
406 					gccnt.intimer_nodelay +=
407 					    ipi->ipi_gc_req.intimer_nodelay;
408 				}
409 			}
410 			if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
411 				bzero(&ipi->ipi_timer_req,
412 				    sizeof(ipi->ipi_timer_req));
413 				if (t && ipi->ipi_timer != NULL) {
414 					ipi->ipi_timer(ipi);
415 					tmcnt.intimer_lazy +=
416 					    ipi->ipi_timer_req.intimer_lazy;
417 					tmcnt.intimer_fast +=
418 					    ipi->ipi_timer_req.intimer_fast;
419 					tmcnt.intimer_nodelay +=
420 					    ipi->ipi_timer_req.intimer_nodelay;
421 				}
422 			}
423 		}
424 		lck_mtx_unlock(&inpcb_lock);
425 		lck_mtx_lock_spin(&inpcb_timeout_lock);
426 	}
427 
428 	/* lock was dropped above, so check first before overriding */
429 	if (!inpcb_garbage_collecting) {
430 		inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
431 	}
432 	if (!inpcb_ticking) {
433 		inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
434 	}
435 
436 	/* arg0 will be set if we are the fast timer */
437 	if (arg0 != NULL) {
438 		inpcb_fast_timer_on = FALSE;
439 	}
440 	inpcb_timeout_run--;
441 	VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
442 
443 	/* re-arm the timer if there's work to do */
444 	if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) {
445 		inpcb_sched_timeout();
446 	} else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) {
447 		/* be lazy when idle with little activity */
448 		inpcb_sched_lazy_timeout();
449 	} else {
450 		inpcb_sched_timeout();
451 	}
452 
453 	lck_mtx_unlock(&inpcb_timeout_lock);
454 }
455 
456 static void
inpcb_sched_timeout(void)457 inpcb_sched_timeout(void)
458 {
459 	_inpcb_sched_timeout(0);
460 }
461 
462 static void
inpcb_sched_lazy_timeout(void)463 inpcb_sched_lazy_timeout(void)
464 {
465 	_inpcb_sched_timeout(inpcb_timeout_lazy);
466 }
467 
468 static void
_inpcb_sched_timeout(unsigned int offset)469 _inpcb_sched_timeout(unsigned int offset)
470 {
471 	uint64_t deadline, leeway;
472 
473 	clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline);
474 	LCK_MTX_ASSERT(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
475 	if (inpcb_timeout_run == 0 &&
476 	    (inpcb_garbage_collecting || inpcb_ticking)) {
477 		lck_mtx_convert_spin(&inpcb_timeout_lock);
478 		inpcb_timeout_run++;
479 		if (offset == 0) {
480 			inpcb_fast_timer_on = TRUE;
481 			thread_call_enter_delayed(inpcb_fast_thread_call,
482 			    deadline);
483 		} else {
484 			inpcb_fast_timer_on = FALSE;
485 			clock_interval_to_absolutetime_interval(offset,
486 			    NSEC_PER_SEC, &leeway);
487 			thread_call_enter_delayed_with_leeway(
488 				inpcb_thread_call, NULL, deadline, leeway,
489 				THREAD_CALL_DELAY_LEEWAY);
490 		}
491 	} else if (inpcb_timeout_run == 1 &&
492 	    offset == 0 && !inpcb_fast_timer_on) {
493 		/*
494 		 * Since the request was for a fast timer but the
495 		 * scheduled timer is a lazy timer, try to schedule
496 		 * another instance of fast timer also.
497 		 */
498 		lck_mtx_convert_spin(&inpcb_timeout_lock);
499 		inpcb_timeout_run++;
500 		inpcb_fast_timer_on = TRUE;
501 		thread_call_enter_delayed(inpcb_fast_thread_call, deadline);
502 	}
503 }
504 
505 void
inpcb_gc_sched(struct inpcbinfo * ipi,u_int32_t type)506 inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
507 {
508 	u_int32_t gccnt;
509 
510 	lck_mtx_lock_spin(&inpcb_timeout_lock);
511 	inpcb_garbage_collecting = TRUE;
512 	gccnt = ipi->ipi_gc_req.intimer_nodelay +
513 	    ipi->ipi_gc_req.intimer_fast;
514 
515 	if (gccnt > INPCB_GCREQ_THRESHOLD) {
516 		type = INPCB_TIMER_FAST;
517 	}
518 
519 	switch (type) {
520 	case INPCB_TIMER_NODELAY:
521 		os_atomic_inc(&ipi->ipi_gc_req.intimer_nodelay, relaxed);
522 		inpcb_sched_timeout();
523 		break;
524 	case INPCB_TIMER_FAST:
525 		os_atomic_inc(&ipi->ipi_gc_req.intimer_fast, relaxed);
526 		inpcb_sched_timeout();
527 		break;
528 	default:
529 		os_atomic_inc(&ipi->ipi_gc_req.intimer_lazy, relaxed);
530 		inpcb_sched_lazy_timeout();
531 		break;
532 	}
533 	lck_mtx_unlock(&inpcb_timeout_lock);
534 }
535 
536 void
inpcb_timer_sched(struct inpcbinfo * ipi,u_int32_t type)537 inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
538 {
539 	lck_mtx_lock_spin(&inpcb_timeout_lock);
540 	inpcb_ticking = TRUE;
541 	switch (type) {
542 	case INPCB_TIMER_NODELAY:
543 		os_atomic_inc(&ipi->ipi_timer_req.intimer_nodelay, relaxed);
544 		inpcb_sched_timeout();
545 		break;
546 	case INPCB_TIMER_FAST:
547 		os_atomic_inc(&ipi->ipi_timer_req.intimer_fast, relaxed);
548 		inpcb_sched_timeout();
549 		break;
550 	default:
551 		os_atomic_inc(&ipi->ipi_timer_req.intimer_lazy, relaxed);
552 		inpcb_sched_lazy_timeout();
553 		break;
554 	}
555 	lck_mtx_unlock(&inpcb_timeout_lock);
556 }
557 
558 void
in_pcbinfo_attach(struct inpcbinfo * ipi)559 in_pcbinfo_attach(struct inpcbinfo *ipi)
560 {
561 	struct inpcbinfo *ipi0;
562 
563 	lck_mtx_lock(&inpcb_lock);
564 	TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
565 		if (ipi0 == ipi) {
566 			panic("%s: ipi %p already in the list",
567 			    __func__, ipi);
568 			/* NOTREACHED */
569 		}
570 	}
571 	TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
572 	lck_mtx_unlock(&inpcb_lock);
573 }
574 
575 int
in_pcbinfo_detach(struct inpcbinfo * ipi)576 in_pcbinfo_detach(struct inpcbinfo *ipi)
577 {
578 	struct inpcbinfo *ipi0;
579 	int error = 0;
580 
581 	lck_mtx_lock(&inpcb_lock);
582 	TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
583 		if (ipi0 == ipi) {
584 			break;
585 		}
586 	}
587 	if (ipi0 != NULL) {
588 		TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
589 	} else {
590 		error = ENXIO;
591 	}
592 	lck_mtx_unlock(&inpcb_lock);
593 
594 	return error;
595 }
596 
597 __attribute__((noinline))
598 char *
inp_snprintf_tuple(struct inpcb * inp,char * __sized_by (buflen)buf,size_t buflen)599 inp_snprintf_tuple(struct inpcb *inp, char *__sized_by(buflen) buf, size_t buflen)
600 {
601 	char laddrstr[MAX_IPv6_STR_LEN];
602 	char faddrstr[MAX_IPv6_STR_LEN];
603 	uint16_t lport = 0;
604 	uint16_t fport = 0;
605 	uint16_t proto = IPPROTO_IP;
606 
607 	if (inp->inp_socket != NULL) {
608 		proto = SOCK_PROTO(inp->inp_socket);
609 
610 		if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
611 			lport  = inp->inp_lport;
612 			fport = inp->inp_fport;
613 		}
614 	}
615 	if (inp->inp_vflag & INP_IPV4) {
616 		inet_ntop(AF_INET, (void *)&inp->inp_laddr.s_addr, laddrstr, sizeof(laddrstr));
617 		inet_ntop(AF_INET, (void *)&inp->inp_faddr.s_addr, faddrstr, sizeof(faddrstr));
618 	} else if (inp->inp_vflag & INP_IPV6) {
619 		inet_ntop(AF_INET6, (void *)&inp->in6p_faddr, laddrstr, sizeof(laddrstr));
620 		inet_ntop(AF_INET6, (void *)&inp->in6p_faddr, faddrstr, sizeof(faddrstr));
621 	}
622 	snprintf(buf, buflen, "[%u %s:%u %s:%u]",
623 	    proto, laddrstr, ntohs(lport), faddrstr, ntohs(fport));
624 
625 	return buf;
626 }
627 
628 __attribute__((noinline))
629 void
in_pcb_check_management_entitled(struct inpcb * inp)630 in_pcb_check_management_entitled(struct inpcb *inp)
631 {
632 	if (inp->inp_flags2 & INP2_MANAGEMENT_CHECKED) {
633 		return;
634 	}
635 
636 	if (management_data_unrestricted) {
637 		inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
638 		inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
639 	} else if (if_management_interface_check_needed == true) {
640 		inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
641 		/*
642 		 * Note that soopt_cred_check check both intcoproc entitlements
643 		 * We check MANAGEMENT_DATA_ENTITLEMENT as there is no corresponding PRIV value
644 		 */
645 		if (soopt_cred_check(inp->inp_socket, PRIV_NET_RESTRICTED_INTCOPROC, false, false) == 0
646 		    || IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT) == true
647 #if DEBUG || DEVELOPMENT
648 		    || IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT_DEVELOPMENT) == true
649 #endif /* DEBUG || DEVELOPMENT */
650 		    ) {
651 			inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
652 		} else {
653 			if (__improbable(if_management_verbose > 1)) {
654 				char buf[128];
655 
656 				os_log(OS_LOG_DEFAULT, "in_pcb_check_management_entitled %s:%d not management entitled %s",
657 				    proc_best_name(current_proc()),
658 				    proc_selfpid(),
659 				    inp_snprintf_tuple(inp, buf, sizeof(buf)));
660 			}
661 		}
662 	}
663 }
664 
665 __attribute__((noinline))
666 void
in_pcb_check_ultra_constrained_entitled(struct inpcb * inp)667 in_pcb_check_ultra_constrained_entitled(struct inpcb *inp)
668 {
669 	if (inp->inp_flags2 & INP2_ULTRA_CONSTRAINED_CHECKED) {
670 		return;
671 	}
672 
673 	if (if_ultra_constrained_check_needed) {
674 		inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_CHECKED;
675 		if (IOCurrentTaskHasEntitlement(ULTRA_CONSTRAINED_ENTITLEMENT)) {
676 			inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_ALLOWED;
677 		}
678 	}
679 }
680 
681 /*
682  * Allocate a PCB and associate it with the socket.
683  *
684  * Returns:	0			Success
685  *		ENOBUFS
686  *		ENOMEM
687  */
688 int
in_pcballoc(struct socket * so,struct inpcbinfo * pcbinfo,struct proc * p)689 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
690 {
691 #pragma unused(p)
692 	struct inpcb *inp;
693 	caddr_t temp;
694 
695 	if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
696 		void *__unsafe_indexable addr = __zalloc_flags(pcbinfo->ipi_zone,
697 		    Z_WAITOK | Z_ZERO | Z_NOFAIL);
698 		__builtin_assume(addr != NULL);
699 		/*
700 		 * N.B: the allocation above may actually be inp_tp
701 		 * which is a structure that includes inpcb, but for
702 		 * the purposes of this function we just touch
703 		 * struct inpcb.
704 		 */
705 		inp = __unsafe_forge_single(struct inpcb *, addr);
706 	} else {
707 		inp = (struct inpcb *)(void *)so->so_saved_pcb;
708 		temp = inp->inp_saved_ppcb;
709 		bzero((caddr_t)inp, sizeof(*inp));
710 		inp->inp_saved_ppcb = temp;
711 	}
712 
713 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
714 	inp->inp_pcbinfo = pcbinfo;
715 	inp->inp_socket = so;
716 #define INP_ALIGN_AND_CAST(_type, _ptr) ({                                \
717 	typeof((_type)(void *__header_bidi_indexable)NULL) __roundup_type;\
718 	const volatile char *__roundup_align_ptr = (const volatile char *)(_ptr); \
719 	__roundup_align_ptr += P2ROUNDUP((uintptr_t)__roundup_align_ptr,  \
720 	                                 _Alignof(typeof(*__roundup_type))) - (uintptr_t)__roundup_align_ptr; \
721 	__DEQUALIFY(_type, __roundup_align_ptr);                          \
722 })
723 	/* make sure inp_stat is always 64-bit aligned */
724 	inp->inp_stat = INP_ALIGN_AND_CAST(struct inp_stat *, inp->inp_stat_store);
725 	if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) +
726 	    sizeof(*inp->inp_stat) > sizeof(inp->inp_stat_store)) {
727 		panic("%s: insufficient space to align inp_stat", __func__);
728 		/* NOTREACHED */
729 	}
730 
731 	/* make sure inp_cstat is always 64-bit aligned */
732 	inp->inp_cstat = INP_ALIGN_AND_CAST(struct inp_stat *, inp->inp_cstat_store);
733 	if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) +
734 	    sizeof(*inp->inp_cstat) > sizeof(inp->inp_cstat_store)) {
735 		panic("%s: insufficient space to align inp_cstat", __func__);
736 		/* NOTREACHED */
737 	}
738 
739 	/* make sure inp_wstat is always 64-bit aligned */
740 	inp->inp_wstat = INP_ALIGN_AND_CAST(struct inp_stat *, inp->inp_wstat_store);
741 	if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) +
742 	    sizeof(*inp->inp_wstat) > sizeof(inp->inp_wstat_store)) {
743 		panic("%s: insufficient space to align inp_wstat", __func__);
744 		/* NOTREACHED */
745 	}
746 
747 	/* make sure inp_Wstat is always 64-bit aligned */
748 	inp->inp_Wstat = INP_ALIGN_AND_CAST(struct inp_stat *, inp->inp_Wstat_store);
749 	if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
750 	    sizeof(*inp->inp_Wstat) > sizeof(inp->inp_Wstat_store)) {
751 		panic("%s: insufficient space to align inp_Wstat", __func__);
752 		/* NOTREACHED */
753 	}
754 
755 	/* make sure inp_btstat is always 64-bit aligned */
756 	inp->inp_btstat = INP_ALIGN_AND_CAST(struct inp_stat *, inp->inp_btstat_store);
757 	if (((uintptr_t)inp->inp_btstat - (uintptr_t)inp->inp_btstat_store) +
758 	    sizeof(*inp->inp_btstat) > sizeof(inp->inp_btstat_store)) {
759 		panic("%s: insufficient space to align inp_btstat", __func__);
760 		/* NOTREACHED */
761 	}
762 #undef INP_ALIGN_AND_CAST
763 	so->so_pcb = (caddr_t)inp;
764 
765 	if (so->so_proto->pr_flags & PR_PCBLOCK) {
766 		lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
767 		    &pcbinfo->ipi_lock_attr);
768 	}
769 
770 	if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) {
771 		inp->inp_flags |= IN6P_IPV6_V6ONLY;
772 	}
773 
774 	if (ip6_auto_flowlabel) {
775 		inp->inp_flags |= IN6P_AUTOFLOWLABEL;
776 	}
777 	if (intcoproc_unrestricted) {
778 		inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
779 	}
780 
781 	(void) inp_update_policy(inp);
782 
783 	lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
784 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
785 	LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
786 	pcbinfo->ipi_count++;
787 	lck_rw_done(&pcbinfo->ipi_lock);
788 	return 0;
789 }
790 
791 /*
792  * in_pcblookup_local_and_cleanup does everything
793  * in_pcblookup_local does but it checks for a socket
794  * that's going away. Since we know that the lock is
795  * held read+write when this function is called, we
796  * can safely dispose of this socket like the slow
797  * timer would usually do and return NULL. This is
798  * great for bind.
799  */
800 struct inpcb *
in_pcblookup_local_and_cleanup(struct inpcbinfo * pcbinfo,struct in_addr laddr,u_int lport_arg,int wild_okay)801 in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
802     u_int lport_arg, int wild_okay)
803 {
804 	struct inpcb *inp;
805 
806 	/* Perform normal lookup */
807 	inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
808 
809 	/* Check if we found a match but it's waiting to be disposed */
810 	if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
811 		struct socket *so = inp->inp_socket;
812 
813 		socket_lock(so, 0);
814 
815 		if (so->so_usecount == 0) {
816 			if (inp->inp_state != INPCB_STATE_DEAD) {
817 				in_pcbdetach(inp);
818 			}
819 			in_pcbdispose(inp);     /* will unlock & destroy */
820 			inp = NULL;
821 		} else {
822 			socket_unlock(so, 0);
823 		}
824 	}
825 
826 	return inp;
827 }
828 
829 static void
in_pcb_conflict_post_msg(u_int16_t port)830 in_pcb_conflict_post_msg(u_int16_t port)
831 {
832 	/*
833 	 * Radar 5523020 send a kernel event notification if a
834 	 * non-participating socket tries to bind the port a socket
835 	 * who has set SOF_NOTIFYCONFLICT owns.
836 	 */
837 	struct kev_msg ev_msg;
838 	struct kev_in_portinuse in_portinuse;
839 
840 	bzero(&in_portinuse, sizeof(struct kev_in_portinuse));
841 	bzero(&ev_msg, sizeof(struct kev_msg));
842 	in_portinuse.port = ntohs(port);        /* port in host order */
843 	in_portinuse.req_pid = proc_selfpid();
844 	ev_msg.vendor_code = KEV_VENDOR_APPLE;
845 	ev_msg.kev_class = KEV_NETWORK_CLASS;
846 	ev_msg.kev_subclass = KEV_INET_SUBCLASS;
847 	ev_msg.event_code = KEV_INET_PORTINUSE;
848 	ev_msg.dv[0].data_ptr = &in_portinuse;
849 	ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse);
850 	ev_msg.dv[1].data_length = 0;
851 	dlil_post_complete_msg(NULL, &ev_msg);
852 }
853 
854 /*
855  * Bind an INPCB to an address and/or port.  This routine should not alter
856  * the caller-supplied local address "nam" or remote address "remote".
857  *
858  * Returns:	0			Success
859  *		EADDRNOTAVAIL		Address not available.
860  *		EINVAL			Invalid argument
861  *		EAFNOSUPPORT		Address family not supported [notdef]
862  *		EACCES			Permission denied
863  *		EADDRINUSE		Address in use
864  *		EAGAIN			Resource unavailable, try again
865  *		priv_check_cred:EPERM	Operation not permitted
866  */
867 int
in_pcbbind(struct inpcb * inp,struct sockaddr * nam,struct sockaddr * remote,struct proc * p)868 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct sockaddr *remote, struct proc *p)
869 {
870 	struct socket *so = inp->inp_socket;
871 	unsigned short *lastport;
872 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
873 	u_short lport = 0, rand_port = 0;
874 	int wild = 0;
875 	int reuseport = (so->so_options & SO_REUSEPORT);
876 	int error = 0;
877 	int randomport;
878 	int conflict = 0;
879 	boolean_t anonport = FALSE;
880 	kauth_cred_t cred;
881 	struct in_addr laddr;
882 	struct ifnet *outif = NULL;
883 
884 	if (inp->inp_flags2 & INP2_BIND_IN_PROGRESS) {
885 		return EINVAL;
886 	}
887 	inp->inp_flags2 |= INP2_BIND_IN_PROGRESS;
888 
889 	if (TAILQ_EMPTY(&in_ifaddrhead)) { /* XXX broken! */
890 		error = EADDRNOTAVAIL;
891 		goto done;
892 	}
893 	if (!(so->so_options & (SO_REUSEADDR | SO_REUSEPORT))) {
894 		wild = 1;
895 	}
896 
897 	bzero(&laddr, sizeof(laddr));
898 
899 	socket_unlock(so, 0); /* keep reference on socket */
900 	lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
901 	if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
902 		/* another thread completed the bind */
903 		lck_rw_done(&pcbinfo->ipi_lock);
904 		socket_lock(so, 0);
905 		error = EINVAL;
906 		goto done;
907 	}
908 
909 	if (nam != NULL) {
910 		if (nam->sa_len != sizeof(struct sockaddr_in)) {
911 			lck_rw_done(&pcbinfo->ipi_lock);
912 			socket_lock(so, 0);
913 			error = EINVAL;
914 			goto done;
915 		}
916 #if 0
917 		/*
918 		 * We should check the family, but old programs
919 		 * incorrectly fail to initialize it.
920 		 */
921 		if (nam->sa_family != AF_INET) {
922 			lck_rw_done(&pcbinfo->ipi_lock);
923 			socket_lock(so, 0);
924 			error = EAFNOSUPPORT;
925 			goto done;
926 		}
927 #endif /* 0 */
928 		lport = SIN(nam)->sin_port;
929 
930 		if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
931 			/*
932 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
933 			 * allow complete duplication of binding if
934 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
935 			 * and a multicast address is bound on both
936 			 * new and duplicated sockets.
937 			 */
938 			if (so->so_options & SO_REUSEADDR) {
939 				reuseport = SO_REUSEADDR | SO_REUSEPORT;
940 			}
941 		} else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
942 			struct sockaddr_in sin;
943 			struct ifaddr *ifa;
944 
945 			/* Sanitized for interface address searches */
946 			SOCKADDR_ZERO(&sin, sizeof(sin));
947 			sin.sin_family = AF_INET;
948 			sin.sin_len = sizeof(struct sockaddr_in);
949 			sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
950 
951 			ifa = ifa_ifwithaddr(SA(&sin));
952 			if (ifa == NULL) {
953 				lck_rw_done(&pcbinfo->ipi_lock);
954 				socket_lock(so, 0);
955 				error = EADDRNOTAVAIL;
956 				goto done;
957 			} else {
958 				/*
959 				 * Opportunistically determine the outbound
960 				 * interface that may be used; this may not
961 				 * hold true if we end up using a route
962 				 * going over a different interface, e.g.
963 				 * when sending to a local address.  This
964 				 * will get updated again after sending.
965 				 */
966 				IFA_LOCK(ifa);
967 				outif = ifa->ifa_ifp;
968 				IFA_UNLOCK(ifa);
969 				ifa_remref(ifa);
970 			}
971 		}
972 
973 #if SKYWALK
974 		if (inp->inp_flags2 & INP2_EXTERNAL_PORT) {
975 			// Extract the external flow info
976 			struct ns_flow_info nfi = {};
977 			error = necp_client_get_netns_flow_info(inp->necp_client_uuid,
978 			    &nfi);
979 			if (error != 0) {
980 				lck_rw_done(&pcbinfo->ipi_lock);
981 				socket_lock(so, 0);
982 				goto done;
983 			}
984 
985 			// Extract the reserved port
986 			u_int16_t reserved_lport = 0;
987 			if (nfi.nfi_laddr.sa.sa_family == AF_INET) {
988 				reserved_lport = nfi.nfi_laddr.sin.sin_port;
989 			} else if (nfi.nfi_laddr.sa.sa_family == AF_INET6) {
990 				reserved_lport = nfi.nfi_laddr.sin6.sin6_port;
991 			} else {
992 				lck_rw_done(&pcbinfo->ipi_lock);
993 				socket_lock(so, 0);
994 				error = EINVAL;
995 				goto done;
996 			}
997 
998 			// Validate or use the reserved port
999 			if (lport == 0) {
1000 				lport = reserved_lport;
1001 			} else if (lport != reserved_lport) {
1002 				lck_rw_done(&pcbinfo->ipi_lock);
1003 				socket_lock(so, 0);
1004 				error = EINVAL;
1005 				goto done;
1006 			}
1007 		}
1008 
1009 		/* Do not allow reserving a UDP port if remaining UDP port count is below 4096 */
1010 		if (SOCK_PROTO(so) == IPPROTO_UDP && !allow_udp_port_exhaustion) {
1011 			uint32_t current_reservations = 0;
1012 			if (inp->inp_vflag & INP_IPV6) {
1013 				current_reservations = netns_lookup_reservations_count_in6(inp->in6p_laddr, IPPROTO_UDP);
1014 			} else {
1015 				current_reservations = netns_lookup_reservations_count_in(inp->inp_laddr, IPPROTO_UDP);
1016 			}
1017 			if (USHRT_MAX - UDP_RANDOM_PORT_RESERVE < current_reservations) {
1018 				log(LOG_ERR, "UDP port not available, less than 4096 UDP ports left");
1019 				lck_rw_done(&pcbinfo->ipi_lock);
1020 				socket_lock(so, 0);
1021 				error = EADDRNOTAVAIL;
1022 				goto done;
1023 			}
1024 		}
1025 
1026 #endif /* SKYWALK */
1027 
1028 		if (lport != 0) {
1029 			struct inpcb *t;
1030 			uid_t u;
1031 
1032 #if XNU_TARGET_OS_OSX
1033 			if (ntohs(lport) < IPPORT_RESERVED &&
1034 			    SIN(nam)->sin_addr.s_addr != 0 &&
1035 			    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1036 				cred = kauth_cred_proc_ref(p);
1037 				error = priv_check_cred(cred,
1038 				    PRIV_NETINET_RESERVEDPORT, 0);
1039 				kauth_cred_unref(&cred);
1040 				if (error != 0) {
1041 					lck_rw_done(&pcbinfo->ipi_lock);
1042 					socket_lock(so, 0);
1043 					error = EACCES;
1044 					goto done;
1045 				}
1046 			}
1047 #endif /* XNU_TARGET_OS_OSX */
1048 			/*
1049 			 * Check wether the process is allowed to bind to a restricted port
1050 			 */
1051 			if (!current_task_can_use_restricted_in_port(lport,
1052 			    (uint8_t)SOCK_PROTO(so), PORT_FLAGS_BSD)) {
1053 				lck_rw_done(&pcbinfo->ipi_lock);
1054 				socket_lock(so, 0);
1055 				error = EADDRINUSE;
1056 				goto done;
1057 			}
1058 
1059 			if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1060 			    (u = kauth_cred_getuid(so->so_cred)) != 0 &&
1061 			    (t = in_pcblookup_local_and_cleanup(
1062 				    inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
1063 				    INPLOOKUP_WILDCARD)) != NULL &&
1064 			    (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1065 			    t->inp_laddr.s_addr != INADDR_ANY ||
1066 			    !(t->inp_socket->so_options & SO_REUSEPORT)) &&
1067 			    (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
1068 			    !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
1069 			    (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1070 			    t->inp_laddr.s_addr != INADDR_ANY) &&
1071 			    (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
1072 			    !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
1073 			    uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
1074 				if ((t->inp_socket->so_flags &
1075 				    SOF_NOTIFYCONFLICT) &&
1076 				    !(so->so_flags & SOF_NOTIFYCONFLICT)) {
1077 					conflict = 1;
1078 				}
1079 
1080 				lck_rw_done(&pcbinfo->ipi_lock);
1081 
1082 				if (conflict) {
1083 					in_pcb_conflict_post_msg(lport);
1084 				}
1085 
1086 				socket_lock(so, 0);
1087 				error = EADDRINUSE;
1088 				goto done;
1089 			}
1090 			t = in_pcblookup_local_and_cleanup(pcbinfo,
1091 			    SIN(nam)->sin_addr, lport, wild);
1092 			if (t != NULL &&
1093 			    (reuseport & t->inp_socket->so_options) == 0 &&
1094 			    (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
1095 			    !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
1096 			    uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
1097 				if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1098 				    t->inp_laddr.s_addr != INADDR_ANY ||
1099 				    SOCK_DOM(so) != PF_INET6 ||
1100 				    SOCK_DOM(t->inp_socket) != PF_INET6) {
1101 					if ((t->inp_socket->so_flags &
1102 					    SOF_NOTIFYCONFLICT) &&
1103 					    !(so->so_flags & SOF_NOTIFYCONFLICT)) {
1104 						conflict = 1;
1105 					}
1106 
1107 					lck_rw_done(&pcbinfo->ipi_lock);
1108 
1109 					if (conflict) {
1110 						in_pcb_conflict_post_msg(lport);
1111 					}
1112 					socket_lock(so, 0);
1113 					error = EADDRINUSE;
1114 					goto done;
1115 				}
1116 			}
1117 #if SKYWALK
1118 			if ((SOCK_PROTO(so) == IPPROTO_TCP ||
1119 			    SOCK_PROTO(so) == IPPROTO_UDP) &&
1120 			    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1121 				int res_err = 0;
1122 				if (inp->inp_vflag & INP_IPV6) {
1123 					res_err = netns_reserve_in6(
1124 						&inp->inp_netns_token,
1125 						SIN6(nam)->sin6_addr,
1126 						(uint8_t)SOCK_PROTO(so), lport, NETNS_BSD,
1127 						NULL);
1128 				} else {
1129 					res_err = netns_reserve_in(
1130 						&inp->inp_netns_token,
1131 						SIN(nam)->sin_addr, (uint8_t)SOCK_PROTO(so),
1132 						lport, NETNS_BSD, NULL);
1133 				}
1134 				if (res_err != 0) {
1135 					lck_rw_done(&pcbinfo->ipi_lock);
1136 					socket_lock(so, 0);
1137 					error = EADDRINUSE;
1138 					goto done;
1139 				}
1140 			}
1141 #endif /* SKYWALK */
1142 		}
1143 		laddr = SIN(nam)->sin_addr;
1144 	}
1145 	if (lport == 0) {
1146 		u_short first, last;
1147 		int count;
1148 		bool found;
1149 
1150 		/*
1151 		 * Override wild = 1 for implicit bind (mainly used by connect)
1152 		 * For implicit bind (lport == 0), we always use an unused port,
1153 		 * so REUSEADDR|REUSEPORT don't apply
1154 		 */
1155 		wild = 1;
1156 
1157 		randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
1158 		    (so->so_type == SOCK_STREAM ? tcp_use_randomport :
1159 		    udp_use_randomport);
1160 
1161 		/*
1162 		 * Even though this looks similar to the code in
1163 		 * in6_pcbsetport, the v6 vs v4 checks are different.
1164 		 */
1165 		anonport = TRUE;
1166 		if (inp->inp_flags & INP_HIGHPORT) {
1167 			first = (u_short)ipport_hifirstauto;     /* sysctl */
1168 			last  = (u_short)ipport_hilastauto;
1169 			lastport = &pcbinfo->ipi_lasthi;
1170 		} else if (inp->inp_flags & INP_LOWPORT) {
1171 			cred = kauth_cred_proc_ref(p);
1172 			error = priv_check_cred(cred,
1173 			    PRIV_NETINET_RESERVEDPORT, 0);
1174 			kauth_cred_unref(&cred);
1175 			if (error != 0) {
1176 				lck_rw_done(&pcbinfo->ipi_lock);
1177 				socket_lock(so, 0);
1178 				goto done;
1179 			}
1180 			first = (u_short)ipport_lowfirstauto;    /* 1023 */
1181 			last  = (u_short)ipport_lowlastauto;     /* 600 */
1182 			lastport = &pcbinfo->ipi_lastlow;
1183 		} else {
1184 			first = (u_short)ipport_firstauto;       /* sysctl */
1185 			last  = (u_short)ipport_lastauto;
1186 			lastport = &pcbinfo->ipi_lastport;
1187 		}
1188 		/* No point in randomizing if only one port is available */
1189 
1190 		if (first == last) {
1191 			randomport = 0;
1192 		}
1193 		/*
1194 		 * Simple check to ensure all ports are not used up causing
1195 		 * a deadlock here.
1196 		 *
1197 		 * We split the two cases (up and down) so that the direction
1198 		 * is not being tested on each round of the loop.
1199 		 */
1200 		if (first > last) {
1201 			struct in_addr lookup_addr;
1202 
1203 			/*
1204 			 * counting down
1205 			 */
1206 			if (randomport) {
1207 				read_frandom(&rand_port, sizeof(rand_port));
1208 				*lastport =
1209 				    first - (rand_port % (first - last));
1210 			}
1211 			count = first - last;
1212 
1213 			lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1214 			    inp->inp_laddr;
1215 
1216 			found = false;
1217 			do {
1218 				if (count-- < 0) {      /* completely used? */
1219 					lck_rw_done(&pcbinfo->ipi_lock);
1220 					socket_lock(so, 0);
1221 					error = EADDRNOTAVAIL;
1222 					goto done;
1223 				}
1224 				--*lastport;
1225 				if (*lastport > first || *lastport < last) {
1226 					*lastport = first;
1227 				}
1228 				lport = htons(*lastport);
1229 
1230 				/*
1231 				 * Skip if this is a restricted port as we do not want to
1232 				 * restricted ports as ephemeral
1233 				 */
1234 				if (IS_RESTRICTED_IN_PORT(lport)) {
1235 					continue;
1236 				}
1237 
1238 				found = in_pcblookup_local_and_cleanup(pcbinfo,
1239 				    lookup_addr, lport, wild) == NULL;
1240 #if SKYWALK
1241 				if (found &&
1242 				    (SOCK_PROTO(so) == IPPROTO_TCP ||
1243 				    SOCK_PROTO(so) == IPPROTO_UDP) &&
1244 				    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1245 					int res_err;
1246 					if (inp->inp_vflag & INP_IPV6) {
1247 						res_err = netns_reserve_in6(
1248 							&inp->inp_netns_token,
1249 							inp->in6p_laddr,
1250 							(uint8_t)SOCK_PROTO(so), lport,
1251 							NETNS_BSD, NULL);
1252 					} else {
1253 						res_err = netns_reserve_in(
1254 							&inp->inp_netns_token,
1255 							lookup_addr, (uint8_t)SOCK_PROTO(so),
1256 							lport, NETNS_BSD, NULL);
1257 					}
1258 					found = res_err == 0;
1259 				}
1260 #endif /* SKYWALK */
1261 			} while (!found);
1262 		} else {
1263 			struct in_addr lookup_addr;
1264 
1265 			/*
1266 			 * counting up
1267 			 */
1268 			if (randomport) {
1269 				read_frandom(&rand_port, sizeof(rand_port));
1270 				*lastport =
1271 				    first + (rand_port % (first - last));
1272 			}
1273 			count = last - first;
1274 
1275 			lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1276 			    inp->inp_laddr;
1277 
1278 			found = false;
1279 			do {
1280 				if (count-- < 0) {      /* completely used? */
1281 					lck_rw_done(&pcbinfo->ipi_lock);
1282 					socket_lock(so, 0);
1283 					error = EADDRNOTAVAIL;
1284 					goto done;
1285 				}
1286 				++*lastport;
1287 				if (*lastport < first || *lastport > last) {
1288 					*lastport = first;
1289 				}
1290 				lport = htons(*lastport);
1291 
1292 				/*
1293 				 * Skip if this is a restricted port as we do not want to
1294 				 * restricted ports as ephemeral
1295 				 */
1296 				if (IS_RESTRICTED_IN_PORT(lport)) {
1297 					continue;
1298 				}
1299 
1300 				found = in_pcblookup_local_and_cleanup(pcbinfo,
1301 				    lookup_addr, lport, wild) == NULL;
1302 #if SKYWALK
1303 				if (found &&
1304 				    (SOCK_PROTO(so) == IPPROTO_TCP ||
1305 				    SOCK_PROTO(so) == IPPROTO_UDP) &&
1306 				    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1307 					int res_err;
1308 					if (inp->inp_vflag & INP_IPV6) {
1309 						res_err = netns_reserve_in6(
1310 							&inp->inp_netns_token,
1311 							inp->in6p_laddr,
1312 							(uint8_t)SOCK_PROTO(so), lport,
1313 							NETNS_BSD, NULL);
1314 					} else {
1315 						res_err = netns_reserve_in(
1316 							&inp->inp_netns_token,
1317 							lookup_addr, (uint8_t)SOCK_PROTO(so),
1318 							lport, NETNS_BSD, NULL);
1319 					}
1320 					found = res_err == 0;
1321 				}
1322 #endif /* SKYWALK */
1323 			} while (!found);
1324 		}
1325 	}
1326 	socket_lock(so, 0);
1327 
1328 	/*
1329 	 * We unlocked socket's protocol lock for a long time.
1330 	 * The socket might have been dropped/defuncted.
1331 	 * Checking if world has changed since.
1332 	 */
1333 	if (inp->inp_state == INPCB_STATE_DEAD) {
1334 #if SKYWALK
1335 		netns_release(&inp->inp_netns_token);
1336 #endif /* SKYWALK */
1337 		lck_rw_done(&pcbinfo->ipi_lock);
1338 		error = ECONNABORTED;
1339 		goto done;
1340 	}
1341 
1342 	if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
1343 #if SKYWALK
1344 		netns_release(&inp->inp_netns_token);
1345 #endif /* SKYWALK */
1346 		lck_rw_done(&pcbinfo->ipi_lock);
1347 		error = EINVAL;
1348 		goto done;
1349 	}
1350 
1351 	if (laddr.s_addr != INADDR_ANY) {
1352 		inp->inp_laddr = laddr;
1353 		inp->inp_last_outifp = outif;
1354 #if SKYWALK
1355 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
1356 			netns_set_ifnet(&inp->inp_netns_token, outif);
1357 		}
1358 #endif /* SKYWALK */
1359 	}
1360 	inp->inp_lport = lport;
1361 	if (anonport) {
1362 		inp->inp_flags |= INP_ANONPORT;
1363 	}
1364 
1365 	if (in_pcbinshash(inp, remote, 1) != 0) {
1366 		inp->inp_laddr.s_addr = INADDR_ANY;
1367 		inp->inp_last_outifp = NULL;
1368 
1369 #if SKYWALK
1370 		netns_release(&inp->inp_netns_token);
1371 #endif /* SKYWALK */
1372 		inp->inp_lport = 0;
1373 		if (anonport) {
1374 			inp->inp_flags &= ~INP_ANONPORT;
1375 		}
1376 		lck_rw_done(&pcbinfo->ipi_lock);
1377 		error = EAGAIN;
1378 		goto done;
1379 	}
1380 	lck_rw_done(&pcbinfo->ipi_lock);
1381 	sflt_notify(so, sock_evt_bound, NULL);
1382 
1383 	in_pcb_check_management_entitled(inp);
1384 	in_pcb_check_ultra_constrained_entitled(inp);
1385 done:
1386 	inp->inp_flags2 &= ~INP2_BIND_IN_PROGRESS;
1387 	return error;
1388 }
1389 
1390 #define APN_FALLBACK_IP_FILTER(a)       \
1391 	(IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \
1392 	 IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \
1393 	 IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \
1394 	 IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \
1395 	 IN_PRIVATE(ntohl((a)->sin_addr.s_addr)))
1396 
1397 #define APN_FALLBACK_NOTIF_INTERVAL     2 /* Magic Number */
1398 static uint64_t last_apn_fallback = 0;
1399 
1400 static boolean_t
apn_fallback_required(proc_t proc,struct socket * so,struct sockaddr_in * p_dstv4)1401 apn_fallback_required(proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4)
1402 {
1403 	uint64_t timenow;
1404 	struct sockaddr_storage lookup_default_addr;
1405 	struct rtentry *rt = NULL;
1406 
1407 	VERIFY(proc != NULL);
1408 
1409 	if (apn_fallbk_enabled == FALSE) {
1410 		return FALSE;
1411 	}
1412 
1413 	if (proc == kernproc) {
1414 		return FALSE;
1415 	}
1416 
1417 	if (so && (so->so_options & SO_NOAPNFALLBK)) {
1418 		return FALSE;
1419 	}
1420 
1421 	timenow = net_uptime();
1422 	if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) {
1423 		apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n"));
1424 		return FALSE;
1425 	}
1426 
1427 	if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4)) {
1428 		return FALSE;
1429 	}
1430 
1431 	/* Check if we have unscoped IPv6 default route through cellular */
1432 	bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1433 	lookup_default_addr.ss_family = AF_INET6;
1434 	lookup_default_addr.ss_len = sizeof(struct sockaddr_in6);
1435 
1436 	rt = rtalloc1(SA(&lookup_default_addr), 0, 0);
1437 	if (NULL == rt) {
1438 		apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1439 		    "unscoped default IPv6 route.\n"));
1440 		return FALSE;
1441 	}
1442 
1443 	if (!IFNET_IS_CELLULAR(rt->rt_ifp)) {
1444 		rtfree(rt);
1445 		apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1446 		    "unscoped default IPv6 route through cellular interface.\n"));
1447 		return FALSE;
1448 	}
1449 
1450 	/*
1451 	 * We have a default IPv6 route, ensure that
1452 	 * we do not have IPv4 default route before triggering
1453 	 * the event
1454 	 */
1455 	rtfree(rt);
1456 	rt = NULL;
1457 
1458 	bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1459 	lookup_default_addr.ss_family = AF_INET;
1460 	lookup_default_addr.ss_len = sizeof(struct sockaddr_in);
1461 
1462 	rt = rtalloc1(SA(&lookup_default_addr), 0, 0);
1463 
1464 	if (rt) {
1465 		rtfree(rt);
1466 		rt = NULL;
1467 		apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped "
1468 		    "IPv4 default route!\n"));
1469 		return FALSE;
1470 	}
1471 
1472 	{
1473 		/*
1474 		 * We disable APN fallback if the binary is not a third-party app.
1475 		 * Note that platform daemons use their process name as a
1476 		 * bundle ID so we filter out bundle IDs without dots.
1477 		 */
1478 		const char *__null_terminated bundle_id = cs_identity_get(proc);
1479 		if (bundle_id == NULL ||
1480 		    bundle_id[0] == '\0' ||
1481 		    strchr(bundle_id, '.') == NULL ||
1482 		    strlcmp("com.apple.", bundle_id, sizeof("com.apple.") - 1) == 0) {
1483 			apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-"
1484 			    "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL")));
1485 			return FALSE;
1486 		}
1487 	}
1488 
1489 	{
1490 		/*
1491 		 * The Apple App Store IPv6 requirement started on
1492 		 * June 1st, 2016 at 12:00:00 AM PDT.
1493 		 * We disable APN fallback if the binary is more recent than that.
1494 		 * We check both atime and birthtime since birthtime is not always supported.
1495 		 */
1496 		static const long ipv6_start_date = 1464764400L;
1497 		vfs_context_t __single context;
1498 		struct stat64 sb;
1499 		int vn_stat_error;
1500 
1501 		bzero(&sb, sizeof(struct stat64));
1502 		context = vfs_context_create(NULL);
1503 		vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, 0, context);
1504 		(void)vfs_context_rele(context);
1505 
1506 		if (vn_stat_error != 0 ||
1507 		    sb.st_atimespec.tv_sec >= ipv6_start_date ||
1508 		    sb.st_birthtimespec.tv_sec >= ipv6_start_date) {
1509 			apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary "
1510 			    "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n",
1511 			    vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec,
1512 			    sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec));
1513 			return FALSE;
1514 		}
1515 	}
1516 	return TRUE;
1517 }
1518 
1519 static void
apn_fallback_trigger(proc_t proc,struct socket * so)1520 apn_fallback_trigger(proc_t proc, struct socket *so)
1521 {
1522 	pid_t pid = 0;
1523 	struct kev_msg ev_msg;
1524 	struct kev_netevent_apnfallbk_data apnfallbk_data;
1525 
1526 	last_apn_fallback = net_uptime();
1527 	pid = proc_pid(proc);
1528 	uuid_t application_uuid;
1529 	uuid_clear(application_uuid);
1530 	proc_getexecutableuuid(proc, application_uuid,
1531 	    sizeof(application_uuid));
1532 
1533 	bzero(&ev_msg, sizeof(struct kev_msg));
1534 	ev_msg.vendor_code      = KEV_VENDOR_APPLE;
1535 	ev_msg.kev_class        = KEV_NETWORK_CLASS;
1536 	ev_msg.kev_subclass     = KEV_NETEVENT_SUBCLASS;
1537 	ev_msg.event_code       = KEV_NETEVENT_APNFALLBACK;
1538 
1539 	bzero(&apnfallbk_data, sizeof(apnfallbk_data));
1540 
1541 	if (so->so_flags & SOF_DELEGATED) {
1542 		apnfallbk_data.epid = so->e_pid;
1543 		uuid_copy(apnfallbk_data.euuid, so->e_uuid);
1544 	} else {
1545 		apnfallbk_data.epid = so->last_pid;
1546 		uuid_copy(apnfallbk_data.euuid, so->last_uuid);
1547 	}
1548 
1549 	ev_msg.dv[0].data_ptr   = &apnfallbk_data;
1550 	ev_msg.dv[0].data_length = sizeof(apnfallbk_data);
1551 	kev_post_msg(&ev_msg);
1552 	apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n"));
1553 }
1554 
1555 /*
1556  * Transform old in_pcbconnect() into an inner subroutine for new
1557  * in_pcbconnect(); do some validity-checking on the remote address
1558  * (in "nam") and then determine local host address (i.e., which
1559  * interface) to use to access that remote host.
1560  *
1561  * This routine may alter the caller-supplied remote address "nam".
1562  *
1563  * The caller may override the bound-to-interface setting of the socket
1564  * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1565  *
1566  * This routine might return an ifp with a reference held if the caller
1567  * provides a non-NULL outif, even in the error case.  The caller is
1568  * responsible for releasing its reference.
1569  *
1570  * Returns:	0			Success
1571  *		EINVAL			Invalid argument
1572  *		EAFNOSUPPORT		Address family not supported
1573  *		EADDRNOTAVAIL		Address not available
1574  */
1575 int
in_pcbladdr(struct inpcb * inp,struct sockaddr * nam,struct in_addr * laddr,unsigned int ifscope,struct ifnet ** outif,int raw)1576 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
1577     unsigned int ifscope, struct ifnet **outif, int raw)
1578 {
1579 	struct route *ro = &inp->inp_route;
1580 	struct in_ifaddr *ia = NULL;
1581 	struct sockaddr_in sin;
1582 	int error = 0;
1583 	boolean_t restricted = FALSE;
1584 
1585 	if (outif != NULL) {
1586 		*outif = NULL;
1587 	}
1588 	if (nam->sa_len != sizeof(struct sockaddr_in)) {
1589 		return EINVAL;
1590 	}
1591 	if (SIN(nam)->sin_family != AF_INET) {
1592 		return EAFNOSUPPORT;
1593 	}
1594 	if (raw == 0 && SIN(nam)->sin_port == 0) {
1595 		return EADDRNOTAVAIL;
1596 	}
1597 
1598 	in_pcb_check_management_entitled(inp);
1599 	in_pcb_check_ultra_constrained_entitled(inp);
1600 
1601 	/*
1602 	 * If the destination address is INADDR_ANY,
1603 	 * use the primary local address.
1604 	 * If the supplied address is INADDR_BROADCAST,
1605 	 * and the primary interface supports broadcast,
1606 	 * choose the broadcast address for that interface.
1607 	 */
1608 	if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1609 	    SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) {
1610 		lck_rw_lock_shared(&in_ifaddr_rwlock);
1611 		if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1612 			ia = TAILQ_FIRST(&in_ifaddrhead);
1613 			IFA_LOCK_SPIN(&ia->ia_ifa);
1614 			if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1615 				SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1616 			} else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1617 				SIN(nam)->sin_addr =
1618 				    SIN(&ia->ia_broadaddr)->sin_addr;
1619 			}
1620 			IFA_UNLOCK(&ia->ia_ifa);
1621 			ia = NULL;
1622 		}
1623 		lck_rw_done(&in_ifaddr_rwlock);
1624 	}
1625 	/*
1626 	 * Otherwise, if the socket has already bound the source, just use it.
1627 	 */
1628 	if (inp->inp_laddr.s_addr != INADDR_ANY) {
1629 		VERIFY(ia == NULL);
1630 		*laddr = inp->inp_laddr;
1631 		return 0;
1632 	}
1633 
1634 	/*
1635 	 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1636 	 * then it overrides the sticky ifscope set for the socket.
1637 	 */
1638 	if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) {
1639 		ifscope = inp->inp_boundifp->if_index;
1640 	}
1641 
1642 	/*
1643 	 * If route is known or can be allocated now,
1644 	 * our src addr is taken from the i/f, else punt.
1645 	 * Note that we should check the address family of the cached
1646 	 * destination, in case of sharing the cache with IPv6.
1647 	 */
1648 	if (ro->ro_rt != NULL) {
1649 		RT_LOCK_SPIN(ro->ro_rt);
1650 	}
1651 	if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1652 	    SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1653 	    (inp->inp_socket->so_options & SO_DONTROUTE)) {
1654 		if (ro->ro_rt != NULL) {
1655 			RT_UNLOCK(ro->ro_rt);
1656 		}
1657 		ROUTE_RELEASE(ro);
1658 	}
1659 	if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1660 	    (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1661 		if (ro->ro_rt != NULL) {
1662 			RT_UNLOCK(ro->ro_rt);
1663 		}
1664 		ROUTE_RELEASE(ro);
1665 		/* No route yet, so try to acquire one */
1666 		SOCKADDR_ZERO(&ro->ro_dst, sizeof(struct sockaddr_in));
1667 		ro->ro_dst.sa_family = AF_INET;
1668 		ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
1669 		SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1670 		rtalloc_scoped(ro, ifscope);
1671 		if (ro->ro_rt != NULL) {
1672 			RT_LOCK_SPIN(ro->ro_rt);
1673 		}
1674 	}
1675 	/* Sanitized local copy for interface address searches */
1676 	SOCKADDR_ZERO(&sin, sizeof(sin));
1677 	sin.sin_family = AF_INET;
1678 	sin.sin_len = sizeof(struct sockaddr_in);
1679 	sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1680 	/*
1681 	 * If we did not find (or use) a route, assume dest is reachable
1682 	 * on a directly connected network and try to find a corresponding
1683 	 * interface to take the source address from.
1684 	 */
1685 	if (ro->ro_rt == NULL) {
1686 		proc_t proc = current_proc();
1687 
1688 		VERIFY(ia == NULL);
1689 		ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1690 		if (ia == NULL) {
1691 			ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1692 		}
1693 		error = ((ia == NULL) ? ENETUNREACH : 0);
1694 
1695 		if (apn_fallback_required(proc, inp->inp_socket,
1696 		    (void *)nam)) {
1697 			apn_fallback_trigger(proc, inp->inp_socket);
1698 		}
1699 
1700 		goto done;
1701 	}
1702 	RT_LOCK_ASSERT_HELD(ro->ro_rt);
1703 	/*
1704 	 * If the outgoing interface on the route found is not
1705 	 * a loopback interface, use the address from that interface.
1706 	 */
1707 	if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1708 		VERIFY(ia == NULL);
1709 		/*
1710 		 * If the route points to a cellular interface and the
1711 		 * caller forbids our using interfaces of such type,
1712 		 * pretend that there is no route.
1713 		 * Apply the same logic for expensive interfaces.
1714 		 */
1715 		if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
1716 			RT_UNLOCK(ro->ro_rt);
1717 			ROUTE_RELEASE(ro);
1718 			error = EHOSTUNREACH;
1719 			restricted = TRUE;
1720 		} else {
1721 			/* Become a regular mutex */
1722 			RT_CONVERT_LOCK(ro->ro_rt);
1723 			ia = ifatoia(ro->ro_rt->rt_ifa);
1724 			ifa_addref(&ia->ia_ifa);
1725 
1726 			/*
1727 			 * Mark the control block for notification of
1728 			 * a possible flow that might undergo clat46
1729 			 * translation.
1730 			 *
1731 			 * We defer the decision to a later point when
1732 			 * inpcb is being disposed off.
1733 			 * The reason is that we only want to send notification
1734 			 * if the flow was ever used to send data.
1735 			 */
1736 			if (IS_INTF_CLAT46(ro->ro_rt->rt_ifp)) {
1737 				inp->inp_flags2 |= INP2_CLAT46_FLOW;
1738 			}
1739 
1740 			RT_UNLOCK(ro->ro_rt);
1741 			error = 0;
1742 		}
1743 		goto done;
1744 	}
1745 	VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1746 	RT_UNLOCK(ro->ro_rt);
1747 	/*
1748 	 * The outgoing interface is marked with 'loopback net', so a route
1749 	 * to ourselves is here.
1750 	 * Try to find the interface of the destination address and then
1751 	 * take the address from there. That interface is not necessarily
1752 	 * a loopback interface.
1753 	 */
1754 	VERIFY(ia == NULL);
1755 	ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1756 	if (ia == NULL) {
1757 		ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1758 	}
1759 	if (ia == NULL) {
1760 		ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1761 	}
1762 	if (ia == NULL) {
1763 		RT_LOCK(ro->ro_rt);
1764 		ia = ifatoia(ro->ro_rt->rt_ifa);
1765 		if (ia != NULL) {
1766 			ifa_addref(&ia->ia_ifa);
1767 		}
1768 		RT_UNLOCK(ro->ro_rt);
1769 	}
1770 	error = ((ia == NULL) ? ENETUNREACH : 0);
1771 
1772 done:
1773 	/*
1774 	 * If the destination address is multicast and an outgoing
1775 	 * interface has been set as a multicast option, use the
1776 	 * address of that interface as our source address.
1777 	 */
1778 	if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1779 	    inp->inp_moptions != NULL) {
1780 		struct ip_moptions *imo;
1781 		struct ifnet *ifp;
1782 
1783 		imo = inp->inp_moptions;
1784 		IMO_LOCK(imo);
1785 		if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1786 		    ia->ia_ifp != imo->imo_multicast_ifp)) {
1787 			ifp = imo->imo_multicast_ifp;
1788 			if (ia != NULL) {
1789 				ifa_remref(&ia->ia_ifa);
1790 			}
1791 			lck_rw_lock_shared(&in_ifaddr_rwlock);
1792 			TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1793 				if (ia->ia_ifp == ifp) {
1794 					break;
1795 				}
1796 			}
1797 			if (ia != NULL) {
1798 				ifa_addref(&ia->ia_ifa);
1799 			}
1800 			lck_rw_done(&in_ifaddr_rwlock);
1801 			if (ia == NULL) {
1802 				error = EADDRNOTAVAIL;
1803 			} else {
1804 				error = 0;
1805 			}
1806 		}
1807 		IMO_UNLOCK(imo);
1808 	}
1809 	/*
1810 	 * Don't do pcblookup call here; return interface in laddr
1811 	 * and exit to caller, that will do the lookup.
1812 	 */
1813 	if (ia != NULL) {
1814 		/*
1815 		 * If the source address belongs to a cellular interface
1816 		 * and the socket forbids our using interfaces of such
1817 		 * type, pretend that there is no source address.
1818 		 * Apply the same logic for expensive interfaces.
1819 		 */
1820 		IFA_LOCK_SPIN(&ia->ia_ifa);
1821 		if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
1822 			IFA_UNLOCK(&ia->ia_ifa);
1823 			error = EHOSTUNREACH;
1824 			restricted = TRUE;
1825 		} else if (error == 0) {
1826 			*laddr = ia->ia_addr.sin_addr;
1827 			if (outif != NULL) {
1828 				struct ifnet *ifp;
1829 
1830 				if (ro->ro_rt != NULL) {
1831 					ifp = ro->ro_rt->rt_ifp;
1832 				} else {
1833 					ifp = ia->ia_ifp;
1834 				}
1835 
1836 				VERIFY(ifp != NULL);
1837 				IFA_CONVERT_LOCK(&ia->ia_ifa);
1838 				ifnet_reference(ifp);   /* for caller */
1839 				if (*outif != NULL) {
1840 					ifnet_release(*outif);
1841 				}
1842 				*outif = ifp;
1843 			}
1844 			IFA_UNLOCK(&ia->ia_ifa);
1845 		} else {
1846 			IFA_UNLOCK(&ia->ia_ifa);
1847 		}
1848 		ifa_remref(&ia->ia_ifa);
1849 		ia = NULL;
1850 	}
1851 
1852 	if (restricted && error == EHOSTUNREACH) {
1853 		soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1854 		    SO_FILT_HINT_IFDENIED));
1855 	}
1856 
1857 	return error;
1858 }
1859 
1860 /*
1861  * Outer subroutine:
1862  * Connect from a socket to a specified address.
1863  * Both address and port must be specified in argument sin.
1864  * If don't have a local address for this socket yet,
1865  * then pick one.
1866  *
1867  * The caller may override the bound-to-interface setting of the socket
1868  * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1869  */
1870 int
in_pcbconnect(struct inpcb * inp,struct sockaddr * nam,struct proc * p,unsigned int ifscope,struct ifnet ** outif)1871 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
1872     unsigned int ifscope, struct ifnet **outif)
1873 {
1874 	struct in_addr laddr;
1875 	struct sockaddr_in *sin = SIN(nam);
1876 	struct inpcb *pcb;
1877 	int error;
1878 	struct socket *so = inp->inp_socket;
1879 
1880 #if CONTENT_FILTER
1881 	if (so) {
1882 		so->so_state_change_cnt++;
1883 	}
1884 #endif
1885 
1886 	/*
1887 	 *   Call inner routine, to assign local interface address.
1888 	 */
1889 	if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0) {
1890 		return error;
1891 	}
1892 
1893 	socket_unlock(so, 0);
1894 	pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
1895 	    inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
1896 	    inp->inp_lport, 0, NULL);
1897 	socket_lock(so, 0);
1898 
1899 	/*
1900 	 * Check if the socket is still in a valid state. When we unlock this
1901 	 * embryonic socket, it can get aborted if another thread is closing
1902 	 * the listener (radar 7947600).
1903 	 */
1904 	if ((so->so_flags & SOF_ABORTED) != 0) {
1905 		return ECONNREFUSED;
1906 	}
1907 
1908 	if (pcb != NULL) {
1909 		in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1910 		return EADDRINUSE;
1911 	}
1912 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
1913 		if (inp->inp_lport == 0) {
1914 			error = in_pcbbind(inp, NULL, nam, p);
1915 			if (error) {
1916 				return error;
1917 			}
1918 		}
1919 		if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1920 			/*
1921 			 * Lock inversion issue, mostly with udp
1922 			 * multicast packets.
1923 			 */
1924 			socket_unlock(so, 0);
1925 			lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1926 			socket_lock(so, 0);
1927 		}
1928 		inp->inp_laddr = laddr;
1929 		/* no reference needed */
1930 		inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
1931 #if SKYWALK
1932 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
1933 			netns_set_ifnet(&inp->inp_netns_token,
1934 			    inp->inp_last_outifp);
1935 		}
1936 #endif /* SKYWALK */
1937 		inp->inp_flags |= INP_INADDR_ANY;
1938 	} else {
1939 		/*
1940 		 * Usage of IP_PKTINFO, without local port already
1941 		 * speficified will cause kernel to panic,
1942 		 * see rdar://problem/18508185.
1943 		 * For now returning error to avoid a kernel panic
1944 		 * This routines can be refactored and handle this better
1945 		 * in future.
1946 		 */
1947 		if (inp->inp_lport == 0) {
1948 			return EINVAL;
1949 		}
1950 		if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1951 			/*
1952 			 * Lock inversion issue, mostly with udp
1953 			 * multicast packets.
1954 			 */
1955 			socket_unlock(so, 0);
1956 			lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1957 			socket_lock(so, 0);
1958 		}
1959 	}
1960 	inp->inp_faddr = sin->sin_addr;
1961 	inp->inp_fport = sin->sin_port;
1962 	if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1963 		nstat_pcb_invalidate_cache(inp);
1964 	}
1965 	in_pcbrehash(inp);
1966 	lck_rw_done(&inp->inp_pcbinfo->ipi_lock);
1967 	return 0;
1968 }
1969 
1970 void
in_pcbdisconnect(struct inpcb * inp)1971 in_pcbdisconnect(struct inpcb *inp)
1972 {
1973 	struct socket *so = inp->inp_socket;
1974 
1975 	if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1976 		nstat_pcb_cache(inp);
1977 	}
1978 
1979 	inp->inp_faddr.s_addr = INADDR_ANY;
1980 	inp->inp_fport = 0;
1981 
1982 #if CONTENT_FILTER
1983 	if (so) {
1984 		so->so_state_change_cnt++;
1985 	}
1986 #endif
1987 
1988 	if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1989 		/* lock inversion issue, mostly with udp multicast packets */
1990 		socket_unlock(so, 0);
1991 		lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1992 		socket_lock(so, 0);
1993 	}
1994 
1995 	in_pcbrehash(inp);
1996 	lck_rw_done(&inp->inp_pcbinfo->ipi_lock);
1997 	/*
1998 	 * A multipath subflow socket would have its SS_NOFDREF set by default,
1999 	 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
2000 	 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
2001 	 */
2002 	if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) {
2003 		in_pcbdetach(inp);
2004 	}
2005 }
2006 
2007 void
in_pcbdetach(struct inpcb * inp)2008 in_pcbdetach(struct inpcb *inp)
2009 {
2010 	struct socket *so = inp->inp_socket;
2011 
2012 	if (so->so_pcb == NULL) {
2013 		/* PCB has been disposed */
2014 		panic("%s: inp=%p so=%p proto=%d so_pcb is null!", __func__,
2015 		    inp, so, SOCK_PROTO(so));
2016 		/* NOTREACHED */
2017 	}
2018 
2019 #if IPSEC
2020 	if (inp->inp_sp != NULL) {
2021 		(void) ipsec4_delete_pcbpolicy(inp);
2022 	}
2023 #endif /* IPSEC */
2024 
2025 	if (inp->inp_stat != NULL && SOCK_PROTO(so) == IPPROTO_UDP) {
2026 		if (inp->inp_stat->rxpackets == 0 && inp->inp_stat->txpackets == 0) {
2027 			INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_no_data);
2028 		}
2029 	}
2030 
2031 	/*
2032 	 * Let NetworkStatistics know this PCB is going away
2033 	 * before we detach it.
2034 	 */
2035 	if (nstat_collect &&
2036 	    (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
2037 		nstat_pcb_detach(inp);
2038 	}
2039 
2040 	/* Free memory buffer held for generating keep alives */
2041 	if (inp->inp_keepalive_data != NULL) {
2042 		kfree_data_counted_by(inp->inp_keepalive_data, inp->inp_keepalive_datalen);
2043 	}
2044 
2045 	/* mark socket state as dead */
2046 	if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
2047 		panic("%s: so=%p proto=%d couldn't set to STOPUSING",
2048 		    __func__, so, SOCK_PROTO(so));
2049 		/* NOTREACHED */
2050 	}
2051 
2052 #if SKYWALK
2053 	/* Free up the port in the namespace registrar if not in TIME_WAIT */
2054 	if (!(inp->inp_flags2 & INP2_TIMEWAIT)) {
2055 		netns_release(&inp->inp_netns_token);
2056 		netns_release(&inp->inp_wildcard_netns_token);
2057 	}
2058 #endif /* SKYWALK */
2059 
2060 	if (!(so->so_flags & SOF_PCBCLEARING)) {
2061 		struct ip_moptions *imo;
2062 
2063 		inp->inp_vflag = 0;
2064 		if (inp->inp_options != NULL) {
2065 			(void) m_free(inp->inp_options);
2066 			inp->inp_options = NULL;
2067 		}
2068 		ROUTE_RELEASE(&inp->inp_route);
2069 		imo = inp->inp_moptions;
2070 		if (imo != NULL) {
2071 			IMO_REMREF(imo);
2072 		}
2073 		inp->inp_moptions = NULL;
2074 		sofreelastref(so, 0);
2075 		inp->inp_state = INPCB_STATE_DEAD;
2076 
2077 		/*
2078 		 * Enqueue an event to send kernel event notification
2079 		 * if the flow has to CLAT46 for data packets
2080 		 */
2081 		if (inp->inp_flags2 & INP2_CLAT46_FLOW) {
2082 			/*
2083 			 * If there has been any exchange of data bytes
2084 			 * over this flow.
2085 			 * Schedule a notification to report that flow is
2086 			 * using client side translation.
2087 			 */
2088 			if (inp->inp_stat != NULL &&
2089 			    (inp->inp_stat->txbytes != 0 ||
2090 			    inp->inp_stat->rxbytes != 0)) {
2091 				if (so->so_flags & SOF_DELEGATED) {
2092 					in6_clat46_event_enqueue_nwk_wq_entry(
2093 						IN6_CLAT46_EVENT_V4_FLOW,
2094 						so->e_pid,
2095 						so->e_uuid);
2096 				} else {
2097 					in6_clat46_event_enqueue_nwk_wq_entry(
2098 						IN6_CLAT46_EVENT_V4_FLOW,
2099 						so->last_pid,
2100 						so->last_uuid);
2101 				}
2102 			}
2103 		}
2104 
2105 		/* makes sure we're not called twice from so_close */
2106 		so->so_flags |= SOF_PCBCLEARING;
2107 
2108 		inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
2109 	}
2110 }
2111 
2112 
2113 void
in_pcbdispose(struct inpcb * inp)2114 in_pcbdispose(struct inpcb *inp)
2115 {
2116 	struct socket *so = inp->inp_socket;
2117 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
2118 
2119 	if (so != NULL && so->so_usecount != 0) {
2120 		panic("%s: so %p [%d,%d] usecount %d lockhistory %s",
2121 		    __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
2122 		    solockhistory_nr(so));
2123 		/* NOTREACHED */
2124 	} else if (inp->inp_wantcnt != WNT_STOPUSING) {
2125 		if (so != NULL) {
2126 			panic_plain("%s: inp %p invalid wantcnt %d, so %p "
2127 			    "[%d,%d] usecount %d retaincnt %d state 0x%x "
2128 			    "flags 0x%x lockhistory %s\n", __func__, inp,
2129 			    inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
2130 			    so->so_usecount, so->so_retaincnt, so->so_state,
2131 			    so->so_flags, solockhistory_nr(so));
2132 			/* NOTREACHED */
2133 		} else {
2134 			panic("%s: inp %p invalid wantcnt %d no socket",
2135 			    __func__, inp, inp->inp_wantcnt);
2136 			/* NOTREACHED */
2137 		}
2138 	}
2139 
2140 	LCK_RW_ASSERT(&ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
2141 
2142 	inp->inp_gencnt = ++ipi->ipi_gencnt;
2143 	/* access ipi in in_pcbremlists */
2144 	in_pcbremlists(inp);
2145 
2146 	if (so != NULL) {
2147 		if (so->so_proto->pr_flags & PR_PCBLOCK) {
2148 			sofreelastref(so, 0);
2149 			if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
2150 				/*
2151 				 * selthreadclear() already called
2152 				 * during sofreelastref() above.
2153 				 */
2154 				sbrelease(&so->so_rcv);
2155 				sbrelease(&so->so_snd);
2156 			}
2157 			if (so->so_head != NULL) {
2158 				panic("%s: so=%p head still exist",
2159 				    __func__, so);
2160 				/* NOTREACHED */
2161 			}
2162 			lck_mtx_unlock(&inp->inpcb_mtx);
2163 
2164 #if NECP
2165 			necp_inpcb_remove_cb(inp);
2166 #endif /* NECP */
2167 
2168 			lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
2169 		}
2170 		/* makes sure we're not called twice from so_close */
2171 		so->so_flags |= SOF_PCBCLEARING;
2172 		so->so_saved_pcb = (caddr_t)inp;
2173 		so->so_pcb = NULL;
2174 		inp->inp_socket = NULL;
2175 #if NECP
2176 		necp_inpcb_dispose(inp);
2177 #endif /* NECP */
2178 		/*
2179 		 * In case there a route cached after a detach (possible
2180 		 * in the tcp case), make sure that it is freed before
2181 		 * we deallocate the structure.
2182 		 */
2183 		ROUTE_RELEASE(&inp->inp_route);
2184 		if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
2185 			zfree(ipi->ipi_zone, inp);
2186 		}
2187 		sodealloc(so);
2188 	}
2189 }
2190 
2191 /*
2192  * The calling convention of in_getsockaddr() and in_getpeeraddr() was
2193  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
2194  * in struct pr_usrreqs, so that protocols can just reference then directly
2195  * without the need for a wrapper function.
2196  */
2197 int
in_getsockaddr(struct socket * so,struct sockaddr ** nam)2198 in_getsockaddr(struct socket *so, struct sockaddr **nam)
2199 {
2200 	struct inpcb *inp;
2201 	struct sockaddr_in *sin;
2202 
2203 	/*
2204 	 * Do the malloc first in case it blocks.
2205 	 */
2206 	sin = SIN(alloc_sockaddr(sizeof(*sin),
2207 	    Z_WAITOK | Z_NOFAIL));
2208 
2209 	sin->sin_family = AF_INET;
2210 
2211 	if ((inp = sotoinpcb(so)) == NULL) {
2212 		free_sockaddr(sin);
2213 		return EINVAL;
2214 	}
2215 	sin->sin_port = inp->inp_lport;
2216 	sin->sin_addr = inp->inp_laddr;
2217 
2218 	*nam = SA(sin);
2219 	return 0;
2220 }
2221 
2222 int
in_getsockaddr_s(struct socket * so,struct sockaddr_in * ss)2223 in_getsockaddr_s(struct socket *so, struct sockaddr_in *ss)
2224 {
2225 	struct sockaddr_in *sin = ss;
2226 	struct inpcb *inp;
2227 
2228 	VERIFY(ss != NULL);
2229 	SOCKADDR_ZERO(ss, sizeof(*ss));
2230 
2231 	sin->sin_family = AF_INET;
2232 	sin->sin_len = sizeof(*sin);
2233 
2234 	if ((inp = sotoinpcb(so)) == NULL) {
2235 		return EINVAL;
2236 	}
2237 
2238 	sin->sin_port = inp->inp_lport;
2239 	sin->sin_addr = inp->inp_laddr;
2240 	return 0;
2241 }
2242 
2243 int
in_getpeeraddr(struct socket * so,struct sockaddr ** nam)2244 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
2245 {
2246 	struct inpcb *inp;
2247 	struct sockaddr_in *sin;
2248 
2249 	/*
2250 	 * Do the malloc first in case it blocks.
2251 	 */
2252 	sin = SIN(alloc_sockaddr(sizeof(*sin),
2253 	    Z_WAITOK | Z_NOFAIL));
2254 
2255 	sin->sin_family = AF_INET;
2256 
2257 	if ((inp = sotoinpcb(so)) == NULL) {
2258 		free_sockaddr(sin);
2259 		return EINVAL;
2260 	}
2261 	sin->sin_port = inp->inp_fport;
2262 	sin->sin_addr = inp->inp_faddr;
2263 
2264 	*nam = SA(sin);
2265 	return 0;
2266 }
2267 
2268 void
in_pcbnotifyall(struct inpcbinfo * pcbinfo,struct in_addr faddr,int errno,void (* notify)(struct inpcb *,int))2269 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2270     int errno, void (*notify)(struct inpcb *, int))
2271 {
2272 	struct inpcb *inp;
2273 
2274 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
2275 
2276 	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
2277 		if (!(inp->inp_vflag & INP_IPV4)) {
2278 			continue;
2279 		}
2280 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
2281 		    inp->inp_socket == NULL) {
2282 			continue;
2283 		}
2284 		if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
2285 			continue;
2286 		}
2287 		socket_lock(inp->inp_socket, 1);
2288 		(*notify)(inp, errno);
2289 		(void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
2290 		socket_unlock(inp->inp_socket, 1);
2291 	}
2292 	lck_rw_done(&pcbinfo->ipi_lock);
2293 }
2294 
2295 /*
2296  * Check for alternatives when higher level complains
2297  * about service problems.  For now, invalidate cached
2298  * routing information.  If the route was created dynamically
2299  * (by a redirect), time to try a default gateway again.
2300  */
2301 void
in_losing(struct inpcb * inp)2302 in_losing(struct inpcb *inp)
2303 {
2304 	boolean_t release = FALSE;
2305 	struct rtentry *rt;
2306 
2307 	if ((rt = inp->inp_route.ro_rt) != NULL) {
2308 		struct in_ifaddr *ia = NULL;
2309 
2310 		RT_LOCK(rt);
2311 		if (rt->rt_flags & RTF_DYNAMIC) {
2312 			/*
2313 			 * Prevent another thread from modifying rt_key,
2314 			 * rt_gateway via rt_setgate() after rt_lock is
2315 			 * dropped by marking the route as defunct.
2316 			 */
2317 			rt->rt_flags |= RTF_CONDEMNED;
2318 			RT_UNLOCK(rt);
2319 			(void) rtrequest(RTM_DELETE, rt_key(rt),
2320 			    rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
2321 		} else {
2322 			RT_UNLOCK(rt);
2323 		}
2324 		/* if the address is gone keep the old route in the pcb */
2325 		if (inp->inp_laddr.s_addr != INADDR_ANY &&
2326 		    (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2327 			/*
2328 			 * Address is around; ditch the route.  A new route
2329 			 * can be allocated the next time output is attempted.
2330 			 */
2331 			release = TRUE;
2332 		}
2333 		if (ia != NULL) {
2334 			ifa_remref(&ia->ia_ifa);
2335 		}
2336 	}
2337 	if (rt == NULL || release) {
2338 		ROUTE_RELEASE(&inp->inp_route);
2339 	}
2340 }
2341 
2342 /*
2343  * After a routing change, flush old routing
2344  * and allocate a (hopefully) better one.
2345  */
2346 void
in_rtchange(struct inpcb * inp,int errno)2347 in_rtchange(struct inpcb *inp, int errno)
2348 {
2349 #pragma unused(errno)
2350 	boolean_t release = FALSE;
2351 	struct rtentry *rt;
2352 
2353 	if ((rt = inp->inp_route.ro_rt) != NULL) {
2354 		struct in_ifaddr *ia = NULL;
2355 
2356 		/* if address is gone, keep the old route */
2357 		if (inp->inp_laddr.s_addr != INADDR_ANY &&
2358 		    (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2359 			/*
2360 			 * Address is around; ditch the route.  A new route
2361 			 * can be allocated the next time output is attempted.
2362 			 */
2363 			release = TRUE;
2364 		}
2365 		if (ia != NULL) {
2366 			ifa_remref(&ia->ia_ifa);
2367 		}
2368 	}
2369 	if (rt == NULL || release) {
2370 		ROUTE_RELEASE(&inp->inp_route);
2371 	}
2372 }
2373 
2374 /*
2375  * Lookup a PCB based on the local address and port.
2376  */
2377 struct inpcb *
in_pcblookup_local(struct inpcbinfo * pcbinfo,struct in_addr laddr,unsigned int lport_arg,int wild_okay)2378 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
2379     unsigned int lport_arg, int wild_okay)
2380 {
2381 	struct inpcb *inp;
2382 	int matchwild = 3, wildcard;
2383 	u_short lport = (u_short)lport_arg;
2384 
2385 	KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
2386 
2387 	if (!wild_okay) {
2388 		struct inpcbhead *head;
2389 		/*
2390 		 * Look for an unconnected (wildcard foreign addr) PCB that
2391 		 * matches the local address and port we're looking for.
2392 		 */
2393 		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2394 		    pcbinfo->ipi_hashmask)];
2395 		LIST_FOREACH(inp, head, inp_hash) {
2396 			if (!(inp->inp_vflag & INP_IPV4)) {
2397 				continue;
2398 			}
2399 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
2400 			    inp->inp_laddr.s_addr == laddr.s_addr &&
2401 			    inp->inp_lport == lport) {
2402 				/*
2403 				 * Found.
2404 				 */
2405 				return inp;
2406 			}
2407 		}
2408 		/*
2409 		 * Not found.
2410 		 */
2411 		KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
2412 		return NULL;
2413 	} else {
2414 		struct inpcbporthead *porthash;
2415 		struct inpcbport *phd;
2416 		struct inpcb *match = NULL;
2417 		/*
2418 		 * Best fit PCB lookup.
2419 		 *
2420 		 * First see if this local port is in use by looking on the
2421 		 * port hash list.
2422 		 */
2423 		porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
2424 		    pcbinfo->ipi_porthashmask)];
2425 		LIST_FOREACH(phd, porthash, phd_hash) {
2426 			if (phd->phd_port == lport) {
2427 				break;
2428 			}
2429 		}
2430 		if (phd != NULL) {
2431 			/*
2432 			 * Port is in use by one or more PCBs. Look for best
2433 			 * fit.
2434 			 */
2435 			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
2436 				wildcard = 0;
2437 				if (!(inp->inp_vflag & INP_IPV4)) {
2438 					continue;
2439 				}
2440 				if (inp->inp_faddr.s_addr != INADDR_ANY) {
2441 					wildcard++;
2442 				}
2443 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
2444 					if (laddr.s_addr == INADDR_ANY) {
2445 						wildcard++;
2446 					} else if (inp->inp_laddr.s_addr !=
2447 					    laddr.s_addr) {
2448 						continue;
2449 					}
2450 				} else {
2451 					if (laddr.s_addr != INADDR_ANY) {
2452 						wildcard++;
2453 					}
2454 				}
2455 				if (wildcard < matchwild) {
2456 					match = inp;
2457 					matchwild = wildcard;
2458 					if (matchwild == 0) {
2459 						break;
2460 					}
2461 				}
2462 			}
2463 		}
2464 		KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
2465 		    0, 0, 0, 0);
2466 		return match;
2467 	}
2468 }
2469 
2470 /*
2471  * Check if PCB exists in hash list.
2472  */
2473 int
in_pcblookup_hash_exists(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,uid_t * uid,gid_t * gid,struct ifnet * ifp)2474 in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2475     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2476     uid_t *uid, gid_t *gid, struct ifnet *ifp)
2477 {
2478 	struct inpcbhead *head;
2479 	struct inpcb *inp;
2480 	u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2481 	int found = 0;
2482 	struct inpcb *local_wild = NULL;
2483 	struct inpcb *local_wild_mapped = NULL;
2484 
2485 	*uid = UID_MAX;
2486 	*gid = GID_MAX;
2487 
2488 	/*
2489 	 * We may have found the pcb in the last lookup - check this first.
2490 	 */
2491 
2492 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
2493 
2494 	/*
2495 	 * First look for an exact match.
2496 	 */
2497 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2498 	    pcbinfo->ipi_hashmask)];
2499 	LIST_FOREACH(inp, head, inp_hash) {
2500 		if (!(inp->inp_vflag & INP_IPV4)) {
2501 			continue;
2502 		}
2503 		if (inp_restricted_recv(inp, ifp)) {
2504 			continue;
2505 		}
2506 
2507 #if NECP
2508 		if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2509 			continue;
2510 		}
2511 #endif /* NECP */
2512 
2513 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
2514 		    inp->inp_laddr.s_addr == laddr.s_addr &&
2515 		    inp->inp_fport == fport &&
2516 		    inp->inp_lport == lport) {
2517 			if ((found = (inp->inp_socket != NULL))) {
2518 				/*
2519 				 * Found.
2520 				 */
2521 				*uid = kauth_cred_getuid(
2522 					inp->inp_socket->so_cred);
2523 				*gid = kauth_cred_getgid(
2524 					inp->inp_socket->so_cred);
2525 			}
2526 			lck_rw_done(&pcbinfo->ipi_lock);
2527 			return found;
2528 		}
2529 	}
2530 
2531 	if (!wildcard) {
2532 		/*
2533 		 * Not found.
2534 		 */
2535 		lck_rw_done(&pcbinfo->ipi_lock);
2536 		return 0;
2537 	}
2538 
2539 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2540 	    pcbinfo->ipi_hashmask)];
2541 	LIST_FOREACH(inp, head, inp_hash) {
2542 		if (!(inp->inp_vflag & INP_IPV4)) {
2543 			continue;
2544 		}
2545 		if (inp_restricted_recv(inp, ifp)) {
2546 			continue;
2547 		}
2548 
2549 #if NECP
2550 		if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2551 			continue;
2552 		}
2553 #endif /* NECP */
2554 
2555 		if (inp->inp_faddr.s_addr == INADDR_ANY &&
2556 		    inp->inp_lport == lport) {
2557 			if (inp->inp_laddr.s_addr == laddr.s_addr) {
2558 				if ((found = (inp->inp_socket != NULL))) {
2559 					*uid = kauth_cred_getuid(
2560 						inp->inp_socket->so_cred);
2561 					*gid = kauth_cred_getgid(
2562 						inp->inp_socket->so_cred);
2563 				}
2564 				lck_rw_done(&pcbinfo->ipi_lock);
2565 				return found;
2566 			} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2567 				if (inp->inp_socket &&
2568 				    SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2569 					local_wild_mapped = inp;
2570 				} else {
2571 					local_wild = inp;
2572 				}
2573 			}
2574 		}
2575 	}
2576 	if (local_wild == NULL) {
2577 		if (local_wild_mapped != NULL) {
2578 			if ((found = (local_wild_mapped->inp_socket != NULL))) {
2579 				*uid = kauth_cred_getuid(
2580 					local_wild_mapped->inp_socket->so_cred);
2581 				*gid = kauth_cred_getgid(
2582 					local_wild_mapped->inp_socket->so_cred);
2583 			}
2584 			lck_rw_done(&pcbinfo->ipi_lock);
2585 			return found;
2586 		}
2587 		lck_rw_done(&pcbinfo->ipi_lock);
2588 		return 0;
2589 	}
2590 	if ((found = (local_wild->inp_socket != NULL))) {
2591 		*uid = kauth_cred_getuid(
2592 			local_wild->inp_socket->so_cred);
2593 		*gid = kauth_cred_getgid(
2594 			local_wild->inp_socket->so_cred);
2595 	}
2596 	lck_rw_done(&pcbinfo->ipi_lock);
2597 	return found;
2598 }
2599 
2600 /*
2601  * Lookup PCB in hash list.
2602  */
2603 struct inpcb *
in_pcblookup_hash(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,struct ifnet * ifp)2604 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2605     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2606     struct ifnet *ifp)
2607 {
2608 	struct inpcbhead *head;
2609 	struct inpcb *inp;
2610 	u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2611 	struct inpcb *local_wild = NULL;
2612 	struct inpcb *local_wild_mapped = NULL;
2613 
2614 	/*
2615 	 * We may have found the pcb in the last lookup - check this first.
2616 	 */
2617 
2618 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
2619 
2620 	/*
2621 	 * First look for an exact match.
2622 	 */
2623 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2624 	    pcbinfo->ipi_hashmask)];
2625 	LIST_FOREACH(inp, head, inp_hash) {
2626 		if (!(inp->inp_vflag & INP_IPV4)) {
2627 			continue;
2628 		}
2629 		if (inp_restricted_recv(inp, ifp)) {
2630 			continue;
2631 		}
2632 
2633 #if NECP
2634 		if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2635 			continue;
2636 		}
2637 #endif /* NECP */
2638 
2639 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
2640 		    inp->inp_laddr.s_addr == laddr.s_addr &&
2641 		    inp->inp_fport == fport &&
2642 		    inp->inp_lport == lport) {
2643 			/*
2644 			 * Found.
2645 			 */
2646 			if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2647 			    WNT_STOPUSING) {
2648 				lck_rw_done(&pcbinfo->ipi_lock);
2649 				return inp;
2650 			} else {
2651 				/* it's there but dead, say it isn't found */
2652 				lck_rw_done(&pcbinfo->ipi_lock);
2653 				return NULL;
2654 			}
2655 		}
2656 	}
2657 
2658 	if (!wildcard) {
2659 		/*
2660 		 * Not found.
2661 		 */
2662 		lck_rw_done(&pcbinfo->ipi_lock);
2663 		return NULL;
2664 	}
2665 
2666 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2667 	    pcbinfo->ipi_hashmask)];
2668 	LIST_FOREACH(inp, head, inp_hash) {
2669 		if (!(inp->inp_vflag & INP_IPV4)) {
2670 			continue;
2671 		}
2672 		if (inp_restricted_recv(inp, ifp)) {
2673 			continue;
2674 		}
2675 
2676 #if NECP
2677 		if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2678 			continue;
2679 		}
2680 #endif /* NECP */
2681 
2682 		if (inp->inp_faddr.s_addr == INADDR_ANY &&
2683 		    inp->inp_lport == lport) {
2684 			if (inp->inp_laddr.s_addr == laddr.s_addr) {
2685 				if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2686 				    WNT_STOPUSING) {
2687 					lck_rw_done(&pcbinfo->ipi_lock);
2688 					return inp;
2689 				} else {
2690 					/* it's dead; say it isn't found */
2691 					lck_rw_done(&pcbinfo->ipi_lock);
2692 					return NULL;
2693 				}
2694 			} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2695 				if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2696 					local_wild_mapped = inp;
2697 				} else {
2698 					local_wild = inp;
2699 				}
2700 			}
2701 		}
2702 	}
2703 	if (local_wild == NULL) {
2704 		if (local_wild_mapped != NULL) {
2705 			if (in_pcb_checkstate(local_wild_mapped,
2706 			    WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2707 				lck_rw_done(&pcbinfo->ipi_lock);
2708 				return local_wild_mapped;
2709 			} else {
2710 				/* it's dead; say it isn't found */
2711 				lck_rw_done(&pcbinfo->ipi_lock);
2712 				return NULL;
2713 			}
2714 		}
2715 		lck_rw_done(&pcbinfo->ipi_lock);
2716 		return NULL;
2717 	}
2718 	if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2719 		lck_rw_done(&pcbinfo->ipi_lock);
2720 		return local_wild;
2721 	}
2722 	/*
2723 	 * It's either not found or is already dead.
2724 	 */
2725 	lck_rw_done(&pcbinfo->ipi_lock);
2726 	return NULL;
2727 }
2728 
2729 /*
2730  * @brief	Insert PCB onto various hash lists.
2731  *
2732  * @param	inp Pointer to internet protocol control block
2733  * @param	remote Pointer to remote address sockaddr for policy evaluation
2734  * @param	locked	Implies if ipi_lock (protecting pcb list)
2735  *              is already locked or not.
2736  *
2737  * @return	int error on failure and 0 on success
2738  */
2739 int
in_pcbinshash(struct inpcb * inp,struct sockaddr * remote,int locked)2740 in_pcbinshash(struct inpcb *inp, struct sockaddr *remote, int locked)
2741 {
2742 	struct inpcbhead *pcbhash;
2743 	struct inpcbporthead *pcbporthash;
2744 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2745 	struct inpcbport *phd;
2746 	u_int32_t hashkey_faddr;
2747 
2748 	if (!locked) {
2749 		if (!lck_rw_try_lock_exclusive(&pcbinfo->ipi_lock)) {
2750 			/*
2751 			 * Lock inversion issue, mostly with udp
2752 			 * multicast packets
2753 			 */
2754 			socket_unlock(inp->inp_socket, 0);
2755 			lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
2756 			socket_lock(inp->inp_socket, 0);
2757 		}
2758 	}
2759 
2760 	/*
2761 	 * This routine or its caller may have given up
2762 	 * socket's protocol lock briefly.
2763 	 * During that time the socket may have been dropped.
2764 	 * Safe-guarding against that.
2765 	 */
2766 	if (inp->inp_state == INPCB_STATE_DEAD) {
2767 		if (!locked) {
2768 			lck_rw_done(&pcbinfo->ipi_lock);
2769 		}
2770 		return ECONNABORTED;
2771 	}
2772 
2773 
2774 	if (inp->inp_vflag & INP_IPV6) {
2775 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2776 	} else {
2777 		hashkey_faddr = inp->inp_faddr.s_addr;
2778 	}
2779 
2780 	inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2781 	    inp->inp_fport, pcbinfo->ipi_hashmask);
2782 
2783 	pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
2784 
2785 	pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2786 	    pcbinfo->ipi_porthashmask)];
2787 
2788 	/*
2789 	 * Go through port list and look for a head for this lport.
2790 	 */
2791 	LIST_FOREACH(phd, pcbporthash, phd_hash) {
2792 		if (phd->phd_port == inp->inp_lport) {
2793 			break;
2794 		}
2795 	}
2796 
2797 	/*
2798 	 * If none exists, malloc one and tack it on.
2799 	 */
2800 	if (phd == NULL) {
2801 		phd = kalloc_type(struct inpcbport, Z_WAITOK | Z_NOFAIL);
2802 		phd->phd_port = inp->inp_lport;
2803 		LIST_INIT(&phd->phd_pcblist);
2804 		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2805 	}
2806 
2807 	VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2808 
2809 #if SKYWALK
2810 	int err;
2811 	struct socket *so = inp->inp_socket;
2812 	if ((SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP) &&
2813 	    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
2814 		if (inp->inp_vflag & INP_IPV6) {
2815 			err = netns_reserve_in6(&inp->inp_netns_token,
2816 			    inp->in6p_laddr, (uint8_t)SOCK_PROTO(so), inp->inp_lport,
2817 			    NETNS_BSD | NETNS_PRERESERVED, NULL);
2818 		} else {
2819 			err = netns_reserve_in(&inp->inp_netns_token,
2820 			    inp->inp_laddr, (uint8_t)SOCK_PROTO(so), inp->inp_lport,
2821 			    NETNS_BSD | NETNS_PRERESERVED, NULL);
2822 		}
2823 		if (err) {
2824 			if (!locked) {
2825 				lck_rw_done(&pcbinfo->ipi_lock);
2826 			}
2827 			return err;
2828 		}
2829 		netns_set_ifnet(&inp->inp_netns_token, inp->inp_last_outifp);
2830 		inp_update_netns_flags(so);
2831 	}
2832 #endif /* SKYWALK */
2833 
2834 	inp->inp_phd = phd;
2835 	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2836 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
2837 	inp->inp_flags2 |= INP2_INHASHLIST;
2838 
2839 	if (!locked) {
2840 		lck_rw_done(&pcbinfo->ipi_lock);
2841 	}
2842 
2843 #if NECP
2844 	// This call catches the original setting of the local address
2845 	inp_update_necp_policy(inp, NULL, remote, 0);
2846 #endif /* NECP */
2847 
2848 	return 0;
2849 }
2850 
2851 /*
2852  * Move PCB to the proper hash bucket when { faddr, fport } have  been
2853  * changed. NOTE: This does not handle the case of the lport changing (the
2854  * hashed port list would have to be updated as well), so the lport must
2855  * not change after in_pcbinshash() has been called.
2856  */
2857 void
in_pcbrehash(struct inpcb * inp)2858 in_pcbrehash(struct inpcb *inp)
2859 {
2860 	struct inpcbhead *head;
2861 	u_int32_t hashkey_faddr;
2862 
2863 #if SKYWALK
2864 	struct socket *so = inp->inp_socket;
2865 	if ((SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP) &&
2866 	    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
2867 		int err;
2868 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2869 			if (inp->inp_vflag & INP_IPV6) {
2870 				err = netns_change_addr_in6(
2871 					&inp->inp_netns_token, inp->in6p_laddr);
2872 			} else {
2873 				err = netns_change_addr_in(
2874 					&inp->inp_netns_token, inp->inp_laddr);
2875 			}
2876 		} else {
2877 			if (inp->inp_vflag & INP_IPV6) {
2878 				err = netns_reserve_in6(&inp->inp_netns_token,
2879 				    inp->in6p_laddr, (uint8_t)SOCK_PROTO(so),
2880 				    inp->inp_lport, NETNS_BSD, NULL);
2881 			} else {
2882 				err = netns_reserve_in(&inp->inp_netns_token,
2883 				    inp->inp_laddr, (uint8_t)SOCK_PROTO(so),
2884 				    inp->inp_lport, NETNS_BSD, NULL);
2885 			}
2886 		}
2887 		/* We are assuming that whatever code paths result in a rehash
2888 		 * did their due diligence and ensured that the given
2889 		 * <proto, laddr, lport> tuple was free ahead of time. Just
2890 		 * reserving the lport on INADDR_ANY should be enough, since
2891 		 * that will block Skywalk from trying to reserve that same
2892 		 * port. Given this assumption, the above netns calls should
2893 		 * never fail*/
2894 		VERIFY(err == 0);
2895 
2896 		netns_set_ifnet(&inp->inp_netns_token, inp->inp_last_outifp);
2897 		inp_update_netns_flags(so);
2898 	}
2899 #endif /* SKYWALK */
2900 	if (inp->inp_vflag & INP_IPV6) {
2901 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2902 	} else {
2903 		hashkey_faddr = inp->inp_faddr.s_addr;
2904 	}
2905 
2906 	inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2907 	    inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2908 	head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
2909 
2910 	if (inp->inp_flags2 & INP2_INHASHLIST) {
2911 		LIST_REMOVE(inp, inp_hash);
2912 		inp->inp_flags2 &= ~INP2_INHASHLIST;
2913 	}
2914 
2915 	VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2916 	LIST_INSERT_HEAD(head, inp, inp_hash);
2917 	inp->inp_flags2 |= INP2_INHASHLIST;
2918 
2919 #if NECP
2920 	// This call catches updates to the remote addresses
2921 	inp_update_necp_policy(inp, NULL, NULL, 0);
2922 #endif /* NECP */
2923 }
2924 
2925 /*
2926  * Remove PCB from various lists.
2927  * Must be called pcbinfo lock is held in exclusive mode.
2928  */
2929 void
in_pcbremlists(struct inpcb * inp)2930 in_pcbremlists(struct inpcb *inp)
2931 {
2932 	inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
2933 
2934 	/*
2935 	 * Check if it's in hashlist -- an inp is placed in hashlist when
2936 	 * it's local port gets assigned. So it should also be present
2937 	 * in the port list.
2938 	 */
2939 	if (inp->inp_flags2 & INP2_INHASHLIST) {
2940 		struct inpcbport *phd = inp->inp_phd;
2941 
2942 		VERIFY(phd != NULL && inp->inp_lport > 0);
2943 
2944 		LIST_REMOVE(inp, inp_hash);
2945 		inp->inp_hash.le_next = NULL;
2946 		inp->inp_hash.le_prev = NULL;
2947 
2948 		LIST_REMOVE(inp, inp_portlist);
2949 		inp->inp_portlist.le_next = NULL;
2950 		inp->inp_portlist.le_prev = NULL;
2951 		if (LIST_EMPTY(&phd->phd_pcblist)) {
2952 			LIST_REMOVE(phd, phd_hash);
2953 			kfree_type(struct inpcbport, phd);
2954 		}
2955 		inp->inp_phd = NULL;
2956 		inp->inp_flags2 &= ~INP2_INHASHLIST;
2957 #if SKYWALK
2958 		/* Free up the port in the namespace registrar */
2959 		netns_release(&inp->inp_netns_token);
2960 		netns_release(&inp->inp_wildcard_netns_token);
2961 #endif /* SKYWALK */
2962 	}
2963 	VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2964 
2965 	if (inp->inp_flags2 & INP2_TIMEWAIT) {
2966 		/* Remove from time-wait queue */
2967 		tcp_remove_from_time_wait(inp);
2968 		inp->inp_flags2 &= ~INP2_TIMEWAIT;
2969 		VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2970 		inp->inp_pcbinfo->ipi_twcount--;
2971 	} else {
2972 		/* Remove from global inp list if it is not time-wait */
2973 		LIST_REMOVE(inp, inp_list);
2974 	}
2975 
2976 	if (inp->inp_flags2 & INP2_IN_FCTREE) {
2977 		inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED | INPFC_REMOVE));
2978 		VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2979 	}
2980 
2981 	inp->inp_pcbinfo->ipi_count--;
2982 }
2983 
2984 /*
2985  * Mechanism used to defer the memory release of PCBs
2986  * The pcb list will contain the pcb until the reaper can clean it up if
2987  * the following conditions are met:
2988  *	1) state "DEAD",
2989  *	2) wantcnt is STOPUSING
2990  *	3) usecount is 0
2991  * This function will be called to either mark the pcb as
2992  */
2993 int
in_pcb_checkstate(struct inpcb * pcb,int mode,int locked)2994 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
2995 {
2996 	volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2997 	UInt32 origwant;
2998 	UInt32 newwant;
2999 
3000 	switch (mode) {
3001 	case WNT_STOPUSING:
3002 		/*
3003 		 * Try to mark the pcb as ready for recycling.  CAS with
3004 		 * STOPUSING, if success we're good, if it's in use, will
3005 		 * be marked later
3006 		 */
3007 		if (locked == 0) {
3008 			socket_lock(pcb->inp_socket, 1);
3009 		}
3010 		pcb->inp_state = INPCB_STATE_DEAD;
3011 
3012 stopusing:
3013 		if (pcb->inp_socket->so_usecount < 0) {
3014 			panic("%s: pcb=%p so=%p usecount is negative",
3015 			    __func__, pcb, pcb->inp_socket);
3016 			/* NOTREACHED */
3017 		}
3018 		if (locked == 0) {
3019 			socket_unlock(pcb->inp_socket, 1);
3020 		}
3021 
3022 		inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
3023 
3024 		origwant = *wantcnt;
3025 		if ((UInt16) origwant == 0xffff) { /* should stop using */
3026 			return WNT_STOPUSING;
3027 		}
3028 		newwant = 0xffff;
3029 		if ((UInt16) origwant == 0) {
3030 			/* try to mark it as unsuable now */
3031 			OSCompareAndSwap(origwant, newwant, wantcnt);
3032 		}
3033 		return WNT_STOPUSING;
3034 
3035 	case WNT_ACQUIRE:
3036 		/*
3037 		 * Try to increase reference to pcb.  If WNT_STOPUSING
3038 		 * should bail out.  If socket state DEAD, try to set count
3039 		 * to STOPUSING, return failed otherwise increase cnt.
3040 		 */
3041 		do {
3042 			origwant = *wantcnt;
3043 			if ((UInt16) origwant == 0xffff) {
3044 				/* should stop using */
3045 				return WNT_STOPUSING;
3046 			}
3047 			newwant = origwant + 1;
3048 		} while (!OSCompareAndSwap(origwant, newwant, wantcnt));
3049 		return WNT_ACQUIRE;
3050 
3051 	case WNT_RELEASE:
3052 		/*
3053 		 * Release reference.  If result is null and pcb state
3054 		 * is DEAD, set wanted bit to STOPUSING
3055 		 */
3056 		if (locked == 0) {
3057 			socket_lock(pcb->inp_socket, 1);
3058 		}
3059 
3060 		do {
3061 			origwant = *wantcnt;
3062 			if ((UInt16) origwant == 0x0) {
3063 				panic("%s: pcb=%p release with zero count",
3064 				    __func__, pcb);
3065 				/* NOTREACHED */
3066 			}
3067 			if ((UInt16) origwant == 0xffff) {
3068 				/* should stop using */
3069 				if (locked == 0) {
3070 					socket_unlock(pcb->inp_socket, 1);
3071 				}
3072 				return WNT_STOPUSING;
3073 			}
3074 			newwant = origwant - 1;
3075 		} while (!OSCompareAndSwap(origwant, newwant, wantcnt));
3076 
3077 		if (pcb->inp_state == INPCB_STATE_DEAD) {
3078 			goto stopusing;
3079 		}
3080 		if (pcb->inp_socket->so_usecount < 0) {
3081 			panic("%s: RELEASE pcb=%p so=%p usecount is negative",
3082 			    __func__, pcb, pcb->inp_socket);
3083 			/* NOTREACHED */
3084 		}
3085 
3086 		if (locked == 0) {
3087 			socket_unlock(pcb->inp_socket, 1);
3088 		}
3089 		return WNT_RELEASE;
3090 
3091 	default:
3092 		panic("%s: so=%p not a valid state =%x", __func__,
3093 		    pcb->inp_socket, mode);
3094 		/* NOTREACHED */
3095 	}
3096 
3097 	/* NOTREACHED */
3098 	return mode;
3099 }
3100 
3101 /*
3102  * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
3103  * The inpcb_compat data structure is passed to user space and must
3104  * not change. We intentionally avoid copying pointers.
3105  */
3106 void
inpcb_to_compat(struct inpcb * inp,struct inpcb_compat * inp_compat)3107 inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
3108 {
3109 	bzero(inp_compat, sizeof(*inp_compat));
3110 	inp_compat->inp_fport = inp->inp_fport;
3111 	inp_compat->inp_lport = inp->inp_lport;
3112 	inp_compat->nat_owner = 0;
3113 	inp_compat->nat_cookie = 0;
3114 	inp_compat->inp_gencnt = inp->inp_gencnt;
3115 	inp_compat->inp_flags = inp->inp_flags;
3116 	inp_compat->inp_flow = inp->inp_flow;
3117 	inp_compat->inp_vflag = inp->inp_vflag;
3118 	inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
3119 	inp_compat->inp_ip_p = inp->inp_ip_p;
3120 	inp_compat->inp_dependfaddr.inp6_foreign =
3121 	    inp->inp_dependfaddr.inp6_foreign;
3122 	inp_compat->inp_dependladdr.inp6_local =
3123 	    inp->inp_dependladdr.inp6_local;
3124 	inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
3125 	inp_compat->inp_depend6.inp6_hlim = 0;
3126 	inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
3127 	inp_compat->inp_depend6.inp6_ifindex = 0;
3128 	inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
3129 }
3130 
3131 #if XNU_TARGET_OS_OSX
3132 void
inpcb_to_xinpcb64(struct inpcb * inp,struct xinpcb64 * xinp)3133 inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
3134 {
3135 	xinp->inp_fport = inp->inp_fport;
3136 	xinp->inp_lport = inp->inp_lport;
3137 	xinp->inp_gencnt = inp->inp_gencnt;
3138 	xinp->inp_flags = inp->inp_flags;
3139 	xinp->inp_flow = inp->inp_flow;
3140 	xinp->inp_vflag = inp->inp_vflag;
3141 	xinp->inp_ip_ttl = inp->inp_ip_ttl;
3142 	xinp->inp_ip_p = inp->inp_ip_p;
3143 	xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
3144 	xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
3145 	xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
3146 	xinp->inp_depend6.inp6_hlim = 0;
3147 	xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
3148 	xinp->inp_depend6.inp6_ifindex = 0;
3149 	xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
3150 }
3151 #endif /* XNU_TARGET_OS_OSX */
3152 
3153 /*
3154  * The following routines implement this scheme:
3155  *
3156  * Callers of ip_output() that intend to cache the route in the inpcb pass
3157  * a local copy of the struct route to ip_output().  Using a local copy of
3158  * the cached route significantly simplifies things as IP no longer has to
3159  * worry about having exclusive access to the passed in struct route, since
3160  * it's defined in the caller's stack; in essence, this allows for a lock-
3161  * less operation when updating the struct route at the IP level and below,
3162  * whenever necessary. The scheme works as follows:
3163  *
3164  * Prior to dropping the socket's lock and calling ip_output(), the caller
3165  * copies the struct route from the inpcb into its stack, and adds a reference
3166  * to the cached route entry, if there was any.  The socket's lock is then
3167  * dropped and ip_output() is called with a pointer to the copy of struct
3168  * route defined on the stack (not to the one in the inpcb.)
3169  *
3170  * Upon returning from ip_output(), the caller then acquires the socket's
3171  * lock and synchronizes the cache; if there is no route cached in the inpcb,
3172  * it copies the local copy of struct route (which may or may not contain any
3173  * route) back into the cache; otherwise, if the inpcb has a route cached in
3174  * it, the one in the local copy will be freed, if there's any.  Trashing the
3175  * cached route in the inpcb can be avoided because ip_output() is single-
3176  * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
3177  * by the socket/transport layer.)
3178  */
3179 void
inp_route_copyout(struct inpcb * inp,struct route * dst)3180 inp_route_copyout(struct inpcb *inp, struct route *dst)
3181 {
3182 	struct route *src = &inp->inp_route;
3183 
3184 	socket_lock_assert_owned(inp->inp_socket);
3185 
3186 	/*
3187 	 * If the route in the PCB is stale or not for IPv4, blow it away;
3188 	 * this is possible in the case of IPv4-mapped address case.
3189 	 */
3190 	if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) {
3191 		ROUTE_RELEASE(src);
3192 	}
3193 
3194 	route_copyout(dst, src, sizeof(*dst));
3195 }
3196 
3197 void
inp_route_copyin(struct inpcb * inp,struct route * src)3198 inp_route_copyin(struct inpcb *inp, struct route *src)
3199 {
3200 	struct route *dst = &inp->inp_route;
3201 
3202 	socket_lock_assert_owned(inp->inp_socket);
3203 
3204 	/* Minor sanity check */
3205 	if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) {
3206 		panic("%s: wrong or corrupted route: %p", __func__, src);
3207 	}
3208 
3209 	route_copyin(src, dst, sizeof(*src));
3210 }
3211 
3212 /*
3213  * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option.
3214  */
3215 static void
inp_bindif_common(struct inpcb * inp,struct ifnet * ifp)3216 inp_bindif_common(struct inpcb *inp, struct ifnet *ifp)
3217 {
3218 	/*
3219 	 * A zero interface scope value indicates an "unbind".
3220 	 * Otherwise, take in whatever value the app desires;
3221 	 * the app may already know the scope (or force itself
3222 	 * to such a scope) ahead of time before the interface
3223 	 * gets attached.  It doesn't matter either way; any
3224 	 * route lookup from this point on will require an
3225 	 * exact match for the embedded interface scope.
3226 	 */
3227 	inp->inp_boundifp = ifp;
3228 	if (inp->inp_boundifp == NULL) {
3229 		inp->inp_flags &= ~INP_BOUND_IF;
3230 	} else {
3231 		inp->inp_flags |= INP_BOUND_IF;
3232 	}
3233 
3234 	/* Blow away any cached route in the PCB */
3235 	ROUTE_RELEASE(&inp->inp_route);
3236 }
3237 
3238 
3239 int
inp_bindif(struct inpcb * inp,unsigned int ifscope,struct ifnet ** pifp)3240 inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
3241 {
3242 	struct ifnet *ifp = NULL;
3243 
3244 	ifnet_head_lock_shared();
3245 	if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
3246 	    (ifp = ifindex2ifnet[ifscope]) == NULL)) {
3247 		ifnet_head_done();
3248 		return ENXIO;
3249 	}
3250 	ifnet_head_done();
3251 
3252 	VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
3253 
3254 	inp_bindif_common(inp, ifp);
3255 
3256 	if (pifp != NULL) {
3257 		*pifp = ifp;
3258 	}
3259 
3260 	return 0;
3261 }
3262 
3263 int
inp_bindtodevice(struct inpcb * inp,const char * ifname)3264 inp_bindtodevice(struct inpcb *inp, const char *ifname)
3265 {
3266 	ifnet_ref_t ifp = NULL;
3267 
3268 	if (*ifname != 0) {
3269 		int error = ifnet_find_by_name(ifname, &ifp);
3270 		if (error != 0) {
3271 			return error;
3272 		}
3273 	}
3274 
3275 	inp_bindif_common(inp, ifp);
3276 
3277 	if (ifp != NULL) {
3278 		ifnet_release(ifp);
3279 	}
3280 	return 0;
3281 }
3282 
3283 /*
3284  * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
3285  * as well as for setting PROC_UUID_NO_CELLULAR policy.
3286  */
3287 void
inp_set_nocellular(struct inpcb * inp)3288 inp_set_nocellular(struct inpcb *inp)
3289 {
3290 	inp->inp_flags |= INP_NO_IFT_CELLULAR;
3291 
3292 	/* Blow away any cached route in the PCB */
3293 	ROUTE_RELEASE(&inp->inp_route);
3294 }
3295 
3296 /*
3297  * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
3298  * as well as for clearing PROC_UUID_NO_CELLULAR policy.
3299  */
3300 void
inp_clear_nocellular(struct inpcb * inp)3301 inp_clear_nocellular(struct inpcb *inp)
3302 {
3303 	struct socket *so = inp->inp_socket;
3304 
3305 	/*
3306 	 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
3307 	 * has a higher precendence than INP_NO_IFT_CELLULAR.  Clear the flag
3308 	 * if and only if the socket is unrestricted.
3309 	 */
3310 	if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
3311 		inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
3312 
3313 		/* Blow away any cached route in the PCB */
3314 		ROUTE_RELEASE(&inp->inp_route);
3315 	}
3316 }
3317 
3318 void
inp_set_noexpensive(struct inpcb * inp)3319 inp_set_noexpensive(struct inpcb *inp)
3320 {
3321 	inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
3322 
3323 	/* Blow away any cached route in the PCB */
3324 	ROUTE_RELEASE(&inp->inp_route);
3325 }
3326 
3327 void
inp_set_noconstrained(struct inpcb * inp)3328 inp_set_noconstrained(struct inpcb *inp)
3329 {
3330 	inp->inp_flags2 |= INP2_NO_IFF_CONSTRAINED;
3331 
3332 	/* Blow away any cached route in the PCB */
3333 	ROUTE_RELEASE(&inp->inp_route);
3334 }
3335 
3336 void
inp_set_awdl_unrestricted(struct inpcb * inp)3337 inp_set_awdl_unrestricted(struct inpcb *inp)
3338 {
3339 	inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
3340 
3341 	/* Blow away any cached route in the PCB */
3342 	ROUTE_RELEASE(&inp->inp_route);
3343 }
3344 
3345 boolean_t
inp_get_awdl_unrestricted(struct inpcb * inp)3346 inp_get_awdl_unrestricted(struct inpcb *inp)
3347 {
3348 	return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
3349 }
3350 
3351 void
inp_clear_awdl_unrestricted(struct inpcb * inp)3352 inp_clear_awdl_unrestricted(struct inpcb *inp)
3353 {
3354 	inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
3355 
3356 	/* Blow away any cached route in the PCB */
3357 	ROUTE_RELEASE(&inp->inp_route);
3358 }
3359 
3360 void
inp_set_intcoproc_allowed(struct inpcb * inp)3361 inp_set_intcoproc_allowed(struct inpcb *inp)
3362 {
3363 	inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
3364 
3365 	/* Blow away any cached route in the PCB */
3366 	ROUTE_RELEASE(&inp->inp_route);
3367 }
3368 
3369 boolean_t
inp_get_intcoproc_allowed(struct inpcb * inp)3370 inp_get_intcoproc_allowed(struct inpcb *inp)
3371 {
3372 	return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE;
3373 }
3374 
3375 void
inp_clear_intcoproc_allowed(struct inpcb * inp)3376 inp_clear_intcoproc_allowed(struct inpcb *inp)
3377 {
3378 	inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED;
3379 
3380 	/* Blow away any cached route in the PCB */
3381 	ROUTE_RELEASE(&inp->inp_route);
3382 }
3383 
3384 void
inp_set_management_allowed(struct inpcb * inp)3385 inp_set_management_allowed(struct inpcb *inp)
3386 {
3387 	inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
3388 	inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
3389 
3390 	/* Blow away any cached route in the PCB */
3391 	ROUTE_RELEASE(&inp->inp_route);
3392 }
3393 
3394 boolean_t
inp_get_management_allowed(struct inpcb * inp)3395 inp_get_management_allowed(struct inpcb *inp)
3396 {
3397 	return (inp->inp_flags2 & INP2_MANAGEMENT_ALLOWED) ? TRUE : FALSE;
3398 }
3399 
3400 void
inp_clear_management_allowed(struct inpcb * inp)3401 inp_clear_management_allowed(struct inpcb *inp)
3402 {
3403 	inp->inp_flags2 &= ~INP2_MANAGEMENT_ALLOWED;
3404 
3405 	/* Blow away any cached route in the PCB */
3406 	ROUTE_RELEASE(&inp->inp_route);
3407 }
3408 
3409 void
inp_set_ultra_constrained_allowed(struct inpcb * inp)3410 inp_set_ultra_constrained_allowed(struct inpcb *inp)
3411 {
3412 	inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_ALLOWED;
3413 	inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_CHECKED;
3414 
3415 	/* Blow away any cached route in the PCB */
3416 	ROUTE_RELEASE(&inp->inp_route);
3417 }
3418 
3419 #if NECP
3420 /*
3421  * Called when PROC_UUID_NECP_APP_POLICY is set.
3422  */
3423 void
inp_set_want_app_policy(struct inpcb * inp)3424 inp_set_want_app_policy(struct inpcb *inp)
3425 {
3426 	inp->inp_flags2 |= INP2_WANT_APP_POLICY;
3427 }
3428 
3429 /*
3430  * Called when PROC_UUID_NECP_APP_POLICY is cleared.
3431  */
3432 void
inp_clear_want_app_policy(struct inpcb * inp)3433 inp_clear_want_app_policy(struct inpcb *inp)
3434 {
3435 	inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
3436 }
3437 #endif /* NECP */
3438 
3439 /*
3440  * Calculate flow hash for an inp, used by an interface to identify a
3441  * flow. When an interface provides flow control advisory, this flow
3442  * hash is used as an identifier.
3443  */
3444 u_int32_t
inp_calc_flowhash(struct inpcb * inp)3445 inp_calc_flowhash(struct inpcb *inp)
3446 {
3447 #if SKYWALK
3448 
3449 	uint32_t flowid;
3450 	struct flowidns_flow_key fk;
3451 
3452 	bzero(&fk, sizeof(fk));
3453 
3454 	if (inp->inp_vflag & INP_IPV4) {
3455 		fk.ffk_af = AF_INET;
3456 		fk.ffk_laddr_v4 = inp->inp_laddr;
3457 		fk.ffk_raddr_v4 = inp->inp_faddr;
3458 	} else {
3459 		fk.ffk_af = AF_INET6;
3460 		fk.ffk_laddr_v6 = inp->in6p_laddr;
3461 		fk.ffk_raddr_v6 = inp->in6p_faddr;
3462 		/* clear embedded scope ID */
3463 		if (IN6_IS_SCOPE_EMBED(&fk.ffk_laddr_v6)) {
3464 			fk.ffk_laddr_v6.s6_addr16[1] = 0;
3465 		}
3466 		if (IN6_IS_SCOPE_EMBED(&fk.ffk_raddr_v6)) {
3467 			fk.ffk_raddr_v6.s6_addr16[1] = 0;
3468 		}
3469 	}
3470 
3471 	fk.ffk_lport = inp->inp_lport;
3472 	fk.ffk_rport = inp->inp_fport;
3473 	fk.ffk_proto = (inp->inp_ip_p != 0) ? inp->inp_ip_p :
3474 	    (uint8_t)SOCK_PROTO(inp->inp_socket);
3475 	flowidns_allocate_flowid(FLOWIDNS_DOMAIN_INPCB, &fk, &flowid);
3476 	/* Insert the inp into inp_fc_tree */
3477 	lck_mtx_lock_spin(&inp_fc_lck);
3478 	ASSERT(inp->inp_flowhash == 0);
3479 	ASSERT((inp->inp_flags2 & INP2_IN_FCTREE) == 0);
3480 	inp->inp_flowhash = flowid;
3481 	VERIFY(RB_INSERT(inp_fc_tree, &inp_fc_tree, inp) == NULL);
3482 	inp->inp_flags2 |= INP2_IN_FCTREE;
3483 	lck_mtx_unlock(&inp_fc_lck);
3484 
3485 	return flowid;
3486 
3487 #else /* !SKYWALK */
3488 
3489 	struct inp_flowhash_key fh __attribute__((aligned(8)));
3490 	u_int32_t flowhash = 0;
3491 	struct inpcb *tmp_inp = NULL;
3492 
3493 	if (inp_hash_seed == 0) {
3494 		inp_hash_seed = RandomULong();
3495 	}
3496 
3497 	bzero(&fh, sizeof(fh));
3498 
3499 	bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof(fh.infh_laddr));
3500 	bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof(fh.infh_faddr));
3501 
3502 	fh.infh_lport = inp->inp_lport;
3503 	fh.infh_fport = inp->inp_fport;
3504 	fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
3505 	fh.infh_proto = inp->inp_ip_p;
3506 	fh.infh_rand1 = RandomULong();
3507 	fh.infh_rand2 = RandomULong();
3508 
3509 try_again:
3510 	flowhash = net_flowhash(&fh, sizeof(fh), inp_hash_seed);
3511 	if (flowhash == 0) {
3512 		/* try to get a non-zero flowhash */
3513 		inp_hash_seed = RandomULong();
3514 		goto try_again;
3515 	}
3516 
3517 	inp->inp_flowhash = flowhash;
3518 
3519 	/* Insert the inp into inp_fc_tree */
3520 	lck_mtx_lock_spin(&inp_fc_lck);
3521 	tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
3522 	if (tmp_inp != NULL) {
3523 		/*
3524 		 * There is a different inp with the same flowhash.
3525 		 * There can be a collision on flow hash but the
3526 		 * probability is low.  Let's recompute the
3527 		 * flowhash.
3528 		 */
3529 		lck_mtx_unlock(&inp_fc_lck);
3530 		/* recompute hash seed */
3531 		inp_hash_seed = RandomULong();
3532 		goto try_again;
3533 	}
3534 
3535 	RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
3536 	inp->inp_flags2 |= INP2_IN_FCTREE;
3537 	lck_mtx_unlock(&inp_fc_lck);
3538 
3539 	return flowhash;
3540 
3541 #endif /* !SKYWALK */
3542 }
3543 
3544 void
inp_flowadv(uint32_t flowhash)3545 inp_flowadv(uint32_t flowhash)
3546 {
3547 	struct inpcb *inp;
3548 
3549 	inp = inp_fc_getinp(flowhash, 0);
3550 
3551 	if (inp == NULL) {
3552 		return;
3553 	}
3554 	inp_fc_feedback(inp);
3555 }
3556 
3557 /*
3558  * Function to compare inp_fc_entries in inp flow control tree
3559  */
3560 static inline int
infc_cmp(const struct inpcb * inp1,const struct inpcb * inp2)3561 infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
3562 {
3563 	return memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
3564 	           sizeof(inp1->inp_flowhash));
3565 }
3566 
3567 static struct inpcb *
inp_fc_getinp(u_int32_t flowhash,u_int32_t flags)3568 inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
3569 {
3570 	struct inpcb *inp = NULL;
3571 	int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
3572 
3573 	lck_mtx_lock_spin(&inp_fc_lck);
3574 	key_inp.inp_flowhash = flowhash;
3575 	inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
3576 	if (inp == NULL) {
3577 		/* inp is not present, return */
3578 		lck_mtx_unlock(&inp_fc_lck);
3579 		return NULL;
3580 	}
3581 
3582 	if (flags & INPFC_REMOVE) {
3583 		ASSERT((inp->inp_flags2 & INP2_IN_FCTREE) != 0);
3584 		lck_mtx_convert_spin(&inp_fc_lck);
3585 		RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
3586 		bzero(&(inp->infc_link), sizeof(inp->infc_link));
3587 #if SKYWALK
3588 		VERIFY(inp->inp_flowhash != 0);
3589 		flowidns_release_flowid(inp->inp_flowhash);
3590 		inp->inp_flowhash = 0;
3591 #endif /* !SKYWALK */
3592 		inp->inp_flags2 &= ~INP2_IN_FCTREE;
3593 		lck_mtx_unlock(&inp_fc_lck);
3594 		return NULL;
3595 	}
3596 
3597 	if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) {
3598 		inp = NULL;
3599 	}
3600 	lck_mtx_unlock(&inp_fc_lck);
3601 
3602 	return inp;
3603 }
3604 
3605 static void
inp_fc_feedback(struct inpcb * inp)3606 inp_fc_feedback(struct inpcb *inp)
3607 {
3608 	struct socket *so = inp->inp_socket;
3609 
3610 	/* we already hold a want_cnt on this inp, socket can't be null */
3611 	VERIFY(so != NULL);
3612 	socket_lock(so, 1);
3613 
3614 	if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3615 		socket_unlock(so, 1);
3616 		return;
3617 	}
3618 
3619 	if (inp->inp_sndinprog_cnt > 0) {
3620 		inp->inp_flags |= INP_FC_FEEDBACK;
3621 	}
3622 
3623 	/*
3624 	 * Return if the connection is not in flow-controlled state.
3625 	 * This can happen if the connection experienced
3626 	 * loss while it was in flow controlled state
3627 	 */
3628 	if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
3629 		socket_unlock(so, 1);
3630 		return;
3631 	}
3632 	inp_reset_fc_state(inp);
3633 
3634 	if (SOCK_TYPE(so) == SOCK_STREAM) {
3635 		inp_fc_unthrottle_tcp(inp);
3636 	}
3637 
3638 	socket_unlock(so, 1);
3639 }
3640 
3641 static void
inp_reset_fc_timerstat(struct inpcb * inp)3642 inp_reset_fc_timerstat(struct inpcb *inp)
3643 {
3644 	uint64_t now;
3645 
3646 	if (inp->inp_fadv_start_time == 0) {
3647 		return;
3648 	}
3649 
3650 	now = net_uptime_us();
3651 	ASSERT(now >= inp->inp_fadv_start_time);
3652 
3653 	inp->inp_fadv_total_time += (now - inp->inp_fadv_start_time);
3654 	inp->inp_fadv_cnt++;
3655 
3656 	inp->inp_fadv_start_time = 0;
3657 }
3658 
3659 static void
inp_set_fc_timerstat(struct inpcb * inp)3660 inp_set_fc_timerstat(struct inpcb *inp)
3661 {
3662 	if (inp->inp_fadv_start_time != 0) {
3663 		return;
3664 	}
3665 
3666 	inp->inp_fadv_start_time = net_uptime_us();
3667 }
3668 
3669 void
inp_reset_fc_state(struct inpcb * inp)3670 inp_reset_fc_state(struct inpcb *inp)
3671 {
3672 	struct socket *so = inp->inp_socket;
3673 	int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
3674 	int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
3675 
3676 	inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3677 
3678 	inp_reset_fc_timerstat(inp);
3679 
3680 	if (suspended) {
3681 		so->so_flags &= ~(SOF_SUSPENDED);
3682 		soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
3683 	}
3684 
3685 	/* Give a write wakeup to unblock the socket */
3686 	if (needwakeup) {
3687 		sowwakeup(so);
3688 	}
3689 }
3690 
3691 int
inp_set_fc_state(struct inpcb * inp,int advcode)3692 inp_set_fc_state(struct inpcb *inp, int advcode)
3693 {
3694 	boolean_t is_flow_controlled = INP_WAIT_FOR_IF_FEEDBACK(inp);
3695 	struct inpcb *tmp_inp = NULL;
3696 	/*
3697 	 * If there was a feedback from the interface when
3698 	 * send operation was in progress, we should ignore
3699 	 * this flow advisory to avoid a race between setting
3700 	 * flow controlled state and receiving feedback from
3701 	 * the interface
3702 	 */
3703 	if (inp->inp_flags & INP_FC_FEEDBACK) {
3704 		return 0;
3705 	}
3706 
3707 	inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3708 	if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
3709 	    INPFC_SOLOCKED)) != NULL) {
3710 		if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3711 			goto exit_reset;
3712 		}
3713 		VERIFY(tmp_inp == inp);
3714 		switch (advcode) {
3715 		case FADV_FLOW_CONTROLLED:
3716 			inp->inp_flags |= INP_FLOW_CONTROLLED;
3717 			inp_set_fc_timerstat(inp);
3718 			break;
3719 		case FADV_SUSPENDED:
3720 			inp->inp_flags |= INP_FLOW_SUSPENDED;
3721 			inp_set_fc_timerstat(inp);
3722 
3723 			soevent(inp->inp_socket,
3724 			    (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
3725 
3726 			/* Record the fact that suspend event was sent */
3727 			inp->inp_socket->so_flags |= SOF_SUSPENDED;
3728 			break;
3729 		}
3730 
3731 		if (!is_flow_controlled && SOCK_TYPE(inp->inp_socket) == SOCK_STREAM) {
3732 			inp_fc_throttle_tcp(inp);
3733 		}
3734 		return 1;
3735 	}
3736 
3737 exit_reset:
3738 	inp_reset_fc_timerstat(inp);
3739 
3740 	return 0;
3741 }
3742 
3743 /*
3744  * Handler for SO_FLUSH socket option.
3745  */
3746 int
inp_flush(struct inpcb * inp,int optval)3747 inp_flush(struct inpcb *inp, int optval)
3748 {
3749 	u_int32_t flowhash = inp->inp_flowhash;
3750 	struct ifnet *rtifp, *oifp;
3751 
3752 	/* Either all classes or one of the valid ones */
3753 	if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) {
3754 		return EINVAL;
3755 	}
3756 
3757 	/* We need a flow hash for identification */
3758 	if (flowhash == 0) {
3759 		return 0;
3760 	}
3761 
3762 	/* Grab the interfaces from the route and pcb */
3763 	rtifp = ((inp->inp_route.ro_rt != NULL) ?
3764 	    inp->inp_route.ro_rt->rt_ifp : NULL);
3765 	oifp = inp->inp_last_outifp;
3766 
3767 	if (rtifp != NULL) {
3768 		if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3769 	}
3770 	if (oifp != NULL && oifp != rtifp) {
3771 		if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3772 	}
3773 
3774 	return 0;
3775 }
3776 
3777 /*
3778  * Clear the INP_INADDR_ANY flag (special case for PPP only)
3779  */
3780 void
inp_clear_INP_INADDR_ANY(struct socket * so)3781 inp_clear_INP_INADDR_ANY(struct socket *so)
3782 {
3783 	struct inpcb *inp = NULL;
3784 
3785 	socket_lock(so, 1);
3786 	inp = sotoinpcb(so);
3787 	if (inp) {
3788 		inp->inp_flags &= ~INP_INADDR_ANY;
3789 	}
3790 	socket_unlock(so, 1);
3791 }
3792 
3793 void
inp_get_soprocinfo(struct inpcb * inp,struct so_procinfo * soprocinfo)3794 inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
3795 {
3796 	struct socket *so = inp->inp_socket;
3797 
3798 	soprocinfo->spi_pid = so->last_pid;
3799 	strbufcpy(soprocinfo->spi_proc_name, inp->inp_last_proc_name);
3800 	if (so->last_pid != 0) {
3801 		uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
3802 	}
3803 	/*
3804 	 * When not delegated, the effective pid is the same as the real pid
3805 	 */
3806 	if (so->so_flags & SOF_DELEGATED) {
3807 		soprocinfo->spi_delegated = 1;
3808 		soprocinfo->spi_epid = so->e_pid;
3809 		uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
3810 	} else {
3811 		soprocinfo->spi_delegated = 0;
3812 		soprocinfo->spi_epid = so->last_pid;
3813 	}
3814 	strbufcpy(soprocinfo->spi_e_proc_name, inp->inp_e_proc_name);
3815 }
3816 
3817 int
inp_findinpcb_procinfo(struct inpcbinfo * pcbinfo,uint32_t flowhash,struct so_procinfo * soprocinfo)3818 inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
3819     struct so_procinfo *soprocinfo)
3820 {
3821 	struct inpcb *inp = NULL;
3822 	int found = 0;
3823 
3824 	bzero(soprocinfo, sizeof(struct so_procinfo));
3825 
3826 	if (!flowhash) {
3827 		return -1;
3828 	}
3829 
3830 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
3831 	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
3832 		if (inp->inp_state != INPCB_STATE_DEAD &&
3833 		    inp->inp_socket != NULL &&
3834 		    inp->inp_flowhash == flowhash) {
3835 			found = 1;
3836 			inp_get_soprocinfo(inp, soprocinfo);
3837 			break;
3838 		}
3839 	}
3840 	lck_rw_done(&pcbinfo->ipi_lock);
3841 
3842 	return found;
3843 }
3844 
3845 #if CONFIG_PROC_UUID_POLICY
3846 static void
inp_update_cellular_policy(struct inpcb * inp,boolean_t set)3847 inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
3848 {
3849 	struct socket *so = inp->inp_socket;
3850 	int before, after;
3851 
3852 	VERIFY(so != NULL);
3853 	VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3854 
3855 	before = INP_NO_CELLULAR(inp);
3856 	if (set) {
3857 		inp_set_nocellular(inp);
3858 	} else {
3859 		inp_clear_nocellular(inp);
3860 	}
3861 	after = INP_NO_CELLULAR(inp);
3862 	if (net_io_policy_log && (before != after)) {
3863 		static const char *ok = "OK";
3864 		static const char *nok = "NOACCESS";
3865 		uuid_string_t euuid_buf;
3866 		pid_t epid;
3867 
3868 		if (so->so_flags & SOF_DELEGATED) {
3869 			uuid_unparse(so->e_uuid, euuid_buf);
3870 			epid = so->e_pid;
3871 		} else {
3872 			uuid_unparse(so->last_uuid, euuid_buf);
3873 			epid = so->last_pid;
3874 		}
3875 
3876 		/* allow this socket to generate another notification event */
3877 		so->so_ifdenied_notifies = 0;
3878 
3879 		log(LOG_DEBUG, "%s: so %llu [%d,%d] epid %d "
3880 		    "euuid %s%s %s->%s\n", __func__,
3881 		    so->so_gencnt, SOCK_DOM(so),
3882 		    SOCK_TYPE(so), epid, euuid_buf,
3883 		    (so->so_flags & SOF_DELEGATED) ?
3884 		    " [delegated]" : "",
3885 		    ((before < after) ? ok : nok),
3886 		    ((before < after) ? nok : ok));
3887 	}
3888 }
3889 
3890 #if NECP
3891 static void
inp_update_necp_want_app_policy(struct inpcb * inp,boolean_t set)3892 inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
3893 {
3894 	struct socket *so = inp->inp_socket;
3895 	int before, after;
3896 
3897 	VERIFY(so != NULL);
3898 	VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3899 
3900 	before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3901 	if (set) {
3902 		inp_set_want_app_policy(inp);
3903 	} else {
3904 		inp_clear_want_app_policy(inp);
3905 	}
3906 	after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3907 	if (net_io_policy_log && (before != after)) {
3908 		static const char *wanted = "WANTED";
3909 		static const char *unwanted = "UNWANTED";
3910 		uuid_string_t euuid_buf;
3911 		pid_t epid;
3912 
3913 		if (so->so_flags & SOF_DELEGATED) {
3914 			uuid_unparse(so->e_uuid, euuid_buf);
3915 			epid = so->e_pid;
3916 		} else {
3917 			uuid_unparse(so->last_uuid, euuid_buf);
3918 			epid = so->last_pid;
3919 		}
3920 
3921 		log(LOG_DEBUG, "%s: so %llu [%d,%d] epid %d "
3922 		    "euuid %s%s %s->%s\n", __func__,
3923 		    so->so_gencnt, SOCK_DOM(so),
3924 		    SOCK_TYPE(so), epid, euuid_buf,
3925 		    (so->so_flags & SOF_DELEGATED) ?
3926 		    " [delegated]" : "",
3927 		    ((before < after) ? unwanted : wanted),
3928 		    ((before < after) ? wanted : unwanted));
3929 	}
3930 }
3931 #endif /* NECP */
3932 #endif /* !CONFIG_PROC_UUID_POLICY */
3933 
3934 #if NECP
3935 void
inp_update_necp_policy(struct inpcb * inp,struct sockaddr * override_local_addr,struct sockaddr * override_remote_addr,u_int override_bound_interface)3936 inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
3937 {
3938 	necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
3939 	if (necp_socket_should_rescope(inp) &&
3940 	    inp->inp_lport == 0 &&
3941 	    inp->inp_laddr.s_addr == INADDR_ANY &&
3942 	    IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
3943 		// If we should rescope, and the socket is not yet bound
3944 		inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
3945 		inp->inp_flags2 |= INP2_SCOPED_BY_NECP;
3946 	}
3947 }
3948 #endif /* NECP */
3949 
3950 int
inp_update_policy(struct inpcb * inp)3951 inp_update_policy(struct inpcb *inp)
3952 {
3953 #if CONFIG_PROC_UUID_POLICY
3954 	struct socket *so = inp->inp_socket;
3955 	uint32_t pflags = 0;
3956 	int32_t ogencnt;
3957 	int err = 0;
3958 	uint8_t *lookup_uuid = NULL;
3959 
3960 	if (!net_io_policy_uuid ||
3961 	    so == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3962 		return 0;
3963 	}
3964 
3965 	/*
3966 	 * Kernel-created sockets that aren't delegating other sockets
3967 	 * are currently exempted from UUID policy checks.
3968 	 */
3969 	if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) {
3970 		return 0;
3971 	}
3972 
3973 #if defined(XNU_TARGET_OS_OSX)
3974 	if (so->so_rpid > 0) {
3975 		lookup_uuid = so->so_ruuid;
3976 		ogencnt = so->so_policy_gencnt;
3977 		err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3978 	}
3979 #endif
3980 	if (lookup_uuid == NULL || err == ENOENT) {
3981 		lookup_uuid = ((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid);
3982 		ogencnt = so->so_policy_gencnt;
3983 		err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3984 	}
3985 
3986 	/*
3987 	 * Discard cached generation count if the entry is gone (ENOENT),
3988 	 * so that we go thru the checks below.
3989 	 */
3990 	if (err == ENOENT && ogencnt != 0) {
3991 		so->so_policy_gencnt = 0;
3992 	}
3993 
3994 	/*
3995 	 * If the generation count has changed, inspect the policy flags
3996 	 * and act accordingly.  If a policy flag was previously set and
3997 	 * the UUID is no longer present in the table (ENOENT), treat it
3998 	 * as if the flag has been cleared.
3999 	 */
4000 	if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
4001 		/* update cellular policy for this socket */
4002 		if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
4003 			inp_update_cellular_policy(inp, TRUE);
4004 		} else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
4005 			inp_update_cellular_policy(inp, FALSE);
4006 		}
4007 #if NECP
4008 		/* update necp want app policy for this socket */
4009 		if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
4010 			inp_update_necp_want_app_policy(inp, TRUE);
4011 		} else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
4012 			inp_update_necp_want_app_policy(inp, FALSE);
4013 		}
4014 #endif /* NECP */
4015 	}
4016 
4017 	return (err == ENOENT) ? 0 : err;
4018 #else /* !CONFIG_PROC_UUID_POLICY */
4019 #pragma unused(inp)
4020 	return 0;
4021 #endif /* !CONFIG_PROC_UUID_POLICY */
4022 }
4023 
4024 unsigned int log_restricted;
4025 SYSCTL_DECL(_net_inet);
4026 SYSCTL_INT(_net_inet, OID_AUTO, log_restricted,
4027     CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0,
4028     "Log network restrictions");
4029 
4030 
4031 /*
4032  * Called when we need to enforce policy restrictions in the input path.
4033  *
4034  * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
4035  */
4036 static boolean_t
_inp_restricted_recv(struct inpcb * inp,struct ifnet * ifp)4037 _inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
4038 {
4039 	VERIFY(inp != NULL);
4040 
4041 	/*
4042 	 * Inbound restrictions.
4043 	 */
4044 	if (!sorestrictrecv) {
4045 		return FALSE;
4046 	}
4047 
4048 	if (ifp == NULL) {
4049 		return FALSE;
4050 	}
4051 
4052 	if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
4053 		return TRUE;
4054 	}
4055 
4056 	if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
4057 		return TRUE;
4058 	}
4059 
4060 	if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
4061 		return TRUE;
4062 	}
4063 
4064 	if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
4065 		return TRUE;
4066 	}
4067 
4068 	if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) {
4069 		return FALSE;
4070 	}
4071 
4072 	if (inp->inp_flags & INP_RECV_ANYIF) {
4073 		return FALSE;
4074 	}
4075 
4076 	/*
4077 	 * An entitled process can use the management interface without being bound
4078 	 * to the interface
4079 	 */
4080 	if (IFNET_IS_MANAGEMENT(ifp)) {
4081 		if (INP_MANAGEMENT_ALLOWED(inp)) {
4082 			return FALSE;
4083 		}
4084 		if (if_management_verbose > 1) {
4085 			os_log(OS_LOG_DEFAULT, "_inp_restricted_recv %s:%d not allowed on management interface %s",
4086 			    proc_best_name(current_proc()), proc_getpid(current_proc()),
4087 			    ifp->if_xname);
4088 		}
4089 		return TRUE;
4090 	}
4091 
4092 	if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) {
4093 		return FALSE;
4094 	}
4095 
4096 	if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
4097 		return TRUE;
4098 	}
4099 
4100 
4101 	return TRUE;
4102 }
4103 
4104 boolean_t
inp_restricted_recv(struct inpcb * inp,struct ifnet * ifp)4105 inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
4106 {
4107 	boolean_t ret;
4108 
4109 	ret = _inp_restricted_recv(inp, ifp);
4110 	if (ret == TRUE && log_restricted) {
4111 		printf("pid %d (%s) is unable to receive packets on %s\n",
4112 		    proc_getpid(current_proc()), proc_best_name(current_proc()),
4113 		    ifp->if_xname);
4114 	}
4115 	return ret;
4116 }
4117 
4118 /*
4119  * Called when we need to enforce policy restrictions in the output path.
4120  *
4121  * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
4122  */
4123 static boolean_t
_inp_restricted_send(struct inpcb * inp,struct ifnet * ifp)4124 _inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
4125 {
4126 	VERIFY(inp != NULL);
4127 
4128 	/*
4129 	 * Outbound restrictions.
4130 	 */
4131 	if (!sorestrictsend) {
4132 		return FALSE;
4133 	}
4134 
4135 	if (ifp == NULL) {
4136 		return FALSE;
4137 	}
4138 
4139 	if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
4140 		return TRUE;
4141 	}
4142 
4143 	if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
4144 		return TRUE;
4145 	}
4146 
4147 	if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
4148 		return TRUE;
4149 	}
4150 
4151 	if (IFNET_IS_ULTRA_CONSTRAINED(ifp) && uuid_is_null(inp->necp_client_uuid) &&
4152 	    !INP_ULTRA_CONSTRAINED_ALLOWED(inp)) {
4153 		// Non-NECP-aware sockets are not allowed to use ultra constrained interfaces
4154 		// without an entitlement
4155 		return TRUE;
4156 	}
4157 
4158 	if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
4159 		return TRUE;
4160 	}
4161 
4162 	if (IFNET_IS_MANAGEMENT(ifp)) {
4163 		if (!INP_MANAGEMENT_ALLOWED(inp)) {
4164 			if (if_management_verbose > 1) {
4165 				os_log(OS_LOG_DEFAULT, "_inp_restricted_send %s:%d not allowed on management interface %s",
4166 				    proc_best_name(current_proc()), proc_getpid(current_proc()),
4167 				    ifp->if_xname);
4168 			}
4169 			return TRUE;
4170 		}
4171 	}
4172 
4173 	if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
4174 		return TRUE;
4175 	}
4176 
4177 	return FALSE;
4178 }
4179 
4180 boolean_t
inp_restricted_send(struct inpcb * inp,struct ifnet * ifp)4181 inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
4182 {
4183 	boolean_t ret;
4184 
4185 	ret = _inp_restricted_send(inp, ifp);
4186 	if (ret == TRUE && log_restricted) {
4187 		printf("pid %d (%s) is unable to transmit packets on %s\n",
4188 		    proc_getpid(current_proc()), proc_best_name(current_proc()),
4189 		    ifp->if_xname);
4190 	}
4191 	return ret;
4192 }
4193 
4194 inline void
inp_count_sndbytes(struct inpcb * inp,u_int32_t th_ack)4195 inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack)
4196 {
4197 	struct ifnet *ifp = inp->inp_last_outifp;
4198 	struct socket *so = inp->inp_socket;
4199 	if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
4200 	    (ifp->if_type == IFT_CELLULAR || IFNET_IS_WIFI(ifp))) {
4201 		int32_t unsent;
4202 
4203 		so->so_snd.sb_flags |= SB_SNDBYTE_CNT;
4204 
4205 		/*
4206 		 * There can be data outstanding before the connection
4207 		 * becomes established -- TFO case
4208 		 */
4209 		if (so->so_snd.sb_cc > 0) {
4210 			inp_incr_sndbytes_total(so, so->so_snd.sb_cc);
4211 		}
4212 
4213 		unsent = inp_get_sndbytes_allunsent(so, th_ack);
4214 		if (unsent > 0) {
4215 			inp_incr_sndbytes_unsent(so, unsent);
4216 		}
4217 	}
4218 }
4219 
4220 inline void
inp_incr_sndbytes_total(struct socket * so,int32_t len)4221 inp_incr_sndbytes_total(struct socket *so, int32_t len)
4222 {
4223 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
4224 	struct ifnet *ifp = inp->inp_last_outifp;
4225 
4226 	if (ifp != NULL) {
4227 		VERIFY(ifp->if_sndbyte_total >= 0);
4228 		OSAddAtomic64(len, &ifp->if_sndbyte_total);
4229 	}
4230 }
4231 
4232 inline void
inp_decr_sndbytes_total(struct socket * so,int32_t len)4233 inp_decr_sndbytes_total(struct socket *so, int32_t len)
4234 {
4235 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
4236 	struct ifnet *ifp = inp->inp_last_outifp;
4237 
4238 	if (ifp != NULL) {
4239 		if (ifp->if_sndbyte_total >= len) {
4240 			OSAddAtomic64(-len, &ifp->if_sndbyte_total);
4241 		} else {
4242 			ifp->if_sndbyte_total = 0;
4243 		}
4244 	}
4245 }
4246 
4247 inline void
inp_incr_sndbytes_unsent(struct socket * so,int32_t len)4248 inp_incr_sndbytes_unsent(struct socket *so, int32_t len)
4249 {
4250 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
4251 	struct ifnet *ifp = inp->inp_last_outifp;
4252 
4253 	if (ifp != NULL) {
4254 		VERIFY(ifp->if_sndbyte_unsent >= 0);
4255 		OSAddAtomic64(len, &ifp->if_sndbyte_unsent);
4256 	}
4257 }
4258 
4259 inline void
inp_decr_sndbytes_unsent(struct socket * so,int32_t len)4260 inp_decr_sndbytes_unsent(struct socket *so, int32_t len)
4261 {
4262 	if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
4263 		return;
4264 	}
4265 
4266 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
4267 	struct ifnet *ifp = inp->inp_last_outifp;
4268 
4269 	if (ifp != NULL) {
4270 		if (ifp->if_sndbyte_unsent >= len) {
4271 			OSAddAtomic64(-len, &ifp->if_sndbyte_unsent);
4272 		} else {
4273 			ifp->if_sndbyte_unsent = 0;
4274 		}
4275 	}
4276 }
4277 
4278 inline void
inp_decr_sndbytes_allunsent(struct socket * so,u_int32_t th_ack)4279 inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
4280 {
4281 	int32_t len;
4282 
4283 	if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
4284 		return;
4285 	}
4286 
4287 	len = inp_get_sndbytes_allunsent(so, th_ack);
4288 	inp_decr_sndbytes_unsent(so, len);
4289 }
4290 
4291 #if SKYWALK
4292 inline void
inp_update_netns_flags(struct socket * so)4293 inp_update_netns_flags(struct socket *so)
4294 {
4295 	struct inpcb *inp;
4296 	uint32_t set_flags = 0;
4297 	uint32_t clear_flags = 0;
4298 
4299 	if (!(SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
4300 		return;
4301 	}
4302 
4303 	inp = sotoinpcb(so);
4304 
4305 	if (inp == NULL) {
4306 		return;
4307 	}
4308 
4309 	if (!NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
4310 		return;
4311 	}
4312 
4313 	if (so->so_options & SO_NOWAKEFROMSLEEP) {
4314 		set_flags |= NETNS_NOWAKEFROMSLEEP;
4315 	} else {
4316 		clear_flags |= NETNS_NOWAKEFROMSLEEP;
4317 	}
4318 
4319 	if (inp->inp_flags & INP_RECV_ANYIF) {
4320 		set_flags |= NETNS_RECVANYIF;
4321 	} else {
4322 		clear_flags |= NETNS_RECVANYIF;
4323 	}
4324 
4325 	if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
4326 		set_flags |= NETNS_EXTBGIDLE;
4327 	} else {
4328 		clear_flags |= NETNS_EXTBGIDLE;
4329 	}
4330 
4331 	netns_change_flags(&inp->inp_netns_token, set_flags, clear_flags);
4332 }
4333 #endif /* SKYWALK */
4334 
4335 inline void
inp_set_activity_bitmap(struct inpcb * inp)4336 inp_set_activity_bitmap(struct inpcb *inp)
4337 {
4338 	in_stat_set_activity_bitmap(&inp->inp_nw_activity, net_uptime());
4339 }
4340 
4341 inline void
inp_get_activity_bitmap(struct inpcb * inp,activity_bitmap_t * ab)4342 inp_get_activity_bitmap(struct inpcb *inp, activity_bitmap_t *ab)
4343 {
4344 	bcopy(&inp->inp_nw_activity, ab, sizeof(*ab));
4345 }
4346 
4347 void
inp_update_last_owner(struct socket * so,struct proc * p,struct proc * ep)4348 inp_update_last_owner(struct socket *so, struct proc *p, struct proc *ep)
4349 {
4350 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
4351 
4352 	if (inp == NULL) {
4353 		return;
4354 	}
4355 
4356 	if (p != NULL) {
4357 		strlcpy(&inp->inp_last_proc_name[0], proc_name_address(p), sizeof(inp->inp_last_proc_name));
4358 	}
4359 	if (so->so_flags & SOF_DELEGATED) {
4360 		if (ep != NULL) {
4361 			strlcpy(&inp->inp_e_proc_name[0], proc_name_address(ep), sizeof(inp->inp_e_proc_name));
4362 		} else {
4363 			inp->inp_e_proc_name[0] = 0;
4364 		}
4365 	} else {
4366 		inp->inp_e_proc_name[0] = 0;
4367 	}
4368 }
4369 
4370 void
inp_copy_last_owner(struct socket * so,struct socket * head)4371 inp_copy_last_owner(struct socket *so, struct socket *head)
4372 {
4373 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
4374 	struct inpcb *head_inp = (struct inpcb *)head->so_pcb;
4375 
4376 	if (inp == NULL || head_inp == NULL) {
4377 		return;
4378 	}
4379 
4380 	strbufcpy(inp->inp_last_proc_name, head_inp->inp_last_proc_name);
4381 	strbufcpy(inp->inp_e_proc_name, head_inp->inp_e_proc_name);
4382 }
4383 
4384 static int
in_check_management_interface_proc_callout(proc_t proc,void * arg __unused)4385 in_check_management_interface_proc_callout(proc_t proc, void *arg __unused)
4386 {
4387 	struct fileproc *fp = NULL;
4388 	task_t __single task = proc_task(proc);
4389 	bool allowed = false;
4390 
4391 	if (IOTaskHasEntitlement(task, INTCOPROC_RESTRICTED_ENTITLEMENT) == true
4392 	    || IOTaskHasEntitlement(task, MANAGEMENT_DATA_ENTITLEMENT) == true
4393 #if DEBUG || DEVELOPMENT
4394 	    || IOTaskHasEntitlement(task, INTCOPROC_RESTRICTED_ENTITLEMENT_DEVELOPMENT) == true
4395 	    || IOTaskHasEntitlement(task, MANAGEMENT_DATA_ENTITLEMENT_DEVELOPMENT) == true
4396 #endif /* DEBUG || DEVELOPMENT */
4397 	    ) {
4398 		allowed = true;
4399 	}
4400 	if (allowed == false && management_data_unrestricted == false) {
4401 		return PROC_RETURNED;
4402 	}
4403 
4404 	proc_fdlock(proc);
4405 	fdt_foreach(fp, proc) {
4406 		struct fileglob *fg = fp->fp_glob;
4407 		struct socket *so;
4408 		struct inpcb *inp;
4409 
4410 		if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET) {
4411 			continue;
4412 		}
4413 
4414 		so = (struct socket *)fp_get_data(fp);
4415 		if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
4416 			continue;
4417 		}
4418 
4419 		inp = (struct inpcb *)so->so_pcb;
4420 
4421 		if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
4422 			continue;
4423 		}
4424 
4425 		socket_lock(so, 1);
4426 
4427 		if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
4428 			socket_unlock(so, 1);
4429 			continue;
4430 		}
4431 		inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
4432 		inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
4433 
4434 		socket_unlock(so, 1);
4435 	}
4436 	proc_fdunlock(proc);
4437 
4438 	return PROC_RETURNED;
4439 }
4440 
4441 static bool in_management_interface_checked = false;
4442 
4443 static void
in_management_interface_event_callback(struct nwk_wq_entry * nwk_item)4444 in_management_interface_event_callback(struct nwk_wq_entry *nwk_item)
4445 {
4446 	kfree_type(struct nwk_wq_entry, nwk_item);
4447 
4448 	if (in_management_interface_checked == true) {
4449 		return;
4450 	}
4451 	in_management_interface_checked = true;
4452 
4453 	proc_iterate(PROC_ALLPROCLIST,
4454 	    in_check_management_interface_proc_callout,
4455 	    NULL, NULL, NULL);
4456 }
4457 
4458 void
in_management_interface_check(void)4459 in_management_interface_check(void)
4460 {
4461 	struct nwk_wq_entry *nwk_item;
4462 
4463 	if (if_management_interface_check_needed == false ||
4464 	    in_management_interface_checked == true) {
4465 		return;
4466 	}
4467 
4468 	nwk_item  = kalloc_type(struct nwk_wq_entry,
4469 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
4470 
4471 	nwk_item->func = in_management_interface_event_callback;
4472 
4473 	nwk_wq_enqueue(nwk_item);
4474 }
4475