xref: /xnu-8020.121.3/bsd/netinet/in_pcb.c (revision fdd8201d7b966f0c3ea610489d29bd841d358941)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1991, 1993, 1995
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
61  * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/sysctl.h>
75 #include <sys/mcache.h>
76 #include <sys/kauth.h>
77 #include <sys/priv.h>
78 #include <sys/proc_uuid_policy.h>
79 #include <sys/syslog.h>
80 #include <sys/priv.h>
81 #include <net/dlil.h>
82 
83 #include <libkern/OSAtomic.h>
84 #include <kern/locks.h>
85 
86 #include <machine/limits.h>
87 
88 #include <kern/zalloc.h>
89 
90 #include <net/if.h>
91 #include <net/if_types.h>
92 #include <net/route.h>
93 #include <net/flowhash.h>
94 #include <net/flowadv.h>
95 #include <net/nat464_utils.h>
96 #include <net/ntstat.h>
97 #include <net/restricted_in_port.h>
98 
99 #include <netinet/in.h>
100 #include <netinet/in_pcb.h>
101 #include <netinet/in_var.h>
102 #include <netinet/ip_var.h>
103 
104 #include <netinet/ip6.h>
105 #include <netinet6/ip6_var.h>
106 
107 #include <sys/kdebug.h>
108 #include <sys/random.h>
109 
110 #include <dev/random/randomdev.h>
111 #include <mach/boolean.h>
112 
113 #include <pexpert/pexpert.h>
114 
115 #if NECP
116 #include <net/necp.h>
117 #endif
118 
119 #include <sys/stat.h>
120 #include <sys/ubc.h>
121 #include <sys/vnode.h>
122 
123 #include <os/log.h>
124 
125 extern const char *proc_name_address(struct proc *);
126 
127 static LCK_GRP_DECLARE(inpcb_lock_grp, "inpcb");
128 static LCK_ATTR_DECLARE(inpcb_lock_attr, 0, 0);
129 static LCK_MTX_DECLARE_ATTR(inpcb_lock, &inpcb_lock_grp, &inpcb_lock_attr);
130 static LCK_MTX_DECLARE_ATTR(inpcb_timeout_lock, &inpcb_lock_grp, &inpcb_lock_attr);
131 
132 static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
133 
134 static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
135 static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
136 static boolean_t inpcb_ticking = FALSE;         /* "slow" timer is scheduled */
137 static boolean_t inpcb_fast_timer_on = FALSE;
138 
139 #define INPCB_GCREQ_THRESHOLD   50000
140 
141 static thread_call_t inpcb_thread_call, inpcb_fast_thread_call;
142 static void inpcb_sched_timeout(void);
143 static void inpcb_sched_lazy_timeout(void);
144 static void _inpcb_sched_timeout(unsigned int);
145 static void inpcb_timeout(void *, void *);
146 const int inpcb_timeout_lazy = 10;      /* 10 seconds leeway for lazy timers */
147 extern int tvtohz(struct timeval *);
148 
149 #if CONFIG_PROC_UUID_POLICY
150 static void inp_update_cellular_policy(struct inpcb *, boolean_t);
151 #if NECP
152 static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
153 #endif /* NECP */
154 #endif /* !CONFIG_PROC_UUID_POLICY */
155 
156 #define DBG_FNC_PCB_LOOKUP      NETDBG_CODE(DBG_NETTCP, (6 << 8))
157 #define DBG_FNC_PCB_HLOOKUP     NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
158 
159 int allow_udp_port_exhaustion = 0;
160 
161 /*
162  * These configure the range of local port addresses assigned to
163  * "unspecified" outgoing connections/packets/whatever.
164  */
165 int     ipport_lowfirstauto  = IPPORT_RESERVED - 1;     /* 1023 */
166 int     ipport_lowlastauto = IPPORT_RESERVEDSTART;      /* 600 */
167 int     ipport_firstauto = IPPORT_HIFIRSTAUTO;          /* 49152 */
168 int     ipport_lastauto  = IPPORT_HILASTAUTO;           /* 65535 */
169 int     ipport_hifirstauto = IPPORT_HIFIRSTAUTO;        /* 49152 */
170 int     ipport_hilastauto  = IPPORT_HILASTAUTO;         /* 65535 */
171 
172 #define RANGECHK(var, min, max) \
173 	if ((var) < (min)) { (var) = (min); } \
174 	else if ((var) > (max)) { (var) = (max); }
175 
176 static int
177 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
178 {
179 #pragma unused(arg1, arg2)
180 	int error;
181 	int new_value = *(int *)oidp->oid_arg1;
182 #if (DEBUG | DEVELOPMENT)
183 	int old_value = *(int *)oidp->oid_arg1;
184 	/*
185 	 * For unit testing allow a non-superuser process with the
186 	 * proper entitlement to modify the variables
187 	 */
188 	if (req->newptr) {
189 		if (proc_suser(current_proc()) != 0 &&
190 		    (error = priv_check_cred(kauth_cred_get(),
191 		    PRIV_NETINET_RESERVEDPORT, 0))) {
192 			return EPERM;
193 		}
194 	}
195 #endif /* (DEBUG | DEVELOPMENT) */
196 
197 	error = sysctl_handle_int(oidp, &new_value, 0, req);
198 	if (!error) {
199 		if (oidp->oid_arg1 == &ipport_lowfirstauto || oidp->oid_arg1 == &ipport_lowlastauto) {
200 			RANGECHK(new_value, 1, IPPORT_RESERVED - 1);
201 		} else {
202 			RANGECHK(new_value, IPPORT_RESERVED, USHRT_MAX);
203 		}
204 		*(int *)oidp->oid_arg1 = new_value;
205 	}
206 
207 #if (DEBUG | DEVELOPMENT)
208 	os_log(OS_LOG_DEFAULT,
209 	    "%s:%u sysctl net.restricted_port.verbose: %d -> %d)",
210 	    proc_best_name(current_proc()), proc_selfpid(),
211 	    old_value, *(int *)oidp->oid_arg1);
212 #endif /* (DEBUG | DEVELOPMENT) */
213 
214 	return error;
215 }
216 
217 #undef RANGECHK
218 
219 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
220     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IP Ports");
221 
222 #if (DEBUG | DEVELOPMENT)
223 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY)
224 #else
225 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED)
226 #endif /* (DEBUG | DEVELOPMENT) */
227 
228 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
229     CTLFAGS_IP_PORTRANGE,
230     &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
231 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
232     CTLFAGS_IP_PORTRANGE,
233     &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
234 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
235     CTLFAGS_IP_PORTRANGE,
236     &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
237 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
238     CTLFAGS_IP_PORTRANGE,
239     &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
240 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
241     CTLFAGS_IP_PORTRANGE,
242     &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
243 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
244     CTLFAGS_IP_PORTRANGE,
245     &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
246 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, ipport_allow_udp_port_exhaustion,
247     CTLFLAG_LOCKED | CTLFLAG_RW, &allow_udp_port_exhaustion, 0, "");
248 
249 static uint32_t apn_fallbk_debug = 0;
250 #define apn_fallbk_log(x)       do { if (apn_fallbk_debug >= 1) log x; } while (0)
251 
252 #if !XNU_TARGET_OS_OSX
253 static boolean_t apn_fallbk_enabled = TRUE;
254 
255 SYSCTL_DECL(_net_inet);
256 SYSCTL_NODE(_net_inet, OID_AUTO, apn_fallback, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "APN Fallback");
257 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
258     &apn_fallbk_enabled, 0, "APN fallback enable");
259 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
260     &apn_fallbk_debug, 0, "APN fallback debug enable");
261 #else /* XNU_TARGET_OS_OSX */
262 static boolean_t apn_fallbk_enabled = FALSE;
263 #endif /* XNU_TARGET_OS_OSX */
264 
265 extern int      udp_use_randomport;
266 extern int      tcp_use_randomport;
267 
268 /* Structs used for flowhash computation */
269 struct inp_flowhash_key_addr {
270 	union {
271 		struct in_addr  v4;
272 		struct in6_addr v6;
273 		u_int8_t        addr8[16];
274 		u_int16_t       addr16[8];
275 		u_int32_t       addr32[4];
276 	} infha;
277 };
278 
279 struct inp_flowhash_key {
280 	struct inp_flowhash_key_addr    infh_laddr;
281 	struct inp_flowhash_key_addr    infh_faddr;
282 	u_int32_t                       infh_lport;
283 	u_int32_t                       infh_fport;
284 	u_int32_t                       infh_af;
285 	u_int32_t                       infh_proto;
286 	u_int32_t                       infh_rand1;
287 	u_int32_t                       infh_rand2;
288 };
289 
290 static u_int32_t inp_hash_seed = 0;
291 
292 static int infc_cmp(const struct inpcb *, const struct inpcb *);
293 
294 /* Flags used by inp_fc_getinp */
295 #define INPFC_SOLOCKED  0x1
296 #define INPFC_REMOVE    0x2
297 static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
298 
299 static void inp_fc_feedback(struct inpcb *);
300 extern void tcp_remove_from_time_wait(struct inpcb *inp);
301 
302 static LCK_MTX_DECLARE_ATTR(inp_fc_lck, &inpcb_lock_grp, &inpcb_lock_attr);
303 
304 RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
305 RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
306 RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
307 
308 /*
309  * Use this inp as a key to find an inp in the flowhash tree.
310  * Accesses to it are protected by inp_fc_lck.
311  */
312 struct inpcb key_inp;
313 
314 /*
315  * in_pcb.c: manage the Protocol Control Blocks.
316  */
317 
318 void
in_pcbinit(void)319 in_pcbinit(void)
320 {
321 	static int inpcb_initialized = 0;
322 
323 	VERIFY(!inpcb_initialized);
324 	inpcb_initialized = 1;
325 
326 	inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout,
327 	    NULL, THREAD_CALL_PRIORITY_KERNEL);
328 	/* Give it an arg so that we know that this is the fast timer */
329 	inpcb_fast_thread_call = thread_call_allocate_with_priority(
330 		inpcb_timeout, &inpcb_timeout, THREAD_CALL_PRIORITY_KERNEL);
331 	if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL) {
332 		panic("unable to alloc the inpcb thread call");
333 	}
334 
335 	/*
336 	 * Initialize data structures required to deliver
337 	 * flow advisories.
338 	 */
339 	lck_mtx_lock(&inp_fc_lck);
340 	RB_INIT(&inp_fc_tree);
341 	bzero(&key_inp, sizeof(key_inp));
342 	lck_mtx_unlock(&inp_fc_lck);
343 }
344 
345 #define INPCB_HAVE_TIMER_REQ(req)       (((req).intimer_lazy > 0) || \
346 	((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
347 static void
inpcb_timeout(void * arg0,void * arg1)348 inpcb_timeout(void *arg0, void *arg1)
349 {
350 #pragma unused(arg1)
351 	struct inpcbinfo *ipi;
352 	boolean_t t, gc;
353 	struct intimercount gccnt, tmcnt;
354 
355 	/*
356 	 * Update coarse-grained networking timestamp (in sec.); the idea
357 	 * is to piggy-back on the timeout callout to update the counter
358 	 * returnable via net_uptime().
359 	 */
360 	net_update_uptime();
361 
362 	bzero(&gccnt, sizeof(gccnt));
363 	bzero(&tmcnt, sizeof(tmcnt));
364 
365 	lck_mtx_lock_spin(&inpcb_timeout_lock);
366 	gc = inpcb_garbage_collecting;
367 	inpcb_garbage_collecting = FALSE;
368 
369 	t = inpcb_ticking;
370 	inpcb_ticking = FALSE;
371 
372 	if (gc || t) {
373 		lck_mtx_unlock(&inpcb_timeout_lock);
374 
375 		lck_mtx_lock(&inpcb_lock);
376 		TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
377 			if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
378 				bzero(&ipi->ipi_gc_req,
379 				    sizeof(ipi->ipi_gc_req));
380 				if (gc && ipi->ipi_gc != NULL) {
381 					ipi->ipi_gc(ipi);
382 					gccnt.intimer_lazy +=
383 					    ipi->ipi_gc_req.intimer_lazy;
384 					gccnt.intimer_fast +=
385 					    ipi->ipi_gc_req.intimer_fast;
386 					gccnt.intimer_nodelay +=
387 					    ipi->ipi_gc_req.intimer_nodelay;
388 				}
389 			}
390 			if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
391 				bzero(&ipi->ipi_timer_req,
392 				    sizeof(ipi->ipi_timer_req));
393 				if (t && ipi->ipi_timer != NULL) {
394 					ipi->ipi_timer(ipi);
395 					tmcnt.intimer_lazy +=
396 					    ipi->ipi_timer_req.intimer_lazy;
397 					tmcnt.intimer_fast +=
398 					    ipi->ipi_timer_req.intimer_fast;
399 					tmcnt.intimer_nodelay +=
400 					    ipi->ipi_timer_req.intimer_nodelay;
401 				}
402 			}
403 		}
404 		lck_mtx_unlock(&inpcb_lock);
405 		lck_mtx_lock_spin(&inpcb_timeout_lock);
406 	}
407 
408 	/* lock was dropped above, so check first before overriding */
409 	if (!inpcb_garbage_collecting) {
410 		inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
411 	}
412 	if (!inpcb_ticking) {
413 		inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
414 	}
415 
416 	/* arg0 will be set if we are the fast timer */
417 	if (arg0 != NULL) {
418 		inpcb_fast_timer_on = FALSE;
419 	}
420 	inpcb_timeout_run--;
421 	VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
422 
423 	/* re-arm the timer if there's work to do */
424 	if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) {
425 		inpcb_sched_timeout();
426 	} else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) {
427 		/* be lazy when idle with little activity */
428 		inpcb_sched_lazy_timeout();
429 	} else {
430 		inpcb_sched_timeout();
431 	}
432 
433 	lck_mtx_unlock(&inpcb_timeout_lock);
434 }
435 
436 static void
inpcb_sched_timeout(void)437 inpcb_sched_timeout(void)
438 {
439 	_inpcb_sched_timeout(0);
440 }
441 
442 static void
inpcb_sched_lazy_timeout(void)443 inpcb_sched_lazy_timeout(void)
444 {
445 	_inpcb_sched_timeout(inpcb_timeout_lazy);
446 }
447 
448 static void
_inpcb_sched_timeout(unsigned int offset)449 _inpcb_sched_timeout(unsigned int offset)
450 {
451 	uint64_t deadline, leeway;
452 
453 	clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline);
454 	LCK_MTX_ASSERT(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
455 	if (inpcb_timeout_run == 0 &&
456 	    (inpcb_garbage_collecting || inpcb_ticking)) {
457 		lck_mtx_convert_spin(&inpcb_timeout_lock);
458 		inpcb_timeout_run++;
459 		if (offset == 0) {
460 			inpcb_fast_timer_on = TRUE;
461 			thread_call_enter_delayed(inpcb_fast_thread_call,
462 			    deadline);
463 		} else {
464 			inpcb_fast_timer_on = FALSE;
465 			clock_interval_to_absolutetime_interval(offset,
466 			    NSEC_PER_SEC, &leeway);
467 			thread_call_enter_delayed_with_leeway(
468 				inpcb_thread_call, NULL, deadline, leeway,
469 				THREAD_CALL_DELAY_LEEWAY);
470 		}
471 	} else if (inpcb_timeout_run == 1 &&
472 	    offset == 0 && !inpcb_fast_timer_on) {
473 		/*
474 		 * Since the request was for a fast timer but the
475 		 * scheduled timer is a lazy timer, try to schedule
476 		 * another instance of fast timer also.
477 		 */
478 		lck_mtx_convert_spin(&inpcb_timeout_lock);
479 		inpcb_timeout_run++;
480 		inpcb_fast_timer_on = TRUE;
481 		thread_call_enter_delayed(inpcb_fast_thread_call, deadline);
482 	}
483 }
484 
485 void
inpcb_gc_sched(struct inpcbinfo * ipi,u_int32_t type)486 inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
487 {
488 	u_int32_t gccnt;
489 
490 	lck_mtx_lock_spin(&inpcb_timeout_lock);
491 	inpcb_garbage_collecting = TRUE;
492 	gccnt = ipi->ipi_gc_req.intimer_nodelay +
493 	    ipi->ipi_gc_req.intimer_fast;
494 
495 	if (gccnt > INPCB_GCREQ_THRESHOLD) {
496 		type = INPCB_TIMER_FAST;
497 	}
498 
499 	switch (type) {
500 	case INPCB_TIMER_NODELAY:
501 		atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1);
502 		inpcb_sched_timeout();
503 		break;
504 	case INPCB_TIMER_FAST:
505 		atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
506 		inpcb_sched_timeout();
507 		break;
508 	default:
509 		atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1);
510 		inpcb_sched_lazy_timeout();
511 		break;
512 	}
513 	lck_mtx_unlock(&inpcb_timeout_lock);
514 }
515 
516 void
inpcb_timer_sched(struct inpcbinfo * ipi,u_int32_t type)517 inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
518 {
519 	lck_mtx_lock_spin(&inpcb_timeout_lock);
520 	inpcb_ticking = TRUE;
521 	switch (type) {
522 	case INPCB_TIMER_NODELAY:
523 		atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1);
524 		inpcb_sched_timeout();
525 		break;
526 	case INPCB_TIMER_FAST:
527 		atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1);
528 		inpcb_sched_timeout();
529 		break;
530 	default:
531 		atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1);
532 		inpcb_sched_lazy_timeout();
533 		break;
534 	}
535 	lck_mtx_unlock(&inpcb_timeout_lock);
536 }
537 
538 void
in_pcbinfo_attach(struct inpcbinfo * ipi)539 in_pcbinfo_attach(struct inpcbinfo *ipi)
540 {
541 	struct inpcbinfo *ipi0;
542 
543 	lck_mtx_lock(&inpcb_lock);
544 	TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
545 		if (ipi0 == ipi) {
546 			panic("%s: ipi %p already in the list",
547 			    __func__, ipi);
548 			/* NOTREACHED */
549 		}
550 	}
551 	TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
552 	lck_mtx_unlock(&inpcb_lock);
553 }
554 
555 int
in_pcbinfo_detach(struct inpcbinfo * ipi)556 in_pcbinfo_detach(struct inpcbinfo *ipi)
557 {
558 	struct inpcbinfo *ipi0;
559 	int error = 0;
560 
561 	lck_mtx_lock(&inpcb_lock);
562 	TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
563 		if (ipi0 == ipi) {
564 			break;
565 		}
566 	}
567 	if (ipi0 != NULL) {
568 		TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
569 	} else {
570 		error = ENXIO;
571 	}
572 	lck_mtx_unlock(&inpcb_lock);
573 
574 	return error;
575 }
576 
577 /*
578  * Allocate a PCB and associate it with the socket.
579  *
580  * Returns:	0			Success
581  *		ENOBUFS
582  *		ENOMEM
583  */
584 int
in_pcballoc(struct socket * so,struct inpcbinfo * pcbinfo,struct proc * p)585 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
586 {
587 #pragma unused(p)
588 	struct inpcb *inp;
589 	caddr_t temp;
590 
591 	if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
592 		inp = zalloc_flags(pcbinfo->ipi_zone,
593 		    Z_WAITOK | Z_ZERO | Z_NOFAIL);
594 	} else {
595 		inp = (struct inpcb *)(void *)so->so_saved_pcb;
596 		temp = inp->inp_saved_ppcb;
597 		bzero((caddr_t)inp, sizeof(*inp));
598 		inp->inp_saved_ppcb = temp;
599 	}
600 
601 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
602 	inp->inp_pcbinfo = pcbinfo;
603 	inp->inp_socket = so;
604 	/* make sure inp_stat is always 64-bit aligned */
605 	inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store,
606 	    sizeof(u_int64_t));
607 	if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) +
608 	    sizeof(*inp->inp_stat) > sizeof(inp->inp_stat_store)) {
609 		panic("%s: insufficient space to align inp_stat", __func__);
610 		/* NOTREACHED */
611 	}
612 
613 	/* make sure inp_cstat is always 64-bit aligned */
614 	inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store,
615 	    sizeof(u_int64_t));
616 	if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) +
617 	    sizeof(*inp->inp_cstat) > sizeof(inp->inp_cstat_store)) {
618 		panic("%s: insufficient space to align inp_cstat", __func__);
619 		/* NOTREACHED */
620 	}
621 
622 	/* make sure inp_wstat is always 64-bit aligned */
623 	inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store,
624 	    sizeof(u_int64_t));
625 	if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) +
626 	    sizeof(*inp->inp_wstat) > sizeof(inp->inp_wstat_store)) {
627 		panic("%s: insufficient space to align inp_wstat", __func__);
628 		/* NOTREACHED */
629 	}
630 
631 	/* make sure inp_Wstat is always 64-bit aligned */
632 	inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store,
633 	    sizeof(u_int64_t));
634 	if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
635 	    sizeof(*inp->inp_Wstat) > sizeof(inp->inp_Wstat_store)) {
636 		panic("%s: insufficient space to align inp_Wstat", __func__);
637 		/* NOTREACHED */
638 	}
639 
640 	so->so_pcb = (caddr_t)inp;
641 
642 	if (so->so_proto->pr_flags & PR_PCBLOCK) {
643 		lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
644 		    &pcbinfo->ipi_lock_attr);
645 	}
646 
647 	if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) {
648 		inp->inp_flags |= IN6P_IPV6_V6ONLY;
649 	}
650 
651 	if (ip6_auto_flowlabel) {
652 		inp->inp_flags |= IN6P_AUTOFLOWLABEL;
653 	}
654 	if (intcoproc_unrestricted) {
655 		inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
656 	}
657 
658 	(void) inp_update_policy(inp);
659 
660 	lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
661 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
662 	LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
663 	pcbinfo->ipi_count++;
664 	lck_rw_done(&pcbinfo->ipi_lock);
665 	return 0;
666 }
667 
668 /*
669  * in_pcblookup_local_and_cleanup does everything
670  * in_pcblookup_local does but it checks for a socket
671  * that's going away. Since we know that the lock is
672  * held read+write when this function is called, we
673  * can safely dispose of this socket like the slow
674  * timer would usually do and return NULL. This is
675  * great for bind.
676  */
677 struct inpcb *
in_pcblookup_local_and_cleanup(struct inpcbinfo * pcbinfo,struct in_addr laddr,u_int lport_arg,int wild_okay)678 in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
679     u_int lport_arg, int wild_okay)
680 {
681 	struct inpcb *inp;
682 
683 	/* Perform normal lookup */
684 	inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
685 
686 	/* Check if we found a match but it's waiting to be disposed */
687 	if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
688 		struct socket *so = inp->inp_socket;
689 
690 		socket_lock(so, 0);
691 
692 		if (so->so_usecount == 0) {
693 			if (inp->inp_state != INPCB_STATE_DEAD) {
694 				in_pcbdetach(inp);
695 			}
696 			in_pcbdispose(inp);     /* will unlock & destroy */
697 			inp = NULL;
698 		} else {
699 			socket_unlock(so, 0);
700 		}
701 	}
702 
703 	return inp;
704 }
705 
706 static void
in_pcb_conflict_post_msg(u_int16_t port)707 in_pcb_conflict_post_msg(u_int16_t port)
708 {
709 	/*
710 	 * Radar 5523020 send a kernel event notification if a
711 	 * non-participating socket tries to bind the port a socket
712 	 * who has set SOF_NOTIFYCONFLICT owns.
713 	 */
714 	struct kev_msg ev_msg;
715 	struct kev_in_portinuse in_portinuse;
716 
717 	bzero(&in_portinuse, sizeof(struct kev_in_portinuse));
718 	bzero(&ev_msg, sizeof(struct kev_msg));
719 	in_portinuse.port = ntohs(port);        /* port in host order */
720 	in_portinuse.req_pid = proc_selfpid();
721 	ev_msg.vendor_code = KEV_VENDOR_APPLE;
722 	ev_msg.kev_class = KEV_NETWORK_CLASS;
723 	ev_msg.kev_subclass = KEV_INET_SUBCLASS;
724 	ev_msg.event_code = KEV_INET_PORTINUSE;
725 	ev_msg.dv[0].data_ptr = &in_portinuse;
726 	ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse);
727 	ev_msg.dv[1].data_length = 0;
728 	dlil_post_complete_msg(NULL, &ev_msg);
729 }
730 
731 /*
732  * Bind an INPCB to an address and/or port.  This routine should not alter
733  * the caller-supplied local address "nam".
734  *
735  * Returns:	0			Success
736  *		EADDRNOTAVAIL		Address not available.
737  *		EINVAL			Invalid argument
738  *		EAFNOSUPPORT		Address family not supported [notdef]
739  *		EACCES			Permission denied
740  *		EADDRINUSE		Address in use
741  *		EAGAIN			Resource unavailable, try again
742  *		priv_check_cred:EPERM	Operation not permitted
743  */
744 int
in_pcbbind(struct inpcb * inp,struct sockaddr * nam,struct proc * p)745 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
746 {
747 	struct socket *so = inp->inp_socket;
748 	unsigned short *lastport;
749 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
750 	u_short lport = 0, rand_port = 0;
751 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
752 	int error, randomport, conflict = 0;
753 	boolean_t anonport = FALSE;
754 	kauth_cred_t cred;
755 	struct in_addr laddr;
756 	struct ifnet *outif = NULL;
757 
758 	if (TAILQ_EMPTY(&in_ifaddrhead)) { /* XXX broken! */
759 		return EADDRNOTAVAIL;
760 	}
761 	if (!(so->so_options & (SO_REUSEADDR | SO_REUSEPORT))) {
762 		wild = 1;
763 	}
764 
765 	bzero(&laddr, sizeof(laddr));
766 
767 	socket_unlock(so, 0); /* keep reference on socket */
768 	lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
769 	if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
770 		/* another thread completed the bind */
771 		lck_rw_done(&pcbinfo->ipi_lock);
772 		socket_lock(so, 0);
773 		return EINVAL;
774 	}
775 
776 	if (nam != NULL) {
777 		if (nam->sa_len != sizeof(struct sockaddr_in)) {
778 			lck_rw_done(&pcbinfo->ipi_lock);
779 			socket_lock(so, 0);
780 			return EINVAL;
781 		}
782 #if 0
783 		/*
784 		 * We should check the family, but old programs
785 		 * incorrectly fail to initialize it.
786 		 */
787 		if (nam->sa_family != AF_INET) {
788 			lck_rw_done(&pcbinfo->ipi_lock);
789 			socket_lock(so, 0);
790 			return EAFNOSUPPORT;
791 		}
792 #endif /* 0 */
793 		lport = SIN(nam)->sin_port;
794 
795 		if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
796 			/*
797 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
798 			 * allow complete duplication of binding if
799 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
800 			 * and a multicast address is bound on both
801 			 * new and duplicated sockets.
802 			 */
803 			if (so->so_options & SO_REUSEADDR) {
804 				reuseport = SO_REUSEADDR | SO_REUSEPORT;
805 			}
806 		} else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
807 			struct sockaddr_in sin;
808 			struct ifaddr *ifa;
809 
810 			/* Sanitized for interface address searches */
811 			bzero(&sin, sizeof(sin));
812 			sin.sin_family = AF_INET;
813 			sin.sin_len = sizeof(struct sockaddr_in);
814 			sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
815 
816 			ifa = ifa_ifwithaddr(SA(&sin));
817 			if (ifa == NULL) {
818 				lck_rw_done(&pcbinfo->ipi_lock);
819 				socket_lock(so, 0);
820 				return EADDRNOTAVAIL;
821 			} else {
822 				/*
823 				 * Opportunistically determine the outbound
824 				 * interface that may be used; this may not
825 				 * hold true if we end up using a route
826 				 * going over a different interface, e.g.
827 				 * when sending to a local address.  This
828 				 * will get updated again after sending.
829 				 */
830 				IFA_LOCK(ifa);
831 				outif = ifa->ifa_ifp;
832 				IFA_UNLOCK(ifa);
833 				IFA_REMREF(ifa);
834 			}
835 		}
836 
837 #if SKYWALK
838 		if (inp->inp_flags2 & INP2_EXTERNAL_PORT) {
839 			// Extract the external flow info
840 			struct ns_flow_info nfi = {};
841 			error = necp_client_get_netns_flow_info(inp->necp_client_uuid,
842 			    &nfi);
843 			if (error != 0) {
844 				lck_rw_done(&pcbinfo->ipi_lock);
845 				socket_lock(so, 0);
846 				return error;
847 			}
848 
849 			// Extract the reserved port
850 			u_int16_t reserved_lport = 0;
851 			if (nfi.nfi_laddr.sa.sa_family == AF_INET) {
852 				reserved_lport = nfi.nfi_laddr.sin.sin_port;
853 			} else if (nfi.nfi_laddr.sa.sa_family == AF_INET6) {
854 				reserved_lport = nfi.nfi_laddr.sin6.sin6_port;
855 			} else {
856 				lck_rw_done(&pcbinfo->ipi_lock);
857 				socket_lock(so, 0);
858 				return EINVAL;
859 			}
860 
861 			// Validate or use the reserved port
862 			if (lport == 0) {
863 				lport = reserved_lport;
864 			} else if (lport != reserved_lport) {
865 				lck_rw_done(&pcbinfo->ipi_lock);
866 				socket_lock(so, 0);
867 				return EINVAL;
868 			}
869 		}
870 
871 		/* Do not allow reserving a UDP port if remaining UDP port count is below 4096 */
872 		if (SOCK_PROTO(so) == IPPROTO_UDP && !allow_udp_port_exhaustion) {
873 			uint32_t current_reservations = 0;
874 			if (inp->inp_vflag & INP_IPV6) {
875 				current_reservations = netns_lookup_reservations_count_in6(inp->in6p_laddr, IPPROTO_UDP);
876 			} else {
877 				current_reservations = netns_lookup_reservations_count_in(inp->inp_laddr, IPPROTO_UDP);
878 			}
879 			if (USHRT_MAX - UDP_RANDOM_PORT_RESERVE < current_reservations) {
880 				log(LOG_ERR, "UDP port not available, less than 4096 UDP ports left");
881 				lck_rw_done(&pcbinfo->ipi_lock);
882 				socket_lock(so, 0);
883 				return EADDRNOTAVAIL;
884 			}
885 		}
886 
887 #endif /* SKYWALK */
888 
889 		if (lport != 0) {
890 			struct inpcb *t;
891 			uid_t u;
892 
893 #if XNU_TARGET_OS_OSX
894 			if (ntohs(lport) < IPPORT_RESERVED &&
895 			    SIN(nam)->sin_addr.s_addr != 0 &&
896 			    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
897 				cred = kauth_cred_proc_ref(p);
898 				error = priv_check_cred(cred,
899 				    PRIV_NETINET_RESERVEDPORT, 0);
900 				kauth_cred_unref(&cred);
901 				if (error != 0) {
902 					lck_rw_done(&pcbinfo->ipi_lock);
903 					socket_lock(so, 0);
904 					return EACCES;
905 				}
906 			}
907 #endif /* XNU_TARGET_OS_OSX */
908 			/*
909 			 * Check wether the process is allowed to bind to a restricted port
910 			 */
911 			if (!current_task_can_use_restricted_in_port(lport,
912 			    (uint8_t)so->so_proto->pr_protocol, PORT_FLAGS_BSD)) {
913 				lck_rw_done(&pcbinfo->ipi_lock);
914 				socket_lock(so, 0);
915 				return EADDRINUSE;
916 			}
917 
918 			if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
919 			    (u = kauth_cred_getuid(so->so_cred)) != 0 &&
920 			    (t = in_pcblookup_local_and_cleanup(
921 				    inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
922 				    INPLOOKUP_WILDCARD)) != NULL &&
923 			    (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
924 			    t->inp_laddr.s_addr != INADDR_ANY ||
925 			    !(t->inp_socket->so_options & SO_REUSEPORT)) &&
926 			    (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
927 			    !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
928 			    (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
929 			    t->inp_laddr.s_addr != INADDR_ANY) &&
930 			    (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
931 			    !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
932 			    uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
933 				if ((t->inp_socket->so_flags &
934 				    SOF_NOTIFYCONFLICT) &&
935 				    !(so->so_flags & SOF_NOTIFYCONFLICT)) {
936 					conflict = 1;
937 				}
938 
939 				lck_rw_done(&pcbinfo->ipi_lock);
940 
941 				if (conflict) {
942 					in_pcb_conflict_post_msg(lport);
943 				}
944 
945 				socket_lock(so, 0);
946 				return EADDRINUSE;
947 			}
948 			t = in_pcblookup_local_and_cleanup(pcbinfo,
949 			    SIN(nam)->sin_addr, lport, wild);
950 			if (t != NULL &&
951 			    (reuseport & t->inp_socket->so_options) == 0 &&
952 			    (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
953 			    !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
954 			    uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
955 				if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
956 				    t->inp_laddr.s_addr != INADDR_ANY ||
957 				    SOCK_DOM(so) != PF_INET6 ||
958 				    SOCK_DOM(t->inp_socket) != PF_INET6) {
959 					if ((t->inp_socket->so_flags &
960 					    SOF_NOTIFYCONFLICT) &&
961 					    !(so->so_flags & SOF_NOTIFYCONFLICT)) {
962 						conflict = 1;
963 					}
964 
965 					lck_rw_done(&pcbinfo->ipi_lock);
966 
967 					if (conflict) {
968 						in_pcb_conflict_post_msg(lport);
969 					}
970 					socket_lock(so, 0);
971 					return EADDRINUSE;
972 				}
973 			}
974 #if SKYWALK
975 			if ((SOCK_PROTO(so) == IPPROTO_TCP ||
976 			    SOCK_PROTO(so) == IPPROTO_UDP) &&
977 			    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
978 				int res_err = 0;
979 				if (inp->inp_vflag & INP_IPV6) {
980 					res_err = netns_reserve_in6(
981 						&inp->inp_netns_token,
982 						SIN6(nam)->sin6_addr,
983 						(uint8_t)SOCK_PROTO(so), lport, NETNS_BSD,
984 						NULL);
985 				} else {
986 					res_err = netns_reserve_in(
987 						&inp->inp_netns_token,
988 						SIN(nam)->sin_addr, (uint8_t)SOCK_PROTO(so),
989 						lport, NETNS_BSD, NULL);
990 				}
991 				if (res_err != 0) {
992 					lck_rw_done(&pcbinfo->ipi_lock);
993 					socket_lock(so, 0);
994 					return EADDRINUSE;
995 				}
996 			}
997 #endif /* SKYWALK */
998 		}
999 		laddr = SIN(nam)->sin_addr;
1000 	}
1001 	if (lport == 0) {
1002 		u_short first, last;
1003 		int count;
1004 		bool found;
1005 
1006 		/*
1007 		 * Override wild = 1 for implicit bind (mainly used by connect)
1008 		 * For implicit bind (lport == 0), we always use an unused port,
1009 		 * so REUSEADDR|REUSEPORT don't apply
1010 		 */
1011 		wild = 1;
1012 
1013 		randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
1014 		    (so->so_type == SOCK_STREAM ? tcp_use_randomport :
1015 		    udp_use_randomport);
1016 
1017 		/*
1018 		 * Even though this looks similar to the code in
1019 		 * in6_pcbsetport, the v6 vs v4 checks are different.
1020 		 */
1021 		anonport = TRUE;
1022 		if (inp->inp_flags & INP_HIGHPORT) {
1023 			first = (u_short)ipport_hifirstauto;     /* sysctl */
1024 			last  = (u_short)ipport_hilastauto;
1025 			lastport = &pcbinfo->ipi_lasthi;
1026 		} else if (inp->inp_flags & INP_LOWPORT) {
1027 			cred = kauth_cred_proc_ref(p);
1028 			error = priv_check_cred(cred,
1029 			    PRIV_NETINET_RESERVEDPORT, 0);
1030 			kauth_cred_unref(&cred);
1031 			if (error != 0) {
1032 				lck_rw_done(&pcbinfo->ipi_lock);
1033 				socket_lock(so, 0);
1034 				return error;
1035 			}
1036 			first = (u_short)ipport_lowfirstauto;    /* 1023 */
1037 			last  = (u_short)ipport_lowlastauto;     /* 600 */
1038 			lastport = &pcbinfo->ipi_lastlow;
1039 		} else {
1040 			first = (u_short)ipport_firstauto;       /* sysctl */
1041 			last  = (u_short)ipport_lastauto;
1042 			lastport = &pcbinfo->ipi_lastport;
1043 		}
1044 		/* No point in randomizing if only one port is available */
1045 
1046 		if (first == last) {
1047 			randomport = 0;
1048 		}
1049 		/*
1050 		 * Simple check to ensure all ports are not used up causing
1051 		 * a deadlock here.
1052 		 *
1053 		 * We split the two cases (up and down) so that the direction
1054 		 * is not being tested on each round of the loop.
1055 		 */
1056 		if (first > last) {
1057 			struct in_addr lookup_addr;
1058 
1059 			/*
1060 			 * counting down
1061 			 */
1062 			if (randomport) {
1063 				read_frandom(&rand_port, sizeof(rand_port));
1064 				*lastport =
1065 				    first - (rand_port % (first - last));
1066 			}
1067 			count = first - last;
1068 
1069 			lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1070 			    inp->inp_laddr;
1071 
1072 			found = false;
1073 			do {
1074 				if (count-- < 0) {      /* completely used? */
1075 					lck_rw_done(&pcbinfo->ipi_lock);
1076 					socket_lock(so, 0);
1077 					return EADDRNOTAVAIL;
1078 				}
1079 				--*lastport;
1080 				if (*lastport > first || *lastport < last) {
1081 					*lastport = first;
1082 				}
1083 				lport = htons(*lastport);
1084 
1085 				/*
1086 				 * Skip if this is a restricted port as we do not want to
1087 				 * restricted ports as ephemeral
1088 				 */
1089 				if (IS_RESTRICTED_IN_PORT(lport)) {
1090 					continue;
1091 				}
1092 
1093 				found = in_pcblookup_local_and_cleanup(pcbinfo,
1094 				    lookup_addr, lport, wild) == NULL;
1095 #if SKYWALK
1096 				if (found &&
1097 				    (SOCK_PROTO(so) == IPPROTO_TCP ||
1098 				    SOCK_PROTO(so) == IPPROTO_UDP) &&
1099 				    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1100 					int res_err;
1101 					if (inp->inp_vflag & INP_IPV6) {
1102 						res_err = netns_reserve_in6(
1103 							&inp->inp_netns_token,
1104 							inp->in6p_laddr,
1105 							(uint8_t)SOCK_PROTO(so), lport,
1106 							NETNS_BSD, NULL);
1107 					} else {
1108 						res_err = netns_reserve_in(
1109 							&inp->inp_netns_token,
1110 							lookup_addr, (uint8_t)SOCK_PROTO(so),
1111 							lport, NETNS_BSD, NULL);
1112 					}
1113 					found = res_err == 0;
1114 				}
1115 #endif /* SKYWALK */
1116 			} while (!found);
1117 		} else {
1118 			struct in_addr lookup_addr;
1119 
1120 			/*
1121 			 * counting up
1122 			 */
1123 			if (randomport) {
1124 				read_frandom(&rand_port, sizeof(rand_port));
1125 				*lastport =
1126 				    first + (rand_port % (first - last));
1127 			}
1128 			count = last - first;
1129 
1130 			lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1131 			    inp->inp_laddr;
1132 
1133 			found = false;
1134 			do {
1135 				if (count-- < 0) {      /* completely used? */
1136 					lck_rw_done(&pcbinfo->ipi_lock);
1137 					socket_lock(so, 0);
1138 					return EADDRNOTAVAIL;
1139 				}
1140 				++*lastport;
1141 				if (*lastport < first || *lastport > last) {
1142 					*lastport = first;
1143 				}
1144 				lport = htons(*lastport);
1145 
1146 				/*
1147 				 * Skip if this is a restricted port as we do not want to
1148 				 * restricted ports as ephemeral
1149 				 */
1150 				if (IS_RESTRICTED_IN_PORT(lport)) {
1151 					continue;
1152 				}
1153 
1154 				found = in_pcblookup_local_and_cleanup(pcbinfo,
1155 				    lookup_addr, lport, wild) == NULL;
1156 #if SKYWALK
1157 				if (found &&
1158 				    (SOCK_PROTO(so) == IPPROTO_TCP ||
1159 				    SOCK_PROTO(so) == IPPROTO_UDP) &&
1160 				    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1161 					int res_err;
1162 					if (inp->inp_vflag & INP_IPV6) {
1163 						res_err = netns_reserve_in6(
1164 							&inp->inp_netns_token,
1165 							inp->in6p_laddr,
1166 							(uint8_t)SOCK_PROTO(so), lport,
1167 							NETNS_BSD, NULL);
1168 					} else {
1169 						res_err = netns_reserve_in(
1170 							&inp->inp_netns_token,
1171 							lookup_addr, (uint8_t)SOCK_PROTO(so),
1172 							lport, NETNS_BSD, NULL);
1173 					}
1174 					found = res_err == 0;
1175 				}
1176 #endif /* SKYWALK */
1177 			} while (!found);
1178 		}
1179 	}
1180 	socket_lock(so, 0);
1181 
1182 	/*
1183 	 * We unlocked socket's protocol lock for a long time.
1184 	 * The socket might have been dropped/defuncted.
1185 	 * Checking if world has changed since.
1186 	 */
1187 	if (inp->inp_state == INPCB_STATE_DEAD) {
1188 #if SKYWALK
1189 		netns_release(&inp->inp_netns_token);
1190 #endif /* SKYWALK */
1191 		lck_rw_done(&pcbinfo->ipi_lock);
1192 		return ECONNABORTED;
1193 	}
1194 
1195 	if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
1196 #if SKYWALK
1197 		netns_release(&inp->inp_netns_token);
1198 #endif /* SKYWALK */
1199 		lck_rw_done(&pcbinfo->ipi_lock);
1200 		return EINVAL;
1201 	}
1202 
1203 	if (laddr.s_addr != INADDR_ANY) {
1204 		inp->inp_laddr = laddr;
1205 		inp->inp_last_outifp = outif;
1206 #if SKYWALK
1207 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
1208 			netns_set_ifnet(&inp->inp_netns_token, outif);
1209 		}
1210 #endif /* SKYWALK */
1211 	}
1212 	inp->inp_lport = lport;
1213 	if (anonport) {
1214 		inp->inp_flags |= INP_ANONPORT;
1215 	}
1216 
1217 	if (in_pcbinshash(inp, 1) != 0) {
1218 		inp->inp_laddr.s_addr = INADDR_ANY;
1219 		inp->inp_last_outifp = NULL;
1220 
1221 #if SKYWALK
1222 		netns_release(&inp->inp_netns_token);
1223 #endif /* SKYWALK */
1224 		inp->inp_lport = 0;
1225 		if (anonport) {
1226 			inp->inp_flags &= ~INP_ANONPORT;
1227 		}
1228 		lck_rw_done(&pcbinfo->ipi_lock);
1229 		return EAGAIN;
1230 	}
1231 	lck_rw_done(&pcbinfo->ipi_lock);
1232 	sflt_notify(so, sock_evt_bound, NULL);
1233 	return 0;
1234 }
1235 
1236 #define APN_FALLBACK_IP_FILTER(a)       \
1237 	(IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \
1238 	 IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \
1239 	 IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \
1240 	 IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \
1241 	 IN_PRIVATE(ntohl((a)->sin_addr.s_addr)))
1242 
1243 #define APN_FALLBACK_NOTIF_INTERVAL     2 /* Magic Number */
1244 static uint64_t last_apn_fallback = 0;
1245 
1246 static boolean_t
apn_fallback_required(proc_t proc,struct socket * so,struct sockaddr_in * p_dstv4)1247 apn_fallback_required(proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4)
1248 {
1249 	uint64_t timenow;
1250 	struct sockaddr_storage lookup_default_addr;
1251 	struct rtentry *rt = NULL;
1252 
1253 	VERIFY(proc != NULL);
1254 
1255 	if (apn_fallbk_enabled == FALSE) {
1256 		return FALSE;
1257 	}
1258 
1259 	if (proc == kernproc) {
1260 		return FALSE;
1261 	}
1262 
1263 	if (so && (so->so_options & SO_NOAPNFALLBK)) {
1264 		return FALSE;
1265 	}
1266 
1267 	timenow = net_uptime();
1268 	if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) {
1269 		apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n"));
1270 		return FALSE;
1271 	}
1272 
1273 	if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4)) {
1274 		return FALSE;
1275 	}
1276 
1277 	/* Check if we have unscoped IPv6 default route through cellular */
1278 	bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1279 	lookup_default_addr.ss_family = AF_INET6;
1280 	lookup_default_addr.ss_len = sizeof(struct sockaddr_in6);
1281 
1282 	rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1283 	if (NULL == rt) {
1284 		apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1285 		    "unscoped default IPv6 route.\n"));
1286 		return FALSE;
1287 	}
1288 
1289 	if (!IFNET_IS_CELLULAR(rt->rt_ifp)) {
1290 		rtfree(rt);
1291 		apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1292 		    "unscoped default IPv6 route through cellular interface.\n"));
1293 		return FALSE;
1294 	}
1295 
1296 	/*
1297 	 * We have a default IPv6 route, ensure that
1298 	 * we do not have IPv4 default route before triggering
1299 	 * the event
1300 	 */
1301 	rtfree(rt);
1302 	rt = NULL;
1303 
1304 	bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1305 	lookup_default_addr.ss_family = AF_INET;
1306 	lookup_default_addr.ss_len = sizeof(struct sockaddr_in);
1307 
1308 	rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1309 
1310 	if (rt) {
1311 		rtfree(rt);
1312 		rt = NULL;
1313 		apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped "
1314 		    "IPv4 default route!\n"));
1315 		return FALSE;
1316 	}
1317 
1318 	{
1319 		/*
1320 		 * We disable APN fallback if the binary is not a third-party app.
1321 		 * Note that platform daemons use their process name as a
1322 		 * bundle ID so we filter out bundle IDs without dots.
1323 		 */
1324 		const char *bundle_id = cs_identity_get(proc);
1325 		if (bundle_id == NULL ||
1326 		    bundle_id[0] == '\0' ||
1327 		    strchr(bundle_id, '.') == NULL ||
1328 		    strncmp(bundle_id, "com.apple.", sizeof("com.apple.") - 1) == 0) {
1329 			apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-"
1330 			    "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL")));
1331 			return FALSE;
1332 		}
1333 	}
1334 
1335 	{
1336 		/*
1337 		 * The Apple App Store IPv6 requirement started on
1338 		 * June 1st, 2016 at 12:00:00 AM PDT.
1339 		 * We disable APN fallback if the binary is more recent than that.
1340 		 * We check both atime and birthtime since birthtime is not always supported.
1341 		 */
1342 		static const long ipv6_start_date = 1464764400L;
1343 		vfs_context_t context;
1344 		struct stat64 sb;
1345 		int vn_stat_error;
1346 
1347 		bzero(&sb, sizeof(struct stat64));
1348 		context = vfs_context_create(NULL);
1349 		vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, 0, context);
1350 		(void)vfs_context_rele(context);
1351 
1352 		if (vn_stat_error != 0 ||
1353 		    sb.st_atimespec.tv_sec >= ipv6_start_date ||
1354 		    sb.st_birthtimespec.tv_sec >= ipv6_start_date) {
1355 			apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary "
1356 			    "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n",
1357 			    vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec,
1358 			    sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec));
1359 			return FALSE;
1360 		}
1361 	}
1362 	return TRUE;
1363 }
1364 
1365 static void
apn_fallback_trigger(proc_t proc,struct socket * so)1366 apn_fallback_trigger(proc_t proc, struct socket *so)
1367 {
1368 	pid_t pid = 0;
1369 	struct kev_msg ev_msg;
1370 	struct kev_netevent_apnfallbk_data apnfallbk_data;
1371 
1372 	last_apn_fallback = net_uptime();
1373 	pid = proc_pid(proc);
1374 	uuid_t application_uuid;
1375 	uuid_clear(application_uuid);
1376 	proc_getexecutableuuid(proc, application_uuid,
1377 	    sizeof(application_uuid));
1378 
1379 	bzero(&ev_msg, sizeof(struct kev_msg));
1380 	ev_msg.vendor_code      = KEV_VENDOR_APPLE;
1381 	ev_msg.kev_class        = KEV_NETWORK_CLASS;
1382 	ev_msg.kev_subclass     = KEV_NETEVENT_SUBCLASS;
1383 	ev_msg.event_code       = KEV_NETEVENT_APNFALLBACK;
1384 
1385 	bzero(&apnfallbk_data, sizeof(apnfallbk_data));
1386 
1387 	if (so->so_flags & SOF_DELEGATED) {
1388 		apnfallbk_data.epid = so->e_pid;
1389 		uuid_copy(apnfallbk_data.euuid, so->e_uuid);
1390 	} else {
1391 		apnfallbk_data.epid = so->last_pid;
1392 		uuid_copy(apnfallbk_data.euuid, so->last_uuid);
1393 	}
1394 
1395 	ev_msg.dv[0].data_ptr   = &apnfallbk_data;
1396 	ev_msg.dv[0].data_length = sizeof(apnfallbk_data);
1397 	kev_post_msg(&ev_msg);
1398 	apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n"));
1399 }
1400 
1401 /*
1402  * Transform old in_pcbconnect() into an inner subroutine for new
1403  * in_pcbconnect(); do some validity-checking on the remote address
1404  * (in "nam") and then determine local host address (i.e., which
1405  * interface) to use to access that remote host.
1406  *
1407  * This routine may alter the caller-supplied remote address "nam".
1408  *
1409  * The caller may override the bound-to-interface setting of the socket
1410  * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1411  *
1412  * This routine might return an ifp with a reference held if the caller
1413  * provides a non-NULL outif, even in the error case.  The caller is
1414  * responsible for releasing its reference.
1415  *
1416  * Returns:	0			Success
1417  *		EINVAL			Invalid argument
1418  *		EAFNOSUPPORT		Address family not supported
1419  *		EADDRNOTAVAIL		Address not available
1420  */
1421 int
in_pcbladdr(struct inpcb * inp,struct sockaddr * nam,struct in_addr * laddr,unsigned int ifscope,struct ifnet ** outif,int raw)1422 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
1423     unsigned int ifscope, struct ifnet **outif, int raw)
1424 {
1425 	struct route *ro = &inp->inp_route;
1426 	struct in_ifaddr *ia = NULL;
1427 	struct sockaddr_in sin;
1428 	int error = 0;
1429 	boolean_t restricted = FALSE;
1430 
1431 	if (outif != NULL) {
1432 		*outif = NULL;
1433 	}
1434 	if (nam->sa_len != sizeof(struct sockaddr_in)) {
1435 		return EINVAL;
1436 	}
1437 	if (SIN(nam)->sin_family != AF_INET) {
1438 		return EAFNOSUPPORT;
1439 	}
1440 	if (raw == 0 && SIN(nam)->sin_port == 0) {
1441 		return EADDRNOTAVAIL;
1442 	}
1443 
1444 	/*
1445 	 * If the destination address is INADDR_ANY,
1446 	 * use the primary local address.
1447 	 * If the supplied address is INADDR_BROADCAST,
1448 	 * and the primary interface supports broadcast,
1449 	 * choose the broadcast address for that interface.
1450 	 */
1451 	if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1452 	    SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) {
1453 		lck_rw_lock_shared(&in_ifaddr_rwlock);
1454 		if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1455 			ia = TAILQ_FIRST(&in_ifaddrhead);
1456 			IFA_LOCK_SPIN(&ia->ia_ifa);
1457 			if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1458 				SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1459 			} else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1460 				SIN(nam)->sin_addr =
1461 				    SIN(&ia->ia_broadaddr)->sin_addr;
1462 			}
1463 			IFA_UNLOCK(&ia->ia_ifa);
1464 			ia = NULL;
1465 		}
1466 		lck_rw_done(&in_ifaddr_rwlock);
1467 	}
1468 	/*
1469 	 * Otherwise, if the socket has already bound the source, just use it.
1470 	 */
1471 	if (inp->inp_laddr.s_addr != INADDR_ANY) {
1472 		VERIFY(ia == NULL);
1473 		*laddr = inp->inp_laddr;
1474 		return 0;
1475 	}
1476 
1477 	/*
1478 	 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1479 	 * then it overrides the sticky ifscope set for the socket.
1480 	 */
1481 	if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) {
1482 		ifscope = inp->inp_boundifp->if_index;
1483 	}
1484 
1485 	/*
1486 	 * If route is known or can be allocated now,
1487 	 * our src addr is taken from the i/f, else punt.
1488 	 * Note that we should check the address family of the cached
1489 	 * destination, in case of sharing the cache with IPv6.
1490 	 */
1491 	if (ro->ro_rt != NULL) {
1492 		RT_LOCK_SPIN(ro->ro_rt);
1493 	}
1494 	if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1495 	    SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1496 	    (inp->inp_socket->so_options & SO_DONTROUTE)) {
1497 		if (ro->ro_rt != NULL) {
1498 			RT_UNLOCK(ro->ro_rt);
1499 		}
1500 		ROUTE_RELEASE(ro);
1501 	}
1502 	if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1503 	    (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1504 		if (ro->ro_rt != NULL) {
1505 			RT_UNLOCK(ro->ro_rt);
1506 		}
1507 		ROUTE_RELEASE(ro);
1508 		/* No route yet, so try to acquire one */
1509 		bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
1510 		ro->ro_dst.sa_family = AF_INET;
1511 		ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
1512 		SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1513 		rtalloc_scoped(ro, ifscope);
1514 		if (ro->ro_rt != NULL) {
1515 			RT_LOCK_SPIN(ro->ro_rt);
1516 		}
1517 	}
1518 	/* Sanitized local copy for interface address searches */
1519 	bzero(&sin, sizeof(sin));
1520 	sin.sin_family = AF_INET;
1521 	sin.sin_len = sizeof(struct sockaddr_in);
1522 	sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1523 	/*
1524 	 * If we did not find (or use) a route, assume dest is reachable
1525 	 * on a directly connected network and try to find a corresponding
1526 	 * interface to take the source address from.
1527 	 */
1528 	if (ro->ro_rt == NULL) {
1529 		proc_t proc = current_proc();
1530 
1531 		VERIFY(ia == NULL);
1532 		ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1533 		if (ia == NULL) {
1534 			ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1535 		}
1536 		error = ((ia == NULL) ? ENETUNREACH : 0);
1537 
1538 		if (apn_fallback_required(proc, inp->inp_socket,
1539 		    (void *)nam)) {
1540 			apn_fallback_trigger(proc, inp->inp_socket);
1541 		}
1542 
1543 		goto done;
1544 	}
1545 	RT_LOCK_ASSERT_HELD(ro->ro_rt);
1546 	/*
1547 	 * If the outgoing interface on the route found is not
1548 	 * a loopback interface, use the address from that interface.
1549 	 */
1550 	if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1551 		VERIFY(ia == NULL);
1552 		/*
1553 		 * If the route points to a cellular interface and the
1554 		 * caller forbids our using interfaces of such type,
1555 		 * pretend that there is no route.
1556 		 * Apply the same logic for expensive interfaces.
1557 		 */
1558 		if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
1559 			RT_UNLOCK(ro->ro_rt);
1560 			ROUTE_RELEASE(ro);
1561 			error = EHOSTUNREACH;
1562 			restricted = TRUE;
1563 		} else {
1564 			/* Become a regular mutex */
1565 			RT_CONVERT_LOCK(ro->ro_rt);
1566 			ia = ifatoia(ro->ro_rt->rt_ifa);
1567 			IFA_ADDREF(&ia->ia_ifa);
1568 
1569 			/*
1570 			 * Mark the control block for notification of
1571 			 * a possible flow that might undergo clat46
1572 			 * translation.
1573 			 *
1574 			 * We defer the decision to a later point when
1575 			 * inpcb is being disposed off.
1576 			 * The reason is that we only want to send notification
1577 			 * if the flow was ever used to send data.
1578 			 */
1579 			if (IS_INTF_CLAT46(ro->ro_rt->rt_ifp)) {
1580 				inp->inp_flags2 |= INP2_CLAT46_FLOW;
1581 			}
1582 
1583 			RT_UNLOCK(ro->ro_rt);
1584 			error = 0;
1585 		}
1586 		goto done;
1587 	}
1588 	VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1589 	RT_UNLOCK(ro->ro_rt);
1590 	/*
1591 	 * The outgoing interface is marked with 'loopback net', so a route
1592 	 * to ourselves is here.
1593 	 * Try to find the interface of the destination address and then
1594 	 * take the address from there. That interface is not necessarily
1595 	 * a loopback interface.
1596 	 */
1597 	VERIFY(ia == NULL);
1598 	ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1599 	if (ia == NULL) {
1600 		ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1601 	}
1602 	if (ia == NULL) {
1603 		ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1604 	}
1605 	if (ia == NULL) {
1606 		RT_LOCK(ro->ro_rt);
1607 		ia = ifatoia(ro->ro_rt->rt_ifa);
1608 		if (ia != NULL) {
1609 			IFA_ADDREF(&ia->ia_ifa);
1610 		}
1611 		RT_UNLOCK(ro->ro_rt);
1612 	}
1613 	error = ((ia == NULL) ? ENETUNREACH : 0);
1614 
1615 done:
1616 	/*
1617 	 * If the destination address is multicast and an outgoing
1618 	 * interface has been set as a multicast option, use the
1619 	 * address of that interface as our source address.
1620 	 */
1621 	if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1622 	    inp->inp_moptions != NULL) {
1623 		struct ip_moptions *imo;
1624 		struct ifnet *ifp;
1625 
1626 		imo = inp->inp_moptions;
1627 		IMO_LOCK(imo);
1628 		if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1629 		    ia->ia_ifp != imo->imo_multicast_ifp)) {
1630 			ifp = imo->imo_multicast_ifp;
1631 			if (ia != NULL) {
1632 				IFA_REMREF(&ia->ia_ifa);
1633 			}
1634 			lck_rw_lock_shared(&in_ifaddr_rwlock);
1635 			TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1636 				if (ia->ia_ifp == ifp) {
1637 					break;
1638 				}
1639 			}
1640 			if (ia != NULL) {
1641 				IFA_ADDREF(&ia->ia_ifa);
1642 			}
1643 			lck_rw_done(&in_ifaddr_rwlock);
1644 			if (ia == NULL) {
1645 				error = EADDRNOTAVAIL;
1646 			} else {
1647 				error = 0;
1648 			}
1649 		}
1650 		IMO_UNLOCK(imo);
1651 	}
1652 	/*
1653 	 * Don't do pcblookup call here; return interface in laddr
1654 	 * and exit to caller, that will do the lookup.
1655 	 */
1656 	if (ia != NULL) {
1657 		/*
1658 		 * If the source address belongs to a cellular interface
1659 		 * and the socket forbids our using interfaces of such
1660 		 * type, pretend that there is no source address.
1661 		 * Apply the same logic for expensive interfaces.
1662 		 */
1663 		IFA_LOCK_SPIN(&ia->ia_ifa);
1664 		if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
1665 			IFA_UNLOCK(&ia->ia_ifa);
1666 			error = EHOSTUNREACH;
1667 			restricted = TRUE;
1668 		} else if (error == 0) {
1669 			*laddr = ia->ia_addr.sin_addr;
1670 			if (outif != NULL) {
1671 				struct ifnet *ifp;
1672 
1673 				if (ro->ro_rt != NULL) {
1674 					ifp = ro->ro_rt->rt_ifp;
1675 				} else {
1676 					ifp = ia->ia_ifp;
1677 				}
1678 
1679 				VERIFY(ifp != NULL);
1680 				IFA_CONVERT_LOCK(&ia->ia_ifa);
1681 				ifnet_reference(ifp);   /* for caller */
1682 				if (*outif != NULL) {
1683 					ifnet_release(*outif);
1684 				}
1685 				*outif = ifp;
1686 			}
1687 			IFA_UNLOCK(&ia->ia_ifa);
1688 		} else {
1689 			IFA_UNLOCK(&ia->ia_ifa);
1690 		}
1691 		IFA_REMREF(&ia->ia_ifa);
1692 		ia = NULL;
1693 	}
1694 
1695 	if (restricted && error == EHOSTUNREACH) {
1696 		soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1697 		    SO_FILT_HINT_IFDENIED));
1698 	}
1699 
1700 	return error;
1701 }
1702 
1703 /*
1704  * Outer subroutine:
1705  * Connect from a socket to a specified address.
1706  * Both address and port must be specified in argument sin.
1707  * If don't have a local address for this socket yet,
1708  * then pick one.
1709  *
1710  * The caller may override the bound-to-interface setting of the socket
1711  * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1712  */
1713 int
in_pcbconnect(struct inpcb * inp,struct sockaddr * nam,struct proc * p,unsigned int ifscope,struct ifnet ** outif)1714 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
1715     unsigned int ifscope, struct ifnet **outif)
1716 {
1717 	struct in_addr laddr;
1718 	struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
1719 	struct inpcb *pcb;
1720 	int error;
1721 	struct socket *so = inp->inp_socket;
1722 
1723 #if CONTENT_FILTER
1724 	if (so) {
1725 		so->so_state_change_cnt++;
1726 	}
1727 #endif
1728 
1729 	/*
1730 	 *   Call inner routine, to assign local interface address.
1731 	 */
1732 	if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0) {
1733 		return error;
1734 	}
1735 
1736 	socket_unlock(so, 0);
1737 	pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
1738 	    inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
1739 	    inp->inp_lport, 0, NULL);
1740 	socket_lock(so, 0);
1741 
1742 	/*
1743 	 * Check if the socket is still in a valid state. When we unlock this
1744 	 * embryonic socket, it can get aborted if another thread is closing
1745 	 * the listener (radar 7947600).
1746 	 */
1747 	if ((so->so_flags & SOF_ABORTED) != 0) {
1748 		return ECONNREFUSED;
1749 	}
1750 
1751 	if (pcb != NULL) {
1752 		in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1753 		return EADDRINUSE;
1754 	}
1755 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
1756 		if (inp->inp_lport == 0) {
1757 			error = in_pcbbind(inp, NULL, p);
1758 			if (error) {
1759 				return error;
1760 			}
1761 		}
1762 		if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1763 			/*
1764 			 * Lock inversion issue, mostly with udp
1765 			 * multicast packets.
1766 			 */
1767 			socket_unlock(so, 0);
1768 			lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1769 			socket_lock(so, 0);
1770 		}
1771 		inp->inp_laddr = laddr;
1772 		/* no reference needed */
1773 		inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
1774 #if SKYWALK
1775 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
1776 			netns_set_ifnet(&inp->inp_netns_token,
1777 			    inp->inp_last_outifp);
1778 		}
1779 #endif /* SKYWALK */
1780 		inp->inp_flags |= INP_INADDR_ANY;
1781 	} else {
1782 		/*
1783 		 * Usage of IP_PKTINFO, without local port already
1784 		 * speficified will cause kernel to panic,
1785 		 * see rdar://problem/18508185.
1786 		 * For now returning error to avoid a kernel panic
1787 		 * This routines can be refactored and handle this better
1788 		 * in future.
1789 		 */
1790 		if (inp->inp_lport == 0) {
1791 			return EINVAL;
1792 		}
1793 		if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1794 			/*
1795 			 * Lock inversion issue, mostly with udp
1796 			 * multicast packets.
1797 			 */
1798 			socket_unlock(so, 0);
1799 			lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1800 			socket_lock(so, 0);
1801 		}
1802 	}
1803 	inp->inp_faddr = sin->sin_addr;
1804 	inp->inp_fport = sin->sin_port;
1805 	if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1806 		nstat_pcb_invalidate_cache(inp);
1807 	}
1808 	in_pcbrehash(inp);
1809 	lck_rw_done(&inp->inp_pcbinfo->ipi_lock);
1810 	return 0;
1811 }
1812 
1813 void
in_pcbdisconnect(struct inpcb * inp)1814 in_pcbdisconnect(struct inpcb *inp)
1815 {
1816 	struct socket *so = inp->inp_socket;
1817 
1818 	if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1819 		nstat_pcb_cache(inp);
1820 	}
1821 
1822 	inp->inp_faddr.s_addr = INADDR_ANY;
1823 	inp->inp_fport = 0;
1824 
1825 #if CONTENT_FILTER
1826 	if (so) {
1827 		so->so_state_change_cnt++;
1828 	}
1829 #endif
1830 
1831 	if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1832 		/* lock inversion issue, mostly with udp multicast packets */
1833 		socket_unlock(so, 0);
1834 		lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1835 		socket_lock(so, 0);
1836 	}
1837 
1838 	in_pcbrehash(inp);
1839 	lck_rw_done(&inp->inp_pcbinfo->ipi_lock);
1840 	/*
1841 	 * A multipath subflow socket would have its SS_NOFDREF set by default,
1842 	 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1843 	 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1844 	 */
1845 	if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) {
1846 		in_pcbdetach(inp);
1847 	}
1848 }
1849 
1850 void
in_pcbdetach(struct inpcb * inp)1851 in_pcbdetach(struct inpcb *inp)
1852 {
1853 	struct socket *so = inp->inp_socket;
1854 
1855 	if (so->so_pcb == NULL) {
1856 		/* PCB has been disposed */
1857 		panic("%s: inp=%p so=%p proto=%d so_pcb is null!", __func__,
1858 		    inp, so, SOCK_PROTO(so));
1859 		/* NOTREACHED */
1860 	}
1861 
1862 #if IPSEC
1863 	if (inp->inp_sp != NULL) {
1864 		(void) ipsec4_delete_pcbpolicy(inp);
1865 	}
1866 #endif /* IPSEC */
1867 
1868 	if (inp->inp_stat != NULL && SOCK_PROTO(so) == IPPROTO_UDP) {
1869 		if (inp->inp_stat->rxpackets == 0 && inp->inp_stat->txpackets == 0) {
1870 			INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_no_data);
1871 		}
1872 	}
1873 
1874 	/*
1875 	 * Let NetworkStatistics know this PCB is going away
1876 	 * before we detach it.
1877 	 */
1878 	if (nstat_collect &&
1879 	    (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
1880 		nstat_pcb_detach(inp);
1881 	}
1882 
1883 	/* Free memory buffer held for generating keep alives */
1884 	if (inp->inp_keepalive_data != NULL) {
1885 		kfree_data(inp->inp_keepalive_data, inp->inp_keepalive_datalen);
1886 		inp->inp_keepalive_data = NULL;
1887 	}
1888 
1889 	/* mark socket state as dead */
1890 	if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
1891 		panic("%s: so=%p proto=%d couldn't set to STOPUSING",
1892 		    __func__, so, SOCK_PROTO(so));
1893 		/* NOTREACHED */
1894 	}
1895 
1896 	if (!(so->so_flags & SOF_PCBCLEARING)) {
1897 		struct ip_moptions *imo;
1898 
1899 		inp->inp_vflag = 0;
1900 		if (inp->inp_options != NULL) {
1901 			(void) m_free(inp->inp_options);
1902 			inp->inp_options = NULL;
1903 		}
1904 		ROUTE_RELEASE(&inp->inp_route);
1905 		imo = inp->inp_moptions;
1906 		if (imo != NULL) {
1907 			IMO_REMREF(imo);
1908 		}
1909 		inp->inp_moptions = NULL;
1910 		sofreelastref(so, 0);
1911 		inp->inp_state = INPCB_STATE_DEAD;
1912 
1913 		/*
1914 		 * Enqueue an event to send kernel event notification
1915 		 * if the flow has to CLAT46 for data packets
1916 		 */
1917 		if (inp->inp_flags2 & INP2_CLAT46_FLOW) {
1918 			/*
1919 			 * If there has been any exchange of data bytes
1920 			 * over this flow.
1921 			 * Schedule a notification to report that flow is
1922 			 * using client side translation.
1923 			 */
1924 			if (inp->inp_stat != NULL &&
1925 			    (inp->inp_stat->txbytes != 0 ||
1926 			    inp->inp_stat->rxbytes != 0)) {
1927 				if (so->so_flags & SOF_DELEGATED) {
1928 					in6_clat46_event_enqueue_nwk_wq_entry(
1929 						IN6_CLAT46_EVENT_V4_FLOW,
1930 						so->e_pid,
1931 						so->e_uuid);
1932 				} else {
1933 					in6_clat46_event_enqueue_nwk_wq_entry(
1934 						IN6_CLAT46_EVENT_V4_FLOW,
1935 						so->last_pid,
1936 						so->last_uuid);
1937 				}
1938 			}
1939 		}
1940 
1941 		/* makes sure we're not called twice from so_close */
1942 		so->so_flags |= SOF_PCBCLEARING;
1943 
1944 		inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
1945 	}
1946 }
1947 
1948 
1949 void
in_pcbdispose(struct inpcb * inp)1950 in_pcbdispose(struct inpcb *inp)
1951 {
1952 	struct socket *so = inp->inp_socket;
1953 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
1954 
1955 	if (so != NULL && so->so_usecount != 0) {
1956 		panic("%s: so %p [%d,%d] usecount %d lockhistory %s",
1957 		    __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
1958 		    solockhistory_nr(so));
1959 		/* NOTREACHED */
1960 	} else if (inp->inp_wantcnt != WNT_STOPUSING) {
1961 		if (so != NULL) {
1962 			panic_plain("%s: inp %p invalid wantcnt %d, so %p "
1963 			    "[%d,%d] usecount %d retaincnt %d state 0x%x "
1964 			    "flags 0x%x lockhistory %s\n", __func__, inp,
1965 			    inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
1966 			    so->so_usecount, so->so_retaincnt, so->so_state,
1967 			    so->so_flags, solockhistory_nr(so));
1968 			/* NOTREACHED */
1969 		} else {
1970 			panic("%s: inp %p invalid wantcnt %d no socket",
1971 			    __func__, inp, inp->inp_wantcnt);
1972 			/* NOTREACHED */
1973 		}
1974 	}
1975 
1976 	LCK_RW_ASSERT(&ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
1977 
1978 	inp->inp_gencnt = ++ipi->ipi_gencnt;
1979 	/* access ipi in in_pcbremlists */
1980 	in_pcbremlists(inp);
1981 
1982 	if (so != NULL) {
1983 		if (so->so_proto->pr_flags & PR_PCBLOCK) {
1984 			sofreelastref(so, 0);
1985 			if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
1986 				/*
1987 				 * selthreadclear() already called
1988 				 * during sofreelastref() above.
1989 				 */
1990 				sbrelease(&so->so_rcv);
1991 				sbrelease(&so->so_snd);
1992 			}
1993 			if (so->so_head != NULL) {
1994 				panic("%s: so=%p head still exist",
1995 				    __func__, so);
1996 				/* NOTREACHED */
1997 			}
1998 			lck_mtx_unlock(&inp->inpcb_mtx);
1999 
2000 #if NECP
2001 			necp_inpcb_remove_cb(inp);
2002 #endif /* NECP */
2003 
2004 			lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
2005 		}
2006 		/* makes sure we're not called twice from so_close */
2007 		so->so_flags |= SOF_PCBCLEARING;
2008 		so->so_saved_pcb = (caddr_t)inp;
2009 		so->so_pcb = NULL;
2010 		inp->inp_socket = NULL;
2011 #if NECP
2012 		necp_inpcb_dispose(inp);
2013 #endif /* NECP */
2014 		/*
2015 		 * In case there a route cached after a detach (possible
2016 		 * in the tcp case), make sure that it is freed before
2017 		 * we deallocate the structure.
2018 		 */
2019 		ROUTE_RELEASE(&inp->inp_route);
2020 		if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
2021 			zfree(ipi->ipi_zone, inp);
2022 		}
2023 		sodealloc(so);
2024 	}
2025 }
2026 
2027 /*
2028  * The calling convention of in_getsockaddr() and in_getpeeraddr() was
2029  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
2030  * in struct pr_usrreqs, so that protocols can just reference then directly
2031  * without the need for a wrapper function.
2032  */
2033 int
in_getsockaddr(struct socket * so,struct sockaddr ** nam)2034 in_getsockaddr(struct socket *so, struct sockaddr **nam)
2035 {
2036 	struct inpcb *inp;
2037 	struct sockaddr_in *sin;
2038 
2039 	/*
2040 	 * Do the malloc first in case it blocks.
2041 	 */
2042 	sin = (struct sockaddr_in *)alloc_sockaddr(sizeof(*sin),
2043 	    Z_WAITOK | Z_NOFAIL);
2044 
2045 	sin->sin_family = AF_INET;
2046 
2047 	if ((inp = sotoinpcb(so)) == NULL) {
2048 		free_sockaddr(sin);
2049 		return EINVAL;
2050 	}
2051 	sin->sin_port = inp->inp_lport;
2052 	sin->sin_addr = inp->inp_laddr;
2053 
2054 	*nam = (struct sockaddr *)sin;
2055 	return 0;
2056 }
2057 
2058 int
in_getsockaddr_s(struct socket * so,struct sockaddr_in * ss)2059 in_getsockaddr_s(struct socket *so, struct sockaddr_in *ss)
2060 {
2061 	struct sockaddr_in *sin = ss;
2062 	struct inpcb *inp;
2063 
2064 	VERIFY(ss != NULL);
2065 	bzero(ss, sizeof(*ss));
2066 
2067 	sin->sin_family = AF_INET;
2068 	sin->sin_len = sizeof(*sin);
2069 
2070 	if ((inp = sotoinpcb(so)) == NULL) {
2071 		return EINVAL;
2072 	}
2073 
2074 	sin->sin_port = inp->inp_lport;
2075 	sin->sin_addr = inp->inp_laddr;
2076 	return 0;
2077 }
2078 
2079 int
in_getpeeraddr(struct socket * so,struct sockaddr ** nam)2080 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
2081 {
2082 	struct inpcb *inp;
2083 	struct sockaddr_in *sin;
2084 
2085 	/*
2086 	 * Do the malloc first in case it blocks.
2087 	 */
2088 	sin = (struct sockaddr_in *)alloc_sockaddr(sizeof(*sin),
2089 	    Z_WAITOK | Z_NOFAIL);
2090 
2091 	sin->sin_family = AF_INET;
2092 
2093 	if ((inp = sotoinpcb(so)) == NULL) {
2094 		free_sockaddr(sin);
2095 		return EINVAL;
2096 	}
2097 	sin->sin_port = inp->inp_fport;
2098 	sin->sin_addr = inp->inp_faddr;
2099 
2100 	*nam = (struct sockaddr *)sin;
2101 	return 0;
2102 }
2103 
2104 void
in_pcbnotifyall(struct inpcbinfo * pcbinfo,struct in_addr faddr,int errno,void (* notify)(struct inpcb *,int))2105 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2106     int errno, void (*notify)(struct inpcb *, int))
2107 {
2108 	struct inpcb *inp;
2109 
2110 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
2111 
2112 	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
2113 		if (!(inp->inp_vflag & INP_IPV4)) {
2114 			continue;
2115 		}
2116 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
2117 		    inp->inp_socket == NULL) {
2118 			continue;
2119 		}
2120 		if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
2121 			continue;
2122 		}
2123 		socket_lock(inp->inp_socket, 1);
2124 		(*notify)(inp, errno);
2125 		(void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
2126 		socket_unlock(inp->inp_socket, 1);
2127 	}
2128 	lck_rw_done(&pcbinfo->ipi_lock);
2129 }
2130 
2131 /*
2132  * Check for alternatives when higher level complains
2133  * about service problems.  For now, invalidate cached
2134  * routing information.  If the route was created dynamically
2135  * (by a redirect), time to try a default gateway again.
2136  */
2137 void
in_losing(struct inpcb * inp)2138 in_losing(struct inpcb *inp)
2139 {
2140 	boolean_t release = FALSE;
2141 	struct rtentry *rt;
2142 
2143 	if ((rt = inp->inp_route.ro_rt) != NULL) {
2144 		struct in_ifaddr *ia = NULL;
2145 
2146 		RT_LOCK(rt);
2147 		if (rt->rt_flags & RTF_DYNAMIC) {
2148 			/*
2149 			 * Prevent another thread from modifying rt_key,
2150 			 * rt_gateway via rt_setgate() after rt_lock is
2151 			 * dropped by marking the route as defunct.
2152 			 */
2153 			rt->rt_flags |= RTF_CONDEMNED;
2154 			RT_UNLOCK(rt);
2155 			(void) rtrequest(RTM_DELETE, rt_key(rt),
2156 			    rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
2157 		} else {
2158 			RT_UNLOCK(rt);
2159 		}
2160 		/* if the address is gone keep the old route in the pcb */
2161 		if (inp->inp_laddr.s_addr != INADDR_ANY &&
2162 		    (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2163 			/*
2164 			 * Address is around; ditch the route.  A new route
2165 			 * can be allocated the next time output is attempted.
2166 			 */
2167 			release = TRUE;
2168 		}
2169 		if (ia != NULL) {
2170 			IFA_REMREF(&ia->ia_ifa);
2171 		}
2172 	}
2173 	if (rt == NULL || release) {
2174 		ROUTE_RELEASE(&inp->inp_route);
2175 	}
2176 }
2177 
2178 /*
2179  * After a routing change, flush old routing
2180  * and allocate a (hopefully) better one.
2181  */
2182 void
in_rtchange(struct inpcb * inp,int errno)2183 in_rtchange(struct inpcb *inp, int errno)
2184 {
2185 #pragma unused(errno)
2186 	boolean_t release = FALSE;
2187 	struct rtentry *rt;
2188 
2189 	if ((rt = inp->inp_route.ro_rt) != NULL) {
2190 		struct in_ifaddr *ia = NULL;
2191 
2192 		/* if address is gone, keep the old route */
2193 		if (inp->inp_laddr.s_addr != INADDR_ANY &&
2194 		    (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2195 			/*
2196 			 * Address is around; ditch the route.  A new route
2197 			 * can be allocated the next time output is attempted.
2198 			 */
2199 			release = TRUE;
2200 		}
2201 		if (ia != NULL) {
2202 			IFA_REMREF(&ia->ia_ifa);
2203 		}
2204 	}
2205 	if (rt == NULL || release) {
2206 		ROUTE_RELEASE(&inp->inp_route);
2207 	}
2208 }
2209 
2210 /*
2211  * Lookup a PCB based on the local address and port.
2212  */
2213 struct inpcb *
in_pcblookup_local(struct inpcbinfo * pcbinfo,struct in_addr laddr,unsigned int lport_arg,int wild_okay)2214 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
2215     unsigned int lport_arg, int wild_okay)
2216 {
2217 	struct inpcb *inp;
2218 	int matchwild = 3, wildcard;
2219 	u_short lport = (u_short)lport_arg;
2220 
2221 	KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
2222 
2223 	if (!wild_okay) {
2224 		struct inpcbhead *head;
2225 		/*
2226 		 * Look for an unconnected (wildcard foreign addr) PCB that
2227 		 * matches the local address and port we're looking for.
2228 		 */
2229 		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2230 		    pcbinfo->ipi_hashmask)];
2231 		LIST_FOREACH(inp, head, inp_hash) {
2232 			if (!(inp->inp_vflag & INP_IPV4)) {
2233 				continue;
2234 			}
2235 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
2236 			    inp->inp_laddr.s_addr == laddr.s_addr &&
2237 			    inp->inp_lport == lport) {
2238 				/*
2239 				 * Found.
2240 				 */
2241 				return inp;
2242 			}
2243 		}
2244 		/*
2245 		 * Not found.
2246 		 */
2247 		KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
2248 		return NULL;
2249 	} else {
2250 		struct inpcbporthead *porthash;
2251 		struct inpcbport *phd;
2252 		struct inpcb *match = NULL;
2253 		/*
2254 		 * Best fit PCB lookup.
2255 		 *
2256 		 * First see if this local port is in use by looking on the
2257 		 * port hash list.
2258 		 */
2259 		porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
2260 		    pcbinfo->ipi_porthashmask)];
2261 		LIST_FOREACH(phd, porthash, phd_hash) {
2262 			if (phd->phd_port == lport) {
2263 				break;
2264 			}
2265 		}
2266 		if (phd != NULL) {
2267 			/*
2268 			 * Port is in use by one or more PCBs. Look for best
2269 			 * fit.
2270 			 */
2271 			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
2272 				wildcard = 0;
2273 				if (!(inp->inp_vflag & INP_IPV4)) {
2274 					continue;
2275 				}
2276 				if (inp->inp_faddr.s_addr != INADDR_ANY) {
2277 					wildcard++;
2278 				}
2279 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
2280 					if (laddr.s_addr == INADDR_ANY) {
2281 						wildcard++;
2282 					} else if (inp->inp_laddr.s_addr !=
2283 					    laddr.s_addr) {
2284 						continue;
2285 					}
2286 				} else {
2287 					if (laddr.s_addr != INADDR_ANY) {
2288 						wildcard++;
2289 					}
2290 				}
2291 				if (wildcard < matchwild) {
2292 					match = inp;
2293 					matchwild = wildcard;
2294 					if (matchwild == 0) {
2295 						break;
2296 					}
2297 				}
2298 			}
2299 		}
2300 		KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
2301 		    0, 0, 0, 0);
2302 		return match;
2303 	}
2304 }
2305 
2306 /*
2307  * Check if PCB exists in hash list.
2308  */
2309 int
in_pcblookup_hash_exists(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,uid_t * uid,gid_t * gid,struct ifnet * ifp)2310 in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2311     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2312     uid_t *uid, gid_t *gid, struct ifnet *ifp)
2313 {
2314 	struct inpcbhead *head;
2315 	struct inpcb *inp;
2316 	u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2317 	int found = 0;
2318 	struct inpcb *local_wild = NULL;
2319 	struct inpcb *local_wild_mapped = NULL;
2320 
2321 	*uid = UID_MAX;
2322 	*gid = GID_MAX;
2323 
2324 	/*
2325 	 * We may have found the pcb in the last lookup - check this first.
2326 	 */
2327 
2328 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
2329 
2330 	/*
2331 	 * First look for an exact match.
2332 	 */
2333 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2334 	    pcbinfo->ipi_hashmask)];
2335 	LIST_FOREACH(inp, head, inp_hash) {
2336 		if (!(inp->inp_vflag & INP_IPV4)) {
2337 			continue;
2338 		}
2339 		if (inp_restricted_recv(inp, ifp)) {
2340 			continue;
2341 		}
2342 
2343 #if NECP
2344 		if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2345 			continue;
2346 		}
2347 #endif /* NECP */
2348 
2349 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
2350 		    inp->inp_laddr.s_addr == laddr.s_addr &&
2351 		    inp->inp_fport == fport &&
2352 		    inp->inp_lport == lport) {
2353 			if ((found = (inp->inp_socket != NULL))) {
2354 				/*
2355 				 * Found.
2356 				 */
2357 				*uid = kauth_cred_getuid(
2358 					inp->inp_socket->so_cred);
2359 				*gid = kauth_cred_getgid(
2360 					inp->inp_socket->so_cred);
2361 			}
2362 			lck_rw_done(&pcbinfo->ipi_lock);
2363 			return found;
2364 		}
2365 	}
2366 
2367 	if (!wildcard) {
2368 		/*
2369 		 * Not found.
2370 		 */
2371 		lck_rw_done(&pcbinfo->ipi_lock);
2372 		return 0;
2373 	}
2374 
2375 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2376 	    pcbinfo->ipi_hashmask)];
2377 	LIST_FOREACH(inp, head, inp_hash) {
2378 		if (!(inp->inp_vflag & INP_IPV4)) {
2379 			continue;
2380 		}
2381 		if (inp_restricted_recv(inp, ifp)) {
2382 			continue;
2383 		}
2384 
2385 #if NECP
2386 		if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2387 			continue;
2388 		}
2389 #endif /* NECP */
2390 
2391 		if (inp->inp_faddr.s_addr == INADDR_ANY &&
2392 		    inp->inp_lport == lport) {
2393 			if (inp->inp_laddr.s_addr == laddr.s_addr) {
2394 				if ((found = (inp->inp_socket != NULL))) {
2395 					*uid = kauth_cred_getuid(
2396 						inp->inp_socket->so_cred);
2397 					*gid = kauth_cred_getgid(
2398 						inp->inp_socket->so_cred);
2399 				}
2400 				lck_rw_done(&pcbinfo->ipi_lock);
2401 				return found;
2402 			} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2403 				if (inp->inp_socket &&
2404 				    SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2405 					local_wild_mapped = inp;
2406 				} else {
2407 					local_wild = inp;
2408 				}
2409 			}
2410 		}
2411 	}
2412 	if (local_wild == NULL) {
2413 		if (local_wild_mapped != NULL) {
2414 			if ((found = (local_wild_mapped->inp_socket != NULL))) {
2415 				*uid = kauth_cred_getuid(
2416 					local_wild_mapped->inp_socket->so_cred);
2417 				*gid = kauth_cred_getgid(
2418 					local_wild_mapped->inp_socket->so_cred);
2419 			}
2420 			lck_rw_done(&pcbinfo->ipi_lock);
2421 			return found;
2422 		}
2423 		lck_rw_done(&pcbinfo->ipi_lock);
2424 		return 0;
2425 	}
2426 	if ((found = (local_wild->inp_socket != NULL))) {
2427 		*uid = kauth_cred_getuid(
2428 			local_wild->inp_socket->so_cred);
2429 		*gid = kauth_cred_getgid(
2430 			local_wild->inp_socket->so_cred);
2431 	}
2432 	lck_rw_done(&pcbinfo->ipi_lock);
2433 	return found;
2434 }
2435 
2436 /*
2437  * Lookup PCB in hash list.
2438  */
2439 struct inpcb *
in_pcblookup_hash(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,struct ifnet * ifp)2440 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2441     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2442     struct ifnet *ifp)
2443 {
2444 	struct inpcbhead *head;
2445 	struct inpcb *inp;
2446 	u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2447 	struct inpcb *local_wild = NULL;
2448 	struct inpcb *local_wild_mapped = NULL;
2449 
2450 	/*
2451 	 * We may have found the pcb in the last lookup - check this first.
2452 	 */
2453 
2454 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
2455 
2456 	/*
2457 	 * First look for an exact match.
2458 	 */
2459 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2460 	    pcbinfo->ipi_hashmask)];
2461 	LIST_FOREACH(inp, head, inp_hash) {
2462 		if (!(inp->inp_vflag & INP_IPV4)) {
2463 			continue;
2464 		}
2465 		if (inp_restricted_recv(inp, ifp)) {
2466 			continue;
2467 		}
2468 
2469 #if NECP
2470 		if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2471 			continue;
2472 		}
2473 #endif /* NECP */
2474 
2475 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
2476 		    inp->inp_laddr.s_addr == laddr.s_addr &&
2477 		    inp->inp_fport == fport &&
2478 		    inp->inp_lport == lport) {
2479 			/*
2480 			 * Found.
2481 			 */
2482 			if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2483 			    WNT_STOPUSING) {
2484 				lck_rw_done(&pcbinfo->ipi_lock);
2485 				return inp;
2486 			} else {
2487 				/* it's there but dead, say it isn't found */
2488 				lck_rw_done(&pcbinfo->ipi_lock);
2489 				return NULL;
2490 			}
2491 		}
2492 	}
2493 
2494 	if (!wildcard) {
2495 		/*
2496 		 * Not found.
2497 		 */
2498 		lck_rw_done(&pcbinfo->ipi_lock);
2499 		return NULL;
2500 	}
2501 
2502 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2503 	    pcbinfo->ipi_hashmask)];
2504 	LIST_FOREACH(inp, head, inp_hash) {
2505 		if (!(inp->inp_vflag & INP_IPV4)) {
2506 			continue;
2507 		}
2508 		if (inp_restricted_recv(inp, ifp)) {
2509 			continue;
2510 		}
2511 
2512 #if NECP
2513 		if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2514 			continue;
2515 		}
2516 #endif /* NECP */
2517 
2518 		if (inp->inp_faddr.s_addr == INADDR_ANY &&
2519 		    inp->inp_lport == lport) {
2520 			if (inp->inp_laddr.s_addr == laddr.s_addr) {
2521 				if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2522 				    WNT_STOPUSING) {
2523 					lck_rw_done(&pcbinfo->ipi_lock);
2524 					return inp;
2525 				} else {
2526 					/* it's dead; say it isn't found */
2527 					lck_rw_done(&pcbinfo->ipi_lock);
2528 					return NULL;
2529 				}
2530 			} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2531 				if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2532 					local_wild_mapped = inp;
2533 				} else {
2534 					local_wild = inp;
2535 				}
2536 			}
2537 		}
2538 	}
2539 	if (local_wild == NULL) {
2540 		if (local_wild_mapped != NULL) {
2541 			if (in_pcb_checkstate(local_wild_mapped,
2542 			    WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2543 				lck_rw_done(&pcbinfo->ipi_lock);
2544 				return local_wild_mapped;
2545 			} else {
2546 				/* it's dead; say it isn't found */
2547 				lck_rw_done(&pcbinfo->ipi_lock);
2548 				return NULL;
2549 			}
2550 		}
2551 		lck_rw_done(&pcbinfo->ipi_lock);
2552 		return NULL;
2553 	}
2554 	if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2555 		lck_rw_done(&pcbinfo->ipi_lock);
2556 		return local_wild;
2557 	}
2558 	/*
2559 	 * It's either not found or is already dead.
2560 	 */
2561 	lck_rw_done(&pcbinfo->ipi_lock);
2562 	return NULL;
2563 }
2564 
2565 /*
2566  * @brief	Insert PCB onto various hash lists.
2567  *
2568  * @param	inp Pointer to internet protocol control block
2569  * @param	locked	Implies if ipi_lock (protecting pcb list)
2570  *              is already locked or not.
2571  *
2572  * @return	int error on failure and 0 on success
2573  */
2574 int
in_pcbinshash(struct inpcb * inp,int locked)2575 in_pcbinshash(struct inpcb *inp, int locked)
2576 {
2577 	struct inpcbhead *pcbhash;
2578 	struct inpcbporthead *pcbporthash;
2579 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2580 	struct inpcbport *phd;
2581 	u_int32_t hashkey_faddr;
2582 
2583 	if (!locked) {
2584 		if (!lck_rw_try_lock_exclusive(&pcbinfo->ipi_lock)) {
2585 			/*
2586 			 * Lock inversion issue, mostly with udp
2587 			 * multicast packets
2588 			 */
2589 			socket_unlock(inp->inp_socket, 0);
2590 			lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
2591 			socket_lock(inp->inp_socket, 0);
2592 		}
2593 	}
2594 
2595 	/*
2596 	 * This routine or its caller may have given up
2597 	 * socket's protocol lock briefly.
2598 	 * During that time the socket may have been dropped.
2599 	 * Safe-guarding against that.
2600 	 */
2601 	if (inp->inp_state == INPCB_STATE_DEAD) {
2602 		if (!locked) {
2603 			lck_rw_done(&pcbinfo->ipi_lock);
2604 		}
2605 		return ECONNABORTED;
2606 	}
2607 
2608 
2609 	if (inp->inp_vflag & INP_IPV6) {
2610 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2611 	} else {
2612 		hashkey_faddr = inp->inp_faddr.s_addr;
2613 	}
2614 
2615 	inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2616 	    inp->inp_fport, pcbinfo->ipi_hashmask);
2617 
2618 	pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
2619 
2620 	pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2621 	    pcbinfo->ipi_porthashmask)];
2622 
2623 	/*
2624 	 * Go through port list and look for a head for this lport.
2625 	 */
2626 	LIST_FOREACH(phd, pcbporthash, phd_hash) {
2627 		if (phd->phd_port == inp->inp_lport) {
2628 			break;
2629 		}
2630 	}
2631 
2632 	/*
2633 	 * If none exists, malloc one and tack it on.
2634 	 */
2635 	if (phd == NULL) {
2636 		phd = kalloc_type(struct inpcbport, Z_WAITOK | Z_NOFAIL);
2637 		phd->phd_port = inp->inp_lport;
2638 		LIST_INIT(&phd->phd_pcblist);
2639 		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2640 	}
2641 
2642 	VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2643 
2644 #if SKYWALK
2645 	int err;
2646 	struct socket *so = inp->inp_socket;
2647 	if ((SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP) &&
2648 	    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
2649 		if (inp->inp_vflag & INP_IPV6) {
2650 			err = netns_reserve_in6(&inp->inp_netns_token,
2651 			    inp->in6p_laddr, (uint8_t)SOCK_PROTO(so), inp->inp_lport,
2652 			    NETNS_BSD | NETNS_PRERESERVED, NULL);
2653 		} else {
2654 			err = netns_reserve_in(&inp->inp_netns_token,
2655 			    inp->inp_laddr, (uint8_t)SOCK_PROTO(so), inp->inp_lport,
2656 			    NETNS_BSD | NETNS_PRERESERVED, NULL);
2657 		}
2658 		if (err) {
2659 			if (!locked) {
2660 				lck_rw_done(&pcbinfo->ipi_lock);
2661 			}
2662 			return err;
2663 		}
2664 		netns_set_ifnet(&inp->inp_netns_token, inp->inp_last_outifp);
2665 		inp_update_netns_flags(so);
2666 	}
2667 #endif /* SKYWALK */
2668 
2669 	inp->inp_phd = phd;
2670 	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2671 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
2672 	inp->inp_flags2 |= INP2_INHASHLIST;
2673 
2674 	if (!locked) {
2675 		lck_rw_done(&pcbinfo->ipi_lock);
2676 	}
2677 
2678 #if NECP
2679 	// This call catches the original setting of the local address
2680 	inp_update_necp_policy(inp, NULL, NULL, 0);
2681 #endif /* NECP */
2682 
2683 	return 0;
2684 }
2685 
2686 /*
2687  * Move PCB to the proper hash bucket when { faddr, fport } have  been
2688  * changed. NOTE: This does not handle the case of the lport changing (the
2689  * hashed port list would have to be updated as well), so the lport must
2690  * not change after in_pcbinshash() has been called.
2691  */
2692 void
in_pcbrehash(struct inpcb * inp)2693 in_pcbrehash(struct inpcb *inp)
2694 {
2695 	struct inpcbhead *head;
2696 	u_int32_t hashkey_faddr;
2697 
2698 #if SKYWALK
2699 	struct socket *so = inp->inp_socket;
2700 	if ((SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP) &&
2701 	    !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
2702 		int err;
2703 		if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2704 			if (inp->inp_vflag & INP_IPV6) {
2705 				err = netns_change_addr_in6(
2706 					&inp->inp_netns_token, inp->in6p_laddr);
2707 			} else {
2708 				err = netns_change_addr_in(
2709 					&inp->inp_netns_token, inp->inp_laddr);
2710 			}
2711 		} else {
2712 			if (inp->inp_vflag & INP_IPV6) {
2713 				err = netns_reserve_in6(&inp->inp_netns_token,
2714 				    inp->in6p_laddr, (uint8_t)SOCK_PROTO(so),
2715 				    inp->inp_lport, NETNS_BSD, NULL);
2716 			} else {
2717 				err = netns_reserve_in(&inp->inp_netns_token,
2718 				    inp->inp_laddr, (uint8_t)SOCK_PROTO(so),
2719 				    inp->inp_lport, NETNS_BSD, NULL);
2720 			}
2721 		}
2722 		/* We are assuming that whatever code paths result in a rehash
2723 		 * did their due diligence and ensured that the given
2724 		 * <proto, laddr, lport> tuple was free ahead of time. Just
2725 		 * reserving the lport on INADDR_ANY should be enough, since
2726 		 * that will block Skywalk from trying to reserve that same
2727 		 * port. Given this assumption, the above netns calls should
2728 		 * never fail*/
2729 		VERIFY(err == 0);
2730 
2731 		netns_set_ifnet(&inp->inp_netns_token, inp->inp_last_outifp);
2732 		inp_update_netns_flags(so);
2733 	}
2734 #endif /* SKYWALK */
2735 	if (inp->inp_vflag & INP_IPV6) {
2736 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2737 	} else {
2738 		hashkey_faddr = inp->inp_faddr.s_addr;
2739 	}
2740 
2741 	inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2742 	    inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2743 	head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
2744 
2745 	if (inp->inp_flags2 & INP2_INHASHLIST) {
2746 		LIST_REMOVE(inp, inp_hash);
2747 		inp->inp_flags2 &= ~INP2_INHASHLIST;
2748 	}
2749 
2750 	VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2751 	LIST_INSERT_HEAD(head, inp, inp_hash);
2752 	inp->inp_flags2 |= INP2_INHASHLIST;
2753 
2754 #if NECP
2755 	// This call catches updates to the remote addresses
2756 	inp_update_necp_policy(inp, NULL, NULL, 0);
2757 #endif /* NECP */
2758 }
2759 
2760 /*
2761  * Remove PCB from various lists.
2762  * Must be called pcbinfo lock is held in exclusive mode.
2763  */
2764 void
in_pcbremlists(struct inpcb * inp)2765 in_pcbremlists(struct inpcb *inp)
2766 {
2767 	inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
2768 
2769 	/*
2770 	 * Check if it's in hashlist -- an inp is placed in hashlist when
2771 	 * it's local port gets assigned. So it should also be present
2772 	 * in the port list.
2773 	 */
2774 	if (inp->inp_flags2 & INP2_INHASHLIST) {
2775 		struct inpcbport *phd = inp->inp_phd;
2776 
2777 		VERIFY(phd != NULL && inp->inp_lport > 0);
2778 
2779 		LIST_REMOVE(inp, inp_hash);
2780 		inp->inp_hash.le_next = NULL;
2781 		inp->inp_hash.le_prev = NULL;
2782 
2783 		LIST_REMOVE(inp, inp_portlist);
2784 		inp->inp_portlist.le_next = NULL;
2785 		inp->inp_portlist.le_prev = NULL;
2786 		if (LIST_EMPTY(&phd->phd_pcblist)) {
2787 			LIST_REMOVE(phd, phd_hash);
2788 			kfree_type(struct inpcbport, phd);
2789 		}
2790 		inp->inp_phd = NULL;
2791 		inp->inp_flags2 &= ~INP2_INHASHLIST;
2792 #if SKYWALK
2793 		/* Free up the port in the namespace registrar */
2794 		netns_release(&inp->inp_netns_token);
2795 		netns_release(&inp->inp_wildcard_netns_token);
2796 #endif /* SKYWALK */
2797 	}
2798 	VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2799 
2800 	if (inp->inp_flags2 & INP2_TIMEWAIT) {
2801 		/* Remove from time-wait queue */
2802 		tcp_remove_from_time_wait(inp);
2803 		inp->inp_flags2 &= ~INP2_TIMEWAIT;
2804 		VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2805 		inp->inp_pcbinfo->ipi_twcount--;
2806 	} else {
2807 		/* Remove from global inp list if it is not time-wait */
2808 		LIST_REMOVE(inp, inp_list);
2809 	}
2810 
2811 	if (inp->inp_flags2 & INP2_IN_FCTREE) {
2812 		inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED | INPFC_REMOVE));
2813 		VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2814 	}
2815 
2816 	inp->inp_pcbinfo->ipi_count--;
2817 }
2818 
2819 /*
2820  * Mechanism used to defer the memory release of PCBs
2821  * The pcb list will contain the pcb until the reaper can clean it up if
2822  * the following conditions are met:
2823  *	1) state "DEAD",
2824  *	2) wantcnt is STOPUSING
2825  *	3) usecount is 0
2826  * This function will be called to either mark the pcb as
2827  */
2828 int
in_pcb_checkstate(struct inpcb * pcb,int mode,int locked)2829 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
2830 {
2831 	volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2832 	UInt32 origwant;
2833 	UInt32 newwant;
2834 
2835 	switch (mode) {
2836 	case WNT_STOPUSING:
2837 		/*
2838 		 * Try to mark the pcb as ready for recycling.  CAS with
2839 		 * STOPUSING, if success we're good, if it's in use, will
2840 		 * be marked later
2841 		 */
2842 		if (locked == 0) {
2843 			socket_lock(pcb->inp_socket, 1);
2844 		}
2845 		pcb->inp_state = INPCB_STATE_DEAD;
2846 
2847 stopusing:
2848 		if (pcb->inp_socket->so_usecount < 0) {
2849 			panic("%s: pcb=%p so=%p usecount is negative",
2850 			    __func__, pcb, pcb->inp_socket);
2851 			/* NOTREACHED */
2852 		}
2853 		if (locked == 0) {
2854 			socket_unlock(pcb->inp_socket, 1);
2855 		}
2856 
2857 		inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
2858 
2859 		origwant = *wantcnt;
2860 		if ((UInt16) origwant == 0xffff) { /* should stop using */
2861 			return WNT_STOPUSING;
2862 		}
2863 		newwant = 0xffff;
2864 		if ((UInt16) origwant == 0) {
2865 			/* try to mark it as unsuable now */
2866 			OSCompareAndSwap(origwant, newwant, wantcnt);
2867 		}
2868 		return WNT_STOPUSING;
2869 
2870 	case WNT_ACQUIRE:
2871 		/*
2872 		 * Try to increase reference to pcb.  If WNT_STOPUSING
2873 		 * should bail out.  If socket state DEAD, try to set count
2874 		 * to STOPUSING, return failed otherwise increase cnt.
2875 		 */
2876 		do {
2877 			origwant = *wantcnt;
2878 			if ((UInt16) origwant == 0xffff) {
2879 				/* should stop using */
2880 				return WNT_STOPUSING;
2881 			}
2882 			newwant = origwant + 1;
2883 		} while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2884 		return WNT_ACQUIRE;
2885 
2886 	case WNT_RELEASE:
2887 		/*
2888 		 * Release reference.  If result is null and pcb state
2889 		 * is DEAD, set wanted bit to STOPUSING
2890 		 */
2891 		if (locked == 0) {
2892 			socket_lock(pcb->inp_socket, 1);
2893 		}
2894 
2895 		do {
2896 			origwant = *wantcnt;
2897 			if ((UInt16) origwant == 0x0) {
2898 				panic("%s: pcb=%p release with zero count",
2899 				    __func__, pcb);
2900 				/* NOTREACHED */
2901 			}
2902 			if ((UInt16) origwant == 0xffff) {
2903 				/* should stop using */
2904 				if (locked == 0) {
2905 					socket_unlock(pcb->inp_socket, 1);
2906 				}
2907 				return WNT_STOPUSING;
2908 			}
2909 			newwant = origwant - 1;
2910 		} while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2911 
2912 		if (pcb->inp_state == INPCB_STATE_DEAD) {
2913 			goto stopusing;
2914 		}
2915 		if (pcb->inp_socket->so_usecount < 0) {
2916 			panic("%s: RELEASE pcb=%p so=%p usecount is negative",
2917 			    __func__, pcb, pcb->inp_socket);
2918 			/* NOTREACHED */
2919 		}
2920 
2921 		if (locked == 0) {
2922 			socket_unlock(pcb->inp_socket, 1);
2923 		}
2924 		return WNT_RELEASE;
2925 
2926 	default:
2927 		panic("%s: so=%p not a valid state =%x", __func__,
2928 		    pcb->inp_socket, mode);
2929 		/* NOTREACHED */
2930 	}
2931 
2932 	/* NOTREACHED */
2933 	return mode;
2934 }
2935 
2936 /*
2937  * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
2938  * The inpcb_compat data structure is passed to user space and must
2939  * not change. We intentionally avoid copying pointers.
2940  */
2941 void
inpcb_to_compat(struct inpcb * inp,struct inpcb_compat * inp_compat)2942 inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
2943 {
2944 	bzero(inp_compat, sizeof(*inp_compat));
2945 	inp_compat->inp_fport = inp->inp_fport;
2946 	inp_compat->inp_lport = inp->inp_lport;
2947 	inp_compat->nat_owner = 0;
2948 	inp_compat->nat_cookie = 0;
2949 	inp_compat->inp_gencnt = inp->inp_gencnt;
2950 	inp_compat->inp_flags = inp->inp_flags;
2951 	inp_compat->inp_flow = inp->inp_flow;
2952 	inp_compat->inp_vflag = inp->inp_vflag;
2953 	inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
2954 	inp_compat->inp_ip_p = inp->inp_ip_p;
2955 	inp_compat->inp_dependfaddr.inp6_foreign =
2956 	    inp->inp_dependfaddr.inp6_foreign;
2957 	inp_compat->inp_dependladdr.inp6_local =
2958 	    inp->inp_dependladdr.inp6_local;
2959 	inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2960 	inp_compat->inp_depend6.inp6_hlim = 0;
2961 	inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2962 	inp_compat->inp_depend6.inp6_ifindex = 0;
2963 	inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2964 }
2965 
2966 #if XNU_TARGET_OS_OSX
2967 void
inpcb_to_xinpcb64(struct inpcb * inp,struct xinpcb64 * xinp)2968 inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
2969 {
2970 	xinp->inp_fport = inp->inp_fport;
2971 	xinp->inp_lport = inp->inp_lport;
2972 	xinp->inp_gencnt = inp->inp_gencnt;
2973 	xinp->inp_flags = inp->inp_flags;
2974 	xinp->inp_flow = inp->inp_flow;
2975 	xinp->inp_vflag = inp->inp_vflag;
2976 	xinp->inp_ip_ttl = inp->inp_ip_ttl;
2977 	xinp->inp_ip_p = inp->inp_ip_p;
2978 	xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
2979 	xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
2980 	xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
2981 	xinp->inp_depend6.inp6_hlim = 0;
2982 	xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
2983 	xinp->inp_depend6.inp6_ifindex = 0;
2984 	xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
2985 }
2986 #endif /* XNU_TARGET_OS_OSX */
2987 
2988 /*
2989  * The following routines implement this scheme:
2990  *
2991  * Callers of ip_output() that intend to cache the route in the inpcb pass
2992  * a local copy of the struct route to ip_output().  Using a local copy of
2993  * the cached route significantly simplifies things as IP no longer has to
2994  * worry about having exclusive access to the passed in struct route, since
2995  * it's defined in the caller's stack; in essence, this allows for a lock-
2996  * less operation when updating the struct route at the IP level and below,
2997  * whenever necessary. The scheme works as follows:
2998  *
2999  * Prior to dropping the socket's lock and calling ip_output(), the caller
3000  * copies the struct route from the inpcb into its stack, and adds a reference
3001  * to the cached route entry, if there was any.  The socket's lock is then
3002  * dropped and ip_output() is called with a pointer to the copy of struct
3003  * route defined on the stack (not to the one in the inpcb.)
3004  *
3005  * Upon returning from ip_output(), the caller then acquires the socket's
3006  * lock and synchronizes the cache; if there is no route cached in the inpcb,
3007  * it copies the local copy of struct route (which may or may not contain any
3008  * route) back into the cache; otherwise, if the inpcb has a route cached in
3009  * it, the one in the local copy will be freed, if there's any.  Trashing the
3010  * cached route in the inpcb can be avoided because ip_output() is single-
3011  * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
3012  * by the socket/transport layer.)
3013  */
3014 void
inp_route_copyout(struct inpcb * inp,struct route * dst)3015 inp_route_copyout(struct inpcb *inp, struct route *dst)
3016 {
3017 	struct route *src = &inp->inp_route;
3018 
3019 	socket_lock_assert_owned(inp->inp_socket);
3020 
3021 	/*
3022 	 * If the route in the PCB is stale or not for IPv4, blow it away;
3023 	 * this is possible in the case of IPv4-mapped address case.
3024 	 */
3025 	if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) {
3026 		ROUTE_RELEASE(src);
3027 	}
3028 
3029 	route_copyout(dst, src, sizeof(*dst));
3030 }
3031 
3032 void
inp_route_copyin(struct inpcb * inp,struct route * src)3033 inp_route_copyin(struct inpcb *inp, struct route *src)
3034 {
3035 	struct route *dst = &inp->inp_route;
3036 
3037 	socket_lock_assert_owned(inp->inp_socket);
3038 
3039 	/* Minor sanity check */
3040 	if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) {
3041 		panic("%s: wrong or corrupted route: %p", __func__, src);
3042 	}
3043 
3044 	route_copyin(src, dst, sizeof(*src));
3045 }
3046 
3047 /*
3048  * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option.
3049  */
3050 int
inp_bindif(struct inpcb * inp,unsigned int ifscope,struct ifnet ** pifp)3051 inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
3052 {
3053 	struct ifnet *ifp = NULL;
3054 
3055 	ifnet_head_lock_shared();
3056 	if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
3057 	    (ifp = ifindex2ifnet[ifscope]) == NULL)) {
3058 		ifnet_head_done();
3059 		return ENXIO;
3060 	}
3061 	ifnet_head_done();
3062 
3063 	VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
3064 
3065 	/*
3066 	 * A zero interface scope value indicates an "unbind".
3067 	 * Otherwise, take in whatever value the app desires;
3068 	 * the app may already know the scope (or force itself
3069 	 * to such a scope) ahead of time before the interface
3070 	 * gets attached.  It doesn't matter either way; any
3071 	 * route lookup from this point on will require an
3072 	 * exact match for the embedded interface scope.
3073 	 */
3074 	inp->inp_boundifp = ifp;
3075 	if (inp->inp_boundifp == NULL) {
3076 		inp->inp_flags &= ~INP_BOUND_IF;
3077 	} else {
3078 		inp->inp_flags |= INP_BOUND_IF;
3079 	}
3080 
3081 	/* Blow away any cached route in the PCB */
3082 	ROUTE_RELEASE(&inp->inp_route);
3083 
3084 	if (pifp != NULL) {
3085 		*pifp = ifp;
3086 	}
3087 
3088 	return 0;
3089 }
3090 
3091 /*
3092  * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
3093  * as well as for setting PROC_UUID_NO_CELLULAR policy.
3094  */
3095 void
inp_set_nocellular(struct inpcb * inp)3096 inp_set_nocellular(struct inpcb *inp)
3097 {
3098 	inp->inp_flags |= INP_NO_IFT_CELLULAR;
3099 
3100 	/* Blow away any cached route in the PCB */
3101 	ROUTE_RELEASE(&inp->inp_route);
3102 }
3103 
3104 /*
3105  * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
3106  * as well as for clearing PROC_UUID_NO_CELLULAR policy.
3107  */
3108 void
inp_clear_nocellular(struct inpcb * inp)3109 inp_clear_nocellular(struct inpcb *inp)
3110 {
3111 	struct socket *so = inp->inp_socket;
3112 
3113 	/*
3114 	 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
3115 	 * has a higher precendence than INP_NO_IFT_CELLULAR.  Clear the flag
3116 	 * if and only if the socket is unrestricted.
3117 	 */
3118 	if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
3119 		inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
3120 
3121 		/* Blow away any cached route in the PCB */
3122 		ROUTE_RELEASE(&inp->inp_route);
3123 	}
3124 }
3125 
3126 void
inp_set_noexpensive(struct inpcb * inp)3127 inp_set_noexpensive(struct inpcb *inp)
3128 {
3129 	inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
3130 
3131 	/* Blow away any cached route in the PCB */
3132 	ROUTE_RELEASE(&inp->inp_route);
3133 }
3134 
3135 void
inp_set_noconstrained(struct inpcb * inp)3136 inp_set_noconstrained(struct inpcb *inp)
3137 {
3138 	inp->inp_flags2 |= INP2_NO_IFF_CONSTRAINED;
3139 
3140 	/* Blow away any cached route in the PCB */
3141 	ROUTE_RELEASE(&inp->inp_route);
3142 }
3143 
3144 void
inp_set_awdl_unrestricted(struct inpcb * inp)3145 inp_set_awdl_unrestricted(struct inpcb *inp)
3146 {
3147 	inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
3148 
3149 	/* Blow away any cached route in the PCB */
3150 	ROUTE_RELEASE(&inp->inp_route);
3151 }
3152 
3153 boolean_t
inp_get_awdl_unrestricted(struct inpcb * inp)3154 inp_get_awdl_unrestricted(struct inpcb *inp)
3155 {
3156 	return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
3157 }
3158 
3159 void
inp_clear_awdl_unrestricted(struct inpcb * inp)3160 inp_clear_awdl_unrestricted(struct inpcb *inp)
3161 {
3162 	inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
3163 
3164 	/* Blow away any cached route in the PCB */
3165 	ROUTE_RELEASE(&inp->inp_route);
3166 }
3167 
3168 void
inp_set_intcoproc_allowed(struct inpcb * inp)3169 inp_set_intcoproc_allowed(struct inpcb *inp)
3170 {
3171 	inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
3172 
3173 	/* Blow away any cached route in the PCB */
3174 	ROUTE_RELEASE(&inp->inp_route);
3175 }
3176 
3177 boolean_t
inp_get_intcoproc_allowed(struct inpcb * inp)3178 inp_get_intcoproc_allowed(struct inpcb *inp)
3179 {
3180 	return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE;
3181 }
3182 
3183 void
inp_clear_intcoproc_allowed(struct inpcb * inp)3184 inp_clear_intcoproc_allowed(struct inpcb *inp)
3185 {
3186 	inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED;
3187 
3188 	/* Blow away any cached route in the PCB */
3189 	ROUTE_RELEASE(&inp->inp_route);
3190 }
3191 
3192 #if NECP
3193 /*
3194  * Called when PROC_UUID_NECP_APP_POLICY is set.
3195  */
3196 void
inp_set_want_app_policy(struct inpcb * inp)3197 inp_set_want_app_policy(struct inpcb *inp)
3198 {
3199 	inp->inp_flags2 |= INP2_WANT_APP_POLICY;
3200 }
3201 
3202 /*
3203  * Called when PROC_UUID_NECP_APP_POLICY is cleared.
3204  */
3205 void
inp_clear_want_app_policy(struct inpcb * inp)3206 inp_clear_want_app_policy(struct inpcb *inp)
3207 {
3208 	inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
3209 }
3210 #endif /* NECP */
3211 
3212 /*
3213  * Calculate flow hash for an inp, used by an interface to identify a
3214  * flow. When an interface provides flow control advisory, this flow
3215  * hash is used as an identifier.
3216  */
3217 u_int32_t
inp_calc_flowhash(struct inpcb * inp)3218 inp_calc_flowhash(struct inpcb *inp)
3219 {
3220 	struct inp_flowhash_key fh __attribute__((aligned(8)));
3221 	u_int32_t flowhash = 0;
3222 	struct inpcb *tmp_inp = NULL;
3223 
3224 	if (inp_hash_seed == 0) {
3225 		inp_hash_seed = RandomULong();
3226 	}
3227 
3228 	bzero(&fh, sizeof(fh));
3229 
3230 	bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof(fh.infh_laddr));
3231 	bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof(fh.infh_faddr));
3232 
3233 	fh.infh_lport = inp->inp_lport;
3234 	fh.infh_fport = inp->inp_fport;
3235 	fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
3236 	fh.infh_proto = inp->inp_ip_p;
3237 	fh.infh_rand1 = RandomULong();
3238 	fh.infh_rand2 = RandomULong();
3239 
3240 try_again:
3241 	flowhash = net_flowhash(&fh, sizeof(fh), inp_hash_seed);
3242 	if (flowhash == 0) {
3243 		/* try to get a non-zero flowhash */
3244 		inp_hash_seed = RandomULong();
3245 		goto try_again;
3246 	}
3247 
3248 	inp->inp_flowhash = flowhash;
3249 
3250 	/* Insert the inp into inp_fc_tree */
3251 	lck_mtx_lock_spin(&inp_fc_lck);
3252 	tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
3253 	if (tmp_inp != NULL) {
3254 		/*
3255 		 * There is a different inp with the same flowhash.
3256 		 * There can be a collision on flow hash but the
3257 		 * probability is low.  Let's recompute the
3258 		 * flowhash.
3259 		 */
3260 		lck_mtx_unlock(&inp_fc_lck);
3261 		/* recompute hash seed */
3262 		inp_hash_seed = RandomULong();
3263 		goto try_again;
3264 	}
3265 
3266 	RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
3267 	inp->inp_flags2 |= INP2_IN_FCTREE;
3268 	lck_mtx_unlock(&inp_fc_lck);
3269 
3270 	return flowhash;
3271 }
3272 
3273 void
inp_flowadv(uint32_t flowhash)3274 inp_flowadv(uint32_t flowhash)
3275 {
3276 	struct inpcb *inp;
3277 
3278 	inp = inp_fc_getinp(flowhash, 0);
3279 
3280 	if (inp == NULL) {
3281 		return;
3282 	}
3283 	inp_fc_feedback(inp);
3284 }
3285 
3286 /*
3287  * Function to compare inp_fc_entries in inp flow control tree
3288  */
3289 static inline int
infc_cmp(const struct inpcb * inp1,const struct inpcb * inp2)3290 infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
3291 {
3292 	return memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
3293 	           sizeof(inp1->inp_flowhash));
3294 }
3295 
3296 static struct inpcb *
inp_fc_getinp(u_int32_t flowhash,u_int32_t flags)3297 inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
3298 {
3299 	struct inpcb *inp = NULL;
3300 	int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
3301 
3302 	lck_mtx_lock_spin(&inp_fc_lck);
3303 	key_inp.inp_flowhash = flowhash;
3304 	inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
3305 	if (inp == NULL) {
3306 		/* inp is not present, return */
3307 		lck_mtx_unlock(&inp_fc_lck);
3308 		return NULL;
3309 	}
3310 
3311 	if (flags & INPFC_REMOVE) {
3312 		RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
3313 		lck_mtx_unlock(&inp_fc_lck);
3314 
3315 		bzero(&(inp->infc_link), sizeof(inp->infc_link));
3316 		inp->inp_flags2 &= ~INP2_IN_FCTREE;
3317 		return NULL;
3318 	}
3319 
3320 	if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) {
3321 		inp = NULL;
3322 	}
3323 	lck_mtx_unlock(&inp_fc_lck);
3324 
3325 	return inp;
3326 }
3327 
3328 static void
inp_fc_feedback(struct inpcb * inp)3329 inp_fc_feedback(struct inpcb *inp)
3330 {
3331 	struct socket *so = inp->inp_socket;
3332 
3333 	/* we already hold a want_cnt on this inp, socket can't be null */
3334 	VERIFY(so != NULL);
3335 	socket_lock(so, 1);
3336 
3337 	if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3338 		socket_unlock(so, 1);
3339 		return;
3340 	}
3341 
3342 	if (inp->inp_sndinprog_cnt > 0) {
3343 		inp->inp_flags |= INP_FC_FEEDBACK;
3344 	}
3345 
3346 	/*
3347 	 * Return if the connection is not in flow-controlled state.
3348 	 * This can happen if the connection experienced
3349 	 * loss while it was in flow controlled state
3350 	 */
3351 	if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
3352 		socket_unlock(so, 1);
3353 		return;
3354 	}
3355 	inp_reset_fc_state(inp);
3356 
3357 	if (SOCK_TYPE(so) == SOCK_STREAM) {
3358 		inp_fc_unthrottle_tcp(inp);
3359 	}
3360 
3361 	socket_unlock(so, 1);
3362 }
3363 
3364 void
inp_reset_fc_state(struct inpcb * inp)3365 inp_reset_fc_state(struct inpcb *inp)
3366 {
3367 	struct socket *so = inp->inp_socket;
3368 	int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
3369 	int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
3370 
3371 	inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3372 
3373 	if (suspended) {
3374 		so->so_flags &= ~(SOF_SUSPENDED);
3375 		soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
3376 	}
3377 
3378 	/* Give a write wakeup to unblock the socket */
3379 	if (needwakeup) {
3380 		sowwakeup(so);
3381 	}
3382 }
3383 
3384 int
inp_set_fc_state(struct inpcb * inp,int advcode)3385 inp_set_fc_state(struct inpcb *inp, int advcode)
3386 {
3387 	boolean_t is_flow_controlled = INP_WAIT_FOR_IF_FEEDBACK(inp);
3388 	struct inpcb *tmp_inp = NULL;
3389 	/*
3390 	 * If there was a feedback from the interface when
3391 	 * send operation was in progress, we should ignore
3392 	 * this flow advisory to avoid a race between setting
3393 	 * flow controlled state and receiving feedback from
3394 	 * the interface
3395 	 */
3396 	if (inp->inp_flags & INP_FC_FEEDBACK) {
3397 		return 0;
3398 	}
3399 
3400 	inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3401 	if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
3402 	    INPFC_SOLOCKED)) != NULL) {
3403 		if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3404 			return 0;
3405 		}
3406 		VERIFY(tmp_inp == inp);
3407 		switch (advcode) {
3408 		case FADV_FLOW_CONTROLLED:
3409 			inp->inp_flags |= INP_FLOW_CONTROLLED;
3410 			break;
3411 		case FADV_SUSPENDED:
3412 			inp->inp_flags |= INP_FLOW_SUSPENDED;
3413 			soevent(inp->inp_socket,
3414 			    (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
3415 
3416 			/* Record the fact that suspend event was sent */
3417 			inp->inp_socket->so_flags |= SOF_SUSPENDED;
3418 			break;
3419 		}
3420 
3421 		if (!is_flow_controlled && SOCK_TYPE(inp->inp_socket) == SOCK_STREAM) {
3422 			inp_fc_throttle_tcp(inp);
3423 		}
3424 		return 1;
3425 	}
3426 	return 0;
3427 }
3428 
3429 /*
3430  * Handler for SO_FLUSH socket option.
3431  */
3432 int
inp_flush(struct inpcb * inp,int optval)3433 inp_flush(struct inpcb *inp, int optval)
3434 {
3435 	u_int32_t flowhash = inp->inp_flowhash;
3436 	struct ifnet *rtifp, *oifp;
3437 
3438 	/* Either all classes or one of the valid ones */
3439 	if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) {
3440 		return EINVAL;
3441 	}
3442 
3443 	/* We need a flow hash for identification */
3444 	if (flowhash == 0) {
3445 		return 0;
3446 	}
3447 
3448 	/* Grab the interfaces from the route and pcb */
3449 	rtifp = ((inp->inp_route.ro_rt != NULL) ?
3450 	    inp->inp_route.ro_rt->rt_ifp : NULL);
3451 	oifp = inp->inp_last_outifp;
3452 
3453 	if (rtifp != NULL) {
3454 		if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3455 	}
3456 	if (oifp != NULL && oifp != rtifp) {
3457 		if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3458 	}
3459 
3460 	return 0;
3461 }
3462 
3463 /*
3464  * Clear the INP_INADDR_ANY flag (special case for PPP only)
3465  */
3466 void
inp_clear_INP_INADDR_ANY(struct socket * so)3467 inp_clear_INP_INADDR_ANY(struct socket *so)
3468 {
3469 	struct inpcb *inp = NULL;
3470 
3471 	socket_lock(so, 1);
3472 	inp = sotoinpcb(so);
3473 	if (inp) {
3474 		inp->inp_flags &= ~INP_INADDR_ANY;
3475 	}
3476 	socket_unlock(so, 1);
3477 }
3478 
3479 void
inp_get_soprocinfo(struct inpcb * inp,struct so_procinfo * soprocinfo)3480 inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
3481 {
3482 	struct socket *so = inp->inp_socket;
3483 
3484 	soprocinfo->spi_pid = so->last_pid;
3485 	strlcpy(&soprocinfo->spi_proc_name[0], &inp->inp_last_proc_name[0],
3486 	    sizeof(soprocinfo->spi_proc_name));
3487 	if (so->last_pid != 0) {
3488 		uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
3489 	}
3490 	/*
3491 	 * When not delegated, the effective pid is the same as the real pid
3492 	 */
3493 	if (so->so_flags & SOF_DELEGATED) {
3494 		soprocinfo->spi_delegated = 1;
3495 		soprocinfo->spi_epid = so->e_pid;
3496 		uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
3497 	} else {
3498 		soprocinfo->spi_delegated = 0;
3499 		soprocinfo->spi_epid = so->last_pid;
3500 	}
3501 	strlcpy(&soprocinfo->spi_e_proc_name[0], &inp->inp_e_proc_name[0],
3502 	    sizeof(soprocinfo->spi_e_proc_name));
3503 }
3504 
3505 int
inp_findinpcb_procinfo(struct inpcbinfo * pcbinfo,uint32_t flowhash,struct so_procinfo * soprocinfo)3506 inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
3507     struct so_procinfo *soprocinfo)
3508 {
3509 	struct inpcb *inp = NULL;
3510 	int found = 0;
3511 
3512 	bzero(soprocinfo, sizeof(struct so_procinfo));
3513 
3514 	if (!flowhash) {
3515 		return -1;
3516 	}
3517 
3518 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
3519 	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
3520 		if (inp->inp_state != INPCB_STATE_DEAD &&
3521 		    inp->inp_socket != NULL &&
3522 		    inp->inp_flowhash == flowhash) {
3523 			found = 1;
3524 			inp_get_soprocinfo(inp, soprocinfo);
3525 			break;
3526 		}
3527 	}
3528 	lck_rw_done(&pcbinfo->ipi_lock);
3529 
3530 	return found;
3531 }
3532 
3533 #if CONFIG_PROC_UUID_POLICY
3534 static void
inp_update_cellular_policy(struct inpcb * inp,boolean_t set)3535 inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
3536 {
3537 	struct socket *so = inp->inp_socket;
3538 	int before, after;
3539 
3540 	VERIFY(so != NULL);
3541 	VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3542 
3543 	before = INP_NO_CELLULAR(inp);
3544 	if (set) {
3545 		inp_set_nocellular(inp);
3546 	} else {
3547 		inp_clear_nocellular(inp);
3548 	}
3549 	after = INP_NO_CELLULAR(inp);
3550 	if (net_io_policy_log && (before != after)) {
3551 		static const char *ok = "OK";
3552 		static const char *nok = "NOACCESS";
3553 		uuid_string_t euuid_buf;
3554 		pid_t epid;
3555 
3556 		if (so->so_flags & SOF_DELEGATED) {
3557 			uuid_unparse(so->e_uuid, euuid_buf);
3558 			epid = so->e_pid;
3559 		} else {
3560 			uuid_unparse(so->last_uuid, euuid_buf);
3561 			epid = so->last_pid;
3562 		}
3563 
3564 		/* allow this socket to generate another notification event */
3565 		so->so_ifdenied_notifies = 0;
3566 
3567 		log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3568 		    "euuid %s%s %s->%s\n", __func__,
3569 		    (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3570 		    SOCK_TYPE(so), epid, euuid_buf,
3571 		    (so->so_flags & SOF_DELEGATED) ?
3572 		    " [delegated]" : "",
3573 		    ((before < after) ? ok : nok),
3574 		    ((before < after) ? nok : ok));
3575 	}
3576 }
3577 
3578 #if NECP
3579 static void
inp_update_necp_want_app_policy(struct inpcb * inp,boolean_t set)3580 inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
3581 {
3582 	struct socket *so = inp->inp_socket;
3583 	int before, after;
3584 
3585 	VERIFY(so != NULL);
3586 	VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3587 
3588 	before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3589 	if (set) {
3590 		inp_set_want_app_policy(inp);
3591 	} else {
3592 		inp_clear_want_app_policy(inp);
3593 	}
3594 	after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3595 	if (net_io_policy_log && (before != after)) {
3596 		static const char *wanted = "WANTED";
3597 		static const char *unwanted = "UNWANTED";
3598 		uuid_string_t euuid_buf;
3599 		pid_t epid;
3600 
3601 		if (so->so_flags & SOF_DELEGATED) {
3602 			uuid_unparse(so->e_uuid, euuid_buf);
3603 			epid = so->e_pid;
3604 		} else {
3605 			uuid_unparse(so->last_uuid, euuid_buf);
3606 			epid = so->last_pid;
3607 		}
3608 
3609 		log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3610 		    "euuid %s%s %s->%s\n", __func__,
3611 		    (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3612 		    SOCK_TYPE(so), epid, euuid_buf,
3613 		    (so->so_flags & SOF_DELEGATED) ?
3614 		    " [delegated]" : "",
3615 		    ((before < after) ? unwanted : wanted),
3616 		    ((before < after) ? wanted : unwanted));
3617 	}
3618 }
3619 #endif /* NECP */
3620 #endif /* !CONFIG_PROC_UUID_POLICY */
3621 
3622 #if NECP
3623 void
inp_update_necp_policy(struct inpcb * inp,struct sockaddr * override_local_addr,struct sockaddr * override_remote_addr,u_int override_bound_interface)3624 inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
3625 {
3626 	necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
3627 	if (necp_socket_should_rescope(inp) &&
3628 	    inp->inp_lport == 0 &&
3629 	    inp->inp_laddr.s_addr == INADDR_ANY &&
3630 	    IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
3631 		// If we should rescope, and the socket is not yet bound
3632 		inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
3633 		inp->inp_flags2 |= INP2_SCOPED_BY_NECP;
3634 	}
3635 }
3636 #endif /* NECP */
3637 
3638 int
inp_update_policy(struct inpcb * inp)3639 inp_update_policy(struct inpcb *inp)
3640 {
3641 #if CONFIG_PROC_UUID_POLICY
3642 	struct socket *so = inp->inp_socket;
3643 	uint32_t pflags = 0;
3644 	int32_t ogencnt;
3645 	int err = 0;
3646 	uint8_t *lookup_uuid = NULL;
3647 
3648 	if (!net_io_policy_uuid ||
3649 	    so == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3650 		return 0;
3651 	}
3652 
3653 	/*
3654 	 * Kernel-created sockets that aren't delegating other sockets
3655 	 * are currently exempted from UUID policy checks.
3656 	 */
3657 	if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) {
3658 		return 0;
3659 	}
3660 
3661 #if defined(XNU_TARGET_OS_OSX)
3662 	if (so->so_rpid > 0) {
3663 		lookup_uuid = so->so_ruuid;
3664 		ogencnt = so->so_policy_gencnt;
3665 		err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3666 	}
3667 #endif
3668 	if (lookup_uuid == NULL || err == ENOENT) {
3669 		lookup_uuid = ((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid);
3670 		ogencnt = so->so_policy_gencnt;
3671 		err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3672 	}
3673 
3674 	/*
3675 	 * Discard cached generation count if the entry is gone (ENOENT),
3676 	 * so that we go thru the checks below.
3677 	 */
3678 	if (err == ENOENT && ogencnt != 0) {
3679 		so->so_policy_gencnt = 0;
3680 	}
3681 
3682 	/*
3683 	 * If the generation count has changed, inspect the policy flags
3684 	 * and act accordingly.  If a policy flag was previously set and
3685 	 * the UUID is no longer present in the table (ENOENT), treat it
3686 	 * as if the flag has been cleared.
3687 	 */
3688 	if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
3689 		/* update cellular policy for this socket */
3690 		if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
3691 			inp_update_cellular_policy(inp, TRUE);
3692 		} else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
3693 			inp_update_cellular_policy(inp, FALSE);
3694 		}
3695 #if NECP
3696 		/* update necp want app policy for this socket */
3697 		if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
3698 			inp_update_necp_want_app_policy(inp, TRUE);
3699 		} else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
3700 			inp_update_necp_want_app_policy(inp, FALSE);
3701 		}
3702 #endif /* NECP */
3703 	}
3704 
3705 	return (err == ENOENT) ? 0 : err;
3706 #else /* !CONFIG_PROC_UUID_POLICY */
3707 #pragma unused(inp)
3708 	return 0;
3709 #endif /* !CONFIG_PROC_UUID_POLICY */
3710 }
3711 
3712 static unsigned int log_restricted;
3713 SYSCTL_DECL(_net_inet);
3714 SYSCTL_INT(_net_inet, OID_AUTO, log_restricted,
3715     CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0,
3716     "Log network restrictions");
3717 /*
3718  * Called when we need to enforce policy restrictions in the input path.
3719  *
3720  * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
3721  */
3722 static boolean_t
_inp_restricted_recv(struct inpcb * inp,struct ifnet * ifp)3723 _inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3724 {
3725 	VERIFY(inp != NULL);
3726 
3727 	/*
3728 	 * Inbound restrictions.
3729 	 */
3730 	if (!sorestrictrecv) {
3731 		return FALSE;
3732 	}
3733 
3734 	if (ifp == NULL) {
3735 		return FALSE;
3736 	}
3737 
3738 	if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
3739 		return TRUE;
3740 	}
3741 
3742 	if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
3743 		return TRUE;
3744 	}
3745 
3746 	if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
3747 		return TRUE;
3748 	}
3749 
3750 	if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
3751 		return TRUE;
3752 	}
3753 
3754 	if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) {
3755 		return FALSE;
3756 	}
3757 
3758 	if (inp->inp_flags & INP_RECV_ANYIF) {
3759 		return FALSE;
3760 	}
3761 
3762 	if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) {
3763 		return FALSE;
3764 	}
3765 
3766 	if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
3767 		return TRUE;
3768 	}
3769 
3770 	return TRUE;
3771 }
3772 
3773 boolean_t
inp_restricted_recv(struct inpcb * inp,struct ifnet * ifp)3774 inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3775 {
3776 	boolean_t ret;
3777 
3778 	ret = _inp_restricted_recv(inp, ifp);
3779 	if (ret == TRUE && log_restricted) {
3780 		printf("pid %d (%s) is unable to receive packets on %s\n",
3781 		    proc_getpid(current_proc()), proc_best_name(current_proc()),
3782 		    ifp->if_xname);
3783 	}
3784 	return ret;
3785 }
3786 
3787 /*
3788  * Called when we need to enforce policy restrictions in the output path.
3789  *
3790  * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
3791  */
3792 static boolean_t
_inp_restricted_send(struct inpcb * inp,struct ifnet * ifp)3793 _inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3794 {
3795 	VERIFY(inp != NULL);
3796 
3797 	/*
3798 	 * Outbound restrictions.
3799 	 */
3800 	if (!sorestrictsend) {
3801 		return FALSE;
3802 	}
3803 
3804 	if (ifp == NULL) {
3805 		return FALSE;
3806 	}
3807 
3808 	if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
3809 		return TRUE;
3810 	}
3811 
3812 	if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
3813 		return TRUE;
3814 	}
3815 
3816 	if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
3817 		return TRUE;
3818 	}
3819 
3820 	if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
3821 		return TRUE;
3822 	}
3823 
3824 	if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
3825 		return TRUE;
3826 	}
3827 
3828 	return FALSE;
3829 }
3830 
3831 boolean_t
inp_restricted_send(struct inpcb * inp,struct ifnet * ifp)3832 inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3833 {
3834 	boolean_t ret;
3835 
3836 	ret = _inp_restricted_send(inp, ifp);
3837 	if (ret == TRUE && log_restricted) {
3838 		printf("pid %d (%s) is unable to transmit packets on %s\n",
3839 		    proc_getpid(current_proc()), proc_best_name(current_proc()),
3840 		    ifp->if_xname);
3841 	}
3842 	return ret;
3843 }
3844 
3845 inline void
inp_count_sndbytes(struct inpcb * inp,u_int32_t th_ack)3846 inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack)
3847 {
3848 	struct ifnet *ifp = inp->inp_last_outifp;
3849 	struct socket *so = inp->inp_socket;
3850 	if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
3851 	    (ifp->if_type == IFT_CELLULAR || IFNET_IS_WIFI(ifp))) {
3852 		int32_t unsent;
3853 
3854 		so->so_snd.sb_flags |= SB_SNDBYTE_CNT;
3855 
3856 		/*
3857 		 * There can be data outstanding before the connection
3858 		 * becomes established -- TFO case
3859 		 */
3860 		if (so->so_snd.sb_cc > 0) {
3861 			inp_incr_sndbytes_total(so, so->so_snd.sb_cc);
3862 		}
3863 
3864 		unsent = inp_get_sndbytes_allunsent(so, th_ack);
3865 		if (unsent > 0) {
3866 			inp_incr_sndbytes_unsent(so, unsent);
3867 		}
3868 	}
3869 }
3870 
3871 inline void
inp_incr_sndbytes_total(struct socket * so,int32_t len)3872 inp_incr_sndbytes_total(struct socket *so, int32_t len)
3873 {
3874 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
3875 	struct ifnet *ifp = inp->inp_last_outifp;
3876 
3877 	if (ifp != NULL) {
3878 		VERIFY(ifp->if_sndbyte_total >= 0);
3879 		OSAddAtomic64(len, &ifp->if_sndbyte_total);
3880 	}
3881 }
3882 
3883 inline void
inp_decr_sndbytes_total(struct socket * so,int32_t len)3884 inp_decr_sndbytes_total(struct socket *so, int32_t len)
3885 {
3886 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
3887 	struct ifnet *ifp = inp->inp_last_outifp;
3888 
3889 	if (ifp != NULL) {
3890 		if (ifp->if_sndbyte_total >= len) {
3891 			OSAddAtomic64(-len, &ifp->if_sndbyte_total);
3892 		} else {
3893 			ifp->if_sndbyte_total = 0;
3894 		}
3895 	}
3896 }
3897 
3898 inline void
inp_incr_sndbytes_unsent(struct socket * so,int32_t len)3899 inp_incr_sndbytes_unsent(struct socket *so, int32_t len)
3900 {
3901 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
3902 	struct ifnet *ifp = inp->inp_last_outifp;
3903 
3904 	if (ifp != NULL) {
3905 		VERIFY(ifp->if_sndbyte_unsent >= 0);
3906 		OSAddAtomic64(len, &ifp->if_sndbyte_unsent);
3907 	}
3908 }
3909 
3910 inline void
inp_decr_sndbytes_unsent(struct socket * so,int32_t len)3911 inp_decr_sndbytes_unsent(struct socket *so, int32_t len)
3912 {
3913 	if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
3914 		return;
3915 	}
3916 
3917 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
3918 	struct ifnet *ifp = inp->inp_last_outifp;
3919 
3920 	if (ifp != NULL) {
3921 		if (ifp->if_sndbyte_unsent >= len) {
3922 			OSAddAtomic64(-len, &ifp->if_sndbyte_unsent);
3923 		} else {
3924 			ifp->if_sndbyte_unsent = 0;
3925 		}
3926 	}
3927 }
3928 
3929 inline void
inp_decr_sndbytes_allunsent(struct socket * so,u_int32_t th_ack)3930 inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
3931 {
3932 	int32_t len;
3933 
3934 	if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
3935 		return;
3936 	}
3937 
3938 	len = inp_get_sndbytes_allunsent(so, th_ack);
3939 	inp_decr_sndbytes_unsent(so, len);
3940 }
3941 
3942 #if SKYWALK
3943 inline void
inp_update_netns_flags(struct socket * so)3944 inp_update_netns_flags(struct socket *so)
3945 {
3946 	struct inpcb *inp;
3947 	uint32_t set_flags = 0;
3948 	uint32_t clear_flags = 0;
3949 
3950 	if (!(SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
3951 		return;
3952 	}
3953 
3954 	inp = sotoinpcb(so);
3955 
3956 	if (inp == NULL) {
3957 		return;
3958 	}
3959 
3960 	if (!NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
3961 		return;
3962 	}
3963 
3964 	if (so->so_options & SO_NOWAKEFROMSLEEP) {
3965 		set_flags |= NETNS_NOWAKEFROMSLEEP;
3966 	} else {
3967 		clear_flags |= NETNS_NOWAKEFROMSLEEP;
3968 	}
3969 
3970 	if (inp->inp_flags & INP_RECV_ANYIF) {
3971 		set_flags |= NETNS_RECVANYIF;
3972 	} else {
3973 		clear_flags |= NETNS_RECVANYIF;
3974 	}
3975 
3976 	if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
3977 		set_flags |= NETNS_EXTBGIDLE;
3978 	} else {
3979 		clear_flags |= NETNS_EXTBGIDLE;
3980 	}
3981 
3982 	netns_change_flags(&inp->inp_netns_token, set_flags, clear_flags);
3983 }
3984 #endif /* SKYWALK */
3985 
3986 inline void
inp_set_activity_bitmap(struct inpcb * inp)3987 inp_set_activity_bitmap(struct inpcb *inp)
3988 {
3989 	in_stat_set_activity_bitmap(&inp->inp_nw_activity, net_uptime());
3990 }
3991 
3992 inline void
inp_get_activity_bitmap(struct inpcb * inp,activity_bitmap_t * ab)3993 inp_get_activity_bitmap(struct inpcb *inp, activity_bitmap_t *ab)
3994 {
3995 	bcopy(&inp->inp_nw_activity, ab, sizeof(*ab));
3996 }
3997 
3998 void
inp_update_last_owner(struct socket * so,struct proc * p,struct proc * ep)3999 inp_update_last_owner(struct socket *so, struct proc *p, struct proc *ep)
4000 {
4001 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
4002 
4003 	if (inp == NULL) {
4004 		return;
4005 	}
4006 
4007 	if (p != NULL) {
4008 		strlcpy(&inp->inp_last_proc_name[0], proc_name_address(p), sizeof(inp->inp_last_proc_name));
4009 	}
4010 	if (so->so_flags & SOF_DELEGATED) {
4011 		if (ep != NULL) {
4012 			strlcpy(&inp->inp_e_proc_name[0], proc_name_address(ep), sizeof(inp->inp_e_proc_name));
4013 		} else {
4014 			inp->inp_e_proc_name[0] = 0;
4015 		}
4016 	} else {
4017 		inp->inp_e_proc_name[0] = 0;
4018 	}
4019 }
4020 
4021 void
inp_copy_last_owner(struct socket * so,struct socket * head)4022 inp_copy_last_owner(struct socket *so, struct socket *head)
4023 {
4024 	struct inpcb *inp = (struct inpcb *)so->so_pcb;
4025 	struct inpcb *head_inp = (struct inpcb *)head->so_pcb;
4026 
4027 	if (inp == NULL || head_inp == NULL) {
4028 		return;
4029 	}
4030 
4031 	strlcpy(&inp->inp_last_proc_name[0], &head_inp->inp_last_proc_name[0], sizeof(inp->inp_last_proc_name));
4032 	strlcpy(&inp->inp_e_proc_name[0], &head_inp->inp_e_proc_name[0], sizeof(inp->inp_e_proc_name));
4033 }
4034