1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/sysctl.h>
75 #include <sys/mcache.h>
76 #include <sys/kauth.h>
77 #include <sys/priv.h>
78 #include <sys/proc_uuid_policy.h>
79 #include <sys/syslog.h>
80 #include <sys/priv.h>
81 #include <sys/file_internal.h>
82 #include <net/dlil.h>
83
84 #include <libkern/OSAtomic.h>
85 #include <kern/locks.h>
86
87 #include <machine/limits.h>
88
89 #include <kern/zalloc.h>
90
91 #include <net/if.h>
92 #include <net/if_types.h>
93 #include <net/route.h>
94 #include <net/flowhash.h>
95 #include <net/flowadv.h>
96 #include <net/nat464_utils.h>
97 #include <net/ntstat.h>
98 #include <net/nwk_wq.h>
99 #include <net/restricted_in_port.h>
100
101 #include <netinet/in.h>
102 #include <netinet/in_pcb.h>
103 #include <netinet/inp_log.h>
104 #include <netinet/in_var.h>
105 #include <netinet/ip_var.h>
106
107 #include <netinet/ip6.h>
108 #include <netinet6/ip6_var.h>
109
110 #include <sys/kdebug.h>
111 #include <sys/random.h>
112
113 #include <dev/random/randomdev.h>
114 #include <mach/boolean.h>
115
116 #include <atm/atm_internal.h>
117 #include <pexpert/pexpert.h>
118
119 #if NECP
120 #include <net/necp.h>
121 #endif
122
123 #include <sys/stat.h>
124 #include <sys/ubc.h>
125 #include <sys/vnode.h>
126
127 #include <os/log.h>
128
129 #if SKYWALK
130 #include <skywalk/namespace/flowidns.h>
131 #endif /* SKYWALK */
132
133 #include <IOKit/IOBSD.h>
134
135 #include <net/sockaddr_utils.h>
136
137 extern const char *proc_name_address(struct proc *);
138
139 static LCK_GRP_DECLARE(inpcb_lock_grp, "inpcb");
140 static LCK_ATTR_DECLARE(inpcb_lock_attr, 0, 0);
141 static LCK_MTX_DECLARE_ATTR(inpcb_lock, &inpcb_lock_grp, &inpcb_lock_attr);
142 static LCK_MTX_DECLARE_ATTR(inpcb_timeout_lock, &inpcb_lock_grp, &inpcb_lock_attr);
143
144 static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
145
146 static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
147 static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
148 static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */
149 static boolean_t inpcb_fast_timer_on = FALSE;
150
151 #define INPCB_GCREQ_THRESHOLD 50000
152
153 static thread_call_t inpcb_thread_call, inpcb_fast_thread_call;
154 static void inpcb_sched_timeout(void);
155 static void inpcb_sched_lazy_timeout(void);
156 static void _inpcb_sched_timeout(unsigned int);
157 static void inpcb_timeout(void *, void *);
158 const int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */
159 extern int tvtohz(struct timeval *);
160
161 #if CONFIG_PROC_UUID_POLICY
162 static void inp_update_cellular_policy(struct inpcb *, boolean_t);
163 #if NECP
164 static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
165 #endif /* NECP */
166 #endif /* !CONFIG_PROC_UUID_POLICY */
167
168 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
169 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
170
171 int allow_udp_port_exhaustion = 0;
172
173 /*
174 * These configure the range of local port addresses assigned to
175 * "unspecified" outgoing connections/packets/whatever.
176 */
177 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
178 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
179 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
180 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
181 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
182 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
183
184 #define RANGECHK(var, min, max) \
185 if ((var) < (min)) { (var) = (min); } \
186 else if ((var) > (max)) { (var) = (max); }
187
188 static int
189 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
190 {
191 #pragma unused(arg1, arg2)
192 int error;
193 int new_value = *(int *)oidp->oid_arg1;
194 #if (DEBUG | DEVELOPMENT)
195 int old_value = *(int *)oidp->oid_arg1;
196 /*
197 * For unit testing allow a non-superuser process with the
198 * proper entitlement to modify the variables
199 */
200 if (req->newptr) {
201 if (proc_suser(current_proc()) != 0 &&
202 (error = priv_check_cred(kauth_cred_get(),
203 PRIV_NETINET_RESERVEDPORT, 0))) {
204 return EPERM;
205 }
206 }
207 #endif /* (DEBUG | DEVELOPMENT) */
208
209 error = sysctl_handle_int(oidp, &new_value, 0, req);
210 if (!error) {
211 if (oidp->oid_arg1 == &ipport_lowfirstauto || oidp->oid_arg1 == &ipport_lowlastauto) {
212 RANGECHK(new_value, 1, IPPORT_RESERVED - 1);
213 } else {
214 RANGECHK(new_value, IPPORT_RESERVED, USHRT_MAX);
215 }
216 *(int *)oidp->oid_arg1 = new_value;
217 }
218
219 #if (DEBUG | DEVELOPMENT)
220 os_log(OS_LOG_DEFAULT,
221 "%s:%u sysctl net.restricted_port.verbose: %d -> %d)",
222 proc_best_name(current_proc()), proc_selfpid(),
223 old_value, *(int *)oidp->oid_arg1);
224 #endif /* (DEBUG | DEVELOPMENT) */
225
226 return error;
227 }
228
229 #undef RANGECHK
230
231 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
232 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IP Ports");
233
234 #if (DEBUG | DEVELOPMENT)
235 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY)
236 #else
237 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED)
238 #endif /* (DEBUG | DEVELOPMENT) */
239
240 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
241 CTLFAGS_IP_PORTRANGE,
242 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
243 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
244 CTLFAGS_IP_PORTRANGE,
245 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
246 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
247 CTLFAGS_IP_PORTRANGE,
248 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
249 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
250 CTLFAGS_IP_PORTRANGE,
251 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
252 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
253 CTLFAGS_IP_PORTRANGE,
254 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
255 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
256 CTLFAGS_IP_PORTRANGE,
257 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
258 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, ipport_allow_udp_port_exhaustion,
259 CTLFLAG_LOCKED | CTLFLAG_RW, &allow_udp_port_exhaustion, 0, "");
260
261 static uint32_t apn_fallbk_debug = 0;
262 #define apn_fallbk_log(x) do { if (apn_fallbk_debug >= 1) log x; } while (0)
263
264 #if !XNU_TARGET_OS_OSX
265 static boolean_t apn_fallbk_enabled = TRUE;
266
267 SYSCTL_DECL(_net_inet);
268 SYSCTL_NODE(_net_inet, OID_AUTO, apn_fallback, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "APN Fallback");
269 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
270 &apn_fallbk_enabled, 0, "APN fallback enable");
271 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
272 &apn_fallbk_debug, 0, "APN fallback debug enable");
273 #else /* XNU_TARGET_OS_OSX */
274 static boolean_t apn_fallbk_enabled = FALSE;
275 #endif /* XNU_TARGET_OS_OSX */
276
277 extern int udp_use_randomport;
278 extern int tcp_use_randomport;
279
280 /* Structs used for flowhash computation */
281 struct inp_flowhash_key_addr {
282 union {
283 struct in_addr v4;
284 struct in6_addr v6;
285 u_int8_t addr8[16];
286 u_int16_t addr16[8];
287 u_int32_t addr32[4];
288 } infha;
289 };
290
291 struct inp_flowhash_key {
292 struct inp_flowhash_key_addr infh_laddr;
293 struct inp_flowhash_key_addr infh_faddr;
294 u_int32_t infh_lport;
295 u_int32_t infh_fport;
296 u_int32_t infh_af;
297 u_int32_t infh_proto;
298 u_int32_t infh_rand1;
299 u_int32_t infh_rand2;
300 };
301
302 #if !SKYWALK
303 static u_int32_t inp_hash_seed = 0;
304 #endif /* !SKYWALK */
305
306 static int infc_cmp(const struct inpcb *, const struct inpcb *);
307
308 /* Flags used by inp_fc_getinp */
309 #define INPFC_SOLOCKED 0x1
310 #define INPFC_REMOVE 0x2
311 static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
312
313 static void inp_fc_feedback(struct inpcb *);
314 extern void tcp_remove_from_time_wait(struct inpcb *inp);
315
316 static LCK_MTX_DECLARE_ATTR(inp_fc_lck, &inpcb_lock_grp, &inpcb_lock_attr);
317
318 RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
319 RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
320 RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
321
322 /*
323 * Use this inp as a key to find an inp in the flowhash tree.
324 * Accesses to it are protected by inp_fc_lck.
325 */
326 struct inpcb key_inp;
327
328 /*
329 * in_pcb.c: manage the Protocol Control Blocks.
330 */
331
332 void
in_pcbinit(void)333 in_pcbinit(void)
334 {
335 static int inpcb_initialized = 0;
336 uint32_t logging_config;
337
338 VERIFY(!inpcb_initialized);
339 inpcb_initialized = 1;
340
341 logging_config = atm_get_diagnostic_config();
342 if (logging_config & 0x80000000) {
343 inp_log_privacy = 1;
344 }
345
346 inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout,
347 NULL, THREAD_CALL_PRIORITY_KERNEL);
348 /* Give it an arg so that we know that this is the fast timer */
349 inpcb_fast_thread_call = thread_call_allocate_with_priority(
350 inpcb_timeout, &inpcb_timeout, THREAD_CALL_PRIORITY_KERNEL);
351 if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL) {
352 panic("unable to alloc the inpcb thread call");
353 }
354
355 /*
356 * Initialize data structures required to deliver
357 * flow advisories.
358 */
359 lck_mtx_lock(&inp_fc_lck);
360 RB_INIT(&inp_fc_tree);
361 bzero(&key_inp, sizeof(key_inp));
362 lck_mtx_unlock(&inp_fc_lck);
363 }
364
365 #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
366 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
367 static void
inpcb_timeout(void * arg0,void * arg1)368 inpcb_timeout(void *arg0, void *arg1)
369 {
370 #pragma unused(arg1)
371 struct inpcbinfo *ipi;
372 boolean_t t, gc;
373 struct intimercount gccnt, tmcnt;
374
375 /*
376 * Update coarse-grained networking timestamp (in sec.); the idea
377 * is to piggy-back on the timeout callout to update the counter
378 * returnable via net_uptime().
379 */
380 net_update_uptime();
381
382 bzero(&gccnt, sizeof(gccnt));
383 bzero(&tmcnt, sizeof(tmcnt));
384
385 lck_mtx_lock_spin(&inpcb_timeout_lock);
386 gc = inpcb_garbage_collecting;
387 inpcb_garbage_collecting = FALSE;
388
389 t = inpcb_ticking;
390 inpcb_ticking = FALSE;
391
392 if (gc || t) {
393 lck_mtx_unlock(&inpcb_timeout_lock);
394
395 lck_mtx_lock(&inpcb_lock);
396 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
397 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
398 bzero(&ipi->ipi_gc_req,
399 sizeof(ipi->ipi_gc_req));
400 if (gc && ipi->ipi_gc != NULL) {
401 ipi->ipi_gc(ipi);
402 gccnt.intimer_lazy +=
403 ipi->ipi_gc_req.intimer_lazy;
404 gccnt.intimer_fast +=
405 ipi->ipi_gc_req.intimer_fast;
406 gccnt.intimer_nodelay +=
407 ipi->ipi_gc_req.intimer_nodelay;
408 }
409 }
410 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
411 bzero(&ipi->ipi_timer_req,
412 sizeof(ipi->ipi_timer_req));
413 if (t && ipi->ipi_timer != NULL) {
414 ipi->ipi_timer(ipi);
415 tmcnt.intimer_lazy +=
416 ipi->ipi_timer_req.intimer_lazy;
417 tmcnt.intimer_fast +=
418 ipi->ipi_timer_req.intimer_fast;
419 tmcnt.intimer_nodelay +=
420 ipi->ipi_timer_req.intimer_nodelay;
421 }
422 }
423 }
424 lck_mtx_unlock(&inpcb_lock);
425 lck_mtx_lock_spin(&inpcb_timeout_lock);
426 }
427
428 /* lock was dropped above, so check first before overriding */
429 if (!inpcb_garbage_collecting) {
430 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
431 }
432 if (!inpcb_ticking) {
433 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
434 }
435
436 /* arg0 will be set if we are the fast timer */
437 if (arg0 != NULL) {
438 inpcb_fast_timer_on = FALSE;
439 }
440 inpcb_timeout_run--;
441 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
442
443 /* re-arm the timer if there's work to do */
444 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) {
445 inpcb_sched_timeout();
446 } else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) {
447 /* be lazy when idle with little activity */
448 inpcb_sched_lazy_timeout();
449 } else {
450 inpcb_sched_timeout();
451 }
452
453 lck_mtx_unlock(&inpcb_timeout_lock);
454 }
455
456 static void
inpcb_sched_timeout(void)457 inpcb_sched_timeout(void)
458 {
459 _inpcb_sched_timeout(0);
460 }
461
462 static void
inpcb_sched_lazy_timeout(void)463 inpcb_sched_lazy_timeout(void)
464 {
465 _inpcb_sched_timeout(inpcb_timeout_lazy);
466 }
467
468 static void
_inpcb_sched_timeout(unsigned int offset)469 _inpcb_sched_timeout(unsigned int offset)
470 {
471 uint64_t deadline, leeway;
472
473 clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline);
474 LCK_MTX_ASSERT(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
475 if (inpcb_timeout_run == 0 &&
476 (inpcb_garbage_collecting || inpcb_ticking)) {
477 lck_mtx_convert_spin(&inpcb_timeout_lock);
478 inpcb_timeout_run++;
479 if (offset == 0) {
480 inpcb_fast_timer_on = TRUE;
481 thread_call_enter_delayed(inpcb_fast_thread_call,
482 deadline);
483 } else {
484 inpcb_fast_timer_on = FALSE;
485 clock_interval_to_absolutetime_interval(offset,
486 NSEC_PER_SEC, &leeway);
487 thread_call_enter_delayed_with_leeway(
488 inpcb_thread_call, NULL, deadline, leeway,
489 THREAD_CALL_DELAY_LEEWAY);
490 }
491 } else if (inpcb_timeout_run == 1 &&
492 offset == 0 && !inpcb_fast_timer_on) {
493 /*
494 * Since the request was for a fast timer but the
495 * scheduled timer is a lazy timer, try to schedule
496 * another instance of fast timer also.
497 */
498 lck_mtx_convert_spin(&inpcb_timeout_lock);
499 inpcb_timeout_run++;
500 inpcb_fast_timer_on = TRUE;
501 thread_call_enter_delayed(inpcb_fast_thread_call, deadline);
502 }
503 }
504
505 void
inpcb_gc_sched(struct inpcbinfo * ipi,u_int32_t type)506 inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
507 {
508 u_int32_t gccnt;
509
510 lck_mtx_lock_spin(&inpcb_timeout_lock);
511 inpcb_garbage_collecting = TRUE;
512 gccnt = ipi->ipi_gc_req.intimer_nodelay +
513 ipi->ipi_gc_req.intimer_fast;
514
515 if (gccnt > INPCB_GCREQ_THRESHOLD) {
516 type = INPCB_TIMER_FAST;
517 }
518
519 switch (type) {
520 case INPCB_TIMER_NODELAY:
521 os_atomic_inc(&ipi->ipi_gc_req.intimer_nodelay, relaxed);
522 inpcb_sched_timeout();
523 break;
524 case INPCB_TIMER_FAST:
525 os_atomic_inc(&ipi->ipi_gc_req.intimer_fast, relaxed);
526 inpcb_sched_timeout();
527 break;
528 default:
529 os_atomic_inc(&ipi->ipi_gc_req.intimer_lazy, relaxed);
530 inpcb_sched_lazy_timeout();
531 break;
532 }
533 lck_mtx_unlock(&inpcb_timeout_lock);
534 }
535
536 void
inpcb_timer_sched(struct inpcbinfo * ipi,u_int32_t type)537 inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
538 {
539 lck_mtx_lock_spin(&inpcb_timeout_lock);
540 inpcb_ticking = TRUE;
541 switch (type) {
542 case INPCB_TIMER_NODELAY:
543 os_atomic_inc(&ipi->ipi_timer_req.intimer_nodelay, relaxed);
544 inpcb_sched_timeout();
545 break;
546 case INPCB_TIMER_FAST:
547 os_atomic_inc(&ipi->ipi_timer_req.intimer_fast, relaxed);
548 inpcb_sched_timeout();
549 break;
550 default:
551 os_atomic_inc(&ipi->ipi_timer_req.intimer_lazy, relaxed);
552 inpcb_sched_lazy_timeout();
553 break;
554 }
555 lck_mtx_unlock(&inpcb_timeout_lock);
556 }
557
558 void
in_pcbinfo_attach(struct inpcbinfo * ipi)559 in_pcbinfo_attach(struct inpcbinfo *ipi)
560 {
561 struct inpcbinfo *ipi0;
562
563 lck_mtx_lock(&inpcb_lock);
564 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
565 if (ipi0 == ipi) {
566 panic("%s: ipi %p already in the list",
567 __func__, ipi);
568 /* NOTREACHED */
569 }
570 }
571 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
572 lck_mtx_unlock(&inpcb_lock);
573 }
574
575 int
in_pcbinfo_detach(struct inpcbinfo * ipi)576 in_pcbinfo_detach(struct inpcbinfo *ipi)
577 {
578 struct inpcbinfo *ipi0;
579 int error = 0;
580
581 lck_mtx_lock(&inpcb_lock);
582 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
583 if (ipi0 == ipi) {
584 break;
585 }
586 }
587 if (ipi0 != NULL) {
588 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
589 } else {
590 error = ENXIO;
591 }
592 lck_mtx_unlock(&inpcb_lock);
593
594 return error;
595 }
596
597 __attribute__((noinline))
598 char *
inp_snprintf_tuple(struct inpcb * inp,char * __sized_by (buflen)buf,size_t buflen)599 inp_snprintf_tuple(struct inpcb *inp, char *__sized_by(buflen) buf, size_t buflen)
600 {
601 char laddrstr[MAX_IPv6_STR_LEN];
602 char faddrstr[MAX_IPv6_STR_LEN];
603 uint16_t lport = 0;
604 uint16_t fport = 0;
605 uint16_t proto = IPPROTO_IP;
606
607 if (inp->inp_socket != NULL) {
608 proto = SOCK_PROTO(inp->inp_socket);
609
610 if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
611 lport = inp->inp_lport;
612 fport = inp->inp_fport;
613 }
614 }
615 if (inp->inp_vflag & INP_IPV4) {
616 inet_ntop(AF_INET, (void *)&inp->inp_laddr.s_addr, laddrstr, sizeof(laddrstr));
617 inet_ntop(AF_INET, (void *)&inp->inp_faddr.s_addr, faddrstr, sizeof(faddrstr));
618 } else if (inp->inp_vflag & INP_IPV6) {
619 inet_ntop(AF_INET6, (void *)&inp->in6p_faddr, laddrstr, sizeof(laddrstr));
620 inet_ntop(AF_INET6, (void *)&inp->in6p_faddr, faddrstr, sizeof(faddrstr));
621 }
622 snprintf(buf, buflen, "[%u %s:%u %s:%u]",
623 proto, laddrstr, ntohs(lport), faddrstr, ntohs(fport));
624
625 return buf;
626 }
627
628 __attribute__((noinline))
629 void
in_pcb_check_management_entitled(struct inpcb * inp)630 in_pcb_check_management_entitled(struct inpcb *inp)
631 {
632 if (inp->inp_flags2 & INP2_MANAGEMENT_CHECKED) {
633 return;
634 }
635
636 if (management_data_unrestricted) {
637 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
638 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
639 } else if (if_management_interface_check_needed == true) {
640 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
641 /*
642 * Note that soopt_cred_check check both intcoproc entitlements
643 * We check MANAGEMENT_DATA_ENTITLEMENT as there is no corresponding PRIV value
644 */
645 if (soopt_cred_check(inp->inp_socket, PRIV_NET_RESTRICTED_INTCOPROC, false, false) == 0
646 || IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT) == true
647 #if DEBUG || DEVELOPMENT
648 || IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT_DEVELOPMENT) == true
649 #endif /* DEBUG || DEVELOPMENT */
650 ) {
651 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
652 } else {
653 if (__improbable(if_management_verbose > 1)) {
654 char buf[128];
655
656 os_log(OS_LOG_DEFAULT, "in_pcb_check_management_entitled %s:%d not management entitled %s",
657 proc_best_name(current_proc()),
658 proc_selfpid(),
659 inp_snprintf_tuple(inp, buf, sizeof(buf)));
660 }
661 }
662 }
663 }
664
665 __attribute__((noinline))
666 void
in_pcb_check_ultra_constrained_entitled(struct inpcb * inp)667 in_pcb_check_ultra_constrained_entitled(struct inpcb *inp)
668 {
669 if (inp->inp_flags2 & INP2_ULTRA_CONSTRAINED_CHECKED) {
670 return;
671 }
672
673 if (if_ultra_constrained_check_needed) {
674 inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_CHECKED;
675 if (IOCurrentTaskHasEntitlement(ULTRA_CONSTRAINED_ENTITLEMENT)) {
676 inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_ALLOWED;
677 }
678 }
679 }
680
681 /*
682 * Allocate a PCB and associate it with the socket.
683 *
684 * Returns: 0 Success
685 * ENOBUFS
686 * ENOMEM
687 */
688 int
in_pcballoc(struct socket * so,struct inpcbinfo * pcbinfo,struct proc * p)689 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
690 {
691 #pragma unused(p)
692 struct inpcb *inp;
693 caddr_t temp;
694
695 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
696 void *__unsafe_indexable addr = __zalloc_flags(pcbinfo->ipi_zone,
697 Z_WAITOK | Z_ZERO | Z_NOFAIL);
698 __builtin_assume(addr != NULL);
699 /*
700 * N.B: the allocation above may actually be inp_tp
701 * which is a structure that includes inpcb, but for
702 * the purposes of this function we just touch
703 * struct inpcb.
704 */
705 inp = __unsafe_forge_single(struct inpcb *, addr);
706 } else {
707 inp = (struct inpcb *)(void *)so->so_saved_pcb;
708 temp = inp->inp_saved_ppcb;
709 bzero((caddr_t)inp, sizeof(*inp));
710 inp->inp_saved_ppcb = temp;
711 }
712
713 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
714 inp->inp_pcbinfo = pcbinfo;
715 inp->inp_socket = so;
716 #define INP_ALIGN_AND_CAST(_type, _ptr) ({ \
717 typeof((_type)(void *__header_bidi_indexable)NULL) __roundup_type;\
718 const volatile char *__roundup_align_ptr = (const volatile char *)(_ptr); \
719 __roundup_align_ptr += P2ROUNDUP((uintptr_t)__roundup_align_ptr, \
720 _Alignof(typeof(*__roundup_type))) - (uintptr_t)__roundup_align_ptr; \
721 __DEQUALIFY(_type, __roundup_align_ptr); \
722 })
723 /* make sure inp_stat is always 64-bit aligned */
724 inp->inp_stat = INP_ALIGN_AND_CAST(struct inp_stat *, inp->inp_stat_store);
725 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) +
726 sizeof(*inp->inp_stat) > sizeof(inp->inp_stat_store)) {
727 panic("%s: insufficient space to align inp_stat", __func__);
728 /* NOTREACHED */
729 }
730
731 /* make sure inp_cstat is always 64-bit aligned */
732 inp->inp_cstat = INP_ALIGN_AND_CAST(struct inp_stat *, inp->inp_cstat_store);
733 if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) +
734 sizeof(*inp->inp_cstat) > sizeof(inp->inp_cstat_store)) {
735 panic("%s: insufficient space to align inp_cstat", __func__);
736 /* NOTREACHED */
737 }
738
739 /* make sure inp_wstat is always 64-bit aligned */
740 inp->inp_wstat = INP_ALIGN_AND_CAST(struct inp_stat *, inp->inp_wstat_store);
741 if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) +
742 sizeof(*inp->inp_wstat) > sizeof(inp->inp_wstat_store)) {
743 panic("%s: insufficient space to align inp_wstat", __func__);
744 /* NOTREACHED */
745 }
746
747 /* make sure inp_Wstat is always 64-bit aligned */
748 inp->inp_Wstat = INP_ALIGN_AND_CAST(struct inp_stat *, inp->inp_Wstat_store);
749 if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
750 sizeof(*inp->inp_Wstat) > sizeof(inp->inp_Wstat_store)) {
751 panic("%s: insufficient space to align inp_Wstat", __func__);
752 /* NOTREACHED */
753 }
754
755 /* make sure inp_btstat is always 64-bit aligned */
756 inp->inp_btstat = INP_ALIGN_AND_CAST(struct inp_stat *, inp->inp_btstat_store);
757 if (((uintptr_t)inp->inp_btstat - (uintptr_t)inp->inp_btstat_store) +
758 sizeof(*inp->inp_btstat) > sizeof(inp->inp_btstat_store)) {
759 panic("%s: insufficient space to align inp_btstat", __func__);
760 /* NOTREACHED */
761 }
762 #undef INP_ALIGN_AND_CAST
763 so->so_pcb = (caddr_t)inp;
764
765 if (so->so_proto->pr_flags & PR_PCBLOCK) {
766 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
767 &pcbinfo->ipi_lock_attr);
768 }
769
770 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) {
771 inp->inp_flags |= IN6P_IPV6_V6ONLY;
772 }
773
774 if (ip6_auto_flowlabel) {
775 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
776 }
777 if (intcoproc_unrestricted) {
778 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
779 }
780
781 (void) inp_update_policy(inp);
782
783 lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
784 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
785 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
786 pcbinfo->ipi_count++;
787 lck_rw_done(&pcbinfo->ipi_lock);
788 return 0;
789 }
790
791 /*
792 * in_pcblookup_local_and_cleanup does everything
793 * in_pcblookup_local does but it checks for a socket
794 * that's going away. Since we know that the lock is
795 * held read+write when this function is called, we
796 * can safely dispose of this socket like the slow
797 * timer would usually do and return NULL. This is
798 * great for bind.
799 */
800 struct inpcb *
in_pcblookup_local_and_cleanup(struct inpcbinfo * pcbinfo,struct in_addr laddr,u_int lport_arg,int wild_okay)801 in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
802 u_int lport_arg, int wild_okay)
803 {
804 struct inpcb *inp;
805
806 /* Perform normal lookup */
807 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
808
809 /* Check if we found a match but it's waiting to be disposed */
810 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
811 struct socket *so = inp->inp_socket;
812
813 socket_lock(so, 0);
814
815 if (so->so_usecount == 0) {
816 if (inp->inp_state != INPCB_STATE_DEAD) {
817 in_pcbdetach(inp);
818 }
819 in_pcbdispose(inp); /* will unlock & destroy */
820 inp = NULL;
821 } else {
822 socket_unlock(so, 0);
823 }
824 }
825
826 return inp;
827 }
828
829 static void
in_pcb_conflict_post_msg(u_int16_t port)830 in_pcb_conflict_post_msg(u_int16_t port)
831 {
832 /*
833 * Radar 5523020 send a kernel event notification if a
834 * non-participating socket tries to bind the port a socket
835 * who has set SOF_NOTIFYCONFLICT owns.
836 */
837 struct kev_msg ev_msg;
838 struct kev_in_portinuse in_portinuse;
839
840 bzero(&in_portinuse, sizeof(struct kev_in_portinuse));
841 bzero(&ev_msg, sizeof(struct kev_msg));
842 in_portinuse.port = ntohs(port); /* port in host order */
843 in_portinuse.req_pid = proc_selfpid();
844 ev_msg.vendor_code = KEV_VENDOR_APPLE;
845 ev_msg.kev_class = KEV_NETWORK_CLASS;
846 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
847 ev_msg.event_code = KEV_INET_PORTINUSE;
848 ev_msg.dv[0].data_ptr = &in_portinuse;
849 ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse);
850 ev_msg.dv[1].data_length = 0;
851 dlil_post_complete_msg(NULL, &ev_msg);
852 }
853
854 /*
855 * Bind an INPCB to an address and/or port. This routine should not alter
856 * the caller-supplied local address "nam" or remote address "remote".
857 *
858 * Returns: 0 Success
859 * EADDRNOTAVAIL Address not available.
860 * EINVAL Invalid argument
861 * EAFNOSUPPORT Address family not supported [notdef]
862 * EACCES Permission denied
863 * EADDRINUSE Address in use
864 * EAGAIN Resource unavailable, try again
865 * priv_check_cred:EPERM Operation not permitted
866 */
867 int
in_pcbbind(struct inpcb * inp,struct sockaddr * nam,struct sockaddr * remote,struct proc * p)868 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct sockaddr *remote, struct proc *p)
869 {
870 struct socket *so = inp->inp_socket;
871 unsigned short *lastport;
872 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
873 u_short lport = 0, rand_port = 0;
874 int wild = 0;
875 int reuseport = (so->so_options & SO_REUSEPORT);
876 int error = 0;
877 int randomport;
878 int conflict = 0;
879 boolean_t anonport = FALSE;
880 kauth_cred_t cred;
881 struct in_addr laddr;
882 struct ifnet *outif = NULL;
883
884 if (inp->inp_flags2 & INP2_BIND_IN_PROGRESS) {
885 return EINVAL;
886 }
887 inp->inp_flags2 |= INP2_BIND_IN_PROGRESS;
888
889 if (TAILQ_EMPTY(&in_ifaddrhead)) { /* XXX broken! */
890 error = EADDRNOTAVAIL;
891 goto done;
892 }
893 if (!(so->so_options & (SO_REUSEADDR | SO_REUSEPORT))) {
894 wild = 1;
895 }
896
897 bzero(&laddr, sizeof(laddr));
898
899 socket_unlock(so, 0); /* keep reference on socket */
900 lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
901 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
902 /* another thread completed the bind */
903 lck_rw_done(&pcbinfo->ipi_lock);
904 socket_lock(so, 0);
905 error = EINVAL;
906 goto done;
907 }
908
909 if (nam != NULL) {
910 if (nam->sa_len != sizeof(struct sockaddr_in)) {
911 lck_rw_done(&pcbinfo->ipi_lock);
912 socket_lock(so, 0);
913 error = EINVAL;
914 goto done;
915 }
916 #if 0
917 /*
918 * We should check the family, but old programs
919 * incorrectly fail to initialize it.
920 */
921 if (nam->sa_family != AF_INET) {
922 lck_rw_done(&pcbinfo->ipi_lock);
923 socket_lock(so, 0);
924 error = EAFNOSUPPORT;
925 goto done;
926 }
927 #endif /* 0 */
928 lport = SIN(nam)->sin_port;
929
930 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
931 /*
932 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
933 * allow complete duplication of binding if
934 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
935 * and a multicast address is bound on both
936 * new and duplicated sockets.
937 */
938 if (so->so_options & SO_REUSEADDR) {
939 reuseport = SO_REUSEADDR | SO_REUSEPORT;
940 }
941 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
942 struct sockaddr_in sin;
943 struct ifaddr *ifa;
944
945 /* Sanitized for interface address searches */
946 SOCKADDR_ZERO(&sin, sizeof(sin));
947 sin.sin_family = AF_INET;
948 sin.sin_len = sizeof(struct sockaddr_in);
949 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
950
951 ifa = ifa_ifwithaddr(SA(&sin));
952 if (ifa == NULL) {
953 lck_rw_done(&pcbinfo->ipi_lock);
954 socket_lock(so, 0);
955 error = EADDRNOTAVAIL;
956 goto done;
957 } else {
958 /*
959 * Opportunistically determine the outbound
960 * interface that may be used; this may not
961 * hold true if we end up using a route
962 * going over a different interface, e.g.
963 * when sending to a local address. This
964 * will get updated again after sending.
965 */
966 IFA_LOCK(ifa);
967 outif = ifa->ifa_ifp;
968 IFA_UNLOCK(ifa);
969 ifa_remref(ifa);
970 }
971 }
972
973 #if SKYWALK
974 if (inp->inp_flags2 & INP2_EXTERNAL_PORT) {
975 // Extract the external flow info
976 struct ns_flow_info nfi = {};
977 error = necp_client_get_netns_flow_info(inp->necp_client_uuid,
978 &nfi);
979 if (error != 0) {
980 lck_rw_done(&pcbinfo->ipi_lock);
981 socket_lock(so, 0);
982 goto done;
983 }
984
985 // Extract the reserved port
986 u_int16_t reserved_lport = 0;
987 if (nfi.nfi_laddr.sa.sa_family == AF_INET) {
988 reserved_lport = nfi.nfi_laddr.sin.sin_port;
989 } else if (nfi.nfi_laddr.sa.sa_family == AF_INET6) {
990 reserved_lport = nfi.nfi_laddr.sin6.sin6_port;
991 } else {
992 lck_rw_done(&pcbinfo->ipi_lock);
993 socket_lock(so, 0);
994 error = EINVAL;
995 goto done;
996 }
997
998 // Validate or use the reserved port
999 if (lport == 0) {
1000 lport = reserved_lport;
1001 } else if (lport != reserved_lport) {
1002 lck_rw_done(&pcbinfo->ipi_lock);
1003 socket_lock(so, 0);
1004 error = EINVAL;
1005 goto done;
1006 }
1007 }
1008
1009 /* Do not allow reserving a UDP port if remaining UDP port count is below 4096 */
1010 if (SOCK_PROTO(so) == IPPROTO_UDP && !allow_udp_port_exhaustion) {
1011 uint32_t current_reservations = 0;
1012 if (inp->inp_vflag & INP_IPV6) {
1013 current_reservations = netns_lookup_reservations_count_in6(inp->in6p_laddr, IPPROTO_UDP);
1014 } else {
1015 current_reservations = netns_lookup_reservations_count_in(inp->inp_laddr, IPPROTO_UDP);
1016 }
1017 if (USHRT_MAX - UDP_RANDOM_PORT_RESERVE < current_reservations) {
1018 log(LOG_ERR, "UDP port not available, less than 4096 UDP ports left");
1019 lck_rw_done(&pcbinfo->ipi_lock);
1020 socket_lock(so, 0);
1021 error = EADDRNOTAVAIL;
1022 goto done;
1023 }
1024 }
1025
1026 #endif /* SKYWALK */
1027
1028 if (lport != 0) {
1029 struct inpcb *t;
1030 uid_t u;
1031
1032 #if XNU_TARGET_OS_OSX
1033 if (ntohs(lport) < IPPORT_RESERVED &&
1034 SIN(nam)->sin_addr.s_addr != 0 &&
1035 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1036 cred = kauth_cred_proc_ref(p);
1037 error = priv_check_cred(cred,
1038 PRIV_NETINET_RESERVEDPORT, 0);
1039 kauth_cred_unref(&cred);
1040 if (error != 0) {
1041 lck_rw_done(&pcbinfo->ipi_lock);
1042 socket_lock(so, 0);
1043 error = EACCES;
1044 goto done;
1045 }
1046 }
1047 #endif /* XNU_TARGET_OS_OSX */
1048 /*
1049 * Check wether the process is allowed to bind to a restricted port
1050 */
1051 if (!current_task_can_use_restricted_in_port(lport,
1052 (uint8_t)SOCK_PROTO(so), PORT_FLAGS_BSD)) {
1053 lck_rw_done(&pcbinfo->ipi_lock);
1054 socket_lock(so, 0);
1055 error = EADDRINUSE;
1056 goto done;
1057 }
1058
1059 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1060 (u = kauth_cred_getuid(so->so_cred)) != 0 &&
1061 (t = in_pcblookup_local_and_cleanup(
1062 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
1063 INPLOOKUP_WILDCARD)) != NULL &&
1064 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1065 t->inp_laddr.s_addr != INADDR_ANY ||
1066 !(t->inp_socket->so_options & SO_REUSEPORT)) &&
1067 (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
1068 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
1069 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1070 t->inp_laddr.s_addr != INADDR_ANY) &&
1071 (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
1072 !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
1073 uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
1074 if ((t->inp_socket->so_flags &
1075 SOF_NOTIFYCONFLICT) &&
1076 !(so->so_flags & SOF_NOTIFYCONFLICT)) {
1077 conflict = 1;
1078 }
1079
1080 lck_rw_done(&pcbinfo->ipi_lock);
1081
1082 if (conflict) {
1083 in_pcb_conflict_post_msg(lport);
1084 }
1085
1086 socket_lock(so, 0);
1087 error = EADDRINUSE;
1088 goto done;
1089 }
1090 t = in_pcblookup_local_and_cleanup(pcbinfo,
1091 SIN(nam)->sin_addr, lport, wild);
1092 if (t != NULL &&
1093 (reuseport & t->inp_socket->so_options) == 0 &&
1094 (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
1095 !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
1096 uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
1097 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1098 t->inp_laddr.s_addr != INADDR_ANY ||
1099 SOCK_DOM(so) != PF_INET6 ||
1100 SOCK_DOM(t->inp_socket) != PF_INET6) {
1101 if ((t->inp_socket->so_flags &
1102 SOF_NOTIFYCONFLICT) &&
1103 !(so->so_flags & SOF_NOTIFYCONFLICT)) {
1104 conflict = 1;
1105 }
1106
1107 lck_rw_done(&pcbinfo->ipi_lock);
1108
1109 if (conflict) {
1110 in_pcb_conflict_post_msg(lport);
1111 }
1112 socket_lock(so, 0);
1113 error = EADDRINUSE;
1114 goto done;
1115 }
1116 }
1117 #if SKYWALK
1118 if ((SOCK_PROTO(so) == IPPROTO_TCP ||
1119 SOCK_PROTO(so) == IPPROTO_UDP) &&
1120 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1121 int res_err = 0;
1122 if (inp->inp_vflag & INP_IPV6) {
1123 res_err = netns_reserve_in6(
1124 &inp->inp_netns_token,
1125 SIN6(nam)->sin6_addr,
1126 (uint8_t)SOCK_PROTO(so), lport, NETNS_BSD,
1127 NULL);
1128 } else {
1129 res_err = netns_reserve_in(
1130 &inp->inp_netns_token,
1131 SIN(nam)->sin_addr, (uint8_t)SOCK_PROTO(so),
1132 lport, NETNS_BSD, NULL);
1133 }
1134 if (res_err != 0) {
1135 lck_rw_done(&pcbinfo->ipi_lock);
1136 socket_lock(so, 0);
1137 error = EADDRINUSE;
1138 goto done;
1139 }
1140 }
1141 #endif /* SKYWALK */
1142 }
1143 laddr = SIN(nam)->sin_addr;
1144 }
1145 if (lport == 0) {
1146 u_short first, last;
1147 int count;
1148 bool found;
1149
1150 /*
1151 * Override wild = 1 for implicit bind (mainly used by connect)
1152 * For implicit bind (lport == 0), we always use an unused port,
1153 * so REUSEADDR|REUSEPORT don't apply
1154 */
1155 wild = 1;
1156
1157 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
1158 (so->so_type == SOCK_STREAM ? tcp_use_randomport :
1159 udp_use_randomport);
1160
1161 /*
1162 * Even though this looks similar to the code in
1163 * in6_pcbsetport, the v6 vs v4 checks are different.
1164 */
1165 anonport = TRUE;
1166 if (inp->inp_flags & INP_HIGHPORT) {
1167 first = (u_short)ipport_hifirstauto; /* sysctl */
1168 last = (u_short)ipport_hilastauto;
1169 lastport = &pcbinfo->ipi_lasthi;
1170 } else if (inp->inp_flags & INP_LOWPORT) {
1171 cred = kauth_cred_proc_ref(p);
1172 error = priv_check_cred(cred,
1173 PRIV_NETINET_RESERVEDPORT, 0);
1174 kauth_cred_unref(&cred);
1175 if (error != 0) {
1176 lck_rw_done(&pcbinfo->ipi_lock);
1177 socket_lock(so, 0);
1178 goto done;
1179 }
1180 first = (u_short)ipport_lowfirstauto; /* 1023 */
1181 last = (u_short)ipport_lowlastauto; /* 600 */
1182 lastport = &pcbinfo->ipi_lastlow;
1183 } else {
1184 first = (u_short)ipport_firstauto; /* sysctl */
1185 last = (u_short)ipport_lastauto;
1186 lastport = &pcbinfo->ipi_lastport;
1187 }
1188 /* No point in randomizing if only one port is available */
1189
1190 if (first == last) {
1191 randomport = 0;
1192 }
1193 /*
1194 * Simple check to ensure all ports are not used up causing
1195 * a deadlock here.
1196 *
1197 * We split the two cases (up and down) so that the direction
1198 * is not being tested on each round of the loop.
1199 */
1200 if (first > last) {
1201 struct in_addr lookup_addr;
1202
1203 /*
1204 * counting down
1205 */
1206 if (randomport) {
1207 read_frandom(&rand_port, sizeof(rand_port));
1208 *lastport =
1209 first - (rand_port % (first - last));
1210 }
1211 count = first - last;
1212
1213 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1214 inp->inp_laddr;
1215
1216 found = false;
1217 do {
1218 if (count-- < 0) { /* completely used? */
1219 lck_rw_done(&pcbinfo->ipi_lock);
1220 socket_lock(so, 0);
1221 error = EADDRNOTAVAIL;
1222 goto done;
1223 }
1224 --*lastport;
1225 if (*lastport > first || *lastport < last) {
1226 *lastport = first;
1227 }
1228 lport = htons(*lastport);
1229
1230 /*
1231 * Skip if this is a restricted port as we do not want to
1232 * restricted ports as ephemeral
1233 */
1234 if (IS_RESTRICTED_IN_PORT(lport)) {
1235 continue;
1236 }
1237
1238 found = in_pcblookup_local_and_cleanup(pcbinfo,
1239 lookup_addr, lport, wild) == NULL;
1240 #if SKYWALK
1241 if (found &&
1242 (SOCK_PROTO(so) == IPPROTO_TCP ||
1243 SOCK_PROTO(so) == IPPROTO_UDP) &&
1244 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1245 int res_err;
1246 if (inp->inp_vflag & INP_IPV6) {
1247 res_err = netns_reserve_in6(
1248 &inp->inp_netns_token,
1249 inp->in6p_laddr,
1250 (uint8_t)SOCK_PROTO(so), lport,
1251 NETNS_BSD, NULL);
1252 } else {
1253 res_err = netns_reserve_in(
1254 &inp->inp_netns_token,
1255 lookup_addr, (uint8_t)SOCK_PROTO(so),
1256 lport, NETNS_BSD, NULL);
1257 }
1258 found = res_err == 0;
1259 }
1260 #endif /* SKYWALK */
1261 } while (!found);
1262 } else {
1263 struct in_addr lookup_addr;
1264
1265 /*
1266 * counting up
1267 */
1268 if (randomport) {
1269 read_frandom(&rand_port, sizeof(rand_port));
1270 *lastport =
1271 first + (rand_port % (first - last));
1272 }
1273 count = last - first;
1274
1275 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1276 inp->inp_laddr;
1277
1278 found = false;
1279 do {
1280 if (count-- < 0) { /* completely used? */
1281 lck_rw_done(&pcbinfo->ipi_lock);
1282 socket_lock(so, 0);
1283 error = EADDRNOTAVAIL;
1284 goto done;
1285 }
1286 ++*lastport;
1287 if (*lastport < first || *lastport > last) {
1288 *lastport = first;
1289 }
1290 lport = htons(*lastport);
1291
1292 /*
1293 * Skip if this is a restricted port as we do not want to
1294 * restricted ports as ephemeral
1295 */
1296 if (IS_RESTRICTED_IN_PORT(lport)) {
1297 continue;
1298 }
1299
1300 found = in_pcblookup_local_and_cleanup(pcbinfo,
1301 lookup_addr, lport, wild) == NULL;
1302 #if SKYWALK
1303 if (found &&
1304 (SOCK_PROTO(so) == IPPROTO_TCP ||
1305 SOCK_PROTO(so) == IPPROTO_UDP) &&
1306 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1307 int res_err;
1308 if (inp->inp_vflag & INP_IPV6) {
1309 res_err = netns_reserve_in6(
1310 &inp->inp_netns_token,
1311 inp->in6p_laddr,
1312 (uint8_t)SOCK_PROTO(so), lport,
1313 NETNS_BSD, NULL);
1314 } else {
1315 res_err = netns_reserve_in(
1316 &inp->inp_netns_token,
1317 lookup_addr, (uint8_t)SOCK_PROTO(so),
1318 lport, NETNS_BSD, NULL);
1319 }
1320 found = res_err == 0;
1321 }
1322 #endif /* SKYWALK */
1323 } while (!found);
1324 }
1325 }
1326 socket_lock(so, 0);
1327
1328 /*
1329 * We unlocked socket's protocol lock for a long time.
1330 * The socket might have been dropped/defuncted.
1331 * Checking if world has changed since.
1332 */
1333 if (inp->inp_state == INPCB_STATE_DEAD) {
1334 #if SKYWALK
1335 netns_release(&inp->inp_netns_token);
1336 #endif /* SKYWALK */
1337 lck_rw_done(&pcbinfo->ipi_lock);
1338 error = ECONNABORTED;
1339 goto done;
1340 }
1341
1342 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
1343 #if SKYWALK
1344 netns_release(&inp->inp_netns_token);
1345 #endif /* SKYWALK */
1346 lck_rw_done(&pcbinfo->ipi_lock);
1347 error = EINVAL;
1348 goto done;
1349 }
1350
1351 if (laddr.s_addr != INADDR_ANY) {
1352 inp->inp_laddr = laddr;
1353 inp->inp_last_outifp = outif;
1354 #if SKYWALK
1355 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
1356 netns_set_ifnet(&inp->inp_netns_token, outif);
1357 }
1358 #endif /* SKYWALK */
1359 }
1360 inp->inp_lport = lport;
1361 if (anonport) {
1362 inp->inp_flags |= INP_ANONPORT;
1363 }
1364
1365 if (in_pcbinshash(inp, remote, 1) != 0) {
1366 inp->inp_laddr.s_addr = INADDR_ANY;
1367 inp->inp_last_outifp = NULL;
1368
1369 #if SKYWALK
1370 netns_release(&inp->inp_netns_token);
1371 #endif /* SKYWALK */
1372 inp->inp_lport = 0;
1373 if (anonport) {
1374 inp->inp_flags &= ~INP_ANONPORT;
1375 }
1376 lck_rw_done(&pcbinfo->ipi_lock);
1377 error = EAGAIN;
1378 goto done;
1379 }
1380 lck_rw_done(&pcbinfo->ipi_lock);
1381 sflt_notify(so, sock_evt_bound, NULL);
1382
1383 in_pcb_check_management_entitled(inp);
1384 in_pcb_check_ultra_constrained_entitled(inp);
1385 done:
1386 inp->inp_flags2 &= ~INP2_BIND_IN_PROGRESS;
1387 return error;
1388 }
1389
1390 #define APN_FALLBACK_IP_FILTER(a) \
1391 (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \
1392 IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \
1393 IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \
1394 IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \
1395 IN_PRIVATE(ntohl((a)->sin_addr.s_addr)))
1396
1397 #define APN_FALLBACK_NOTIF_INTERVAL 2 /* Magic Number */
1398 static uint64_t last_apn_fallback = 0;
1399
1400 static boolean_t
apn_fallback_required(proc_t proc,struct socket * so,struct sockaddr_in * p_dstv4)1401 apn_fallback_required(proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4)
1402 {
1403 uint64_t timenow;
1404 struct sockaddr_storage lookup_default_addr;
1405 struct rtentry *rt = NULL;
1406
1407 VERIFY(proc != NULL);
1408
1409 if (apn_fallbk_enabled == FALSE) {
1410 return FALSE;
1411 }
1412
1413 if (proc == kernproc) {
1414 return FALSE;
1415 }
1416
1417 if (so && (so->so_options & SO_NOAPNFALLBK)) {
1418 return FALSE;
1419 }
1420
1421 timenow = net_uptime();
1422 if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) {
1423 apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n"));
1424 return FALSE;
1425 }
1426
1427 if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4)) {
1428 return FALSE;
1429 }
1430
1431 /* Check if we have unscoped IPv6 default route through cellular */
1432 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1433 lookup_default_addr.ss_family = AF_INET6;
1434 lookup_default_addr.ss_len = sizeof(struct sockaddr_in6);
1435
1436 rt = rtalloc1(SA(&lookup_default_addr), 0, 0);
1437 if (NULL == rt) {
1438 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1439 "unscoped default IPv6 route.\n"));
1440 return FALSE;
1441 }
1442
1443 if (!IFNET_IS_CELLULAR(rt->rt_ifp)) {
1444 rtfree(rt);
1445 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1446 "unscoped default IPv6 route through cellular interface.\n"));
1447 return FALSE;
1448 }
1449
1450 /*
1451 * We have a default IPv6 route, ensure that
1452 * we do not have IPv4 default route before triggering
1453 * the event
1454 */
1455 rtfree(rt);
1456 rt = NULL;
1457
1458 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1459 lookup_default_addr.ss_family = AF_INET;
1460 lookup_default_addr.ss_len = sizeof(struct sockaddr_in);
1461
1462 rt = rtalloc1(SA(&lookup_default_addr), 0, 0);
1463
1464 if (rt) {
1465 rtfree(rt);
1466 rt = NULL;
1467 apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped "
1468 "IPv4 default route!\n"));
1469 return FALSE;
1470 }
1471
1472 {
1473 /*
1474 * We disable APN fallback if the binary is not a third-party app.
1475 * Note that platform daemons use their process name as a
1476 * bundle ID so we filter out bundle IDs without dots.
1477 */
1478 const char *__null_terminated bundle_id = cs_identity_get(proc);
1479 if (bundle_id == NULL ||
1480 bundle_id[0] == '\0' ||
1481 strchr(bundle_id, '.') == NULL ||
1482 strlcmp("com.apple.", bundle_id, sizeof("com.apple.") - 1) == 0) {
1483 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-"
1484 "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL")));
1485 return FALSE;
1486 }
1487 }
1488
1489 {
1490 /*
1491 * The Apple App Store IPv6 requirement started on
1492 * June 1st, 2016 at 12:00:00 AM PDT.
1493 * We disable APN fallback if the binary is more recent than that.
1494 * We check both atime and birthtime since birthtime is not always supported.
1495 */
1496 static const long ipv6_start_date = 1464764400L;
1497 vfs_context_t __single context;
1498 struct stat64 sb;
1499 int vn_stat_error;
1500
1501 bzero(&sb, sizeof(struct stat64));
1502 context = vfs_context_create(NULL);
1503 vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, 0, context);
1504 (void)vfs_context_rele(context);
1505
1506 if (vn_stat_error != 0 ||
1507 sb.st_atimespec.tv_sec >= ipv6_start_date ||
1508 sb.st_birthtimespec.tv_sec >= ipv6_start_date) {
1509 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary "
1510 "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n",
1511 vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec,
1512 sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec));
1513 return FALSE;
1514 }
1515 }
1516 return TRUE;
1517 }
1518
1519 static void
apn_fallback_trigger(proc_t proc,struct socket * so)1520 apn_fallback_trigger(proc_t proc, struct socket *so)
1521 {
1522 pid_t pid = 0;
1523 struct kev_msg ev_msg;
1524 struct kev_netevent_apnfallbk_data apnfallbk_data;
1525
1526 last_apn_fallback = net_uptime();
1527 pid = proc_pid(proc);
1528 uuid_t application_uuid;
1529 uuid_clear(application_uuid);
1530 proc_getexecutableuuid(proc, application_uuid,
1531 sizeof(application_uuid));
1532
1533 bzero(&ev_msg, sizeof(struct kev_msg));
1534 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1535 ev_msg.kev_class = KEV_NETWORK_CLASS;
1536 ev_msg.kev_subclass = KEV_NETEVENT_SUBCLASS;
1537 ev_msg.event_code = KEV_NETEVENT_APNFALLBACK;
1538
1539 bzero(&apnfallbk_data, sizeof(apnfallbk_data));
1540
1541 if (so->so_flags & SOF_DELEGATED) {
1542 apnfallbk_data.epid = so->e_pid;
1543 uuid_copy(apnfallbk_data.euuid, so->e_uuid);
1544 } else {
1545 apnfallbk_data.epid = so->last_pid;
1546 uuid_copy(apnfallbk_data.euuid, so->last_uuid);
1547 }
1548
1549 ev_msg.dv[0].data_ptr = &apnfallbk_data;
1550 ev_msg.dv[0].data_length = sizeof(apnfallbk_data);
1551 kev_post_msg(&ev_msg);
1552 apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n"));
1553 }
1554
1555 /*
1556 * Transform old in_pcbconnect() into an inner subroutine for new
1557 * in_pcbconnect(); do some validity-checking on the remote address
1558 * (in "nam") and then determine local host address (i.e., which
1559 * interface) to use to access that remote host.
1560 *
1561 * This routine may alter the caller-supplied remote address "nam".
1562 *
1563 * The caller may override the bound-to-interface setting of the socket
1564 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1565 *
1566 * This routine might return an ifp with a reference held if the caller
1567 * provides a non-NULL outif, even in the error case. The caller is
1568 * responsible for releasing its reference.
1569 *
1570 * Returns: 0 Success
1571 * EINVAL Invalid argument
1572 * EAFNOSUPPORT Address family not supported
1573 * EADDRNOTAVAIL Address not available
1574 */
1575 int
in_pcbladdr(struct inpcb * inp,struct sockaddr * nam,struct in_addr * laddr,unsigned int ifscope,struct ifnet ** outif,int raw)1576 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
1577 unsigned int ifscope, struct ifnet **outif, int raw)
1578 {
1579 struct route *ro = &inp->inp_route;
1580 struct in_ifaddr *ia = NULL;
1581 struct sockaddr_in sin;
1582 int error = 0;
1583 boolean_t restricted = FALSE;
1584
1585 if (outif != NULL) {
1586 *outif = NULL;
1587 }
1588 if (nam->sa_len != sizeof(struct sockaddr_in)) {
1589 return EINVAL;
1590 }
1591 if (SIN(nam)->sin_family != AF_INET) {
1592 return EAFNOSUPPORT;
1593 }
1594 if (raw == 0 && SIN(nam)->sin_port == 0) {
1595 return EADDRNOTAVAIL;
1596 }
1597
1598 in_pcb_check_management_entitled(inp);
1599 in_pcb_check_ultra_constrained_entitled(inp);
1600
1601 /*
1602 * If the destination address is INADDR_ANY,
1603 * use the primary local address.
1604 * If the supplied address is INADDR_BROADCAST,
1605 * and the primary interface supports broadcast,
1606 * choose the broadcast address for that interface.
1607 */
1608 if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1609 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) {
1610 lck_rw_lock_shared(&in_ifaddr_rwlock);
1611 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1612 ia = TAILQ_FIRST(&in_ifaddrhead);
1613 IFA_LOCK_SPIN(&ia->ia_ifa);
1614 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1615 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1616 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1617 SIN(nam)->sin_addr =
1618 SIN(&ia->ia_broadaddr)->sin_addr;
1619 }
1620 IFA_UNLOCK(&ia->ia_ifa);
1621 ia = NULL;
1622 }
1623 lck_rw_done(&in_ifaddr_rwlock);
1624 }
1625 /*
1626 * Otherwise, if the socket has already bound the source, just use it.
1627 */
1628 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1629 VERIFY(ia == NULL);
1630 *laddr = inp->inp_laddr;
1631 return 0;
1632 }
1633
1634 /*
1635 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1636 * then it overrides the sticky ifscope set for the socket.
1637 */
1638 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) {
1639 ifscope = inp->inp_boundifp->if_index;
1640 }
1641
1642 /*
1643 * If route is known or can be allocated now,
1644 * our src addr is taken from the i/f, else punt.
1645 * Note that we should check the address family of the cached
1646 * destination, in case of sharing the cache with IPv6.
1647 */
1648 if (ro->ro_rt != NULL) {
1649 RT_LOCK_SPIN(ro->ro_rt);
1650 }
1651 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1652 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1653 (inp->inp_socket->so_options & SO_DONTROUTE)) {
1654 if (ro->ro_rt != NULL) {
1655 RT_UNLOCK(ro->ro_rt);
1656 }
1657 ROUTE_RELEASE(ro);
1658 }
1659 if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1660 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1661 if (ro->ro_rt != NULL) {
1662 RT_UNLOCK(ro->ro_rt);
1663 }
1664 ROUTE_RELEASE(ro);
1665 /* No route yet, so try to acquire one */
1666 SOCKADDR_ZERO(&ro->ro_dst, sizeof(struct sockaddr_in));
1667 ro->ro_dst.sa_family = AF_INET;
1668 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
1669 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1670 rtalloc_scoped(ro, ifscope);
1671 if (ro->ro_rt != NULL) {
1672 RT_LOCK_SPIN(ro->ro_rt);
1673 }
1674 }
1675 /* Sanitized local copy for interface address searches */
1676 SOCKADDR_ZERO(&sin, sizeof(sin));
1677 sin.sin_family = AF_INET;
1678 sin.sin_len = sizeof(struct sockaddr_in);
1679 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1680 /*
1681 * If we did not find (or use) a route, assume dest is reachable
1682 * on a directly connected network and try to find a corresponding
1683 * interface to take the source address from.
1684 */
1685 if (ro->ro_rt == NULL) {
1686 proc_t proc = current_proc();
1687
1688 VERIFY(ia == NULL);
1689 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1690 if (ia == NULL) {
1691 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1692 }
1693 error = ((ia == NULL) ? ENETUNREACH : 0);
1694
1695 if (apn_fallback_required(proc, inp->inp_socket,
1696 (void *)nam)) {
1697 apn_fallback_trigger(proc, inp->inp_socket);
1698 }
1699
1700 goto done;
1701 }
1702 RT_LOCK_ASSERT_HELD(ro->ro_rt);
1703 /*
1704 * If the outgoing interface on the route found is not
1705 * a loopback interface, use the address from that interface.
1706 */
1707 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1708 VERIFY(ia == NULL);
1709 /*
1710 * If the route points to a cellular interface and the
1711 * caller forbids our using interfaces of such type,
1712 * pretend that there is no route.
1713 * Apply the same logic for expensive interfaces.
1714 */
1715 if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
1716 RT_UNLOCK(ro->ro_rt);
1717 ROUTE_RELEASE(ro);
1718 error = EHOSTUNREACH;
1719 restricted = TRUE;
1720 } else {
1721 /* Become a regular mutex */
1722 RT_CONVERT_LOCK(ro->ro_rt);
1723 ia = ifatoia(ro->ro_rt->rt_ifa);
1724 ifa_addref(&ia->ia_ifa);
1725
1726 /*
1727 * Mark the control block for notification of
1728 * a possible flow that might undergo clat46
1729 * translation.
1730 *
1731 * We defer the decision to a later point when
1732 * inpcb is being disposed off.
1733 * The reason is that we only want to send notification
1734 * if the flow was ever used to send data.
1735 */
1736 if (IS_INTF_CLAT46(ro->ro_rt->rt_ifp)) {
1737 inp->inp_flags2 |= INP2_CLAT46_FLOW;
1738 }
1739
1740 RT_UNLOCK(ro->ro_rt);
1741 error = 0;
1742 }
1743 goto done;
1744 }
1745 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1746 RT_UNLOCK(ro->ro_rt);
1747 /*
1748 * The outgoing interface is marked with 'loopback net', so a route
1749 * to ourselves is here.
1750 * Try to find the interface of the destination address and then
1751 * take the address from there. That interface is not necessarily
1752 * a loopback interface.
1753 */
1754 VERIFY(ia == NULL);
1755 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1756 if (ia == NULL) {
1757 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1758 }
1759 if (ia == NULL) {
1760 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1761 }
1762 if (ia == NULL) {
1763 RT_LOCK(ro->ro_rt);
1764 ia = ifatoia(ro->ro_rt->rt_ifa);
1765 if (ia != NULL) {
1766 ifa_addref(&ia->ia_ifa);
1767 }
1768 RT_UNLOCK(ro->ro_rt);
1769 }
1770 error = ((ia == NULL) ? ENETUNREACH : 0);
1771
1772 done:
1773 /*
1774 * If the destination address is multicast and an outgoing
1775 * interface has been set as a multicast option, use the
1776 * address of that interface as our source address.
1777 */
1778 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1779 inp->inp_moptions != NULL) {
1780 struct ip_moptions *imo;
1781 struct ifnet *ifp;
1782
1783 imo = inp->inp_moptions;
1784 IMO_LOCK(imo);
1785 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1786 ia->ia_ifp != imo->imo_multicast_ifp)) {
1787 ifp = imo->imo_multicast_ifp;
1788 if (ia != NULL) {
1789 ifa_remref(&ia->ia_ifa);
1790 }
1791 lck_rw_lock_shared(&in_ifaddr_rwlock);
1792 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1793 if (ia->ia_ifp == ifp) {
1794 break;
1795 }
1796 }
1797 if (ia != NULL) {
1798 ifa_addref(&ia->ia_ifa);
1799 }
1800 lck_rw_done(&in_ifaddr_rwlock);
1801 if (ia == NULL) {
1802 error = EADDRNOTAVAIL;
1803 } else {
1804 error = 0;
1805 }
1806 }
1807 IMO_UNLOCK(imo);
1808 }
1809 /*
1810 * Don't do pcblookup call here; return interface in laddr
1811 * and exit to caller, that will do the lookup.
1812 */
1813 if (ia != NULL) {
1814 /*
1815 * If the source address belongs to a cellular interface
1816 * and the socket forbids our using interfaces of such
1817 * type, pretend that there is no source address.
1818 * Apply the same logic for expensive interfaces.
1819 */
1820 IFA_LOCK_SPIN(&ia->ia_ifa);
1821 if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
1822 IFA_UNLOCK(&ia->ia_ifa);
1823 error = EHOSTUNREACH;
1824 restricted = TRUE;
1825 } else if (error == 0) {
1826 *laddr = ia->ia_addr.sin_addr;
1827 if (outif != NULL) {
1828 struct ifnet *ifp;
1829
1830 if (ro->ro_rt != NULL) {
1831 ifp = ro->ro_rt->rt_ifp;
1832 } else {
1833 ifp = ia->ia_ifp;
1834 }
1835
1836 VERIFY(ifp != NULL);
1837 IFA_CONVERT_LOCK(&ia->ia_ifa);
1838 ifnet_reference(ifp); /* for caller */
1839 if (*outif != NULL) {
1840 ifnet_release(*outif);
1841 }
1842 *outif = ifp;
1843 }
1844 IFA_UNLOCK(&ia->ia_ifa);
1845 } else {
1846 IFA_UNLOCK(&ia->ia_ifa);
1847 }
1848 ifa_remref(&ia->ia_ifa);
1849 ia = NULL;
1850 }
1851
1852 if (restricted && error == EHOSTUNREACH) {
1853 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1854 SO_FILT_HINT_IFDENIED));
1855 }
1856
1857 return error;
1858 }
1859
1860 /*
1861 * Outer subroutine:
1862 * Connect from a socket to a specified address.
1863 * Both address and port must be specified in argument sin.
1864 * If don't have a local address for this socket yet,
1865 * then pick one.
1866 *
1867 * The caller may override the bound-to-interface setting of the socket
1868 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1869 */
1870 int
in_pcbconnect(struct inpcb * inp,struct sockaddr * nam,struct proc * p,unsigned int ifscope,struct ifnet ** outif)1871 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
1872 unsigned int ifscope, struct ifnet **outif)
1873 {
1874 struct in_addr laddr;
1875 struct sockaddr_in *sin = SIN(nam);
1876 struct inpcb *pcb;
1877 int error;
1878 struct socket *so = inp->inp_socket;
1879
1880 #if CONTENT_FILTER
1881 if (so) {
1882 so->so_state_change_cnt++;
1883 }
1884 #endif
1885
1886 /*
1887 * Call inner routine, to assign local interface address.
1888 */
1889 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0) {
1890 return error;
1891 }
1892
1893 socket_unlock(so, 0);
1894 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
1895 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
1896 inp->inp_lport, 0, NULL);
1897 socket_lock(so, 0);
1898
1899 /*
1900 * Check if the socket is still in a valid state. When we unlock this
1901 * embryonic socket, it can get aborted if another thread is closing
1902 * the listener (radar 7947600).
1903 */
1904 if ((so->so_flags & SOF_ABORTED) != 0) {
1905 return ECONNREFUSED;
1906 }
1907
1908 if (pcb != NULL) {
1909 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1910 return EADDRINUSE;
1911 }
1912 if (inp->inp_laddr.s_addr == INADDR_ANY) {
1913 if (inp->inp_lport == 0) {
1914 error = in_pcbbind(inp, NULL, nam, p);
1915 if (error) {
1916 return error;
1917 }
1918 }
1919 if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1920 /*
1921 * Lock inversion issue, mostly with udp
1922 * multicast packets.
1923 */
1924 socket_unlock(so, 0);
1925 lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1926 socket_lock(so, 0);
1927 }
1928 inp->inp_laddr = laddr;
1929 /* no reference needed */
1930 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
1931 #if SKYWALK
1932 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
1933 netns_set_ifnet(&inp->inp_netns_token,
1934 inp->inp_last_outifp);
1935 }
1936 #endif /* SKYWALK */
1937 inp->inp_flags |= INP_INADDR_ANY;
1938 } else {
1939 /*
1940 * Usage of IP_PKTINFO, without local port already
1941 * speficified will cause kernel to panic,
1942 * see rdar://problem/18508185.
1943 * For now returning error to avoid a kernel panic
1944 * This routines can be refactored and handle this better
1945 * in future.
1946 */
1947 if (inp->inp_lport == 0) {
1948 return EINVAL;
1949 }
1950 if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1951 /*
1952 * Lock inversion issue, mostly with udp
1953 * multicast packets.
1954 */
1955 socket_unlock(so, 0);
1956 lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1957 socket_lock(so, 0);
1958 }
1959 }
1960 inp->inp_faddr = sin->sin_addr;
1961 inp->inp_fport = sin->sin_port;
1962 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1963 nstat_pcb_invalidate_cache(inp);
1964 }
1965 in_pcbrehash(inp);
1966 lck_rw_done(&inp->inp_pcbinfo->ipi_lock);
1967 return 0;
1968 }
1969
1970 void
in_pcbdisconnect(struct inpcb * inp)1971 in_pcbdisconnect(struct inpcb *inp)
1972 {
1973 struct socket *so = inp->inp_socket;
1974
1975 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1976 nstat_pcb_cache(inp);
1977 }
1978
1979 inp->inp_faddr.s_addr = INADDR_ANY;
1980 inp->inp_fport = 0;
1981
1982 #if CONTENT_FILTER
1983 if (so) {
1984 so->so_state_change_cnt++;
1985 }
1986 #endif
1987
1988 if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1989 /* lock inversion issue, mostly with udp multicast packets */
1990 socket_unlock(so, 0);
1991 lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1992 socket_lock(so, 0);
1993 }
1994
1995 in_pcbrehash(inp);
1996 lck_rw_done(&inp->inp_pcbinfo->ipi_lock);
1997 /*
1998 * A multipath subflow socket would have its SS_NOFDREF set by default,
1999 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
2000 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
2001 */
2002 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) {
2003 in_pcbdetach(inp);
2004 }
2005 }
2006
2007 void
in_pcbdetach(struct inpcb * inp)2008 in_pcbdetach(struct inpcb *inp)
2009 {
2010 struct socket *so = inp->inp_socket;
2011
2012 if (so->so_pcb == NULL) {
2013 /* PCB has been disposed */
2014 panic("%s: inp=%p so=%p proto=%d so_pcb is null!", __func__,
2015 inp, so, SOCK_PROTO(so));
2016 /* NOTREACHED */
2017 }
2018
2019 #if IPSEC
2020 if (inp->inp_sp != NULL) {
2021 (void) ipsec4_delete_pcbpolicy(inp);
2022 }
2023 #endif /* IPSEC */
2024
2025 if (inp->inp_stat != NULL && SOCK_PROTO(so) == IPPROTO_UDP) {
2026 if (inp->inp_stat->rxpackets == 0 && inp->inp_stat->txpackets == 0) {
2027 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_no_data);
2028 }
2029 }
2030
2031 /*
2032 * Let NetworkStatistics know this PCB is going away
2033 * before we detach it.
2034 */
2035 if (nstat_collect &&
2036 (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
2037 nstat_pcb_detach(inp);
2038 }
2039
2040 /* Free memory buffer held for generating keep alives */
2041 if (inp->inp_keepalive_data != NULL) {
2042 kfree_data_counted_by(inp->inp_keepalive_data, inp->inp_keepalive_datalen);
2043 }
2044
2045 /* mark socket state as dead */
2046 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
2047 panic("%s: so=%p proto=%d couldn't set to STOPUSING",
2048 __func__, so, SOCK_PROTO(so));
2049 /* NOTREACHED */
2050 }
2051
2052 #if SKYWALK
2053 /* Free up the port in the namespace registrar if not in TIME_WAIT */
2054 if (!(inp->inp_flags2 & INP2_TIMEWAIT)) {
2055 netns_release(&inp->inp_netns_token);
2056 netns_release(&inp->inp_wildcard_netns_token);
2057 }
2058 #endif /* SKYWALK */
2059
2060 if (!(so->so_flags & SOF_PCBCLEARING)) {
2061 struct ip_moptions *imo;
2062
2063 inp->inp_vflag = 0;
2064 if (inp->inp_options != NULL) {
2065 (void) m_free(inp->inp_options);
2066 inp->inp_options = NULL;
2067 }
2068 ROUTE_RELEASE(&inp->inp_route);
2069 imo = inp->inp_moptions;
2070 if (imo != NULL) {
2071 IMO_REMREF(imo);
2072 }
2073 inp->inp_moptions = NULL;
2074 sofreelastref(so, 0);
2075 inp->inp_state = INPCB_STATE_DEAD;
2076
2077 /*
2078 * Enqueue an event to send kernel event notification
2079 * if the flow has to CLAT46 for data packets
2080 */
2081 if (inp->inp_flags2 & INP2_CLAT46_FLOW) {
2082 /*
2083 * If there has been any exchange of data bytes
2084 * over this flow.
2085 * Schedule a notification to report that flow is
2086 * using client side translation.
2087 */
2088 if (inp->inp_stat != NULL &&
2089 (inp->inp_stat->txbytes != 0 ||
2090 inp->inp_stat->rxbytes != 0)) {
2091 if (so->so_flags & SOF_DELEGATED) {
2092 in6_clat46_event_enqueue_nwk_wq_entry(
2093 IN6_CLAT46_EVENT_V4_FLOW,
2094 so->e_pid,
2095 so->e_uuid);
2096 } else {
2097 in6_clat46_event_enqueue_nwk_wq_entry(
2098 IN6_CLAT46_EVENT_V4_FLOW,
2099 so->last_pid,
2100 so->last_uuid);
2101 }
2102 }
2103 }
2104
2105 /* makes sure we're not called twice from so_close */
2106 so->so_flags |= SOF_PCBCLEARING;
2107
2108 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
2109 }
2110 }
2111
2112
2113 void
in_pcbdispose(struct inpcb * inp)2114 in_pcbdispose(struct inpcb *inp)
2115 {
2116 struct socket *so = inp->inp_socket;
2117 struct inpcbinfo *ipi = inp->inp_pcbinfo;
2118
2119 if (so != NULL && so->so_usecount != 0) {
2120 panic("%s: so %p [%d,%d] usecount %d lockhistory %s",
2121 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
2122 solockhistory_nr(so));
2123 /* NOTREACHED */
2124 } else if (inp->inp_wantcnt != WNT_STOPUSING) {
2125 if (so != NULL) {
2126 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
2127 "[%d,%d] usecount %d retaincnt %d state 0x%x "
2128 "flags 0x%x lockhistory %s\n", __func__, inp,
2129 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
2130 so->so_usecount, so->so_retaincnt, so->so_state,
2131 so->so_flags, solockhistory_nr(so));
2132 /* NOTREACHED */
2133 } else {
2134 panic("%s: inp %p invalid wantcnt %d no socket",
2135 __func__, inp, inp->inp_wantcnt);
2136 /* NOTREACHED */
2137 }
2138 }
2139
2140 LCK_RW_ASSERT(&ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
2141
2142 inp->inp_gencnt = ++ipi->ipi_gencnt;
2143 /* access ipi in in_pcbremlists */
2144 in_pcbremlists(inp);
2145
2146 if (so != NULL) {
2147 if (so->so_proto->pr_flags & PR_PCBLOCK) {
2148 sofreelastref(so, 0);
2149 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
2150 /*
2151 * selthreadclear() already called
2152 * during sofreelastref() above.
2153 */
2154 sbrelease(&so->so_rcv);
2155 sbrelease(&so->so_snd);
2156 }
2157 if (so->so_head != NULL) {
2158 panic("%s: so=%p head still exist",
2159 __func__, so);
2160 /* NOTREACHED */
2161 }
2162 lck_mtx_unlock(&inp->inpcb_mtx);
2163
2164 #if NECP
2165 necp_inpcb_remove_cb(inp);
2166 #endif /* NECP */
2167
2168 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
2169 }
2170 /* makes sure we're not called twice from so_close */
2171 so->so_flags |= SOF_PCBCLEARING;
2172 so->so_saved_pcb = (caddr_t)inp;
2173 so->so_pcb = NULL;
2174 inp->inp_socket = NULL;
2175 #if NECP
2176 necp_inpcb_dispose(inp);
2177 #endif /* NECP */
2178 /*
2179 * In case there a route cached after a detach (possible
2180 * in the tcp case), make sure that it is freed before
2181 * we deallocate the structure.
2182 */
2183 ROUTE_RELEASE(&inp->inp_route);
2184 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
2185 zfree(ipi->ipi_zone, inp);
2186 }
2187 sodealloc(so);
2188 }
2189 }
2190
2191 /*
2192 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
2193 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
2194 * in struct pr_usrreqs, so that protocols can just reference then directly
2195 * without the need for a wrapper function.
2196 */
2197 int
in_getsockaddr(struct socket * so,struct sockaddr ** nam)2198 in_getsockaddr(struct socket *so, struct sockaddr **nam)
2199 {
2200 struct inpcb *inp;
2201 struct sockaddr_in *sin;
2202
2203 /*
2204 * Do the malloc first in case it blocks.
2205 */
2206 sin = SIN(alloc_sockaddr(sizeof(*sin),
2207 Z_WAITOK | Z_NOFAIL));
2208
2209 sin->sin_family = AF_INET;
2210
2211 if ((inp = sotoinpcb(so)) == NULL) {
2212 free_sockaddr(sin);
2213 return EINVAL;
2214 }
2215 sin->sin_port = inp->inp_lport;
2216 sin->sin_addr = inp->inp_laddr;
2217
2218 *nam = SA(sin);
2219 return 0;
2220 }
2221
2222 int
in_getsockaddr_s(struct socket * so,struct sockaddr_in * ss)2223 in_getsockaddr_s(struct socket *so, struct sockaddr_in *ss)
2224 {
2225 struct sockaddr_in *sin = ss;
2226 struct inpcb *inp;
2227
2228 VERIFY(ss != NULL);
2229 SOCKADDR_ZERO(ss, sizeof(*ss));
2230
2231 sin->sin_family = AF_INET;
2232 sin->sin_len = sizeof(*sin);
2233
2234 if ((inp = sotoinpcb(so)) == NULL) {
2235 return EINVAL;
2236 }
2237
2238 sin->sin_port = inp->inp_lport;
2239 sin->sin_addr = inp->inp_laddr;
2240 return 0;
2241 }
2242
2243 int
in_getpeeraddr(struct socket * so,struct sockaddr ** nam)2244 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
2245 {
2246 struct inpcb *inp;
2247 struct sockaddr_in *sin;
2248
2249 /*
2250 * Do the malloc first in case it blocks.
2251 */
2252 sin = SIN(alloc_sockaddr(sizeof(*sin),
2253 Z_WAITOK | Z_NOFAIL));
2254
2255 sin->sin_family = AF_INET;
2256
2257 if ((inp = sotoinpcb(so)) == NULL) {
2258 free_sockaddr(sin);
2259 return EINVAL;
2260 }
2261 sin->sin_port = inp->inp_fport;
2262 sin->sin_addr = inp->inp_faddr;
2263
2264 *nam = SA(sin);
2265 return 0;
2266 }
2267
2268 void
in_pcbnotifyall(struct inpcbinfo * pcbinfo,struct in_addr faddr,int errno,void (* notify)(struct inpcb *,int))2269 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2270 int errno, void (*notify)(struct inpcb *, int))
2271 {
2272 struct inpcb *inp;
2273
2274 lck_rw_lock_shared(&pcbinfo->ipi_lock);
2275
2276 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
2277 if (!(inp->inp_vflag & INP_IPV4)) {
2278 continue;
2279 }
2280 if (inp->inp_faddr.s_addr != faddr.s_addr ||
2281 inp->inp_socket == NULL) {
2282 continue;
2283 }
2284 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
2285 continue;
2286 }
2287 socket_lock(inp->inp_socket, 1);
2288 (*notify)(inp, errno);
2289 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
2290 socket_unlock(inp->inp_socket, 1);
2291 }
2292 lck_rw_done(&pcbinfo->ipi_lock);
2293 }
2294
2295 /*
2296 * Check for alternatives when higher level complains
2297 * about service problems. For now, invalidate cached
2298 * routing information. If the route was created dynamically
2299 * (by a redirect), time to try a default gateway again.
2300 */
2301 void
in_losing(struct inpcb * inp)2302 in_losing(struct inpcb *inp)
2303 {
2304 boolean_t release = FALSE;
2305 struct rtentry *rt;
2306
2307 if ((rt = inp->inp_route.ro_rt) != NULL) {
2308 struct in_ifaddr *ia = NULL;
2309
2310 RT_LOCK(rt);
2311 if (rt->rt_flags & RTF_DYNAMIC) {
2312 /*
2313 * Prevent another thread from modifying rt_key,
2314 * rt_gateway via rt_setgate() after rt_lock is
2315 * dropped by marking the route as defunct.
2316 */
2317 rt->rt_flags |= RTF_CONDEMNED;
2318 RT_UNLOCK(rt);
2319 (void) rtrequest(RTM_DELETE, rt_key(rt),
2320 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
2321 } else {
2322 RT_UNLOCK(rt);
2323 }
2324 /* if the address is gone keep the old route in the pcb */
2325 if (inp->inp_laddr.s_addr != INADDR_ANY &&
2326 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2327 /*
2328 * Address is around; ditch the route. A new route
2329 * can be allocated the next time output is attempted.
2330 */
2331 release = TRUE;
2332 }
2333 if (ia != NULL) {
2334 ifa_remref(&ia->ia_ifa);
2335 }
2336 }
2337 if (rt == NULL || release) {
2338 ROUTE_RELEASE(&inp->inp_route);
2339 }
2340 }
2341
2342 /*
2343 * After a routing change, flush old routing
2344 * and allocate a (hopefully) better one.
2345 */
2346 void
in_rtchange(struct inpcb * inp,int errno)2347 in_rtchange(struct inpcb *inp, int errno)
2348 {
2349 #pragma unused(errno)
2350 boolean_t release = FALSE;
2351 struct rtentry *rt;
2352
2353 if ((rt = inp->inp_route.ro_rt) != NULL) {
2354 struct in_ifaddr *ia = NULL;
2355
2356 /* if address is gone, keep the old route */
2357 if (inp->inp_laddr.s_addr != INADDR_ANY &&
2358 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2359 /*
2360 * Address is around; ditch the route. A new route
2361 * can be allocated the next time output is attempted.
2362 */
2363 release = TRUE;
2364 }
2365 if (ia != NULL) {
2366 ifa_remref(&ia->ia_ifa);
2367 }
2368 }
2369 if (rt == NULL || release) {
2370 ROUTE_RELEASE(&inp->inp_route);
2371 }
2372 }
2373
2374 /*
2375 * Lookup a PCB based on the local address and port.
2376 */
2377 struct inpcb *
in_pcblookup_local(struct inpcbinfo * pcbinfo,struct in_addr laddr,unsigned int lport_arg,int wild_okay)2378 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
2379 unsigned int lport_arg, int wild_okay)
2380 {
2381 struct inpcb *inp;
2382 int matchwild = 3, wildcard;
2383 u_short lport = (u_short)lport_arg;
2384
2385 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
2386
2387 if (!wild_okay) {
2388 struct inpcbhead *head;
2389 /*
2390 * Look for an unconnected (wildcard foreign addr) PCB that
2391 * matches the local address and port we're looking for.
2392 */
2393 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2394 pcbinfo->ipi_hashmask)];
2395 LIST_FOREACH(inp, head, inp_hash) {
2396 if (!(inp->inp_vflag & INP_IPV4)) {
2397 continue;
2398 }
2399 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2400 inp->inp_laddr.s_addr == laddr.s_addr &&
2401 inp->inp_lport == lport) {
2402 /*
2403 * Found.
2404 */
2405 return inp;
2406 }
2407 }
2408 /*
2409 * Not found.
2410 */
2411 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
2412 return NULL;
2413 } else {
2414 struct inpcbporthead *porthash;
2415 struct inpcbport *phd;
2416 struct inpcb *match = NULL;
2417 /*
2418 * Best fit PCB lookup.
2419 *
2420 * First see if this local port is in use by looking on the
2421 * port hash list.
2422 */
2423 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
2424 pcbinfo->ipi_porthashmask)];
2425 LIST_FOREACH(phd, porthash, phd_hash) {
2426 if (phd->phd_port == lport) {
2427 break;
2428 }
2429 }
2430 if (phd != NULL) {
2431 /*
2432 * Port is in use by one or more PCBs. Look for best
2433 * fit.
2434 */
2435 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
2436 wildcard = 0;
2437 if (!(inp->inp_vflag & INP_IPV4)) {
2438 continue;
2439 }
2440 if (inp->inp_faddr.s_addr != INADDR_ANY) {
2441 wildcard++;
2442 }
2443 if (inp->inp_laddr.s_addr != INADDR_ANY) {
2444 if (laddr.s_addr == INADDR_ANY) {
2445 wildcard++;
2446 } else if (inp->inp_laddr.s_addr !=
2447 laddr.s_addr) {
2448 continue;
2449 }
2450 } else {
2451 if (laddr.s_addr != INADDR_ANY) {
2452 wildcard++;
2453 }
2454 }
2455 if (wildcard < matchwild) {
2456 match = inp;
2457 matchwild = wildcard;
2458 if (matchwild == 0) {
2459 break;
2460 }
2461 }
2462 }
2463 }
2464 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
2465 0, 0, 0, 0);
2466 return match;
2467 }
2468 }
2469
2470 /*
2471 * Check if PCB exists in hash list.
2472 */
2473 int
in_pcblookup_hash_exists(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,uid_t * uid,gid_t * gid,struct ifnet * ifp)2474 in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2475 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2476 uid_t *uid, gid_t *gid, struct ifnet *ifp)
2477 {
2478 struct inpcbhead *head;
2479 struct inpcb *inp;
2480 u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2481 int found = 0;
2482 struct inpcb *local_wild = NULL;
2483 struct inpcb *local_wild_mapped = NULL;
2484
2485 *uid = UID_MAX;
2486 *gid = GID_MAX;
2487
2488 /*
2489 * We may have found the pcb in the last lookup - check this first.
2490 */
2491
2492 lck_rw_lock_shared(&pcbinfo->ipi_lock);
2493
2494 /*
2495 * First look for an exact match.
2496 */
2497 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2498 pcbinfo->ipi_hashmask)];
2499 LIST_FOREACH(inp, head, inp_hash) {
2500 if (!(inp->inp_vflag & INP_IPV4)) {
2501 continue;
2502 }
2503 if (inp_restricted_recv(inp, ifp)) {
2504 continue;
2505 }
2506
2507 #if NECP
2508 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2509 continue;
2510 }
2511 #endif /* NECP */
2512
2513 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2514 inp->inp_laddr.s_addr == laddr.s_addr &&
2515 inp->inp_fport == fport &&
2516 inp->inp_lport == lport) {
2517 if ((found = (inp->inp_socket != NULL))) {
2518 /*
2519 * Found.
2520 */
2521 *uid = kauth_cred_getuid(
2522 inp->inp_socket->so_cred);
2523 *gid = kauth_cred_getgid(
2524 inp->inp_socket->so_cred);
2525 }
2526 lck_rw_done(&pcbinfo->ipi_lock);
2527 return found;
2528 }
2529 }
2530
2531 if (!wildcard) {
2532 /*
2533 * Not found.
2534 */
2535 lck_rw_done(&pcbinfo->ipi_lock);
2536 return 0;
2537 }
2538
2539 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2540 pcbinfo->ipi_hashmask)];
2541 LIST_FOREACH(inp, head, inp_hash) {
2542 if (!(inp->inp_vflag & INP_IPV4)) {
2543 continue;
2544 }
2545 if (inp_restricted_recv(inp, ifp)) {
2546 continue;
2547 }
2548
2549 #if NECP
2550 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2551 continue;
2552 }
2553 #endif /* NECP */
2554
2555 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2556 inp->inp_lport == lport) {
2557 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2558 if ((found = (inp->inp_socket != NULL))) {
2559 *uid = kauth_cred_getuid(
2560 inp->inp_socket->so_cred);
2561 *gid = kauth_cred_getgid(
2562 inp->inp_socket->so_cred);
2563 }
2564 lck_rw_done(&pcbinfo->ipi_lock);
2565 return found;
2566 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2567 if (inp->inp_socket &&
2568 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2569 local_wild_mapped = inp;
2570 } else {
2571 local_wild = inp;
2572 }
2573 }
2574 }
2575 }
2576 if (local_wild == NULL) {
2577 if (local_wild_mapped != NULL) {
2578 if ((found = (local_wild_mapped->inp_socket != NULL))) {
2579 *uid = kauth_cred_getuid(
2580 local_wild_mapped->inp_socket->so_cred);
2581 *gid = kauth_cred_getgid(
2582 local_wild_mapped->inp_socket->so_cred);
2583 }
2584 lck_rw_done(&pcbinfo->ipi_lock);
2585 return found;
2586 }
2587 lck_rw_done(&pcbinfo->ipi_lock);
2588 return 0;
2589 }
2590 if ((found = (local_wild->inp_socket != NULL))) {
2591 *uid = kauth_cred_getuid(
2592 local_wild->inp_socket->so_cred);
2593 *gid = kauth_cred_getgid(
2594 local_wild->inp_socket->so_cred);
2595 }
2596 lck_rw_done(&pcbinfo->ipi_lock);
2597 return found;
2598 }
2599
2600 /*
2601 * Lookup PCB in hash list.
2602 */
2603 struct inpcb *
in_pcblookup_hash(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,struct ifnet * ifp)2604 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2605 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2606 struct ifnet *ifp)
2607 {
2608 struct inpcbhead *head;
2609 struct inpcb *inp;
2610 u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2611 struct inpcb *local_wild = NULL;
2612 struct inpcb *local_wild_mapped = NULL;
2613
2614 /*
2615 * We may have found the pcb in the last lookup - check this first.
2616 */
2617
2618 lck_rw_lock_shared(&pcbinfo->ipi_lock);
2619
2620 /*
2621 * First look for an exact match.
2622 */
2623 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2624 pcbinfo->ipi_hashmask)];
2625 LIST_FOREACH(inp, head, inp_hash) {
2626 if (!(inp->inp_vflag & INP_IPV4)) {
2627 continue;
2628 }
2629 if (inp_restricted_recv(inp, ifp)) {
2630 continue;
2631 }
2632
2633 #if NECP
2634 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2635 continue;
2636 }
2637 #endif /* NECP */
2638
2639 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2640 inp->inp_laddr.s_addr == laddr.s_addr &&
2641 inp->inp_fport == fport &&
2642 inp->inp_lport == lport) {
2643 /*
2644 * Found.
2645 */
2646 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2647 WNT_STOPUSING) {
2648 lck_rw_done(&pcbinfo->ipi_lock);
2649 return inp;
2650 } else {
2651 /* it's there but dead, say it isn't found */
2652 lck_rw_done(&pcbinfo->ipi_lock);
2653 return NULL;
2654 }
2655 }
2656 }
2657
2658 if (!wildcard) {
2659 /*
2660 * Not found.
2661 */
2662 lck_rw_done(&pcbinfo->ipi_lock);
2663 return NULL;
2664 }
2665
2666 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2667 pcbinfo->ipi_hashmask)];
2668 LIST_FOREACH(inp, head, inp_hash) {
2669 if (!(inp->inp_vflag & INP_IPV4)) {
2670 continue;
2671 }
2672 if (inp_restricted_recv(inp, ifp)) {
2673 continue;
2674 }
2675
2676 #if NECP
2677 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2678 continue;
2679 }
2680 #endif /* NECP */
2681
2682 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2683 inp->inp_lport == lport) {
2684 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2685 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2686 WNT_STOPUSING) {
2687 lck_rw_done(&pcbinfo->ipi_lock);
2688 return inp;
2689 } else {
2690 /* it's dead; say it isn't found */
2691 lck_rw_done(&pcbinfo->ipi_lock);
2692 return NULL;
2693 }
2694 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2695 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2696 local_wild_mapped = inp;
2697 } else {
2698 local_wild = inp;
2699 }
2700 }
2701 }
2702 }
2703 if (local_wild == NULL) {
2704 if (local_wild_mapped != NULL) {
2705 if (in_pcb_checkstate(local_wild_mapped,
2706 WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2707 lck_rw_done(&pcbinfo->ipi_lock);
2708 return local_wild_mapped;
2709 } else {
2710 /* it's dead; say it isn't found */
2711 lck_rw_done(&pcbinfo->ipi_lock);
2712 return NULL;
2713 }
2714 }
2715 lck_rw_done(&pcbinfo->ipi_lock);
2716 return NULL;
2717 }
2718 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2719 lck_rw_done(&pcbinfo->ipi_lock);
2720 return local_wild;
2721 }
2722 /*
2723 * It's either not found or is already dead.
2724 */
2725 lck_rw_done(&pcbinfo->ipi_lock);
2726 return NULL;
2727 }
2728
2729 /*
2730 * @brief Insert PCB onto various hash lists.
2731 *
2732 * @param inp Pointer to internet protocol control block
2733 * @param remote Pointer to remote address sockaddr for policy evaluation
2734 * @param locked Implies if ipi_lock (protecting pcb list)
2735 * is already locked or not.
2736 *
2737 * @return int error on failure and 0 on success
2738 */
2739 int
in_pcbinshash(struct inpcb * inp,struct sockaddr * remote,int locked)2740 in_pcbinshash(struct inpcb *inp, struct sockaddr *remote, int locked)
2741 {
2742 struct inpcbhead *pcbhash;
2743 struct inpcbporthead *pcbporthash;
2744 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2745 struct inpcbport *phd;
2746 u_int32_t hashkey_faddr;
2747
2748 if (!locked) {
2749 if (!lck_rw_try_lock_exclusive(&pcbinfo->ipi_lock)) {
2750 /*
2751 * Lock inversion issue, mostly with udp
2752 * multicast packets
2753 */
2754 socket_unlock(inp->inp_socket, 0);
2755 lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
2756 socket_lock(inp->inp_socket, 0);
2757 }
2758 }
2759
2760 /*
2761 * This routine or its caller may have given up
2762 * socket's protocol lock briefly.
2763 * During that time the socket may have been dropped.
2764 * Safe-guarding against that.
2765 */
2766 if (inp->inp_state == INPCB_STATE_DEAD) {
2767 if (!locked) {
2768 lck_rw_done(&pcbinfo->ipi_lock);
2769 }
2770 return ECONNABORTED;
2771 }
2772
2773
2774 if (inp->inp_vflag & INP_IPV6) {
2775 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2776 } else {
2777 hashkey_faddr = inp->inp_faddr.s_addr;
2778 }
2779
2780 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2781 inp->inp_fport, pcbinfo->ipi_hashmask);
2782
2783 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
2784
2785 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2786 pcbinfo->ipi_porthashmask)];
2787
2788 /*
2789 * Go through port list and look for a head for this lport.
2790 */
2791 LIST_FOREACH(phd, pcbporthash, phd_hash) {
2792 if (phd->phd_port == inp->inp_lport) {
2793 break;
2794 }
2795 }
2796
2797 /*
2798 * If none exists, malloc one and tack it on.
2799 */
2800 if (phd == NULL) {
2801 phd = kalloc_type(struct inpcbport, Z_WAITOK | Z_NOFAIL);
2802 phd->phd_port = inp->inp_lport;
2803 LIST_INIT(&phd->phd_pcblist);
2804 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2805 }
2806
2807 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2808
2809 #if SKYWALK
2810 int err;
2811 struct socket *so = inp->inp_socket;
2812 if ((SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP) &&
2813 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
2814 if (inp->inp_vflag & INP_IPV6) {
2815 err = netns_reserve_in6(&inp->inp_netns_token,
2816 inp->in6p_laddr, (uint8_t)SOCK_PROTO(so), inp->inp_lport,
2817 NETNS_BSD | NETNS_PRERESERVED, NULL);
2818 } else {
2819 err = netns_reserve_in(&inp->inp_netns_token,
2820 inp->inp_laddr, (uint8_t)SOCK_PROTO(so), inp->inp_lport,
2821 NETNS_BSD | NETNS_PRERESERVED, NULL);
2822 }
2823 if (err) {
2824 if (!locked) {
2825 lck_rw_done(&pcbinfo->ipi_lock);
2826 }
2827 return err;
2828 }
2829 netns_set_ifnet(&inp->inp_netns_token, inp->inp_last_outifp);
2830 inp_update_netns_flags(so);
2831 }
2832 #endif /* SKYWALK */
2833
2834 inp->inp_phd = phd;
2835 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2836 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
2837 inp->inp_flags2 |= INP2_INHASHLIST;
2838
2839 if (!locked) {
2840 lck_rw_done(&pcbinfo->ipi_lock);
2841 }
2842
2843 #if NECP
2844 // This call catches the original setting of the local address
2845 inp_update_necp_policy(inp, NULL, remote, 0);
2846 #endif /* NECP */
2847
2848 return 0;
2849 }
2850
2851 /*
2852 * Move PCB to the proper hash bucket when { faddr, fport } have been
2853 * changed. NOTE: This does not handle the case of the lport changing (the
2854 * hashed port list would have to be updated as well), so the lport must
2855 * not change after in_pcbinshash() has been called.
2856 */
2857 void
in_pcbrehash(struct inpcb * inp)2858 in_pcbrehash(struct inpcb *inp)
2859 {
2860 struct inpcbhead *head;
2861 u_int32_t hashkey_faddr;
2862
2863 #if SKYWALK
2864 struct socket *so = inp->inp_socket;
2865 if ((SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP) &&
2866 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
2867 int err;
2868 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2869 if (inp->inp_vflag & INP_IPV6) {
2870 err = netns_change_addr_in6(
2871 &inp->inp_netns_token, inp->in6p_laddr);
2872 } else {
2873 err = netns_change_addr_in(
2874 &inp->inp_netns_token, inp->inp_laddr);
2875 }
2876 } else {
2877 if (inp->inp_vflag & INP_IPV6) {
2878 err = netns_reserve_in6(&inp->inp_netns_token,
2879 inp->in6p_laddr, (uint8_t)SOCK_PROTO(so),
2880 inp->inp_lport, NETNS_BSD, NULL);
2881 } else {
2882 err = netns_reserve_in(&inp->inp_netns_token,
2883 inp->inp_laddr, (uint8_t)SOCK_PROTO(so),
2884 inp->inp_lport, NETNS_BSD, NULL);
2885 }
2886 }
2887 /* We are assuming that whatever code paths result in a rehash
2888 * did their due diligence and ensured that the given
2889 * <proto, laddr, lport> tuple was free ahead of time. Just
2890 * reserving the lport on INADDR_ANY should be enough, since
2891 * that will block Skywalk from trying to reserve that same
2892 * port. Given this assumption, the above netns calls should
2893 * never fail*/
2894 VERIFY(err == 0);
2895
2896 netns_set_ifnet(&inp->inp_netns_token, inp->inp_last_outifp);
2897 inp_update_netns_flags(so);
2898 }
2899 #endif /* SKYWALK */
2900 if (inp->inp_vflag & INP_IPV6) {
2901 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2902 } else {
2903 hashkey_faddr = inp->inp_faddr.s_addr;
2904 }
2905
2906 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2907 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2908 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
2909
2910 if (inp->inp_flags2 & INP2_INHASHLIST) {
2911 LIST_REMOVE(inp, inp_hash);
2912 inp->inp_flags2 &= ~INP2_INHASHLIST;
2913 }
2914
2915 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2916 LIST_INSERT_HEAD(head, inp, inp_hash);
2917 inp->inp_flags2 |= INP2_INHASHLIST;
2918
2919 #if NECP
2920 // This call catches updates to the remote addresses
2921 inp_update_necp_policy(inp, NULL, NULL, 0);
2922 #endif /* NECP */
2923 }
2924
2925 /*
2926 * Remove PCB from various lists.
2927 * Must be called pcbinfo lock is held in exclusive mode.
2928 */
2929 void
in_pcbremlists(struct inpcb * inp)2930 in_pcbremlists(struct inpcb *inp)
2931 {
2932 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
2933
2934 /*
2935 * Check if it's in hashlist -- an inp is placed in hashlist when
2936 * it's local port gets assigned. So it should also be present
2937 * in the port list.
2938 */
2939 if (inp->inp_flags2 & INP2_INHASHLIST) {
2940 struct inpcbport *phd = inp->inp_phd;
2941
2942 VERIFY(phd != NULL && inp->inp_lport > 0);
2943
2944 LIST_REMOVE(inp, inp_hash);
2945 inp->inp_hash.le_next = NULL;
2946 inp->inp_hash.le_prev = NULL;
2947
2948 LIST_REMOVE(inp, inp_portlist);
2949 inp->inp_portlist.le_next = NULL;
2950 inp->inp_portlist.le_prev = NULL;
2951 if (LIST_EMPTY(&phd->phd_pcblist)) {
2952 LIST_REMOVE(phd, phd_hash);
2953 kfree_type(struct inpcbport, phd);
2954 }
2955 inp->inp_phd = NULL;
2956 inp->inp_flags2 &= ~INP2_INHASHLIST;
2957 #if SKYWALK
2958 /* Free up the port in the namespace registrar */
2959 netns_release(&inp->inp_netns_token);
2960 netns_release(&inp->inp_wildcard_netns_token);
2961 #endif /* SKYWALK */
2962 }
2963 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2964
2965 if (inp->inp_flags2 & INP2_TIMEWAIT) {
2966 /* Remove from time-wait queue */
2967 tcp_remove_from_time_wait(inp);
2968 inp->inp_flags2 &= ~INP2_TIMEWAIT;
2969 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2970 inp->inp_pcbinfo->ipi_twcount--;
2971 } else {
2972 /* Remove from global inp list if it is not time-wait */
2973 LIST_REMOVE(inp, inp_list);
2974 }
2975
2976 if (inp->inp_flags2 & INP2_IN_FCTREE) {
2977 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED | INPFC_REMOVE));
2978 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2979 }
2980
2981 inp->inp_pcbinfo->ipi_count--;
2982 }
2983
2984 /*
2985 * Mechanism used to defer the memory release of PCBs
2986 * The pcb list will contain the pcb until the reaper can clean it up if
2987 * the following conditions are met:
2988 * 1) state "DEAD",
2989 * 2) wantcnt is STOPUSING
2990 * 3) usecount is 0
2991 * This function will be called to either mark the pcb as
2992 */
2993 int
in_pcb_checkstate(struct inpcb * pcb,int mode,int locked)2994 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
2995 {
2996 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2997 UInt32 origwant;
2998 UInt32 newwant;
2999
3000 switch (mode) {
3001 case WNT_STOPUSING:
3002 /*
3003 * Try to mark the pcb as ready for recycling. CAS with
3004 * STOPUSING, if success we're good, if it's in use, will
3005 * be marked later
3006 */
3007 if (locked == 0) {
3008 socket_lock(pcb->inp_socket, 1);
3009 }
3010 pcb->inp_state = INPCB_STATE_DEAD;
3011
3012 stopusing:
3013 if (pcb->inp_socket->so_usecount < 0) {
3014 panic("%s: pcb=%p so=%p usecount is negative",
3015 __func__, pcb, pcb->inp_socket);
3016 /* NOTREACHED */
3017 }
3018 if (locked == 0) {
3019 socket_unlock(pcb->inp_socket, 1);
3020 }
3021
3022 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
3023
3024 origwant = *wantcnt;
3025 if ((UInt16) origwant == 0xffff) { /* should stop using */
3026 return WNT_STOPUSING;
3027 }
3028 newwant = 0xffff;
3029 if ((UInt16) origwant == 0) {
3030 /* try to mark it as unsuable now */
3031 OSCompareAndSwap(origwant, newwant, wantcnt);
3032 }
3033 return WNT_STOPUSING;
3034
3035 case WNT_ACQUIRE:
3036 /*
3037 * Try to increase reference to pcb. If WNT_STOPUSING
3038 * should bail out. If socket state DEAD, try to set count
3039 * to STOPUSING, return failed otherwise increase cnt.
3040 */
3041 do {
3042 origwant = *wantcnt;
3043 if ((UInt16) origwant == 0xffff) {
3044 /* should stop using */
3045 return WNT_STOPUSING;
3046 }
3047 newwant = origwant + 1;
3048 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
3049 return WNT_ACQUIRE;
3050
3051 case WNT_RELEASE:
3052 /*
3053 * Release reference. If result is null and pcb state
3054 * is DEAD, set wanted bit to STOPUSING
3055 */
3056 if (locked == 0) {
3057 socket_lock(pcb->inp_socket, 1);
3058 }
3059
3060 do {
3061 origwant = *wantcnt;
3062 if ((UInt16) origwant == 0x0) {
3063 panic("%s: pcb=%p release with zero count",
3064 __func__, pcb);
3065 /* NOTREACHED */
3066 }
3067 if ((UInt16) origwant == 0xffff) {
3068 /* should stop using */
3069 if (locked == 0) {
3070 socket_unlock(pcb->inp_socket, 1);
3071 }
3072 return WNT_STOPUSING;
3073 }
3074 newwant = origwant - 1;
3075 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
3076
3077 if (pcb->inp_state == INPCB_STATE_DEAD) {
3078 goto stopusing;
3079 }
3080 if (pcb->inp_socket->so_usecount < 0) {
3081 panic("%s: RELEASE pcb=%p so=%p usecount is negative",
3082 __func__, pcb, pcb->inp_socket);
3083 /* NOTREACHED */
3084 }
3085
3086 if (locked == 0) {
3087 socket_unlock(pcb->inp_socket, 1);
3088 }
3089 return WNT_RELEASE;
3090
3091 default:
3092 panic("%s: so=%p not a valid state =%x", __func__,
3093 pcb->inp_socket, mode);
3094 /* NOTREACHED */
3095 }
3096
3097 /* NOTREACHED */
3098 return mode;
3099 }
3100
3101 /*
3102 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
3103 * The inpcb_compat data structure is passed to user space and must
3104 * not change. We intentionally avoid copying pointers.
3105 */
3106 void
inpcb_to_compat(struct inpcb * inp,struct inpcb_compat * inp_compat)3107 inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
3108 {
3109 bzero(inp_compat, sizeof(*inp_compat));
3110 inp_compat->inp_fport = inp->inp_fport;
3111 inp_compat->inp_lport = inp->inp_lport;
3112 inp_compat->nat_owner = 0;
3113 inp_compat->nat_cookie = 0;
3114 inp_compat->inp_gencnt = inp->inp_gencnt;
3115 inp_compat->inp_flags = inp->inp_flags;
3116 inp_compat->inp_flow = inp->inp_flow;
3117 inp_compat->inp_vflag = inp->inp_vflag;
3118 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
3119 inp_compat->inp_ip_p = inp->inp_ip_p;
3120 inp_compat->inp_dependfaddr.inp6_foreign =
3121 inp->inp_dependfaddr.inp6_foreign;
3122 inp_compat->inp_dependladdr.inp6_local =
3123 inp->inp_dependladdr.inp6_local;
3124 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
3125 inp_compat->inp_depend6.inp6_hlim = 0;
3126 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
3127 inp_compat->inp_depend6.inp6_ifindex = 0;
3128 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
3129 }
3130
3131 #if XNU_TARGET_OS_OSX
3132 void
inpcb_to_xinpcb64(struct inpcb * inp,struct xinpcb64 * xinp)3133 inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
3134 {
3135 xinp->inp_fport = inp->inp_fport;
3136 xinp->inp_lport = inp->inp_lport;
3137 xinp->inp_gencnt = inp->inp_gencnt;
3138 xinp->inp_flags = inp->inp_flags;
3139 xinp->inp_flow = inp->inp_flow;
3140 xinp->inp_vflag = inp->inp_vflag;
3141 xinp->inp_ip_ttl = inp->inp_ip_ttl;
3142 xinp->inp_ip_p = inp->inp_ip_p;
3143 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
3144 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
3145 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
3146 xinp->inp_depend6.inp6_hlim = 0;
3147 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
3148 xinp->inp_depend6.inp6_ifindex = 0;
3149 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
3150 }
3151 #endif /* XNU_TARGET_OS_OSX */
3152
3153 /*
3154 * The following routines implement this scheme:
3155 *
3156 * Callers of ip_output() that intend to cache the route in the inpcb pass
3157 * a local copy of the struct route to ip_output(). Using a local copy of
3158 * the cached route significantly simplifies things as IP no longer has to
3159 * worry about having exclusive access to the passed in struct route, since
3160 * it's defined in the caller's stack; in essence, this allows for a lock-
3161 * less operation when updating the struct route at the IP level and below,
3162 * whenever necessary. The scheme works as follows:
3163 *
3164 * Prior to dropping the socket's lock and calling ip_output(), the caller
3165 * copies the struct route from the inpcb into its stack, and adds a reference
3166 * to the cached route entry, if there was any. The socket's lock is then
3167 * dropped and ip_output() is called with a pointer to the copy of struct
3168 * route defined on the stack (not to the one in the inpcb.)
3169 *
3170 * Upon returning from ip_output(), the caller then acquires the socket's
3171 * lock and synchronizes the cache; if there is no route cached in the inpcb,
3172 * it copies the local copy of struct route (which may or may not contain any
3173 * route) back into the cache; otherwise, if the inpcb has a route cached in
3174 * it, the one in the local copy will be freed, if there's any. Trashing the
3175 * cached route in the inpcb can be avoided because ip_output() is single-
3176 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
3177 * by the socket/transport layer.)
3178 */
3179 void
inp_route_copyout(struct inpcb * inp,struct route * dst)3180 inp_route_copyout(struct inpcb *inp, struct route *dst)
3181 {
3182 struct route *src = &inp->inp_route;
3183
3184 socket_lock_assert_owned(inp->inp_socket);
3185
3186 /*
3187 * If the route in the PCB is stale or not for IPv4, blow it away;
3188 * this is possible in the case of IPv4-mapped address case.
3189 */
3190 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) {
3191 ROUTE_RELEASE(src);
3192 }
3193
3194 route_copyout(dst, src, sizeof(*dst));
3195 }
3196
3197 void
inp_route_copyin(struct inpcb * inp,struct route * src)3198 inp_route_copyin(struct inpcb *inp, struct route *src)
3199 {
3200 struct route *dst = &inp->inp_route;
3201
3202 socket_lock_assert_owned(inp->inp_socket);
3203
3204 /* Minor sanity check */
3205 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) {
3206 panic("%s: wrong or corrupted route: %p", __func__, src);
3207 }
3208
3209 route_copyin(src, dst, sizeof(*src));
3210 }
3211
3212 /*
3213 * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option.
3214 */
3215 static void
inp_bindif_common(struct inpcb * inp,struct ifnet * ifp)3216 inp_bindif_common(struct inpcb *inp, struct ifnet *ifp)
3217 {
3218 /*
3219 * A zero interface scope value indicates an "unbind".
3220 * Otherwise, take in whatever value the app desires;
3221 * the app may already know the scope (or force itself
3222 * to such a scope) ahead of time before the interface
3223 * gets attached. It doesn't matter either way; any
3224 * route lookup from this point on will require an
3225 * exact match for the embedded interface scope.
3226 */
3227 inp->inp_boundifp = ifp;
3228 if (inp->inp_boundifp == NULL) {
3229 inp->inp_flags &= ~INP_BOUND_IF;
3230 } else {
3231 inp->inp_flags |= INP_BOUND_IF;
3232 }
3233
3234 /* Blow away any cached route in the PCB */
3235 ROUTE_RELEASE(&inp->inp_route);
3236 }
3237
3238
3239 int
inp_bindif(struct inpcb * inp,unsigned int ifscope,struct ifnet ** pifp)3240 inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
3241 {
3242 struct ifnet *ifp = NULL;
3243
3244 ifnet_head_lock_shared();
3245 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
3246 (ifp = ifindex2ifnet[ifscope]) == NULL)) {
3247 ifnet_head_done();
3248 return ENXIO;
3249 }
3250 ifnet_head_done();
3251
3252 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
3253
3254 inp_bindif_common(inp, ifp);
3255
3256 if (pifp != NULL) {
3257 *pifp = ifp;
3258 }
3259
3260 return 0;
3261 }
3262
3263 int
inp_bindtodevice(struct inpcb * inp,const char * ifname)3264 inp_bindtodevice(struct inpcb *inp, const char *ifname)
3265 {
3266 ifnet_ref_t ifp = NULL;
3267
3268 if (*ifname != 0) {
3269 int error = ifnet_find_by_name(ifname, &ifp);
3270 if (error != 0) {
3271 return error;
3272 }
3273 }
3274
3275 inp_bindif_common(inp, ifp);
3276
3277 if (ifp != NULL) {
3278 ifnet_release(ifp);
3279 }
3280 return 0;
3281 }
3282
3283 /*
3284 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
3285 * as well as for setting PROC_UUID_NO_CELLULAR policy.
3286 */
3287 void
inp_set_nocellular(struct inpcb * inp)3288 inp_set_nocellular(struct inpcb *inp)
3289 {
3290 inp->inp_flags |= INP_NO_IFT_CELLULAR;
3291
3292 /* Blow away any cached route in the PCB */
3293 ROUTE_RELEASE(&inp->inp_route);
3294 }
3295
3296 /*
3297 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
3298 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
3299 */
3300 void
inp_clear_nocellular(struct inpcb * inp)3301 inp_clear_nocellular(struct inpcb *inp)
3302 {
3303 struct socket *so = inp->inp_socket;
3304
3305 /*
3306 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
3307 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
3308 * if and only if the socket is unrestricted.
3309 */
3310 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
3311 inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
3312
3313 /* Blow away any cached route in the PCB */
3314 ROUTE_RELEASE(&inp->inp_route);
3315 }
3316 }
3317
3318 void
inp_set_noexpensive(struct inpcb * inp)3319 inp_set_noexpensive(struct inpcb *inp)
3320 {
3321 inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
3322
3323 /* Blow away any cached route in the PCB */
3324 ROUTE_RELEASE(&inp->inp_route);
3325 }
3326
3327 void
inp_set_noconstrained(struct inpcb * inp)3328 inp_set_noconstrained(struct inpcb *inp)
3329 {
3330 inp->inp_flags2 |= INP2_NO_IFF_CONSTRAINED;
3331
3332 /* Blow away any cached route in the PCB */
3333 ROUTE_RELEASE(&inp->inp_route);
3334 }
3335
3336 void
inp_set_awdl_unrestricted(struct inpcb * inp)3337 inp_set_awdl_unrestricted(struct inpcb *inp)
3338 {
3339 inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
3340
3341 /* Blow away any cached route in the PCB */
3342 ROUTE_RELEASE(&inp->inp_route);
3343 }
3344
3345 boolean_t
inp_get_awdl_unrestricted(struct inpcb * inp)3346 inp_get_awdl_unrestricted(struct inpcb *inp)
3347 {
3348 return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
3349 }
3350
3351 void
inp_clear_awdl_unrestricted(struct inpcb * inp)3352 inp_clear_awdl_unrestricted(struct inpcb *inp)
3353 {
3354 inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
3355
3356 /* Blow away any cached route in the PCB */
3357 ROUTE_RELEASE(&inp->inp_route);
3358 }
3359
3360 void
inp_set_intcoproc_allowed(struct inpcb * inp)3361 inp_set_intcoproc_allowed(struct inpcb *inp)
3362 {
3363 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
3364
3365 /* Blow away any cached route in the PCB */
3366 ROUTE_RELEASE(&inp->inp_route);
3367 }
3368
3369 boolean_t
inp_get_intcoproc_allowed(struct inpcb * inp)3370 inp_get_intcoproc_allowed(struct inpcb *inp)
3371 {
3372 return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE;
3373 }
3374
3375 void
inp_clear_intcoproc_allowed(struct inpcb * inp)3376 inp_clear_intcoproc_allowed(struct inpcb *inp)
3377 {
3378 inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED;
3379
3380 /* Blow away any cached route in the PCB */
3381 ROUTE_RELEASE(&inp->inp_route);
3382 }
3383
3384 void
inp_set_management_allowed(struct inpcb * inp)3385 inp_set_management_allowed(struct inpcb *inp)
3386 {
3387 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
3388 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
3389
3390 /* Blow away any cached route in the PCB */
3391 ROUTE_RELEASE(&inp->inp_route);
3392 }
3393
3394 boolean_t
inp_get_management_allowed(struct inpcb * inp)3395 inp_get_management_allowed(struct inpcb *inp)
3396 {
3397 return (inp->inp_flags2 & INP2_MANAGEMENT_ALLOWED) ? TRUE : FALSE;
3398 }
3399
3400 void
inp_clear_management_allowed(struct inpcb * inp)3401 inp_clear_management_allowed(struct inpcb *inp)
3402 {
3403 inp->inp_flags2 &= ~INP2_MANAGEMENT_ALLOWED;
3404
3405 /* Blow away any cached route in the PCB */
3406 ROUTE_RELEASE(&inp->inp_route);
3407 }
3408
3409 void
inp_set_ultra_constrained_allowed(struct inpcb * inp)3410 inp_set_ultra_constrained_allowed(struct inpcb *inp)
3411 {
3412 inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_ALLOWED;
3413 inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_CHECKED;
3414
3415 /* Blow away any cached route in the PCB */
3416 ROUTE_RELEASE(&inp->inp_route);
3417 }
3418
3419 #if NECP
3420 /*
3421 * Called when PROC_UUID_NECP_APP_POLICY is set.
3422 */
3423 void
inp_set_want_app_policy(struct inpcb * inp)3424 inp_set_want_app_policy(struct inpcb *inp)
3425 {
3426 inp->inp_flags2 |= INP2_WANT_APP_POLICY;
3427 }
3428
3429 /*
3430 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
3431 */
3432 void
inp_clear_want_app_policy(struct inpcb * inp)3433 inp_clear_want_app_policy(struct inpcb *inp)
3434 {
3435 inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
3436 }
3437 #endif /* NECP */
3438
3439 /*
3440 * Calculate flow hash for an inp, used by an interface to identify a
3441 * flow. When an interface provides flow control advisory, this flow
3442 * hash is used as an identifier.
3443 */
3444 u_int32_t
inp_calc_flowhash(struct inpcb * inp)3445 inp_calc_flowhash(struct inpcb *inp)
3446 {
3447 #if SKYWALK
3448
3449 uint32_t flowid;
3450 struct flowidns_flow_key fk;
3451
3452 bzero(&fk, sizeof(fk));
3453
3454 if (inp->inp_vflag & INP_IPV4) {
3455 fk.ffk_af = AF_INET;
3456 fk.ffk_laddr_v4 = inp->inp_laddr;
3457 fk.ffk_raddr_v4 = inp->inp_faddr;
3458 } else {
3459 fk.ffk_af = AF_INET6;
3460 fk.ffk_laddr_v6 = inp->in6p_laddr;
3461 fk.ffk_raddr_v6 = inp->in6p_faddr;
3462 /* clear embedded scope ID */
3463 if (IN6_IS_SCOPE_EMBED(&fk.ffk_laddr_v6)) {
3464 fk.ffk_laddr_v6.s6_addr16[1] = 0;
3465 }
3466 if (IN6_IS_SCOPE_EMBED(&fk.ffk_raddr_v6)) {
3467 fk.ffk_raddr_v6.s6_addr16[1] = 0;
3468 }
3469 }
3470
3471 fk.ffk_lport = inp->inp_lport;
3472 fk.ffk_rport = inp->inp_fport;
3473 fk.ffk_proto = (inp->inp_ip_p != 0) ? inp->inp_ip_p :
3474 (uint8_t)SOCK_PROTO(inp->inp_socket);
3475 flowidns_allocate_flowid(FLOWIDNS_DOMAIN_INPCB, &fk, &flowid);
3476 /* Insert the inp into inp_fc_tree */
3477 lck_mtx_lock_spin(&inp_fc_lck);
3478 ASSERT(inp->inp_flowhash == 0);
3479 ASSERT((inp->inp_flags2 & INP2_IN_FCTREE) == 0);
3480 inp->inp_flowhash = flowid;
3481 VERIFY(RB_INSERT(inp_fc_tree, &inp_fc_tree, inp) == NULL);
3482 inp->inp_flags2 |= INP2_IN_FCTREE;
3483 lck_mtx_unlock(&inp_fc_lck);
3484
3485 return flowid;
3486
3487 #else /* !SKYWALK */
3488
3489 struct inp_flowhash_key fh __attribute__((aligned(8)));
3490 u_int32_t flowhash = 0;
3491 struct inpcb *tmp_inp = NULL;
3492
3493 if (inp_hash_seed == 0) {
3494 inp_hash_seed = RandomULong();
3495 }
3496
3497 bzero(&fh, sizeof(fh));
3498
3499 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof(fh.infh_laddr));
3500 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof(fh.infh_faddr));
3501
3502 fh.infh_lport = inp->inp_lport;
3503 fh.infh_fport = inp->inp_fport;
3504 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
3505 fh.infh_proto = inp->inp_ip_p;
3506 fh.infh_rand1 = RandomULong();
3507 fh.infh_rand2 = RandomULong();
3508
3509 try_again:
3510 flowhash = net_flowhash(&fh, sizeof(fh), inp_hash_seed);
3511 if (flowhash == 0) {
3512 /* try to get a non-zero flowhash */
3513 inp_hash_seed = RandomULong();
3514 goto try_again;
3515 }
3516
3517 inp->inp_flowhash = flowhash;
3518
3519 /* Insert the inp into inp_fc_tree */
3520 lck_mtx_lock_spin(&inp_fc_lck);
3521 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
3522 if (tmp_inp != NULL) {
3523 /*
3524 * There is a different inp with the same flowhash.
3525 * There can be a collision on flow hash but the
3526 * probability is low. Let's recompute the
3527 * flowhash.
3528 */
3529 lck_mtx_unlock(&inp_fc_lck);
3530 /* recompute hash seed */
3531 inp_hash_seed = RandomULong();
3532 goto try_again;
3533 }
3534
3535 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
3536 inp->inp_flags2 |= INP2_IN_FCTREE;
3537 lck_mtx_unlock(&inp_fc_lck);
3538
3539 return flowhash;
3540
3541 #endif /* !SKYWALK */
3542 }
3543
3544 void
inp_flowadv(uint32_t flowhash)3545 inp_flowadv(uint32_t flowhash)
3546 {
3547 struct inpcb *inp;
3548
3549 inp = inp_fc_getinp(flowhash, 0);
3550
3551 if (inp == NULL) {
3552 return;
3553 }
3554 inp_fc_feedback(inp);
3555 }
3556
3557 /*
3558 * Function to compare inp_fc_entries in inp flow control tree
3559 */
3560 static inline int
infc_cmp(const struct inpcb * inp1,const struct inpcb * inp2)3561 infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
3562 {
3563 return memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
3564 sizeof(inp1->inp_flowhash));
3565 }
3566
3567 static struct inpcb *
inp_fc_getinp(u_int32_t flowhash,u_int32_t flags)3568 inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
3569 {
3570 struct inpcb *inp = NULL;
3571 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
3572
3573 lck_mtx_lock_spin(&inp_fc_lck);
3574 key_inp.inp_flowhash = flowhash;
3575 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
3576 if (inp == NULL) {
3577 /* inp is not present, return */
3578 lck_mtx_unlock(&inp_fc_lck);
3579 return NULL;
3580 }
3581
3582 if (flags & INPFC_REMOVE) {
3583 ASSERT((inp->inp_flags2 & INP2_IN_FCTREE) != 0);
3584 lck_mtx_convert_spin(&inp_fc_lck);
3585 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
3586 bzero(&(inp->infc_link), sizeof(inp->infc_link));
3587 #if SKYWALK
3588 VERIFY(inp->inp_flowhash != 0);
3589 flowidns_release_flowid(inp->inp_flowhash);
3590 inp->inp_flowhash = 0;
3591 #endif /* !SKYWALK */
3592 inp->inp_flags2 &= ~INP2_IN_FCTREE;
3593 lck_mtx_unlock(&inp_fc_lck);
3594 return NULL;
3595 }
3596
3597 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) {
3598 inp = NULL;
3599 }
3600 lck_mtx_unlock(&inp_fc_lck);
3601
3602 return inp;
3603 }
3604
3605 static void
inp_fc_feedback(struct inpcb * inp)3606 inp_fc_feedback(struct inpcb *inp)
3607 {
3608 struct socket *so = inp->inp_socket;
3609
3610 /* we already hold a want_cnt on this inp, socket can't be null */
3611 VERIFY(so != NULL);
3612 socket_lock(so, 1);
3613
3614 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3615 socket_unlock(so, 1);
3616 return;
3617 }
3618
3619 if (inp->inp_sndinprog_cnt > 0) {
3620 inp->inp_flags |= INP_FC_FEEDBACK;
3621 }
3622
3623 /*
3624 * Return if the connection is not in flow-controlled state.
3625 * This can happen if the connection experienced
3626 * loss while it was in flow controlled state
3627 */
3628 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
3629 socket_unlock(so, 1);
3630 return;
3631 }
3632 inp_reset_fc_state(inp);
3633
3634 if (SOCK_TYPE(so) == SOCK_STREAM) {
3635 inp_fc_unthrottle_tcp(inp);
3636 }
3637
3638 socket_unlock(so, 1);
3639 }
3640
3641 static void
inp_reset_fc_timerstat(struct inpcb * inp)3642 inp_reset_fc_timerstat(struct inpcb *inp)
3643 {
3644 uint64_t now;
3645
3646 if (inp->inp_fadv_start_time == 0) {
3647 return;
3648 }
3649
3650 now = net_uptime_us();
3651 ASSERT(now >= inp->inp_fadv_start_time);
3652
3653 inp->inp_fadv_total_time += (now - inp->inp_fadv_start_time);
3654 inp->inp_fadv_cnt++;
3655
3656 inp->inp_fadv_start_time = 0;
3657 }
3658
3659 static void
inp_set_fc_timerstat(struct inpcb * inp)3660 inp_set_fc_timerstat(struct inpcb *inp)
3661 {
3662 if (inp->inp_fadv_start_time != 0) {
3663 return;
3664 }
3665
3666 inp->inp_fadv_start_time = net_uptime_us();
3667 }
3668
3669 void
inp_reset_fc_state(struct inpcb * inp)3670 inp_reset_fc_state(struct inpcb *inp)
3671 {
3672 struct socket *so = inp->inp_socket;
3673 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
3674 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
3675
3676 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3677
3678 inp_reset_fc_timerstat(inp);
3679
3680 if (suspended) {
3681 so->so_flags &= ~(SOF_SUSPENDED);
3682 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
3683 }
3684
3685 /* Give a write wakeup to unblock the socket */
3686 if (needwakeup) {
3687 sowwakeup(so);
3688 }
3689 }
3690
3691 int
inp_set_fc_state(struct inpcb * inp,int advcode)3692 inp_set_fc_state(struct inpcb *inp, int advcode)
3693 {
3694 boolean_t is_flow_controlled = INP_WAIT_FOR_IF_FEEDBACK(inp);
3695 struct inpcb *tmp_inp = NULL;
3696 /*
3697 * If there was a feedback from the interface when
3698 * send operation was in progress, we should ignore
3699 * this flow advisory to avoid a race between setting
3700 * flow controlled state and receiving feedback from
3701 * the interface
3702 */
3703 if (inp->inp_flags & INP_FC_FEEDBACK) {
3704 return 0;
3705 }
3706
3707 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3708 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
3709 INPFC_SOLOCKED)) != NULL) {
3710 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3711 goto exit_reset;
3712 }
3713 VERIFY(tmp_inp == inp);
3714 switch (advcode) {
3715 case FADV_FLOW_CONTROLLED:
3716 inp->inp_flags |= INP_FLOW_CONTROLLED;
3717 inp_set_fc_timerstat(inp);
3718 break;
3719 case FADV_SUSPENDED:
3720 inp->inp_flags |= INP_FLOW_SUSPENDED;
3721 inp_set_fc_timerstat(inp);
3722
3723 soevent(inp->inp_socket,
3724 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
3725
3726 /* Record the fact that suspend event was sent */
3727 inp->inp_socket->so_flags |= SOF_SUSPENDED;
3728 break;
3729 }
3730
3731 if (!is_flow_controlled && SOCK_TYPE(inp->inp_socket) == SOCK_STREAM) {
3732 inp_fc_throttle_tcp(inp);
3733 }
3734 return 1;
3735 }
3736
3737 exit_reset:
3738 inp_reset_fc_timerstat(inp);
3739
3740 return 0;
3741 }
3742
3743 /*
3744 * Handler for SO_FLUSH socket option.
3745 */
3746 int
inp_flush(struct inpcb * inp,int optval)3747 inp_flush(struct inpcb *inp, int optval)
3748 {
3749 u_int32_t flowhash = inp->inp_flowhash;
3750 struct ifnet *rtifp, *oifp;
3751
3752 /* Either all classes or one of the valid ones */
3753 if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) {
3754 return EINVAL;
3755 }
3756
3757 /* We need a flow hash for identification */
3758 if (flowhash == 0) {
3759 return 0;
3760 }
3761
3762 /* Grab the interfaces from the route and pcb */
3763 rtifp = ((inp->inp_route.ro_rt != NULL) ?
3764 inp->inp_route.ro_rt->rt_ifp : NULL);
3765 oifp = inp->inp_last_outifp;
3766
3767 if (rtifp != NULL) {
3768 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3769 }
3770 if (oifp != NULL && oifp != rtifp) {
3771 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3772 }
3773
3774 return 0;
3775 }
3776
3777 /*
3778 * Clear the INP_INADDR_ANY flag (special case for PPP only)
3779 */
3780 void
inp_clear_INP_INADDR_ANY(struct socket * so)3781 inp_clear_INP_INADDR_ANY(struct socket *so)
3782 {
3783 struct inpcb *inp = NULL;
3784
3785 socket_lock(so, 1);
3786 inp = sotoinpcb(so);
3787 if (inp) {
3788 inp->inp_flags &= ~INP_INADDR_ANY;
3789 }
3790 socket_unlock(so, 1);
3791 }
3792
3793 void
inp_get_soprocinfo(struct inpcb * inp,struct so_procinfo * soprocinfo)3794 inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
3795 {
3796 struct socket *so = inp->inp_socket;
3797
3798 soprocinfo->spi_pid = so->last_pid;
3799 strbufcpy(soprocinfo->spi_proc_name, inp->inp_last_proc_name);
3800 if (so->last_pid != 0) {
3801 uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
3802 }
3803 /*
3804 * When not delegated, the effective pid is the same as the real pid
3805 */
3806 if (so->so_flags & SOF_DELEGATED) {
3807 soprocinfo->spi_delegated = 1;
3808 soprocinfo->spi_epid = so->e_pid;
3809 uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
3810 } else {
3811 soprocinfo->spi_delegated = 0;
3812 soprocinfo->spi_epid = so->last_pid;
3813 }
3814 strbufcpy(soprocinfo->spi_e_proc_name, inp->inp_e_proc_name);
3815 }
3816
3817 int
inp_findinpcb_procinfo(struct inpcbinfo * pcbinfo,uint32_t flowhash,struct so_procinfo * soprocinfo)3818 inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
3819 struct so_procinfo *soprocinfo)
3820 {
3821 struct inpcb *inp = NULL;
3822 int found = 0;
3823
3824 bzero(soprocinfo, sizeof(struct so_procinfo));
3825
3826 if (!flowhash) {
3827 return -1;
3828 }
3829
3830 lck_rw_lock_shared(&pcbinfo->ipi_lock);
3831 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
3832 if (inp->inp_state != INPCB_STATE_DEAD &&
3833 inp->inp_socket != NULL &&
3834 inp->inp_flowhash == flowhash) {
3835 found = 1;
3836 inp_get_soprocinfo(inp, soprocinfo);
3837 break;
3838 }
3839 }
3840 lck_rw_done(&pcbinfo->ipi_lock);
3841
3842 return found;
3843 }
3844
3845 #if CONFIG_PROC_UUID_POLICY
3846 static void
inp_update_cellular_policy(struct inpcb * inp,boolean_t set)3847 inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
3848 {
3849 struct socket *so = inp->inp_socket;
3850 int before, after;
3851
3852 VERIFY(so != NULL);
3853 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3854
3855 before = INP_NO_CELLULAR(inp);
3856 if (set) {
3857 inp_set_nocellular(inp);
3858 } else {
3859 inp_clear_nocellular(inp);
3860 }
3861 after = INP_NO_CELLULAR(inp);
3862 if (net_io_policy_log && (before != after)) {
3863 static const char *ok = "OK";
3864 static const char *nok = "NOACCESS";
3865 uuid_string_t euuid_buf;
3866 pid_t epid;
3867
3868 if (so->so_flags & SOF_DELEGATED) {
3869 uuid_unparse(so->e_uuid, euuid_buf);
3870 epid = so->e_pid;
3871 } else {
3872 uuid_unparse(so->last_uuid, euuid_buf);
3873 epid = so->last_pid;
3874 }
3875
3876 /* allow this socket to generate another notification event */
3877 so->so_ifdenied_notifies = 0;
3878
3879 log(LOG_DEBUG, "%s: so %llu [%d,%d] epid %d "
3880 "euuid %s%s %s->%s\n", __func__,
3881 so->so_gencnt, SOCK_DOM(so),
3882 SOCK_TYPE(so), epid, euuid_buf,
3883 (so->so_flags & SOF_DELEGATED) ?
3884 " [delegated]" : "",
3885 ((before < after) ? ok : nok),
3886 ((before < after) ? nok : ok));
3887 }
3888 }
3889
3890 #if NECP
3891 static void
inp_update_necp_want_app_policy(struct inpcb * inp,boolean_t set)3892 inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
3893 {
3894 struct socket *so = inp->inp_socket;
3895 int before, after;
3896
3897 VERIFY(so != NULL);
3898 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3899
3900 before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3901 if (set) {
3902 inp_set_want_app_policy(inp);
3903 } else {
3904 inp_clear_want_app_policy(inp);
3905 }
3906 after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3907 if (net_io_policy_log && (before != after)) {
3908 static const char *wanted = "WANTED";
3909 static const char *unwanted = "UNWANTED";
3910 uuid_string_t euuid_buf;
3911 pid_t epid;
3912
3913 if (so->so_flags & SOF_DELEGATED) {
3914 uuid_unparse(so->e_uuid, euuid_buf);
3915 epid = so->e_pid;
3916 } else {
3917 uuid_unparse(so->last_uuid, euuid_buf);
3918 epid = so->last_pid;
3919 }
3920
3921 log(LOG_DEBUG, "%s: so %llu [%d,%d] epid %d "
3922 "euuid %s%s %s->%s\n", __func__,
3923 so->so_gencnt, SOCK_DOM(so),
3924 SOCK_TYPE(so), epid, euuid_buf,
3925 (so->so_flags & SOF_DELEGATED) ?
3926 " [delegated]" : "",
3927 ((before < after) ? unwanted : wanted),
3928 ((before < after) ? wanted : unwanted));
3929 }
3930 }
3931 #endif /* NECP */
3932 #endif /* !CONFIG_PROC_UUID_POLICY */
3933
3934 #if NECP
3935 void
inp_update_necp_policy(struct inpcb * inp,struct sockaddr * override_local_addr,struct sockaddr * override_remote_addr,u_int override_bound_interface)3936 inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
3937 {
3938 necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
3939 if (necp_socket_should_rescope(inp) &&
3940 inp->inp_lport == 0 &&
3941 inp->inp_laddr.s_addr == INADDR_ANY &&
3942 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
3943 // If we should rescope, and the socket is not yet bound
3944 inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
3945 inp->inp_flags2 |= INP2_SCOPED_BY_NECP;
3946 }
3947 }
3948 #endif /* NECP */
3949
3950 int
inp_update_policy(struct inpcb * inp)3951 inp_update_policy(struct inpcb *inp)
3952 {
3953 #if CONFIG_PROC_UUID_POLICY
3954 struct socket *so = inp->inp_socket;
3955 uint32_t pflags = 0;
3956 int32_t ogencnt;
3957 int err = 0;
3958 uint8_t *lookup_uuid = NULL;
3959
3960 if (!net_io_policy_uuid ||
3961 so == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3962 return 0;
3963 }
3964
3965 /*
3966 * Kernel-created sockets that aren't delegating other sockets
3967 * are currently exempted from UUID policy checks.
3968 */
3969 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) {
3970 return 0;
3971 }
3972
3973 #if defined(XNU_TARGET_OS_OSX)
3974 if (so->so_rpid > 0) {
3975 lookup_uuid = so->so_ruuid;
3976 ogencnt = so->so_policy_gencnt;
3977 err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3978 }
3979 #endif
3980 if (lookup_uuid == NULL || err == ENOENT) {
3981 lookup_uuid = ((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid);
3982 ogencnt = so->so_policy_gencnt;
3983 err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3984 }
3985
3986 /*
3987 * Discard cached generation count if the entry is gone (ENOENT),
3988 * so that we go thru the checks below.
3989 */
3990 if (err == ENOENT && ogencnt != 0) {
3991 so->so_policy_gencnt = 0;
3992 }
3993
3994 /*
3995 * If the generation count has changed, inspect the policy flags
3996 * and act accordingly. If a policy flag was previously set and
3997 * the UUID is no longer present in the table (ENOENT), treat it
3998 * as if the flag has been cleared.
3999 */
4000 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
4001 /* update cellular policy for this socket */
4002 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
4003 inp_update_cellular_policy(inp, TRUE);
4004 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
4005 inp_update_cellular_policy(inp, FALSE);
4006 }
4007 #if NECP
4008 /* update necp want app policy for this socket */
4009 if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
4010 inp_update_necp_want_app_policy(inp, TRUE);
4011 } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
4012 inp_update_necp_want_app_policy(inp, FALSE);
4013 }
4014 #endif /* NECP */
4015 }
4016
4017 return (err == ENOENT) ? 0 : err;
4018 #else /* !CONFIG_PROC_UUID_POLICY */
4019 #pragma unused(inp)
4020 return 0;
4021 #endif /* !CONFIG_PROC_UUID_POLICY */
4022 }
4023
4024 unsigned int log_restricted;
4025 SYSCTL_DECL(_net_inet);
4026 SYSCTL_INT(_net_inet, OID_AUTO, log_restricted,
4027 CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0,
4028 "Log network restrictions");
4029
4030
4031 /*
4032 * Called when we need to enforce policy restrictions in the input path.
4033 *
4034 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
4035 */
4036 static boolean_t
_inp_restricted_recv(struct inpcb * inp,struct ifnet * ifp)4037 _inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
4038 {
4039 VERIFY(inp != NULL);
4040
4041 /*
4042 * Inbound restrictions.
4043 */
4044 if (!sorestrictrecv) {
4045 return FALSE;
4046 }
4047
4048 if (ifp == NULL) {
4049 return FALSE;
4050 }
4051
4052 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
4053 return TRUE;
4054 }
4055
4056 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
4057 return TRUE;
4058 }
4059
4060 if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
4061 return TRUE;
4062 }
4063
4064 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
4065 return TRUE;
4066 }
4067
4068 if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) {
4069 return FALSE;
4070 }
4071
4072 if (inp->inp_flags & INP_RECV_ANYIF) {
4073 return FALSE;
4074 }
4075
4076 /*
4077 * An entitled process can use the management interface without being bound
4078 * to the interface
4079 */
4080 if (IFNET_IS_MANAGEMENT(ifp)) {
4081 if (INP_MANAGEMENT_ALLOWED(inp)) {
4082 return FALSE;
4083 }
4084 if (if_management_verbose > 1) {
4085 os_log(OS_LOG_DEFAULT, "_inp_restricted_recv %s:%d not allowed on management interface %s",
4086 proc_best_name(current_proc()), proc_getpid(current_proc()),
4087 ifp->if_xname);
4088 }
4089 return TRUE;
4090 }
4091
4092 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) {
4093 return FALSE;
4094 }
4095
4096 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
4097 return TRUE;
4098 }
4099
4100
4101 return TRUE;
4102 }
4103
4104 boolean_t
inp_restricted_recv(struct inpcb * inp,struct ifnet * ifp)4105 inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
4106 {
4107 boolean_t ret;
4108
4109 ret = _inp_restricted_recv(inp, ifp);
4110 if (ret == TRUE && log_restricted) {
4111 printf("pid %d (%s) is unable to receive packets on %s\n",
4112 proc_getpid(current_proc()), proc_best_name(current_proc()),
4113 ifp->if_xname);
4114 }
4115 return ret;
4116 }
4117
4118 /*
4119 * Called when we need to enforce policy restrictions in the output path.
4120 *
4121 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
4122 */
4123 static boolean_t
_inp_restricted_send(struct inpcb * inp,struct ifnet * ifp)4124 _inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
4125 {
4126 VERIFY(inp != NULL);
4127
4128 /*
4129 * Outbound restrictions.
4130 */
4131 if (!sorestrictsend) {
4132 return FALSE;
4133 }
4134
4135 if (ifp == NULL) {
4136 return FALSE;
4137 }
4138
4139 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
4140 return TRUE;
4141 }
4142
4143 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
4144 return TRUE;
4145 }
4146
4147 if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
4148 return TRUE;
4149 }
4150
4151 if (IFNET_IS_ULTRA_CONSTRAINED(ifp) && uuid_is_null(inp->necp_client_uuid) &&
4152 !INP_ULTRA_CONSTRAINED_ALLOWED(inp)) {
4153 // Non-NECP-aware sockets are not allowed to use ultra constrained interfaces
4154 // without an entitlement
4155 return TRUE;
4156 }
4157
4158 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
4159 return TRUE;
4160 }
4161
4162 if (IFNET_IS_MANAGEMENT(ifp)) {
4163 if (!INP_MANAGEMENT_ALLOWED(inp)) {
4164 if (if_management_verbose > 1) {
4165 os_log(OS_LOG_DEFAULT, "_inp_restricted_send %s:%d not allowed on management interface %s",
4166 proc_best_name(current_proc()), proc_getpid(current_proc()),
4167 ifp->if_xname);
4168 }
4169 return TRUE;
4170 }
4171 }
4172
4173 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
4174 return TRUE;
4175 }
4176
4177 return FALSE;
4178 }
4179
4180 boolean_t
inp_restricted_send(struct inpcb * inp,struct ifnet * ifp)4181 inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
4182 {
4183 boolean_t ret;
4184
4185 ret = _inp_restricted_send(inp, ifp);
4186 if (ret == TRUE && log_restricted) {
4187 printf("pid %d (%s) is unable to transmit packets on %s\n",
4188 proc_getpid(current_proc()), proc_best_name(current_proc()),
4189 ifp->if_xname);
4190 }
4191 return ret;
4192 }
4193
4194 inline void
inp_count_sndbytes(struct inpcb * inp,u_int32_t th_ack)4195 inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack)
4196 {
4197 struct ifnet *ifp = inp->inp_last_outifp;
4198 struct socket *so = inp->inp_socket;
4199 if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
4200 (ifp->if_type == IFT_CELLULAR || IFNET_IS_WIFI(ifp))) {
4201 int32_t unsent;
4202
4203 so->so_snd.sb_flags |= SB_SNDBYTE_CNT;
4204
4205 /*
4206 * There can be data outstanding before the connection
4207 * becomes established -- TFO case
4208 */
4209 if (so->so_snd.sb_cc > 0) {
4210 inp_incr_sndbytes_total(so, so->so_snd.sb_cc);
4211 }
4212
4213 unsent = inp_get_sndbytes_allunsent(so, th_ack);
4214 if (unsent > 0) {
4215 inp_incr_sndbytes_unsent(so, unsent);
4216 }
4217 }
4218 }
4219
4220 inline void
inp_incr_sndbytes_total(struct socket * so,int32_t len)4221 inp_incr_sndbytes_total(struct socket *so, int32_t len)
4222 {
4223 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4224 struct ifnet *ifp = inp->inp_last_outifp;
4225
4226 if (ifp != NULL) {
4227 VERIFY(ifp->if_sndbyte_total >= 0);
4228 OSAddAtomic64(len, &ifp->if_sndbyte_total);
4229 }
4230 }
4231
4232 inline void
inp_decr_sndbytes_total(struct socket * so,int32_t len)4233 inp_decr_sndbytes_total(struct socket *so, int32_t len)
4234 {
4235 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4236 struct ifnet *ifp = inp->inp_last_outifp;
4237
4238 if (ifp != NULL) {
4239 if (ifp->if_sndbyte_total >= len) {
4240 OSAddAtomic64(-len, &ifp->if_sndbyte_total);
4241 } else {
4242 ifp->if_sndbyte_total = 0;
4243 }
4244 }
4245 }
4246
4247 inline void
inp_incr_sndbytes_unsent(struct socket * so,int32_t len)4248 inp_incr_sndbytes_unsent(struct socket *so, int32_t len)
4249 {
4250 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4251 struct ifnet *ifp = inp->inp_last_outifp;
4252
4253 if (ifp != NULL) {
4254 VERIFY(ifp->if_sndbyte_unsent >= 0);
4255 OSAddAtomic64(len, &ifp->if_sndbyte_unsent);
4256 }
4257 }
4258
4259 inline void
inp_decr_sndbytes_unsent(struct socket * so,int32_t len)4260 inp_decr_sndbytes_unsent(struct socket *so, int32_t len)
4261 {
4262 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
4263 return;
4264 }
4265
4266 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4267 struct ifnet *ifp = inp->inp_last_outifp;
4268
4269 if (ifp != NULL) {
4270 if (ifp->if_sndbyte_unsent >= len) {
4271 OSAddAtomic64(-len, &ifp->if_sndbyte_unsent);
4272 } else {
4273 ifp->if_sndbyte_unsent = 0;
4274 }
4275 }
4276 }
4277
4278 inline void
inp_decr_sndbytes_allunsent(struct socket * so,u_int32_t th_ack)4279 inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
4280 {
4281 int32_t len;
4282
4283 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
4284 return;
4285 }
4286
4287 len = inp_get_sndbytes_allunsent(so, th_ack);
4288 inp_decr_sndbytes_unsent(so, len);
4289 }
4290
4291 #if SKYWALK
4292 inline void
inp_update_netns_flags(struct socket * so)4293 inp_update_netns_flags(struct socket *so)
4294 {
4295 struct inpcb *inp;
4296 uint32_t set_flags = 0;
4297 uint32_t clear_flags = 0;
4298
4299 if (!(SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
4300 return;
4301 }
4302
4303 inp = sotoinpcb(so);
4304
4305 if (inp == NULL) {
4306 return;
4307 }
4308
4309 if (!NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
4310 return;
4311 }
4312
4313 if (so->so_options & SO_NOWAKEFROMSLEEP) {
4314 set_flags |= NETNS_NOWAKEFROMSLEEP;
4315 } else {
4316 clear_flags |= NETNS_NOWAKEFROMSLEEP;
4317 }
4318
4319 if (inp->inp_flags & INP_RECV_ANYIF) {
4320 set_flags |= NETNS_RECVANYIF;
4321 } else {
4322 clear_flags |= NETNS_RECVANYIF;
4323 }
4324
4325 if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
4326 set_flags |= NETNS_EXTBGIDLE;
4327 } else {
4328 clear_flags |= NETNS_EXTBGIDLE;
4329 }
4330
4331 netns_change_flags(&inp->inp_netns_token, set_flags, clear_flags);
4332 }
4333 #endif /* SKYWALK */
4334
4335 inline void
inp_set_activity_bitmap(struct inpcb * inp)4336 inp_set_activity_bitmap(struct inpcb *inp)
4337 {
4338 in_stat_set_activity_bitmap(&inp->inp_nw_activity, net_uptime());
4339 }
4340
4341 inline void
inp_get_activity_bitmap(struct inpcb * inp,activity_bitmap_t * ab)4342 inp_get_activity_bitmap(struct inpcb *inp, activity_bitmap_t *ab)
4343 {
4344 bcopy(&inp->inp_nw_activity, ab, sizeof(*ab));
4345 }
4346
4347 void
inp_update_last_owner(struct socket * so,struct proc * p,struct proc * ep)4348 inp_update_last_owner(struct socket *so, struct proc *p, struct proc *ep)
4349 {
4350 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4351
4352 if (inp == NULL) {
4353 return;
4354 }
4355
4356 if (p != NULL) {
4357 strlcpy(&inp->inp_last_proc_name[0], proc_name_address(p), sizeof(inp->inp_last_proc_name));
4358 }
4359 if (so->so_flags & SOF_DELEGATED) {
4360 if (ep != NULL) {
4361 strlcpy(&inp->inp_e_proc_name[0], proc_name_address(ep), sizeof(inp->inp_e_proc_name));
4362 } else {
4363 inp->inp_e_proc_name[0] = 0;
4364 }
4365 } else {
4366 inp->inp_e_proc_name[0] = 0;
4367 }
4368 }
4369
4370 void
inp_copy_last_owner(struct socket * so,struct socket * head)4371 inp_copy_last_owner(struct socket *so, struct socket *head)
4372 {
4373 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4374 struct inpcb *head_inp = (struct inpcb *)head->so_pcb;
4375
4376 if (inp == NULL || head_inp == NULL) {
4377 return;
4378 }
4379
4380 strbufcpy(inp->inp_last_proc_name, head_inp->inp_last_proc_name);
4381 strbufcpy(inp->inp_e_proc_name, head_inp->inp_e_proc_name);
4382 }
4383
4384 static int
in_check_management_interface_proc_callout(proc_t proc,void * arg __unused)4385 in_check_management_interface_proc_callout(proc_t proc, void *arg __unused)
4386 {
4387 struct fileproc *fp = NULL;
4388 task_t __single task = proc_task(proc);
4389 bool allowed = false;
4390
4391 if (IOTaskHasEntitlement(task, INTCOPROC_RESTRICTED_ENTITLEMENT) == true
4392 || IOTaskHasEntitlement(task, MANAGEMENT_DATA_ENTITLEMENT) == true
4393 #if DEBUG || DEVELOPMENT
4394 || IOTaskHasEntitlement(task, INTCOPROC_RESTRICTED_ENTITLEMENT_DEVELOPMENT) == true
4395 || IOTaskHasEntitlement(task, MANAGEMENT_DATA_ENTITLEMENT_DEVELOPMENT) == true
4396 #endif /* DEBUG || DEVELOPMENT */
4397 ) {
4398 allowed = true;
4399 }
4400 if (allowed == false && management_data_unrestricted == false) {
4401 return PROC_RETURNED;
4402 }
4403
4404 proc_fdlock(proc);
4405 fdt_foreach(fp, proc) {
4406 struct fileglob *fg = fp->fp_glob;
4407 struct socket *so;
4408 struct inpcb *inp;
4409
4410 if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET) {
4411 continue;
4412 }
4413
4414 so = (struct socket *)fp_get_data(fp);
4415 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
4416 continue;
4417 }
4418
4419 inp = (struct inpcb *)so->so_pcb;
4420
4421 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
4422 continue;
4423 }
4424
4425 socket_lock(so, 1);
4426
4427 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
4428 socket_unlock(so, 1);
4429 continue;
4430 }
4431 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
4432 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
4433
4434 socket_unlock(so, 1);
4435 }
4436 proc_fdunlock(proc);
4437
4438 return PROC_RETURNED;
4439 }
4440
4441 static bool in_management_interface_checked = false;
4442
4443 static void
in_management_interface_event_callback(struct nwk_wq_entry * nwk_item)4444 in_management_interface_event_callback(struct nwk_wq_entry *nwk_item)
4445 {
4446 kfree_type(struct nwk_wq_entry, nwk_item);
4447
4448 if (in_management_interface_checked == true) {
4449 return;
4450 }
4451 in_management_interface_checked = true;
4452
4453 proc_iterate(PROC_ALLPROCLIST,
4454 in_check_management_interface_proc_callout,
4455 NULL, NULL, NULL);
4456 }
4457
4458 void
in_management_interface_check(void)4459 in_management_interface_check(void)
4460 {
4461 struct nwk_wq_entry *nwk_item;
4462
4463 if (if_management_interface_check_needed == false ||
4464 in_management_interface_checked == true) {
4465 return;
4466 }
4467
4468 nwk_item = kalloc_type(struct nwk_wq_entry,
4469 Z_WAITOK | Z_ZERO | Z_NOFAIL);
4470
4471 nwk_item->func = in_management_interface_event_callback;
4472
4473 nwk_wq_enqueue(nwk_item);
4474 }
4475