1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/sysctl.h>
75 #include <sys/mcache.h>
76 #include <sys/kauth.h>
77 #include <sys/priv.h>
78 #include <sys/proc_uuid_policy.h>
79 #include <sys/syslog.h>
80 #include <sys/priv.h>
81 #include <sys/file_internal.h>
82 #include <net/dlil.h>
83
84 #include <libkern/OSAtomic.h>
85 #include <kern/locks.h>
86
87 #include <machine/limits.h>
88
89 #include <kern/uipc_domain.h>
90 #include <kern/zalloc.h>
91
92 #include <net/if.h>
93 #include <net/if_types.h>
94 #include <net/route.h>
95 #include <net/flowhash.h>
96 #include <net/flowadv.h>
97 #include <net/nat464_utils.h>
98 #include <net/ntstat.h>
99 #include <net/nwk_wq.h>
100 #include <net/restricted_in_port.h>
101
102 #include <netinet/in.h>
103 #include <netinet/in_pcb.h>
104 #include <netinet/inp_log.h>
105 #include <netinet/in_var.h>
106 #include <netinet/ip_var.h>
107
108 #include <netinet/ip6.h>
109 #include <netinet6/ip6_var.h>
110
111 #include <sys/kdebug.h>
112 #include <sys/random.h>
113
114 #include <dev/random/randomdev.h>
115 #include <mach/boolean.h>
116
117 #include <atm/atm_internal.h>
118 #include <pexpert/pexpert.h>
119
120 #if NECP
121 #include <net/necp.h>
122 #endif
123
124 #include <sys/stat.h>
125 #include <sys/ubc.h>
126 #include <sys/vnode.h>
127
128 #include <os/log.h>
129
130 #if SKYWALK
131 #include <skywalk/namespace/flowidns.h>
132 #endif /* SKYWALK */
133
134 #include <IOKit/IOBSD.h>
135
136 #include <net/sockaddr_utils.h>
137
138 extern int udp_use_randomport;
139 extern int tcp_use_randomport;
140
141 extern const char *proc_name_address(struct proc *);
142
143 static LCK_GRP_DECLARE(inpcb_lock_grp, "inpcb");
144 static LCK_ATTR_DECLARE(inpcb_lock_attr, 0, 0);
145 static LCK_MTX_DECLARE_ATTR(inpcb_lock, &inpcb_lock_grp, &inpcb_lock_attr);
146 static LCK_MTX_DECLARE_ATTR(inpcb_timeout_lock, &inpcb_lock_grp, &inpcb_lock_attr);
147
148 static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
149
150 static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
151 static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
152 static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */
153 static boolean_t inpcb_fast_timer_on = FALSE;
154
155 #define INPCB_GCREQ_THRESHOLD 50000
156
157 static thread_call_t inpcb_thread_call, inpcb_fast_thread_call;
158 static void inpcb_sched_timeout(void);
159 static void inpcb_sched_lazy_timeout(void);
160 static void _inpcb_sched_timeout(unsigned int);
161 static void inpcb_timeout(void *, void *);
162 const int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */
163 extern int tvtohz(struct timeval *);
164
165 #if CONFIG_PROC_UUID_POLICY
166 static void inp_update_cellular_policy(struct inpcb *, boolean_t);
167 #if NECP
168 static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
169 #endif /* NECP */
170 #endif /* !CONFIG_PROC_UUID_POLICY */
171
172 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
173 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
174
175 int allow_udp_port_exhaustion = 0;
176
177 /*
178 * These configure the range of local port addresses assigned to
179 * "unspecified" outgoing connections/packets/whatever.
180 */
181 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
182 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
183 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
184 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
185 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
186 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
187
188 #define RANGECHK(var, min, max) \
189 if ((var) < (min)) { (var) = (min); } \
190 else if ((var) > (max)) { (var) = (max); }
191
192 static int
193 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
194 {
195 #pragma unused(arg1, arg2)
196 int error;
197 int new_value = *(int *)oidp->oid_arg1;
198 #if (DEBUG | DEVELOPMENT)
199 int old_value = *(int *)oidp->oid_arg1;
200 /*
201 * For unit testing allow a non-superuser process with the
202 * proper entitlement to modify the variables
203 */
204 if (req->newptr) {
205 if (proc_suser(current_proc()) != 0 &&
206 (error = priv_check_cred(kauth_cred_get(),
207 PRIV_NETINET_RESERVEDPORT, 0))) {
208 return EPERM;
209 }
210 }
211 #endif /* (DEBUG | DEVELOPMENT) */
212
213 error = sysctl_handle_int(oidp, &new_value, 0, req);
214 if (!error) {
215 if (oidp->oid_arg1 == &ipport_lowfirstauto || oidp->oid_arg1 == &ipport_lowlastauto) {
216 RANGECHK(new_value, 1, IPPORT_RESERVED - 1);
217 } else {
218 RANGECHK(new_value, IPPORT_RESERVED, USHRT_MAX);
219 }
220 *(int *)oidp->oid_arg1 = new_value;
221 }
222
223 #if (DEBUG | DEVELOPMENT)
224 os_log(OS_LOG_DEFAULT,
225 "%s:%u sysctl net.restricted_port.verbose: %d -> %d)",
226 proc_best_name(current_proc()), proc_selfpid(),
227 old_value, *(int *)oidp->oid_arg1);
228 #endif /* (DEBUG | DEVELOPMENT) */
229
230 return error;
231 }
232
233 #undef RANGECHK
234
235 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
236 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IP Ports");
237
238 #if (DEBUG | DEVELOPMENT)
239 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY)
240 #else
241 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED)
242 #endif /* (DEBUG | DEVELOPMENT) */
243
244 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
245 CTLFAGS_IP_PORTRANGE,
246 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
247 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
248 CTLFAGS_IP_PORTRANGE,
249 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
250 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
251 CTLFAGS_IP_PORTRANGE,
252 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
253 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
254 CTLFAGS_IP_PORTRANGE,
255 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
256 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
257 CTLFAGS_IP_PORTRANGE,
258 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
259 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
260 CTLFAGS_IP_PORTRANGE,
261 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
262 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, ipport_allow_udp_port_exhaustion,
263 CTLFLAG_LOCKED | CTLFLAG_RW, &allow_udp_port_exhaustion, 0, "");
264
265 static uint32_t apn_fallbk_debug = 0;
266 #define apn_fallbk_log(x) do { if (apn_fallbk_debug >= 1) log x; } while (0)
267
268 #if !XNU_TARGET_OS_OSX
269 static boolean_t apn_fallbk_enabled = TRUE;
270
271 SYSCTL_DECL(_net_inet);
272 SYSCTL_NODE(_net_inet, OID_AUTO, apn_fallback, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "APN Fallback");
273 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
274 &apn_fallbk_enabled, 0, "APN fallback enable");
275 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
276 &apn_fallbk_debug, 0, "APN fallback debug enable");
277 #else /* XNU_TARGET_OS_OSX */
278 static boolean_t apn_fallbk_enabled = FALSE;
279 #endif /* XNU_TARGET_OS_OSX */
280
281 extern int udp_use_randomport;
282 extern int tcp_use_randomport;
283
284 /* Structs used for flowhash computation */
285 struct inp_flowhash_key_addr {
286 union {
287 struct in_addr v4;
288 struct in6_addr v6;
289 u_int8_t addr8[16];
290 u_int16_t addr16[8];
291 u_int32_t addr32[4];
292 } infha;
293 };
294
295 struct inp_flowhash_key {
296 struct inp_flowhash_key_addr infh_laddr;
297 struct inp_flowhash_key_addr infh_faddr;
298 u_int32_t infh_lport;
299 u_int32_t infh_fport;
300 u_int32_t infh_af;
301 u_int32_t infh_proto;
302 u_int32_t infh_rand1;
303 u_int32_t infh_rand2;
304 };
305
306 #if !SKYWALK
307 static u_int32_t inp_hash_seed = 0;
308 #endif /* !SKYWALK */
309
310 static int infc_cmp(const struct inpcb *, const struct inpcb *);
311
312 /* Flags used by inp_fc_getinp */
313 #define INPFC_SOLOCKED 0x1
314 #define INPFC_REMOVE 0x2
315 static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
316
317 static void inp_fc_feedback(struct inpcb *);
318 extern void tcp_remove_from_time_wait(struct inpcb *inp);
319
320 static LCK_MTX_DECLARE_ATTR(inp_fc_lck, &inpcb_lock_grp, &inpcb_lock_attr);
321
322 RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
323 RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
324 RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
325
326 /*
327 * Use this inp as a key to find an inp in the flowhash tree.
328 * Accesses to it are protected by inp_fc_lck.
329 */
330 struct inpcb key_inp;
331
332 /*
333 * in_pcb.c: manage the Protocol Control Blocks.
334 */
335
336 void
in_pcbinit(void)337 in_pcbinit(void)
338 {
339 static int inpcb_initialized = 0;
340 uint32_t logging_config;
341
342 VERIFY(!inpcb_initialized);
343 inpcb_initialized = 1;
344
345 logging_config = atm_get_diagnostic_config();
346 if (logging_config & 0x80000000) {
347 inp_log_privacy = 1;
348 }
349
350 inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout,
351 NULL, THREAD_CALL_PRIORITY_KERNEL);
352 /* Give it an arg so that we know that this is the fast timer */
353 inpcb_fast_thread_call = thread_call_allocate_with_priority(
354 inpcb_timeout, &inpcb_timeout, THREAD_CALL_PRIORITY_KERNEL);
355 if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL) {
356 panic("unable to alloc the inpcb thread call");
357 }
358
359 /*
360 * Initialize data structures required to deliver
361 * flow advisories.
362 */
363 lck_mtx_lock(&inp_fc_lck);
364 RB_INIT(&inp_fc_tree);
365 bzero(&key_inp, sizeof(key_inp));
366 lck_mtx_unlock(&inp_fc_lck);
367 }
368
369 #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
370 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
371 static void
inpcb_timeout(void * arg0,void * arg1)372 inpcb_timeout(void *arg0, void *arg1)
373 {
374 #pragma unused(arg1)
375 struct inpcbinfo *ipi;
376 boolean_t t, gc;
377 struct intimercount gccnt, tmcnt;
378
379 /*
380 * Update coarse-grained networking timestamp (in sec.); the idea
381 * is to piggy-back on the timeout callout to update the counter
382 * returnable via net_uptime().
383 */
384 net_update_uptime();
385
386 bzero(&gccnt, sizeof(gccnt));
387 bzero(&tmcnt, sizeof(tmcnt));
388
389 lck_mtx_lock_spin(&inpcb_timeout_lock);
390 gc = inpcb_garbage_collecting;
391 inpcb_garbage_collecting = FALSE;
392
393 t = inpcb_ticking;
394 inpcb_ticking = FALSE;
395
396 if (gc || t) {
397 lck_mtx_unlock(&inpcb_timeout_lock);
398
399 lck_mtx_lock(&inpcb_lock);
400 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
401 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
402 bzero(&ipi->ipi_gc_req,
403 sizeof(ipi->ipi_gc_req));
404 if (gc && ipi->ipi_gc != NULL) {
405 ipi->ipi_gc(ipi);
406 gccnt.intimer_lazy +=
407 ipi->ipi_gc_req.intimer_lazy;
408 gccnt.intimer_fast +=
409 ipi->ipi_gc_req.intimer_fast;
410 gccnt.intimer_nodelay +=
411 ipi->ipi_gc_req.intimer_nodelay;
412 }
413 }
414 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
415 bzero(&ipi->ipi_timer_req,
416 sizeof(ipi->ipi_timer_req));
417 if (t && ipi->ipi_timer != NULL) {
418 ipi->ipi_timer(ipi);
419 tmcnt.intimer_lazy +=
420 ipi->ipi_timer_req.intimer_lazy;
421 tmcnt.intimer_fast +=
422 ipi->ipi_timer_req.intimer_fast;
423 tmcnt.intimer_nodelay +=
424 ipi->ipi_timer_req.intimer_nodelay;
425 }
426 }
427 }
428 lck_mtx_unlock(&inpcb_lock);
429 lck_mtx_lock_spin(&inpcb_timeout_lock);
430 }
431
432 /* lock was dropped above, so check first before overriding */
433 if (!inpcb_garbage_collecting) {
434 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
435 }
436 if (!inpcb_ticking) {
437 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
438 }
439
440 /* arg0 will be set if we are the fast timer */
441 if (arg0 != NULL) {
442 inpcb_fast_timer_on = FALSE;
443 }
444 inpcb_timeout_run--;
445 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
446
447 /* re-arm the timer if there's work to do */
448 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) {
449 inpcb_sched_timeout();
450 } else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) {
451 /* be lazy when idle with little activity */
452 inpcb_sched_lazy_timeout();
453 } else {
454 inpcb_sched_timeout();
455 }
456
457 lck_mtx_unlock(&inpcb_timeout_lock);
458 }
459
460 static void
inpcb_sched_timeout(void)461 inpcb_sched_timeout(void)
462 {
463 _inpcb_sched_timeout(0);
464 }
465
466 static void
inpcb_sched_lazy_timeout(void)467 inpcb_sched_lazy_timeout(void)
468 {
469 _inpcb_sched_timeout(inpcb_timeout_lazy);
470 }
471
472 static void
_inpcb_sched_timeout(unsigned int offset)473 _inpcb_sched_timeout(unsigned int offset)
474 {
475 uint64_t deadline, leeway;
476
477 clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline);
478 LCK_MTX_ASSERT(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
479 if (inpcb_timeout_run == 0 &&
480 (inpcb_garbage_collecting || inpcb_ticking)) {
481 lck_mtx_convert_spin(&inpcb_timeout_lock);
482 inpcb_timeout_run++;
483 if (offset == 0) {
484 inpcb_fast_timer_on = TRUE;
485 thread_call_enter_delayed(inpcb_fast_thread_call,
486 deadline);
487 } else {
488 inpcb_fast_timer_on = FALSE;
489 clock_interval_to_absolutetime_interval(offset,
490 NSEC_PER_SEC, &leeway);
491 thread_call_enter_delayed_with_leeway(
492 inpcb_thread_call, NULL, deadline, leeway,
493 THREAD_CALL_DELAY_LEEWAY);
494 }
495 } else if (inpcb_timeout_run == 1 &&
496 offset == 0 && !inpcb_fast_timer_on) {
497 /*
498 * Since the request was for a fast timer but the
499 * scheduled timer is a lazy timer, try to schedule
500 * another instance of fast timer also.
501 */
502 lck_mtx_convert_spin(&inpcb_timeout_lock);
503 inpcb_timeout_run++;
504 inpcb_fast_timer_on = TRUE;
505 thread_call_enter_delayed(inpcb_fast_thread_call, deadline);
506 }
507 }
508
509 void
inpcb_gc_sched(struct inpcbinfo * ipi,u_int32_t type)510 inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
511 {
512 u_int32_t gccnt;
513
514 lck_mtx_lock_spin(&inpcb_timeout_lock);
515 inpcb_garbage_collecting = TRUE;
516 gccnt = ipi->ipi_gc_req.intimer_nodelay +
517 ipi->ipi_gc_req.intimer_fast;
518
519 if (gccnt > INPCB_GCREQ_THRESHOLD) {
520 type = INPCB_TIMER_FAST;
521 }
522
523 switch (type) {
524 case INPCB_TIMER_NODELAY:
525 os_atomic_inc(&ipi->ipi_gc_req.intimer_nodelay, relaxed);
526 inpcb_sched_timeout();
527 break;
528 case INPCB_TIMER_FAST:
529 os_atomic_inc(&ipi->ipi_gc_req.intimer_fast, relaxed);
530 inpcb_sched_timeout();
531 break;
532 default:
533 os_atomic_inc(&ipi->ipi_gc_req.intimer_lazy, relaxed);
534 inpcb_sched_lazy_timeout();
535 break;
536 }
537 lck_mtx_unlock(&inpcb_timeout_lock);
538 }
539
540 void
inpcb_timer_sched(struct inpcbinfo * ipi,u_int32_t type)541 inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
542 {
543 lck_mtx_lock_spin(&inpcb_timeout_lock);
544 inpcb_ticking = TRUE;
545 switch (type) {
546 case INPCB_TIMER_NODELAY:
547 os_atomic_inc(&ipi->ipi_timer_req.intimer_nodelay, relaxed);
548 inpcb_sched_timeout();
549 break;
550 case INPCB_TIMER_FAST:
551 os_atomic_inc(&ipi->ipi_timer_req.intimer_fast, relaxed);
552 inpcb_sched_timeout();
553 break;
554 default:
555 os_atomic_inc(&ipi->ipi_timer_req.intimer_lazy, relaxed);
556 inpcb_sched_lazy_timeout();
557 break;
558 }
559 lck_mtx_unlock(&inpcb_timeout_lock);
560 }
561
562 void
in_pcbinfo_attach(struct inpcbinfo * ipi)563 in_pcbinfo_attach(struct inpcbinfo *ipi)
564 {
565 struct inpcbinfo *ipi0;
566
567 lck_mtx_lock(&inpcb_lock);
568 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
569 if (ipi0 == ipi) {
570 panic("%s: ipi %p already in the list",
571 __func__, ipi);
572 /* NOTREACHED */
573 }
574 }
575 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
576 lck_mtx_unlock(&inpcb_lock);
577 }
578
579 int
in_pcbinfo_detach(struct inpcbinfo * ipi)580 in_pcbinfo_detach(struct inpcbinfo *ipi)
581 {
582 struct inpcbinfo *ipi0;
583 int error = 0;
584
585 lck_mtx_lock(&inpcb_lock);
586 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
587 if (ipi0 == ipi) {
588 break;
589 }
590 }
591 if (ipi0 != NULL) {
592 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
593 } else {
594 error = ENXIO;
595 }
596 lck_mtx_unlock(&inpcb_lock);
597
598 return error;
599 }
600
601 __attribute__((noinline))
602 char *
inp_snprintf_tuple(struct inpcb * inp,char * __sized_by (buflen)buf,size_t buflen)603 inp_snprintf_tuple(struct inpcb *inp, char *__sized_by(buflen) buf, size_t buflen)
604 {
605 char laddrstr[MAX_IPv6_STR_LEN];
606 char faddrstr[MAX_IPv6_STR_LEN];
607 uint16_t lport = 0;
608 uint16_t fport = 0;
609 uint16_t proto = IPPROTO_IP;
610
611 if (inp->inp_socket != NULL) {
612 proto = SOCK_PROTO(inp->inp_socket);
613
614 if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
615 lport = inp->inp_lport;
616 fport = inp->inp_fport;
617 }
618 }
619 if (inp->inp_vflag & INP_IPV4) {
620 inet_ntop(AF_INET, (void *)&inp->inp_laddr.s_addr, laddrstr, sizeof(laddrstr));
621 inet_ntop(AF_INET, (void *)&inp->inp_faddr.s_addr, faddrstr, sizeof(faddrstr));
622 } else if (inp->inp_vflag & INP_IPV6) {
623 inet_ntop(AF_INET6, (void *)&inp->in6p_faddr, laddrstr, sizeof(laddrstr));
624 inet_ntop(AF_INET6, (void *)&inp->in6p_faddr, faddrstr, sizeof(faddrstr));
625 }
626 snprintf(buf, buflen, "[%u %s:%u %s:%u]",
627 proto, laddrstr, ntohs(lport), faddrstr, ntohs(fport));
628
629 return buf;
630 }
631
632 __attribute__((noinline))
633 void
in_pcb_check_management_entitled(struct inpcb * inp)634 in_pcb_check_management_entitled(struct inpcb *inp)
635 {
636 if (inp->inp_flags2 & INP2_MANAGEMENT_CHECKED) {
637 return;
638 }
639
640 if (management_data_unrestricted) {
641 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
642 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
643 } else if (if_management_interface_check_needed == true) {
644 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
645 /*
646 * Note that soopt_cred_check check both intcoproc entitlements
647 * We check MANAGEMENT_DATA_ENTITLEMENT as there is no corresponding PRIV value
648 */
649 if (soopt_cred_check(inp->inp_socket, PRIV_NET_RESTRICTED_INTCOPROC, false, false) == 0
650 || IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT) == true
651 #if DEBUG || DEVELOPMENT
652 || IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT_DEVELOPMENT) == true
653 #endif /* DEBUG || DEVELOPMENT */
654 ) {
655 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
656 } else {
657 if (__improbable(if_management_verbose > 1)) {
658 char buf[128];
659
660 os_log(OS_LOG_DEFAULT, "in_pcb_check_management_entitled %s:%d not management entitled %s",
661 proc_best_name(current_proc()),
662 proc_selfpid(),
663 inp_snprintf_tuple(inp, buf, sizeof(buf)));
664 }
665 }
666 }
667 }
668
669 __attribute__((noinline))
670 void
in_pcb_check_ultra_constrained_entitled(struct inpcb * inp)671 in_pcb_check_ultra_constrained_entitled(struct inpcb *inp)
672 {
673 if (inp->inp_flags2 & INP2_ULTRA_CONSTRAINED_CHECKED) {
674 return;
675 }
676
677 if (if_ultra_constrained_check_needed) {
678 inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_CHECKED;
679 if (if_ultra_constrained_default_allowed || IOCurrentTaskHasEntitlement(ULTRA_CONSTRAINED_ENTITLEMENT)) {
680 inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_ALLOWED;
681 }
682 }
683 }
684
685 /*
686 * Allocate a PCB and associate it with the socket.
687 *
688 * Returns: 0 Success
689 * ENOBUFS
690 * ENOMEM
691 */
692 int
in_pcballoc(struct socket * so,struct inpcbinfo * pcbinfo,struct proc * p)693 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
694 {
695 #pragma unused(p)
696 void *__unsafe_indexable addr;
697 struct inpcb *inp;
698
699 if (proto_memacct_hardlimit(so->so_proto)) {
700 return ENOBUFS;
701 }
702 addr = __zalloc_flags(pcbinfo->ipi_zone, Z_WAITOK_ZERO_NOFAIL);
703 __builtin_assume(addr != NULL);
704
705 proto_memacct_add(so->so_proto, kalloc_type_size(pcbinfo->ipi_zone));
706
707 /*
708 * N.B: the allocation above may actually be inp_tp
709 * which is a structure that includes inpcb, but for
710 * the purposes of this function we just touch
711 * struct inpcb.
712 */
713 inp = __unsafe_forge_single(struct inpcb *, addr);
714
715 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
716 inp->inp_pcbinfo = pcbinfo;
717 inp->inp_socket = so;
718 so->so_pcb = (caddr_t)inp;
719 // There was some history about alignment of statistics counters
720 // Ensure that all is as expected
721 VERIFY(IS_P2ALIGNED(&inp->inp_mstat, sizeof(u_int64_t)));
722
723 if (so->so_proto->pr_flags & PR_PCBLOCK) {
724 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
725 &pcbinfo->ipi_lock_attr);
726 }
727
728 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) {
729 inp->inp_flags |= IN6P_IPV6_V6ONLY;
730 }
731
732 if (ip6_auto_flowlabel) {
733 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
734 }
735 if (intcoproc_unrestricted) {
736 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
737 }
738
739 (void) inp_update_policy(inp);
740
741 inp->inp_max_pacing_rate = UINT64_MAX;
742
743 lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
744 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
745 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
746 pcbinfo->ipi_count++;
747 lck_rw_done(&pcbinfo->ipi_lock);
748 return 0;
749 }
750
751 /*
752 * in_pcblookup_local_and_cleanup does everything
753 * in_pcblookup_local does but it checks for a socket
754 * that's going away. Since we know that the lock is
755 * held read+write when this function is called, we
756 * can safely dispose of this socket like the slow
757 * timer would usually do and return NULL. This is
758 * great for bind.
759 */
760 struct inpcb *
in_pcblookup_local_and_cleanup(struct inpcbinfo * pcbinfo,struct in_addr laddr,u_int lport_arg,int wild_okay)761 in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
762 u_int lport_arg, int wild_okay)
763 {
764 struct inpcb *inp;
765
766 /* Perform normal lookup */
767 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
768
769 /* Check if we found a match but it's waiting to be disposed */
770 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
771 struct socket *so = inp->inp_socket;
772
773 socket_lock(so, 0);
774
775 if (so->so_usecount == 0) {
776 if (inp->inp_state != INPCB_STATE_DEAD) {
777 in_pcbdetach(inp);
778 }
779 in_pcbdispose(inp); /* will unlock & destroy */
780 inp = NULL;
781 } else {
782 socket_unlock(so, 0);
783 }
784 }
785
786 return inp;
787 }
788
789 static void
in_pcb_conflict_post_msg(u_int16_t port)790 in_pcb_conflict_post_msg(u_int16_t port)
791 {
792 /*
793 * Radar 5523020 send a kernel event notification if a
794 * non-participating socket tries to bind the port a socket
795 * who has set SOF_NOTIFYCONFLICT owns.
796 */
797 struct kev_msg ev_msg;
798 struct kev_in_portinuse in_portinuse;
799
800 bzero(&in_portinuse, sizeof(struct kev_in_portinuse));
801 bzero(&ev_msg, sizeof(struct kev_msg));
802 in_portinuse.port = ntohs(port); /* port in host order */
803 in_portinuse.req_pid = proc_selfpid();
804 ev_msg.vendor_code = KEV_VENDOR_APPLE;
805 ev_msg.kev_class = KEV_NETWORK_CLASS;
806 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
807 ev_msg.event_code = KEV_INET_PORTINUSE;
808 ev_msg.dv[0].data_ptr = &in_portinuse;
809 ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse);
810 ev_msg.dv[1].data_length = 0;
811 dlil_post_complete_msg(NULL, &ev_msg);
812 }
813
814 /*
815 * Bind an INPCB to an address and/or port. This routine should not alter
816 * the caller-supplied local address "nam" or remote address "remote".
817 *
818 * Returns: 0 Success
819 * EADDRNOTAVAIL Address not available.
820 * EINVAL Invalid argument
821 * EAFNOSUPPORT Address family not supported [notdef]
822 * EACCES Permission denied
823 * EADDRINUSE Address in use
824 * EAGAIN Resource unavailable, try again
825 * priv_check_cred:EPERM Operation not permitted
826 */
827 int
in_pcbbind(struct inpcb * inp,struct sockaddr * nam,struct sockaddr * remote,struct proc * p)828 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct sockaddr *remote, struct proc *p)
829 {
830 struct socket *so = inp->inp_socket;
831 unsigned short *lastport;
832 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
833 u_short lport = 0, rand_port = 0;
834 int wild = 0;
835 int reuseport = (so->so_options & SO_REUSEPORT);
836 int error = 0;
837 int randomport;
838 int conflict = 0;
839 boolean_t anonport = FALSE;
840 kauth_cred_t cred;
841 struct in_addr laddr;
842 struct ifnet *outif = NULL;
843
844 ASSERT((inp->inp_flags2 & INP2_BIND_IN_PROGRESS) != 0);
845
846 if (TAILQ_EMPTY(&in_ifaddrhead)) { /* XXX broken! */
847 error = EADDRNOTAVAIL;
848 goto done;
849 }
850 if (!(so->so_options & (SO_REUSEADDR | SO_REUSEPORT))) {
851 wild = 1;
852 }
853
854 bzero(&laddr, sizeof(laddr));
855
856 socket_unlock(so, 0); /* keep reference on socket */
857 lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
858 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
859 /* another thread completed the bind */
860 lck_rw_done(&pcbinfo->ipi_lock);
861 socket_lock(so, 0);
862 error = EINVAL;
863 goto done;
864 }
865
866 if (nam != NULL) {
867 if (nam->sa_len != sizeof(struct sockaddr_in)) {
868 lck_rw_done(&pcbinfo->ipi_lock);
869 socket_lock(so, 0);
870 error = EINVAL;
871 goto done;
872 }
873 #if 0
874 /*
875 * We should check the family, but old programs
876 * incorrectly fail to initialize it.
877 */
878 if (nam->sa_family != AF_INET) {
879 lck_rw_done(&pcbinfo->ipi_lock);
880 socket_lock(so, 0);
881 error = EAFNOSUPPORT;
882 goto done;
883 }
884 #endif /* 0 */
885 lport = SIN(nam)->sin_port;
886
887 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
888 /*
889 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
890 * allow complete duplication of binding if
891 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
892 * and a multicast address is bound on both
893 * new and duplicated sockets.
894 */
895 if (so->so_options & SO_REUSEADDR) {
896 reuseport = SO_REUSEADDR | SO_REUSEPORT;
897 }
898 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
899 struct sockaddr_in sin;
900 struct ifaddr *ifa;
901
902 /* Sanitized for interface address searches */
903 SOCKADDR_ZERO(&sin, sizeof(sin));
904 sin.sin_family = AF_INET;
905 sin.sin_len = sizeof(struct sockaddr_in);
906 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
907
908 ifa = ifa_ifwithaddr(SA(&sin));
909 if (ifa == NULL) {
910 lck_rw_done(&pcbinfo->ipi_lock);
911 socket_lock(so, 0);
912 error = EADDRNOTAVAIL;
913 goto done;
914 } else {
915 /*
916 * Opportunistically determine the outbound
917 * interface that may be used; this may not
918 * hold true if we end up using a route
919 * going over a different interface, e.g.
920 * when sending to a local address. This
921 * will get updated again after sending.
922 */
923 IFA_LOCK(ifa);
924 outif = ifa->ifa_ifp;
925 IFA_UNLOCK(ifa);
926 ifa_remref(ifa);
927 }
928 }
929
930 #if SKYWALK
931 if (inp->inp_flags2 & INP2_EXTERNAL_PORT) {
932 // Extract the external flow info
933 struct ns_flow_info nfi = {};
934 error = necp_client_get_netns_flow_info(inp->necp_client_uuid,
935 &nfi);
936 if (error != 0) {
937 lck_rw_done(&pcbinfo->ipi_lock);
938 socket_lock(so, 0);
939 goto done;
940 }
941
942 // Extract the reserved port
943 u_int16_t reserved_lport = 0;
944 if (nfi.nfi_laddr.sa.sa_family == AF_INET) {
945 reserved_lport = nfi.nfi_laddr.sin.sin_port;
946 } else if (nfi.nfi_laddr.sa.sa_family == AF_INET6) {
947 reserved_lport = nfi.nfi_laddr.sin6.sin6_port;
948 } else {
949 lck_rw_done(&pcbinfo->ipi_lock);
950 socket_lock(so, 0);
951 error = EINVAL;
952 goto done;
953 }
954
955 // Validate or use the reserved port
956 if (lport == 0) {
957 lport = reserved_lport;
958 } else if (lport != reserved_lport) {
959 lck_rw_done(&pcbinfo->ipi_lock);
960 socket_lock(so, 0);
961 error = EINVAL;
962 goto done;
963 }
964 }
965
966 /* Do not allow reserving a UDP port if remaining UDP port count is below 4096 */
967 if (SOCK_PROTO(so) == IPPROTO_UDP && !allow_udp_port_exhaustion) {
968 uint32_t current_reservations = 0;
969 if (inp->inp_vflag & INP_IPV6) {
970 current_reservations = netns_lookup_reservations_count_in6(inp->in6p_laddr, IPPROTO_UDP);
971 } else {
972 current_reservations = netns_lookup_reservations_count_in(inp->inp_laddr, IPPROTO_UDP);
973 }
974 if (USHRT_MAX - UDP_RANDOM_PORT_RESERVE < current_reservations) {
975 log(LOG_ERR, "UDP port not available, less than 4096 UDP ports left");
976 lck_rw_done(&pcbinfo->ipi_lock);
977 socket_lock(so, 0);
978 error = EADDRNOTAVAIL;
979 goto done;
980 }
981 }
982
983 #endif /* SKYWALK */
984
985 if (lport != 0) {
986 struct inpcb *t;
987 uid_t u;
988
989 #if XNU_TARGET_OS_OSX
990 if (ntohs(lport) < IPPORT_RESERVED &&
991 SIN(nam)->sin_addr.s_addr != 0 &&
992 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
993 cred = kauth_cred_proc_ref(p);
994 error = priv_check_cred(cred,
995 PRIV_NETINET_RESERVEDPORT, 0);
996 kauth_cred_unref(&cred);
997 if (error != 0) {
998 lck_rw_done(&pcbinfo->ipi_lock);
999 socket_lock(so, 0);
1000 error = EACCES;
1001 goto done;
1002 }
1003 }
1004 #endif /* XNU_TARGET_OS_OSX */
1005 /*
1006 * Check wether the process is allowed to bind to a restricted port
1007 */
1008 if (!current_task_can_use_restricted_in_port(lport,
1009 (uint8_t)SOCK_PROTO(so), PORT_FLAGS_BSD)) {
1010 lck_rw_done(&pcbinfo->ipi_lock);
1011 socket_lock(so, 0);
1012 error = EADDRINUSE;
1013 goto done;
1014 }
1015
1016 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1017 (u = kauth_cred_getuid(so->so_cred)) != 0 &&
1018 (t = in_pcblookup_local_and_cleanup(
1019 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
1020 INPLOOKUP_WILDCARD)) != NULL &&
1021 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1022 t->inp_laddr.s_addr != INADDR_ANY ||
1023 !(t->inp_socket->so_options & SO_REUSEPORT)) &&
1024 (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
1025 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
1026 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1027 t->inp_laddr.s_addr != INADDR_ANY) &&
1028 (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
1029 !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
1030 uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
1031 if ((t->inp_socket->so_flags &
1032 SOF_NOTIFYCONFLICT) &&
1033 !(so->so_flags & SOF_NOTIFYCONFLICT)) {
1034 conflict = 1;
1035 }
1036
1037 lck_rw_done(&pcbinfo->ipi_lock);
1038
1039 if (conflict) {
1040 in_pcb_conflict_post_msg(lport);
1041 }
1042
1043 socket_lock(so, 0);
1044 error = EADDRINUSE;
1045 goto done;
1046 }
1047 t = in_pcblookup_local_and_cleanup(pcbinfo,
1048 SIN(nam)->sin_addr, lport, wild);
1049 if (t != NULL &&
1050 (reuseport & t->inp_socket->so_options) == 0 &&
1051 (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
1052 !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
1053 uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
1054 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1055 t->inp_laddr.s_addr != INADDR_ANY ||
1056 SOCK_DOM(so) != PF_INET6 ||
1057 SOCK_DOM(t->inp_socket) != PF_INET6) {
1058 if ((t->inp_socket->so_flags &
1059 SOF_NOTIFYCONFLICT) &&
1060 !(so->so_flags & SOF_NOTIFYCONFLICT)) {
1061 conflict = 1;
1062 }
1063
1064 lck_rw_done(&pcbinfo->ipi_lock);
1065
1066 if (conflict) {
1067 in_pcb_conflict_post_msg(lport);
1068 }
1069 socket_lock(so, 0);
1070 error = EADDRINUSE;
1071 goto done;
1072 }
1073 }
1074 #if SKYWALK
1075 if ((SOCK_PROTO(so) == IPPROTO_TCP ||
1076 SOCK_PROTO(so) == IPPROTO_UDP) &&
1077 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1078 int res_err = 0;
1079 if (inp->inp_vflag & INP_IPV6) {
1080 res_err = netns_reserve_in6(
1081 &inp->inp_netns_token,
1082 SIN6(nam)->sin6_addr,
1083 (uint8_t)SOCK_PROTO(so), lport, NETNS_BSD,
1084 NULL);
1085 } else {
1086 res_err = netns_reserve_in(
1087 &inp->inp_netns_token,
1088 SIN(nam)->sin_addr, (uint8_t)SOCK_PROTO(so),
1089 lport, NETNS_BSD, NULL);
1090 }
1091 if (res_err != 0) {
1092 lck_rw_done(&pcbinfo->ipi_lock);
1093 socket_lock(so, 0);
1094 error = EADDRINUSE;
1095 goto done;
1096 }
1097 }
1098 #endif /* SKYWALK */
1099 }
1100 laddr = SIN(nam)->sin_addr;
1101 }
1102 if (lport == 0) {
1103 u_short first, last;
1104 int count;
1105 bool found;
1106
1107 /*
1108 * Override wild = 1 for implicit bind (mainly used by connect)
1109 * For implicit bind (lport == 0), we always use an unused port,
1110 * so REUSEADDR|REUSEPORT don't apply
1111 */
1112 wild = 1;
1113
1114 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
1115 (so->so_type == SOCK_STREAM ? tcp_use_randomport :
1116 udp_use_randomport);
1117
1118 /*
1119 * Even though this looks similar to the code in
1120 * in6_pcbsetport, the v6 vs v4 checks are different.
1121 */
1122 anonport = TRUE;
1123 if (inp->inp_flags & INP_HIGHPORT) {
1124 first = (u_short)ipport_hifirstauto; /* sysctl */
1125 last = (u_short)ipport_hilastauto;
1126 lastport = &pcbinfo->ipi_lasthi;
1127 } else if (inp->inp_flags & INP_LOWPORT) {
1128 cred = kauth_cred_proc_ref(p);
1129 error = priv_check_cred(cred,
1130 PRIV_NETINET_RESERVEDPORT, 0);
1131 kauth_cred_unref(&cred);
1132 if (error != 0) {
1133 lck_rw_done(&pcbinfo->ipi_lock);
1134 socket_lock(so, 0);
1135 goto done;
1136 }
1137 first = (u_short)ipport_lowfirstauto; /* 1023 */
1138 last = (u_short)ipport_lowlastauto; /* 600 */
1139 lastport = &pcbinfo->ipi_lastlow;
1140 } else {
1141 first = (u_short)ipport_firstauto; /* sysctl */
1142 last = (u_short)ipport_lastauto;
1143 lastport = &pcbinfo->ipi_lastport;
1144 }
1145 /* No point in randomizing if only one port is available */
1146
1147 if (first == last) {
1148 randomport = 0;
1149 }
1150 /*
1151 * Simple check to ensure all ports are not used up causing
1152 * a deadlock here.
1153 *
1154 * We split the two cases (up and down) so that the direction
1155 * is not being tested on each round of the loop.
1156 */
1157 if (first > last) {
1158 struct in_addr lookup_addr;
1159
1160 /*
1161 * counting down
1162 */
1163 if (randomport) {
1164 read_frandom(&rand_port, sizeof(rand_port));
1165 *lastport =
1166 first - (rand_port % (first - last));
1167 }
1168 count = first - last;
1169
1170 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1171 inp->inp_laddr;
1172
1173 found = false;
1174 do {
1175 if (count-- < 0) { /* completely used? */
1176 lck_rw_done(&pcbinfo->ipi_lock);
1177 socket_lock(so, 0);
1178 error = EADDRNOTAVAIL;
1179 goto done;
1180 }
1181 --*lastport;
1182 if (*lastport > first || *lastport < last) {
1183 *lastport = first;
1184 }
1185 lport = htons(*lastport);
1186
1187 /*
1188 * Skip if this is a restricted port as we do not want to
1189 * use restricted ports as ephemeral
1190 */
1191 if (IS_RESTRICTED_IN_PORT(lport)) {
1192 continue;
1193 }
1194
1195 found = in_pcblookup_local_and_cleanup(pcbinfo,
1196 lookup_addr, lport, wild) == NULL;
1197 #if SKYWALK
1198 if (found &&
1199 (SOCK_PROTO(so) == IPPROTO_TCP ||
1200 SOCK_PROTO(so) == IPPROTO_UDP) &&
1201 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1202 int res_err;
1203 if (inp->inp_vflag & INP_IPV6) {
1204 res_err = netns_reserve_in6(
1205 &inp->inp_netns_token,
1206 inp->in6p_laddr,
1207 (uint8_t)SOCK_PROTO(so), lport,
1208 NETNS_BSD, NULL);
1209 } else {
1210 res_err = netns_reserve_in(
1211 &inp->inp_netns_token,
1212 lookup_addr, (uint8_t)SOCK_PROTO(so),
1213 lport, NETNS_BSD, NULL);
1214 }
1215 found = res_err == 0;
1216 }
1217 #endif /* SKYWALK */
1218 } while (!found);
1219 } else {
1220 struct in_addr lookup_addr;
1221
1222 /*
1223 * counting up
1224 */
1225 if (randomport) {
1226 read_frandom(&rand_port, sizeof(rand_port));
1227 *lastport =
1228 first + (rand_port % (first - last));
1229 }
1230 count = last - first;
1231
1232 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1233 inp->inp_laddr;
1234
1235 found = false;
1236 do {
1237 if (count-- < 0) { /* completely used? */
1238 lck_rw_done(&pcbinfo->ipi_lock);
1239 socket_lock(so, 0);
1240 error = EADDRNOTAVAIL;
1241 goto done;
1242 }
1243 ++*lastport;
1244 if (*lastport < first || *lastport > last) {
1245 *lastport = first;
1246 }
1247 lport = htons(*lastport);
1248
1249 /*
1250 * Skip if this is a restricted port as we do not want to
1251 * use restricted ports as ephemeral
1252 */
1253 if (IS_RESTRICTED_IN_PORT(lport)) {
1254 continue;
1255 }
1256
1257 found = in_pcblookup_local_and_cleanup(pcbinfo,
1258 lookup_addr, lport, wild) == NULL;
1259 #if SKYWALK
1260 if (found &&
1261 (SOCK_PROTO(so) == IPPROTO_TCP ||
1262 SOCK_PROTO(so) == IPPROTO_UDP) &&
1263 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1264 int res_err;
1265 if (inp->inp_vflag & INP_IPV6) {
1266 res_err = netns_reserve_in6(
1267 &inp->inp_netns_token,
1268 inp->in6p_laddr,
1269 (uint8_t)SOCK_PROTO(so), lport,
1270 NETNS_BSD, NULL);
1271 } else {
1272 res_err = netns_reserve_in(
1273 &inp->inp_netns_token,
1274 lookup_addr, (uint8_t)SOCK_PROTO(so),
1275 lport, NETNS_BSD, NULL);
1276 }
1277 found = res_err == 0;
1278 }
1279 #endif /* SKYWALK */
1280 } while (!found);
1281 }
1282 }
1283 socket_lock(so, 0);
1284
1285 /*
1286 * We unlocked socket's protocol lock for a long time.
1287 * The socket might have been dropped/defuncted.
1288 * Checking if world has changed since.
1289 */
1290 if (inp->inp_state == INPCB_STATE_DEAD) {
1291 #if SKYWALK
1292 netns_release(&inp->inp_netns_token);
1293 #endif /* SKYWALK */
1294 lck_rw_done(&pcbinfo->ipi_lock);
1295 error = ECONNABORTED;
1296 goto done;
1297 }
1298
1299 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
1300 #if SKYWALK
1301 netns_release(&inp->inp_netns_token);
1302 #endif /* SKYWALK */
1303 lck_rw_done(&pcbinfo->ipi_lock);
1304 error = EINVAL;
1305 goto done;
1306 }
1307
1308 if (laddr.s_addr != INADDR_ANY) {
1309 inp->inp_laddr = laddr;
1310 inp->inp_last_outifp = outif;
1311 #if SKYWALK
1312 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
1313 netns_set_ifnet(&inp->inp_netns_token, outif);
1314 }
1315 #endif /* SKYWALK */
1316 }
1317 inp->inp_lport = lport;
1318 if (anonport) {
1319 inp->inp_flags |= INP_ANONPORT;
1320 }
1321
1322 if (in_pcbinshash(inp, remote, 1) != 0) {
1323 inp->inp_laddr.s_addr = INADDR_ANY;
1324 inp->inp_last_outifp = NULL;
1325
1326 #if SKYWALK
1327 netns_release(&inp->inp_netns_token);
1328 #endif /* SKYWALK */
1329 inp->inp_lport = 0;
1330 if (anonport) {
1331 inp->inp_flags &= ~INP_ANONPORT;
1332 }
1333 lck_rw_done(&pcbinfo->ipi_lock);
1334 error = EAGAIN;
1335 goto done;
1336 }
1337 lck_rw_done(&pcbinfo->ipi_lock);
1338 sflt_notify(so, sock_evt_bound, NULL);
1339
1340 in_pcb_check_management_entitled(inp);
1341 in_pcb_check_ultra_constrained_entitled(inp);
1342 done:
1343 return error;
1344 }
1345
1346 #define APN_FALLBACK_IP_FILTER(a) \
1347 (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \
1348 IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \
1349 IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \
1350 IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \
1351 IN_PRIVATE(ntohl((a)->sin_addr.s_addr)))
1352
1353 #define APN_FALLBACK_NOTIF_INTERVAL 2 /* Magic Number */
1354 static uint64_t last_apn_fallback = 0;
1355
1356 static boolean_t
apn_fallback_required(proc_t proc,struct socket * so,struct sockaddr_in * p_dstv4)1357 apn_fallback_required(proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4)
1358 {
1359 uint64_t timenow;
1360 struct sockaddr_storage lookup_default_addr;
1361 struct rtentry *rt = NULL;
1362
1363 VERIFY(proc != NULL);
1364
1365 if (apn_fallbk_enabled == FALSE) {
1366 return FALSE;
1367 }
1368
1369 if (proc == kernproc) {
1370 return FALSE;
1371 }
1372
1373 if (so && (so->so_options & SO_NOAPNFALLBK)) {
1374 return FALSE;
1375 }
1376
1377 timenow = net_uptime();
1378 if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) {
1379 apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n"));
1380 return FALSE;
1381 }
1382
1383 if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4)) {
1384 return FALSE;
1385 }
1386
1387 /* Check if we have unscoped IPv6 default route through cellular */
1388 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1389 lookup_default_addr.ss_family = AF_INET6;
1390 lookup_default_addr.ss_len = sizeof(struct sockaddr_in6);
1391
1392 rt = rtalloc1(SA(&lookup_default_addr), 0, 0);
1393 if (NULL == rt) {
1394 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1395 "unscoped default IPv6 route.\n"));
1396 return FALSE;
1397 }
1398
1399 if (!IFNET_IS_CELLULAR(rt->rt_ifp)) {
1400 rtfree(rt);
1401 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1402 "unscoped default IPv6 route through cellular interface.\n"));
1403 return FALSE;
1404 }
1405
1406 /*
1407 * We have a default IPv6 route, ensure that
1408 * we do not have IPv4 default route before triggering
1409 * the event
1410 */
1411 rtfree(rt);
1412 rt = NULL;
1413
1414 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1415 lookup_default_addr.ss_family = AF_INET;
1416 lookup_default_addr.ss_len = sizeof(struct sockaddr_in);
1417
1418 rt = rtalloc1(SA(&lookup_default_addr), 0, 0);
1419
1420 if (rt) {
1421 rtfree(rt);
1422 rt = NULL;
1423 apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped "
1424 "IPv4 default route!\n"));
1425 return FALSE;
1426 }
1427
1428 {
1429 /*
1430 * We disable APN fallback if the binary is not a third-party app.
1431 * Note that platform daemons use their process name as a
1432 * bundle ID so we filter out bundle IDs without dots.
1433 */
1434 const char *__null_terminated bundle_id = cs_identity_get(proc);
1435 if (bundle_id == NULL ||
1436 bundle_id[0] == '\0' ||
1437 strchr(bundle_id, '.') == NULL ||
1438 strlcmp("com.apple.", bundle_id, sizeof("com.apple.") - 1) == 0) {
1439 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-"
1440 "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL")));
1441 return FALSE;
1442 }
1443 }
1444
1445 {
1446 /*
1447 * The Apple App Store IPv6 requirement started on
1448 * June 1st, 2016 at 12:00:00 AM PDT.
1449 * We disable APN fallback if the binary is more recent than that.
1450 * We check both atime and birthtime since birthtime is not always supported.
1451 */
1452 static const long ipv6_start_date = 1464764400L;
1453 vfs_context_t __single context;
1454 struct stat64 sb;
1455 int vn_stat_error;
1456
1457 bzero(&sb, sizeof(struct stat64));
1458 context = vfs_context_create(NULL);
1459 vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, 0, context);
1460 (void)vfs_context_rele(context);
1461
1462 if (vn_stat_error != 0 ||
1463 sb.st_atimespec.tv_sec >= ipv6_start_date ||
1464 sb.st_birthtimespec.tv_sec >= ipv6_start_date) {
1465 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary "
1466 "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n",
1467 vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec,
1468 sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec));
1469 return FALSE;
1470 }
1471 }
1472 return TRUE;
1473 }
1474
1475 static void
apn_fallback_trigger(proc_t proc,struct socket * so)1476 apn_fallback_trigger(proc_t proc, struct socket *so)
1477 {
1478 pid_t pid = 0;
1479 struct kev_msg ev_msg;
1480 struct kev_netevent_apnfallbk_data apnfallbk_data;
1481
1482 last_apn_fallback = net_uptime();
1483 pid = proc_pid(proc);
1484 uuid_t application_uuid;
1485 uuid_clear(application_uuid);
1486 proc_getexecutableuuid(proc, application_uuid,
1487 sizeof(application_uuid));
1488
1489 bzero(&ev_msg, sizeof(struct kev_msg));
1490 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1491 ev_msg.kev_class = KEV_NETWORK_CLASS;
1492 ev_msg.kev_subclass = KEV_NETEVENT_SUBCLASS;
1493 ev_msg.event_code = KEV_NETEVENT_APNFALLBACK;
1494
1495 bzero(&apnfallbk_data, sizeof(apnfallbk_data));
1496
1497 if (so->so_flags & SOF_DELEGATED) {
1498 apnfallbk_data.epid = so->e_pid;
1499 uuid_copy(apnfallbk_data.euuid, so->e_uuid);
1500 } else {
1501 apnfallbk_data.epid = so->last_pid;
1502 uuid_copy(apnfallbk_data.euuid, so->last_uuid);
1503 }
1504
1505 ev_msg.dv[0].data_ptr = &apnfallbk_data;
1506 ev_msg.dv[0].data_length = sizeof(apnfallbk_data);
1507 kev_post_msg(&ev_msg);
1508 apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n"));
1509 }
1510
1511 /*
1512 * Transform old in_pcbconnect() into an inner subroutine for new
1513 * in_pcbconnect(); do some validity-checking on the remote address
1514 * (in "nam") and then determine local host address (i.e., which
1515 * interface) to use to access that remote host.
1516 *
1517 * This routine may alter the caller-supplied remote address "nam".
1518 *
1519 * The caller may override the bound-to-interface setting of the socket
1520 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1521 *
1522 * This routine might return an ifp with a reference held if the caller
1523 * provides a non-NULL outif, even in the error case. The caller is
1524 * responsible for releasing its reference.
1525 *
1526 * Returns: 0 Success
1527 * EINVAL Invalid argument
1528 * EAFNOSUPPORT Address family not supported
1529 * EADDRNOTAVAIL Address not available
1530 */
1531 int
in_pcbladdr(struct inpcb * inp,struct sockaddr * nam,struct in_addr * laddr,unsigned int ifscope,struct ifnet ** outif,int raw)1532 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
1533 unsigned int ifscope, struct ifnet **outif, int raw)
1534 {
1535 struct route *ro = &inp->inp_route;
1536 struct in_ifaddr *ia = NULL;
1537 struct sockaddr_in sin;
1538 int error = 0;
1539 boolean_t restricted = FALSE;
1540
1541 if (outif != NULL) {
1542 *outif = NULL;
1543 }
1544 if (nam->sa_len != sizeof(struct sockaddr_in)) {
1545 return EINVAL;
1546 }
1547 if (SIN(nam)->sin_family != AF_INET) {
1548 return EAFNOSUPPORT;
1549 }
1550 if (raw == 0 && SIN(nam)->sin_port == 0) {
1551 return EADDRNOTAVAIL;
1552 }
1553
1554 in_pcb_check_management_entitled(inp);
1555 in_pcb_check_ultra_constrained_entitled(inp);
1556
1557 /*
1558 * If the destination address is INADDR_ANY,
1559 * use the primary local address.
1560 * If the supplied address is INADDR_BROADCAST,
1561 * and the primary interface supports broadcast,
1562 * choose the broadcast address for that interface.
1563 */
1564 if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1565 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) {
1566 lck_rw_lock_shared(&in_ifaddr_rwlock);
1567 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1568 ia = TAILQ_FIRST(&in_ifaddrhead);
1569 IFA_LOCK_SPIN(&ia->ia_ifa);
1570 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1571 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1572 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1573 SIN(nam)->sin_addr =
1574 SIN(&ia->ia_broadaddr)->sin_addr;
1575 }
1576 IFA_UNLOCK(&ia->ia_ifa);
1577 ia = NULL;
1578 }
1579 lck_rw_done(&in_ifaddr_rwlock);
1580 }
1581 /*
1582 * Otherwise, if the socket has already bound the source, just use it.
1583 */
1584 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1585 VERIFY(ia == NULL);
1586 *laddr = inp->inp_laddr;
1587 return 0;
1588 }
1589
1590 /*
1591 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1592 * then it overrides the sticky ifscope set for the socket.
1593 */
1594 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) {
1595 ifscope = inp->inp_boundifp->if_index;
1596 }
1597
1598 /*
1599 * If route is known or can be allocated now,
1600 * our src addr is taken from the i/f, else punt.
1601 * Note that we should check the address family of the cached
1602 * destination, in case of sharing the cache with IPv6.
1603 */
1604 if (ro->ro_rt != NULL) {
1605 RT_LOCK_SPIN(ro->ro_rt);
1606 }
1607 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1608 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1609 (inp->inp_socket->so_options & SO_DONTROUTE)) {
1610 if (ro->ro_rt != NULL) {
1611 RT_UNLOCK(ro->ro_rt);
1612 }
1613 ROUTE_RELEASE(ro);
1614 }
1615 if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1616 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1617 if (ro->ro_rt != NULL) {
1618 RT_UNLOCK(ro->ro_rt);
1619 }
1620 ROUTE_RELEASE(ro);
1621 /* No route yet, so try to acquire one */
1622 SOCKADDR_ZERO(&ro->ro_dst, sizeof(struct sockaddr_in));
1623 ro->ro_dst.sa_family = AF_INET;
1624 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
1625 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1626 rtalloc_scoped(ro, ifscope);
1627 if (ro->ro_rt != NULL) {
1628 RT_LOCK_SPIN(ro->ro_rt);
1629 }
1630 }
1631 /* Sanitized local copy for interface address searches */
1632 SOCKADDR_ZERO(&sin, sizeof(sin));
1633 sin.sin_family = AF_INET;
1634 sin.sin_len = sizeof(struct sockaddr_in);
1635 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1636 /*
1637 * If we did not find (or use) a route, assume dest is reachable
1638 * on a directly connected network and try to find a corresponding
1639 * interface to take the source address from.
1640 */
1641 if (ro->ro_rt == NULL) {
1642 proc_t proc = current_proc();
1643
1644 VERIFY(ia == NULL);
1645 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1646 if (ia == NULL) {
1647 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1648 }
1649 error = ((ia == NULL) ? ENETUNREACH : 0);
1650
1651 if (apn_fallback_required(proc, inp->inp_socket,
1652 (void *)nam)) {
1653 apn_fallback_trigger(proc, inp->inp_socket);
1654 }
1655
1656 goto done;
1657 }
1658 RT_LOCK_ASSERT_HELD(ro->ro_rt);
1659 /*
1660 * If the outgoing interface on the route found is not
1661 * a loopback interface, use the address from that interface.
1662 */
1663 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1664 VERIFY(ia == NULL);
1665 /*
1666 * If the route points to a cellular interface and the
1667 * caller forbids our using interfaces of such type,
1668 * pretend that there is no route.
1669 * Apply the same logic for expensive interfaces.
1670 */
1671 if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
1672 RT_UNLOCK(ro->ro_rt);
1673 ROUTE_RELEASE(ro);
1674 error = EHOSTUNREACH;
1675 restricted = TRUE;
1676 } else {
1677 /* Become a regular mutex */
1678 RT_CONVERT_LOCK(ro->ro_rt);
1679 ia = ifatoia(ro->ro_rt->rt_ifa);
1680 ifa_addref(&ia->ia_ifa);
1681
1682 /*
1683 * Mark the control block for notification of
1684 * a possible flow that might undergo clat46
1685 * translation.
1686 *
1687 * We defer the decision to a later point when
1688 * inpcb is being disposed off.
1689 * The reason is that we only want to send notification
1690 * if the flow was ever used to send data.
1691 */
1692 if (IS_INTF_CLAT46(ro->ro_rt->rt_ifp)) {
1693 inp->inp_flags2 |= INP2_CLAT46_FLOW;
1694 }
1695
1696 RT_UNLOCK(ro->ro_rt);
1697 error = 0;
1698 }
1699 goto done;
1700 }
1701 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1702 RT_UNLOCK(ro->ro_rt);
1703 /*
1704 * The outgoing interface is marked with 'loopback net', so a route
1705 * to ourselves is here.
1706 * Try to find the interface of the destination address and then
1707 * take the address from there. That interface is not necessarily
1708 * a loopback interface.
1709 */
1710 VERIFY(ia == NULL);
1711 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1712 if (ia == NULL) {
1713 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1714 }
1715 if (ia == NULL) {
1716 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1717 }
1718 if (ia == NULL) {
1719 RT_LOCK(ro->ro_rt);
1720 ia = ifatoia(ro->ro_rt->rt_ifa);
1721 if (ia != NULL) {
1722 ifa_addref(&ia->ia_ifa);
1723 }
1724 RT_UNLOCK(ro->ro_rt);
1725 }
1726 error = ((ia == NULL) ? ENETUNREACH : 0);
1727
1728 done:
1729 /*
1730 * If the destination address is multicast and an outgoing
1731 * interface has been set as a multicast option, use the
1732 * address of that interface as our source address.
1733 */
1734 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1735 inp->inp_moptions != NULL) {
1736 struct ip_moptions *imo;
1737 struct ifnet *ifp;
1738
1739 imo = inp->inp_moptions;
1740 IMO_LOCK(imo);
1741 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1742 ia->ia_ifp != imo->imo_multicast_ifp)) {
1743 ifp = imo->imo_multicast_ifp;
1744 if (ia != NULL) {
1745 ifa_remref(&ia->ia_ifa);
1746 }
1747 lck_rw_lock_shared(&in_ifaddr_rwlock);
1748 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1749 if (ia->ia_ifp == ifp) {
1750 break;
1751 }
1752 }
1753 if (ia != NULL) {
1754 ifa_addref(&ia->ia_ifa);
1755 }
1756 lck_rw_done(&in_ifaddr_rwlock);
1757 if (ia == NULL) {
1758 error = EADDRNOTAVAIL;
1759 } else {
1760 error = 0;
1761 }
1762 }
1763 IMO_UNLOCK(imo);
1764 }
1765 /*
1766 * Don't do pcblookup call here; return interface in laddr
1767 * and exit to caller, that will do the lookup.
1768 */
1769 if (ia != NULL) {
1770 /*
1771 * If the source address belongs to a cellular interface
1772 * and the socket forbids our using interfaces of such
1773 * type, pretend that there is no source address.
1774 * Apply the same logic for expensive interfaces.
1775 */
1776 IFA_LOCK_SPIN(&ia->ia_ifa);
1777 if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
1778 IFA_UNLOCK(&ia->ia_ifa);
1779 error = EHOSTUNREACH;
1780 restricted = TRUE;
1781 } else if (error == 0) {
1782 *laddr = ia->ia_addr.sin_addr;
1783 if (outif != NULL) {
1784 struct ifnet *ifp;
1785
1786 if (ro->ro_rt != NULL) {
1787 ifp = ro->ro_rt->rt_ifp;
1788 } else {
1789 ifp = ia->ia_ifp;
1790 }
1791
1792 VERIFY(ifp != NULL);
1793 IFA_CONVERT_LOCK(&ia->ia_ifa);
1794 ifnet_reference(ifp); /* for caller */
1795 if (*outif != NULL) {
1796 ifnet_release(*outif);
1797 }
1798 *outif = ifp;
1799 }
1800 IFA_UNLOCK(&ia->ia_ifa);
1801 } else {
1802 IFA_UNLOCK(&ia->ia_ifa);
1803 }
1804 ifa_remref(&ia->ia_ifa);
1805 ia = NULL;
1806 }
1807
1808 if (restricted && error == EHOSTUNREACH) {
1809 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1810 SO_FILT_HINT_IFDENIED));
1811 }
1812
1813 return error;
1814 }
1815
1816 /*
1817 * Outer subroutine:
1818 * Connect from a socket to a specified address.
1819 * Both address and port must be specified in argument sin.
1820 * If don't have a local address for this socket yet,
1821 * then pick one.
1822 *
1823 * The caller may override the bound-to-interface setting of the socket
1824 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1825 */
1826 int
in_pcbconnect(struct inpcb * inp,struct sockaddr * nam,struct proc * p,unsigned int ifscope,struct ifnet ** outif)1827 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
1828 unsigned int ifscope, struct ifnet **outif)
1829 {
1830 struct in_addr laddr;
1831 struct sockaddr_in *sin = SIN(nam);
1832 struct inpcb *pcb;
1833 int error;
1834 struct socket *so = inp->inp_socket;
1835
1836 #if CONTENT_FILTER
1837 if (so) {
1838 so->so_state_change_cnt++;
1839 }
1840 #endif
1841
1842 /*
1843 * Call inner routine, to assign local interface address.
1844 */
1845 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0) {
1846 return error;
1847 }
1848
1849 socket_unlock(so, 0);
1850 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
1851 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
1852 inp->inp_lport, 0, NULL);
1853 socket_lock(so, 0);
1854
1855 /*
1856 * Check if the socket is still in a valid state. When we unlock this
1857 * embryonic socket, it can get aborted if another thread is closing
1858 * the listener (radar 7947600).
1859 */
1860 if ((so->so_flags & SOF_ABORTED) != 0) {
1861 return ECONNREFUSED;
1862 }
1863
1864 if (pcb != NULL) {
1865 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1866 return EADDRINUSE;
1867 }
1868 if (inp->inp_laddr.s_addr == INADDR_ANY) {
1869 if (inp->inp_lport == 0) {
1870 error = in_pcbbind(inp, NULL, nam, p);
1871 if (error) {
1872 return error;
1873 }
1874 }
1875 if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1876 /*
1877 * Lock inversion issue, mostly with udp
1878 * multicast packets.
1879 */
1880 socket_unlock(so, 0);
1881 lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1882 socket_lock(so, 0);
1883 }
1884 inp->inp_laddr = laddr;
1885 /* no reference needed */
1886 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
1887 #if SKYWALK
1888 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
1889 netns_set_ifnet(&inp->inp_netns_token,
1890 inp->inp_last_outifp);
1891 }
1892 #endif /* SKYWALK */
1893 inp->inp_flags |= INP_INADDR_ANY;
1894 } else {
1895 /*
1896 * Usage of IP_PKTINFO, without local port already
1897 * speficified will cause kernel to panic,
1898 * see rdar://problem/18508185.
1899 * For now returning error to avoid a kernel panic
1900 * This routines can be refactored and handle this better
1901 * in future.
1902 */
1903 if (inp->inp_lport == 0) {
1904 return EINVAL;
1905 }
1906 if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1907 /*
1908 * Lock inversion issue, mostly with udp
1909 * multicast packets.
1910 */
1911 socket_unlock(so, 0);
1912 lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1913 socket_lock(so, 0);
1914 }
1915 }
1916 inp->inp_faddr = sin->sin_addr;
1917 inp->inp_fport = sin->sin_port;
1918 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1919 nstat_udp_pcb_invalidate_cache(inp);
1920 }
1921 in_pcbrehash(inp);
1922 lck_rw_done(&inp->inp_pcbinfo->ipi_lock);
1923 return 0;
1924 }
1925
1926 void
in_pcbdisconnect(struct inpcb * inp)1927 in_pcbdisconnect(struct inpcb *inp)
1928 {
1929 struct socket *so = inp->inp_socket;
1930
1931 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1932 nstat_udp_pcb_cache(inp);
1933 }
1934
1935 inp->inp_faddr.s_addr = INADDR_ANY;
1936 inp->inp_fport = 0;
1937
1938 #if CONTENT_FILTER
1939 if (so) {
1940 so->so_state_change_cnt++;
1941 }
1942 #endif
1943
1944 if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1945 /* lock inversion issue, mostly with udp multicast packets */
1946 socket_unlock(so, 0);
1947 lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1948 socket_lock(so, 0);
1949 }
1950
1951 in_pcbrehash(inp);
1952 lck_rw_done(&inp->inp_pcbinfo->ipi_lock);
1953 /*
1954 * A multipath subflow socket would have its SS_NOFDREF set by default,
1955 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1956 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1957 */
1958 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) {
1959 in_pcbdetach(inp);
1960 }
1961 }
1962
1963 void
in_pcbdetach(struct inpcb * inp)1964 in_pcbdetach(struct inpcb *inp)
1965 {
1966 struct socket *so = inp->inp_socket;
1967
1968 if (so->so_pcb == NULL) {
1969 /* PCB has been disposed */
1970 panic("%s: inp=%p so=%p proto=%d so_pcb is null!", __func__,
1971 inp, so, SOCK_PROTO(so));
1972 /* NOTREACHED */
1973 }
1974
1975 #if IPSEC
1976 if (inp->inp_sp != NULL) {
1977 (void) ipsec4_delete_pcbpolicy(inp);
1978 }
1979 #endif /* IPSEC */
1980
1981 if (SOCK_PROTO(so) == IPPROTO_UDP) {
1982 if (inp->inp_mstat.ms_total.ts_rxpackets == 0 && inp->inp_mstat.ms_total.ts_txpackets == 0) {
1983 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_no_data);
1984 }
1985 }
1986
1987 /*
1988 * Let NetworkStatistics know this PCB is going away
1989 * before we detach it.
1990 */
1991 if (nstat_collect &&
1992 (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
1993 nstat_pcb_detach(inp);
1994 }
1995
1996 /* Free memory buffer held for generating keep alives */
1997 if (inp->inp_keepalive_data != NULL) {
1998 kfree_data_counted_by(inp->inp_keepalive_data, inp->inp_keepalive_datalen);
1999 }
2000
2001 /* mark socket state as dead */
2002 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
2003 panic("%s: so=%p proto=%d couldn't set to STOPUSING",
2004 __func__, so, SOCK_PROTO(so));
2005 /* NOTREACHED */
2006 }
2007
2008 #if SKYWALK
2009 /* Free up the port in the namespace registrar if not in TIME_WAIT */
2010 if (!(inp->inp_flags2 & INP2_TIMEWAIT)) {
2011 netns_release(&inp->inp_netns_token);
2012 netns_release(&inp->inp_wildcard_netns_token);
2013 }
2014 #endif /* SKYWALK */
2015
2016 if (!(so->so_flags & SOF_PCBCLEARING)) {
2017 struct ip_moptions *imo;
2018
2019 inp->inp_vflag = 0;
2020 if (inp->inp_options != NULL) {
2021 (void) m_free(inp->inp_options);
2022 inp->inp_options = NULL;
2023 }
2024 ROUTE_RELEASE(&inp->inp_route);
2025 imo = inp->inp_moptions;
2026 if (imo != NULL) {
2027 IMO_REMREF(imo);
2028 }
2029 inp->inp_moptions = NULL;
2030 sofreelastref(so, 0);
2031 inp->inp_state = INPCB_STATE_DEAD;
2032
2033 /*
2034 * Enqueue an event to send kernel event notification
2035 * if the flow has to CLAT46 for data packets
2036 */
2037 if (inp->inp_flags2 & INP2_CLAT46_FLOW) {
2038 /*
2039 * If there has been any exchange of data bytes
2040 * over this flow.
2041 * Schedule a notification to report that flow is
2042 * using client side translation.
2043 */
2044 if (inp->inp_mstat.ms_total.ts_txbytes != 0 ||
2045 inp->inp_mstat.ms_total.ts_rxbytes != 0) {
2046 if (so->so_flags & SOF_DELEGATED) {
2047 in6_clat46_event_enqueue_nwk_wq_entry(
2048 IN6_CLAT46_EVENT_V4_FLOW,
2049 so->e_pid,
2050 so->e_uuid);
2051 } else {
2052 in6_clat46_event_enqueue_nwk_wq_entry(
2053 IN6_CLAT46_EVENT_V4_FLOW,
2054 so->last_pid,
2055 so->last_uuid);
2056 }
2057 }
2058 }
2059
2060 /* makes sure we're not called twice from so_close */
2061 so->so_flags |= SOF_PCBCLEARING;
2062
2063 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
2064 }
2065 }
2066
2067
2068 void
in_pcbdispose(struct inpcb * inp)2069 in_pcbdispose(struct inpcb *inp)
2070 {
2071 struct socket *so = inp->inp_socket;
2072 struct inpcbinfo *ipi = inp->inp_pcbinfo;
2073
2074 if (so != NULL && so->so_usecount != 0) {
2075 panic("%s: so %p [%d,%d] usecount %d lockhistory %s",
2076 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
2077 solockhistory_nr(so));
2078 /* NOTREACHED */
2079 } else if (inp->inp_wantcnt != WNT_STOPUSING) {
2080 if (so != NULL) {
2081 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
2082 "[%d,%d] usecount %d retaincnt %d state 0x%x "
2083 "flags 0x%x lockhistory %s\n", __func__, inp,
2084 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
2085 so->so_usecount, so->so_retaincnt, so->so_state,
2086 so->so_flags, solockhistory_nr(so));
2087 /* NOTREACHED */
2088 } else {
2089 panic("%s: inp %p invalid wantcnt %d no socket",
2090 __func__, inp, inp->inp_wantcnt);
2091 /* NOTREACHED */
2092 }
2093 }
2094
2095 LCK_RW_ASSERT(&ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
2096
2097 inp->inp_gencnt = ++ipi->ipi_gencnt;
2098 /* access ipi in in_pcbremlists */
2099 in_pcbremlists(inp);
2100
2101 if (so != NULL) {
2102 if (so->so_proto->pr_flags & PR_PCBLOCK) {
2103 sofreelastref(so, 0);
2104 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
2105 /*
2106 * selthreadclear() already called
2107 * during sofreelastref() above.
2108 */
2109 sbrelease(&so->so_rcv);
2110 sbrelease(&so->so_snd);
2111 }
2112 if (so->so_head != NULL) {
2113 panic("%s: so=%p head still exist",
2114 __func__, so);
2115 /* NOTREACHED */
2116 }
2117 lck_mtx_unlock(&inp->inpcb_mtx);
2118
2119 #if NECP
2120 necp_inpcb_remove_cb(inp);
2121 #endif /* NECP */
2122
2123 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
2124 }
2125 /* makes sure we're not called twice from so_close */
2126 so->so_flags |= SOF_PCBCLEARING;
2127 so->so_pcb = NULL;
2128 inp->inp_socket = NULL;
2129 #if NECP
2130 necp_inpcb_dispose(inp);
2131 #endif /* NECP */
2132 /*
2133 * In case there a route cached after a detach (possible
2134 * in the tcp case), make sure that it is freed before
2135 * we deallocate the structure.
2136 */
2137 ROUTE_RELEASE(&inp->inp_route);
2138 zfree(ipi->ipi_zone, inp);
2139 proto_memacct_sub(so->so_proto, kalloc_type_size(ipi->ipi_zone));
2140
2141 sodealloc(so);
2142 }
2143 }
2144
2145 /*
2146 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
2147 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
2148 * in struct pr_usrreqs, so that protocols can just reference then directly
2149 * without the need for a wrapper function.
2150 */
2151 int
in_getsockaddr(struct socket * so,struct sockaddr ** nam)2152 in_getsockaddr(struct socket *so, struct sockaddr **nam)
2153 {
2154 struct inpcb *inp;
2155 struct sockaddr_in *sin;
2156
2157 /*
2158 * Do the malloc first in case it blocks.
2159 */
2160 sin = SIN(alloc_sockaddr(sizeof(*sin),
2161 Z_WAITOK | Z_NOFAIL));
2162
2163 sin->sin_family = AF_INET;
2164
2165 if ((inp = sotoinpcb(so)) == NULL) {
2166 free_sockaddr(sin);
2167 return EINVAL;
2168 }
2169 sin->sin_port = inp->inp_lport;
2170 sin->sin_addr = inp->inp_laddr;
2171
2172 *nam = SA(sin);
2173 return 0;
2174 }
2175
2176 int
in_getsockaddr_s(struct socket * so,struct sockaddr_in * ss)2177 in_getsockaddr_s(struct socket *so, struct sockaddr_in *ss)
2178 {
2179 struct sockaddr_in *sin = ss;
2180 struct inpcb *inp;
2181
2182 VERIFY(ss != NULL);
2183 SOCKADDR_ZERO(ss, sizeof(*ss));
2184
2185 sin->sin_family = AF_INET;
2186 sin->sin_len = sizeof(*sin);
2187
2188 if ((inp = sotoinpcb(so)) == NULL) {
2189 return EINVAL;
2190 }
2191
2192 sin->sin_port = inp->inp_lport;
2193 sin->sin_addr = inp->inp_laddr;
2194 return 0;
2195 }
2196
2197 int
in_getpeeraddr(struct socket * so,struct sockaddr ** nam)2198 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
2199 {
2200 struct inpcb *inp;
2201 struct sockaddr_in *sin;
2202
2203 /*
2204 * Do the malloc first in case it blocks.
2205 */
2206 sin = SIN(alloc_sockaddr(sizeof(*sin),
2207 Z_WAITOK | Z_NOFAIL));
2208
2209 sin->sin_family = AF_INET;
2210
2211 if ((inp = sotoinpcb(so)) == NULL) {
2212 free_sockaddr(sin);
2213 return EINVAL;
2214 }
2215 sin->sin_port = inp->inp_fport;
2216 sin->sin_addr = inp->inp_faddr;
2217
2218 *nam = SA(sin);
2219 return 0;
2220 }
2221
2222 void
in_pcbnotifyall(struct inpcbinfo * pcbinfo,struct in_addr faddr,int errno,void (* notify)(struct inpcb *,int))2223 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2224 int errno, void (*notify)(struct inpcb *, int))
2225 {
2226 struct inpcb *inp;
2227
2228 lck_rw_lock_shared(&pcbinfo->ipi_lock);
2229
2230 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
2231 if (!(inp->inp_vflag & INP_IPV4)) {
2232 continue;
2233 }
2234 if (inp->inp_faddr.s_addr != faddr.s_addr ||
2235 inp->inp_socket == NULL) {
2236 continue;
2237 }
2238 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
2239 continue;
2240 }
2241 socket_lock(inp->inp_socket, 1);
2242 (*notify)(inp, errno);
2243 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
2244 socket_unlock(inp->inp_socket, 1);
2245 }
2246 lck_rw_done(&pcbinfo->ipi_lock);
2247 }
2248
2249 /*
2250 * Check for alternatives when higher level complains
2251 * about service problems. For now, invalidate cached
2252 * routing information. If the route was created dynamically
2253 * (by a redirect), time to try a default gateway again.
2254 */
2255 void
in_losing(struct inpcb * inp)2256 in_losing(struct inpcb *inp)
2257 {
2258 boolean_t release = FALSE;
2259 struct rtentry *rt;
2260
2261 if ((rt = inp->inp_route.ro_rt) != NULL) {
2262 struct in_ifaddr *ia = NULL;
2263
2264 RT_LOCK(rt);
2265 if (rt->rt_flags & RTF_DYNAMIC) {
2266 /*
2267 * Prevent another thread from modifying rt_key,
2268 * rt_gateway via rt_setgate() after rt_lock is
2269 * dropped by marking the route as defunct.
2270 */
2271 rt->rt_flags |= RTF_CONDEMNED;
2272 RT_UNLOCK(rt);
2273 (void) rtrequest(RTM_DELETE, rt_key(rt),
2274 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
2275 } else {
2276 RT_UNLOCK(rt);
2277 }
2278 /* if the address is gone keep the old route in the pcb */
2279 if (inp->inp_laddr.s_addr != INADDR_ANY &&
2280 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2281 /*
2282 * Address is around; ditch the route. A new route
2283 * can be allocated the next time output is attempted.
2284 */
2285 release = TRUE;
2286 }
2287 if (ia != NULL) {
2288 ifa_remref(&ia->ia_ifa);
2289 }
2290 }
2291 if (rt == NULL || release) {
2292 ROUTE_RELEASE(&inp->inp_route);
2293 }
2294 }
2295
2296 /*
2297 * After a routing change, flush old routing
2298 * and allocate a (hopefully) better one.
2299 */
2300 void
in_rtchange(struct inpcb * inp,int errno)2301 in_rtchange(struct inpcb *inp, int errno)
2302 {
2303 #pragma unused(errno)
2304 boolean_t release = FALSE;
2305 struct rtentry *rt;
2306
2307 if ((rt = inp->inp_route.ro_rt) != NULL) {
2308 struct in_ifaddr *ia = NULL;
2309
2310 /* if address is gone, keep the old route */
2311 if (inp->inp_laddr.s_addr != INADDR_ANY &&
2312 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2313 /*
2314 * Address is around; ditch the route. A new route
2315 * can be allocated the next time output is attempted.
2316 */
2317 release = TRUE;
2318 }
2319 if (ia != NULL) {
2320 ifa_remref(&ia->ia_ifa);
2321 }
2322 }
2323 if (rt == NULL || release) {
2324 ROUTE_RELEASE(&inp->inp_route);
2325 }
2326 }
2327
2328 /*
2329 * Lookup a PCB based on the local address and port.
2330 */
2331 struct inpcb *
in_pcblookup_local(struct inpcbinfo * pcbinfo,struct in_addr laddr,unsigned int lport_arg,int wild_okay)2332 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
2333 unsigned int lport_arg, int wild_okay)
2334 {
2335 struct inpcb *inp;
2336 int matchwild = 3, wildcard;
2337 u_short lport = (u_short)lport_arg;
2338
2339 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
2340
2341 if (!wild_okay) {
2342 struct inpcbhead *head;
2343 /*
2344 * Look for an unconnected (wildcard foreign addr) PCB that
2345 * matches the local address and port we're looking for.
2346 */
2347 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2348 pcbinfo->ipi_hashmask)];
2349 LIST_FOREACH(inp, head, inp_hash) {
2350 if (!(inp->inp_vflag & INP_IPV4)) {
2351 continue;
2352 }
2353 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2354 inp->inp_laddr.s_addr == laddr.s_addr &&
2355 inp->inp_lport == lport) {
2356 /*
2357 * Found.
2358 */
2359 return inp;
2360 }
2361 }
2362 /*
2363 * Not found.
2364 */
2365 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
2366 return NULL;
2367 } else {
2368 struct inpcbporthead *porthash;
2369 struct inpcbport *phd;
2370 struct inpcb *match = NULL;
2371 /*
2372 * Best fit PCB lookup.
2373 *
2374 * First see if this local port is in use by looking on the
2375 * port hash list.
2376 */
2377 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
2378 pcbinfo->ipi_porthashmask)];
2379 LIST_FOREACH(phd, porthash, phd_hash) {
2380 if (phd->phd_port == lport) {
2381 break;
2382 }
2383 }
2384 if (phd != NULL) {
2385 /*
2386 * Port is in use by one or more PCBs. Look for best
2387 * fit.
2388 */
2389 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
2390 wildcard = 0;
2391 if (!(inp->inp_vflag & INP_IPV4)) {
2392 continue;
2393 }
2394 if (inp->inp_faddr.s_addr != INADDR_ANY) {
2395 wildcard++;
2396 }
2397 if (inp->inp_laddr.s_addr != INADDR_ANY) {
2398 if (laddr.s_addr == INADDR_ANY) {
2399 wildcard++;
2400 } else if (inp->inp_laddr.s_addr !=
2401 laddr.s_addr) {
2402 continue;
2403 }
2404 } else {
2405 if (laddr.s_addr != INADDR_ANY) {
2406 wildcard++;
2407 }
2408 }
2409 if (wildcard < matchwild) {
2410 match = inp;
2411 matchwild = wildcard;
2412 if (matchwild == 0) {
2413 break;
2414 }
2415 }
2416 }
2417 }
2418 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
2419 0, 0, 0, 0);
2420 return match;
2421 }
2422 }
2423
2424 /*
2425 * Check if PCB exists in hash list.
2426 */
2427 int
in_pcblookup_hash_exists(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,uid_t * uid,gid_t * gid,struct ifnet * ifp)2428 in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2429 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2430 uid_t *uid, gid_t *gid, struct ifnet *ifp)
2431 {
2432 struct inpcbhead *head;
2433 struct inpcb *inp;
2434 u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2435 int found = 0;
2436 struct inpcb *local_wild = NULL;
2437 struct inpcb *local_wild_mapped = NULL;
2438
2439 *uid = UID_MAX;
2440 *gid = GID_MAX;
2441
2442 /*
2443 * We may have found the pcb in the last lookup - check this first.
2444 */
2445
2446 lck_rw_lock_shared(&pcbinfo->ipi_lock);
2447
2448 /*
2449 * First look for an exact match.
2450 */
2451 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2452 pcbinfo->ipi_hashmask)];
2453 LIST_FOREACH(inp, head, inp_hash) {
2454 if (!(inp->inp_vflag & INP_IPV4)) {
2455 continue;
2456 }
2457 if (inp_restricted_recv(inp, ifp)) {
2458 continue;
2459 }
2460
2461 #if NECP
2462 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2463 continue;
2464 }
2465 #endif /* NECP */
2466
2467 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2468 inp->inp_laddr.s_addr == laddr.s_addr &&
2469 inp->inp_fport == fport &&
2470 inp->inp_lport == lport) {
2471 if ((found = (inp->inp_socket != NULL))) {
2472 /*
2473 * Found.
2474 */
2475 *uid = kauth_cred_getuid(
2476 inp->inp_socket->so_cred);
2477 *gid = kauth_cred_getgid(
2478 inp->inp_socket->so_cred);
2479 }
2480 lck_rw_done(&pcbinfo->ipi_lock);
2481 return found;
2482 }
2483 }
2484
2485 if (!wildcard) {
2486 /*
2487 * Not found.
2488 */
2489 lck_rw_done(&pcbinfo->ipi_lock);
2490 return 0;
2491 }
2492
2493 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2494 pcbinfo->ipi_hashmask)];
2495 LIST_FOREACH(inp, head, inp_hash) {
2496 if (!(inp->inp_vflag & INP_IPV4)) {
2497 continue;
2498 }
2499 if (inp_restricted_recv(inp, ifp)) {
2500 continue;
2501 }
2502
2503 #if NECP
2504 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2505 continue;
2506 }
2507 #endif /* NECP */
2508
2509 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2510 inp->inp_lport == lport) {
2511 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2512 if ((found = (inp->inp_socket != NULL))) {
2513 *uid = kauth_cred_getuid(
2514 inp->inp_socket->so_cred);
2515 *gid = kauth_cred_getgid(
2516 inp->inp_socket->so_cred);
2517 }
2518 lck_rw_done(&pcbinfo->ipi_lock);
2519 return found;
2520 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2521 if (inp->inp_socket &&
2522 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2523 local_wild_mapped = inp;
2524 } else {
2525 local_wild = inp;
2526 }
2527 }
2528 }
2529 }
2530 if (local_wild == NULL) {
2531 if (local_wild_mapped != NULL) {
2532 if ((found = (local_wild_mapped->inp_socket != NULL))) {
2533 *uid = kauth_cred_getuid(
2534 local_wild_mapped->inp_socket->so_cred);
2535 *gid = kauth_cred_getgid(
2536 local_wild_mapped->inp_socket->so_cred);
2537 }
2538 lck_rw_done(&pcbinfo->ipi_lock);
2539 return found;
2540 }
2541 lck_rw_done(&pcbinfo->ipi_lock);
2542 return 0;
2543 }
2544 if ((found = (local_wild->inp_socket != NULL))) {
2545 *uid = kauth_cred_getuid(
2546 local_wild->inp_socket->so_cred);
2547 *gid = kauth_cred_getgid(
2548 local_wild->inp_socket->so_cred);
2549 }
2550 lck_rw_done(&pcbinfo->ipi_lock);
2551 return found;
2552 }
2553
2554 /*
2555 * Lookup PCB in hash list.
2556 */
2557 static struct inpcb *
in_pcblookup_hash_locked(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,struct ifnet * ifp)2558 in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2559 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2560 struct ifnet *ifp)
2561 {
2562 struct inpcbhead *head;
2563 struct inpcb *inp;
2564 u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2565 struct inpcb *local_wild = NULL;
2566 struct inpcb *local_wild_mapped = NULL;
2567
2568 /*
2569 * First look for an exact match.
2570 */
2571 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2572 pcbinfo->ipi_hashmask)];
2573 LIST_FOREACH(inp, head, inp_hash) {
2574 if (!(inp->inp_vflag & INP_IPV4)) {
2575 continue;
2576 }
2577 if (inp_restricted_recv(inp, ifp)) {
2578 continue;
2579 }
2580
2581 #if NECP
2582 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2583 continue;
2584 }
2585 #endif /* NECP */
2586
2587 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2588 inp->inp_laddr.s_addr == laddr.s_addr &&
2589 inp->inp_fport == fport &&
2590 inp->inp_lport == lport) {
2591 /*
2592 * Found.
2593 */
2594 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2595 WNT_STOPUSING) {
2596 return inp;
2597 } else {
2598 /* it's there but dead, say it isn't found */
2599 return NULL;
2600 }
2601 }
2602 }
2603
2604 if (!wildcard) {
2605 /*
2606 * Not found.
2607 */
2608 return NULL;
2609 }
2610
2611 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2612 pcbinfo->ipi_hashmask)];
2613 LIST_FOREACH(inp, head, inp_hash) {
2614 if (!(inp->inp_vflag & INP_IPV4)) {
2615 continue;
2616 }
2617 if (inp_restricted_recv(inp, ifp)) {
2618 continue;
2619 }
2620
2621 #if NECP
2622 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2623 continue;
2624 }
2625 #endif /* NECP */
2626
2627 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2628 inp->inp_lport == lport) {
2629 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2630 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2631 WNT_STOPUSING) {
2632 return inp;
2633 } else {
2634 /* it's dead; say it isn't found */
2635 return NULL;
2636 }
2637 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2638 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2639 local_wild_mapped = inp;
2640 } else {
2641 local_wild = inp;
2642 }
2643 }
2644 }
2645 }
2646 if (local_wild == NULL) {
2647 if (local_wild_mapped != NULL) {
2648 if (in_pcb_checkstate(local_wild_mapped,
2649 WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2650 return local_wild_mapped;
2651 } else {
2652 /* it's dead; say it isn't found */
2653 return NULL;
2654 }
2655 }
2656 return NULL;
2657 }
2658 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2659 return local_wild;
2660 }
2661 /*
2662 * It's either not found or is already dead.
2663 */
2664 return NULL;
2665 }
2666
2667 struct inpcb *
in_pcblookup_hash(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,struct ifnet * ifp)2668 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2669 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2670 struct ifnet *ifp)
2671 {
2672 struct inpcb *inp;
2673
2674 lck_rw_lock_shared(&pcbinfo->ipi_lock);
2675
2676 inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport_arg, laddr,
2677 lport_arg, wildcard, ifp);
2678
2679 lck_rw_done(&pcbinfo->ipi_lock);
2680
2681 return inp;
2682 }
2683
2684
2685 struct inpcb *
in_pcblookup_hash_try(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,struct ifnet * ifp)2686 in_pcblookup_hash_try(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2687 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2688 struct ifnet *ifp)
2689 {
2690 struct inpcb *inp;
2691
2692 if (!lck_rw_try_lock_shared(&pcbinfo->ipi_lock)) {
2693 return NULL;
2694 }
2695
2696 inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport_arg, laddr,
2697 lport_arg, wildcard, ifp);
2698
2699 lck_rw_done(&pcbinfo->ipi_lock);
2700
2701 return inp;
2702 }
2703
2704 /*
2705 * @brief Insert PCB onto various hash lists.
2706 *
2707 * @param inp Pointer to internet protocol control block
2708 * @param remote Pointer to remote address sockaddr for policy evaluation
2709 * @param locked Implies if ipi_lock (protecting pcb list)
2710 * is already locked or not.
2711 *
2712 * @return int error on failure and 0 on success
2713 */
2714 int
in_pcbinshash(struct inpcb * inp,struct sockaddr * remote,int locked)2715 in_pcbinshash(struct inpcb *inp, struct sockaddr *remote, int locked)
2716 {
2717 struct inpcbhead *pcbhash;
2718 struct inpcbporthead *pcbporthash;
2719 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2720 struct inpcbport *phd;
2721 u_int32_t hashkey_faddr;
2722
2723 if (!locked) {
2724 if (!lck_rw_try_lock_exclusive(&pcbinfo->ipi_lock)) {
2725 /*
2726 * Lock inversion issue, mostly with udp
2727 * multicast packets
2728 */
2729 socket_unlock(inp->inp_socket, 0);
2730 lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
2731 socket_lock(inp->inp_socket, 0);
2732 }
2733 }
2734
2735 /*
2736 * This routine or its caller may have given up
2737 * socket's protocol lock briefly.
2738 * During that time the socket may have been dropped.
2739 * Safe-guarding against that.
2740 */
2741 if (inp->inp_state == INPCB_STATE_DEAD) {
2742 if (!locked) {
2743 lck_rw_done(&pcbinfo->ipi_lock);
2744 }
2745 return ECONNABORTED;
2746 }
2747
2748
2749 if (inp->inp_vflag & INP_IPV6) {
2750 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2751 } else {
2752 hashkey_faddr = inp->inp_faddr.s_addr;
2753 }
2754
2755 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2756 inp->inp_fport, pcbinfo->ipi_hashmask);
2757
2758 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
2759
2760 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2761 pcbinfo->ipi_porthashmask)];
2762
2763 /*
2764 * Go through port list and look for a head for this lport.
2765 */
2766 LIST_FOREACH(phd, pcbporthash, phd_hash) {
2767 if (phd->phd_port == inp->inp_lport) {
2768 break;
2769 }
2770 }
2771
2772 /*
2773 * If none exists, malloc one and tack it on.
2774 */
2775 if (phd == NULL) {
2776 phd = kalloc_type(struct inpcbport, Z_WAITOK | Z_NOFAIL);
2777 phd->phd_port = inp->inp_lport;
2778 LIST_INIT(&phd->phd_pcblist);
2779 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2780 }
2781
2782 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2783
2784 #if SKYWALK
2785 int err;
2786 struct socket *so = inp->inp_socket;
2787 if ((SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP) &&
2788 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
2789 if (inp->inp_vflag & INP_IPV6) {
2790 err = netns_reserve_in6(&inp->inp_netns_token,
2791 inp->in6p_laddr, (uint8_t)SOCK_PROTO(so), inp->inp_lport,
2792 NETNS_BSD | NETNS_PRERESERVED, NULL);
2793 } else {
2794 err = netns_reserve_in(&inp->inp_netns_token,
2795 inp->inp_laddr, (uint8_t)SOCK_PROTO(so), inp->inp_lport,
2796 NETNS_BSD | NETNS_PRERESERVED, NULL);
2797 }
2798 if (err) {
2799 if (!locked) {
2800 lck_rw_done(&pcbinfo->ipi_lock);
2801 }
2802 return err;
2803 }
2804 netns_set_ifnet(&inp->inp_netns_token, inp->inp_last_outifp);
2805 inp_update_netns_flags(so);
2806 }
2807 #endif /* SKYWALK */
2808
2809 inp->inp_phd = phd;
2810 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2811 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
2812 inp->inp_flags2 |= INP2_INHASHLIST;
2813
2814 if (!locked) {
2815 lck_rw_done(&pcbinfo->ipi_lock);
2816 }
2817
2818 #if NECP
2819 // This call catches the original setting of the local address
2820 inp_update_necp_policy(inp, NULL, remote, 0);
2821 #endif /* NECP */
2822
2823 return 0;
2824 }
2825
2826 /*
2827 * Move PCB to the proper hash bucket when { faddr, fport } have been
2828 * changed. NOTE: This does not handle the case of the lport changing (the
2829 * hashed port list would have to be updated as well), so the lport must
2830 * not change after in_pcbinshash() has been called.
2831 */
2832 void
in_pcbrehash(struct inpcb * inp)2833 in_pcbrehash(struct inpcb *inp)
2834 {
2835 struct inpcbhead *head;
2836 u_int32_t hashkey_faddr;
2837
2838 #if SKYWALK
2839 struct socket *so = inp->inp_socket;
2840 if ((SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP) &&
2841 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
2842 int err;
2843 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2844 if (inp->inp_vflag & INP_IPV6) {
2845 err = netns_change_addr_in6(
2846 &inp->inp_netns_token, inp->in6p_laddr);
2847 } else {
2848 err = netns_change_addr_in(
2849 &inp->inp_netns_token, inp->inp_laddr);
2850 }
2851 } else {
2852 if (inp->inp_vflag & INP_IPV6) {
2853 err = netns_reserve_in6(&inp->inp_netns_token,
2854 inp->in6p_laddr, (uint8_t)SOCK_PROTO(so),
2855 inp->inp_lport, NETNS_BSD, NULL);
2856 } else {
2857 err = netns_reserve_in(&inp->inp_netns_token,
2858 inp->inp_laddr, (uint8_t)SOCK_PROTO(so),
2859 inp->inp_lport, NETNS_BSD, NULL);
2860 }
2861 }
2862 /* We are assuming that whatever code paths result in a rehash
2863 * did their due diligence and ensured that the given
2864 * <proto, laddr, lport> tuple was free ahead of time. Just
2865 * reserving the lport on INADDR_ANY should be enough, since
2866 * that will block Skywalk from trying to reserve that same
2867 * port. Given this assumption, the above netns calls should
2868 * never fail*/
2869 VERIFY(err == 0);
2870
2871 netns_set_ifnet(&inp->inp_netns_token, inp->inp_last_outifp);
2872 inp_update_netns_flags(so);
2873 }
2874 #endif /* SKYWALK */
2875 if (inp->inp_vflag & INP_IPV6) {
2876 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2877 } else {
2878 hashkey_faddr = inp->inp_faddr.s_addr;
2879 }
2880
2881 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2882 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2883 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
2884
2885 if (inp->inp_flags2 & INP2_INHASHLIST) {
2886 LIST_REMOVE(inp, inp_hash);
2887 inp->inp_flags2 &= ~INP2_INHASHLIST;
2888 }
2889
2890 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2891 LIST_INSERT_HEAD(head, inp, inp_hash);
2892 inp->inp_flags2 |= INP2_INHASHLIST;
2893
2894 #if NECP
2895 // This call catches updates to the remote addresses
2896 inp_update_necp_policy(inp, NULL, NULL, 0);
2897 #endif /* NECP */
2898 }
2899
2900 /*
2901 * Remove PCB from various lists.
2902 * Must be called pcbinfo lock is held in exclusive mode.
2903 */
2904 void
in_pcbremlists(struct inpcb * inp)2905 in_pcbremlists(struct inpcb *inp)
2906 {
2907 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
2908
2909 /*
2910 * Check if it's in hashlist -- an inp is placed in hashlist when
2911 * it's local port gets assigned. So it should also be present
2912 * in the port list.
2913 */
2914 if (inp->inp_flags2 & INP2_INHASHLIST) {
2915 struct inpcbport *phd = inp->inp_phd;
2916
2917 VERIFY(phd != NULL && inp->inp_lport > 0);
2918
2919 LIST_REMOVE(inp, inp_hash);
2920 inp->inp_hash.le_next = NULL;
2921 inp->inp_hash.le_prev = NULL;
2922
2923 LIST_REMOVE(inp, inp_portlist);
2924 inp->inp_portlist.le_next = NULL;
2925 inp->inp_portlist.le_prev = NULL;
2926 if (LIST_EMPTY(&phd->phd_pcblist)) {
2927 LIST_REMOVE(phd, phd_hash);
2928 kfree_type(struct inpcbport, phd);
2929 }
2930 inp->inp_phd = NULL;
2931 inp->inp_flags2 &= ~INP2_INHASHLIST;
2932 #if SKYWALK
2933 /* Free up the port in the namespace registrar */
2934 netns_release(&inp->inp_netns_token);
2935 netns_release(&inp->inp_wildcard_netns_token);
2936 #endif /* SKYWALK */
2937 }
2938 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2939
2940 if (inp->inp_flags2 & INP2_TIMEWAIT) {
2941 /* Remove from time-wait queue */
2942 tcp_remove_from_time_wait(inp);
2943 inp->inp_flags2 &= ~INP2_TIMEWAIT;
2944 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2945 inp->inp_pcbinfo->ipi_twcount--;
2946 } else {
2947 /* Remove from global inp list if it is not time-wait */
2948 LIST_REMOVE(inp, inp_list);
2949 }
2950
2951 if (inp->inp_flags2 & INP2_IN_FCTREE) {
2952 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED | INPFC_REMOVE));
2953 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2954 }
2955
2956 inp->inp_pcbinfo->ipi_count--;
2957 }
2958
2959 /*
2960 * Mechanism used to defer the memory release of PCBs
2961 * The pcb list will contain the pcb until the reaper can clean it up if
2962 * the following conditions are met:
2963 * 1) state "DEAD",
2964 * 2) wantcnt is STOPUSING
2965 * 3) usecount is 0
2966 * This function will be called to either mark the pcb as
2967 */
2968 int
in_pcb_checkstate(struct inpcb * pcb,int mode,int locked)2969 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
2970 {
2971 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2972 UInt32 origwant;
2973 UInt32 newwant;
2974
2975 switch (mode) {
2976 case WNT_STOPUSING:
2977 /*
2978 * Try to mark the pcb as ready for recycling. CAS with
2979 * STOPUSING, if success we're good, if it's in use, will
2980 * be marked later
2981 */
2982 if (locked == 0) {
2983 socket_lock(pcb->inp_socket, 1);
2984 }
2985 pcb->inp_state = INPCB_STATE_DEAD;
2986
2987 stopusing:
2988 if (pcb->inp_socket->so_usecount < 0) {
2989 panic("%s: pcb=%p so=%p usecount is negative",
2990 __func__, pcb, pcb->inp_socket);
2991 /* NOTREACHED */
2992 }
2993 if (locked == 0) {
2994 socket_unlock(pcb->inp_socket, 1);
2995 }
2996
2997 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
2998
2999 origwant = *wantcnt;
3000 if ((UInt16) origwant == 0xffff) { /* should stop using */
3001 return WNT_STOPUSING;
3002 }
3003 newwant = 0xffff;
3004 if ((UInt16) origwant == 0) {
3005 /* try to mark it as unsuable now */
3006 OSCompareAndSwap(origwant, newwant, wantcnt);
3007 }
3008 return WNT_STOPUSING;
3009
3010 case WNT_ACQUIRE:
3011 /*
3012 * Try to increase reference to pcb. If WNT_STOPUSING
3013 * should bail out. If socket state DEAD, try to set count
3014 * to STOPUSING, return failed otherwise increase cnt.
3015 */
3016 do {
3017 origwant = *wantcnt;
3018 if ((UInt16) origwant == 0xffff) {
3019 /* should stop using */
3020 return WNT_STOPUSING;
3021 }
3022 newwant = origwant + 1;
3023 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
3024 return WNT_ACQUIRE;
3025
3026 case WNT_RELEASE:
3027 /*
3028 * Release reference. If result is null and pcb state
3029 * is DEAD, set wanted bit to STOPUSING
3030 */
3031 if (locked == 0) {
3032 socket_lock(pcb->inp_socket, 1);
3033 }
3034
3035 do {
3036 origwant = *wantcnt;
3037 if ((UInt16) origwant == 0x0) {
3038 panic("%s: pcb=%p release with zero count",
3039 __func__, pcb);
3040 /* NOTREACHED */
3041 }
3042 if ((UInt16) origwant == 0xffff) {
3043 /* should stop using */
3044 if (locked == 0) {
3045 socket_unlock(pcb->inp_socket, 1);
3046 }
3047 return WNT_STOPUSING;
3048 }
3049 newwant = origwant - 1;
3050 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
3051
3052 if (pcb->inp_state == INPCB_STATE_DEAD) {
3053 goto stopusing;
3054 }
3055 if (pcb->inp_socket->so_usecount < 0) {
3056 panic("%s: RELEASE pcb=%p so=%p usecount is negative",
3057 __func__, pcb, pcb->inp_socket);
3058 /* NOTREACHED */
3059 }
3060
3061 if (locked == 0) {
3062 socket_unlock(pcb->inp_socket, 1);
3063 }
3064 return WNT_RELEASE;
3065
3066 default:
3067 panic("%s: so=%p not a valid state =%x", __func__,
3068 pcb->inp_socket, mode);
3069 /* NOTREACHED */
3070 }
3071
3072 /* NOTREACHED */
3073 return mode;
3074 }
3075
3076 /*
3077 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
3078 * The inpcb_compat data structure is passed to user space and must
3079 * not change. We intentionally avoid copying pointers.
3080 */
3081 void
inpcb_to_compat(struct inpcb * inp,struct inpcb_compat * inp_compat)3082 inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
3083 {
3084 bzero(inp_compat, sizeof(*inp_compat));
3085 inp_compat->inp_fport = inp->inp_fport;
3086 inp_compat->inp_lport = inp->inp_lport;
3087 inp_compat->nat_owner = 0;
3088 inp_compat->nat_cookie = 0;
3089 inp_compat->inp_gencnt = inp->inp_gencnt;
3090 inp_compat->inp_flags = inp->inp_flags;
3091 inp_compat->inp_flow = inp->inp_flow;
3092 inp_compat->inp_vflag = inp->inp_vflag;
3093 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
3094 inp_compat->inp_ip_p = inp->inp_ip_p;
3095 inp_compat->inp_dependfaddr.inp6_foreign =
3096 inp->inp_dependfaddr.inp6_foreign;
3097 inp_compat->inp_dependladdr.inp6_local =
3098 inp->inp_dependladdr.inp6_local;
3099 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
3100 inp_compat->inp_depend6.inp6_hlim = 0;
3101 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
3102 inp_compat->inp_depend6.inp6_ifindex = 0;
3103 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
3104 }
3105
3106 #if XNU_TARGET_OS_OSX
3107 void
inpcb_to_xinpcb64(struct inpcb * inp,struct xinpcb64 * xinp)3108 inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
3109 {
3110 xinp->inp_fport = inp->inp_fport;
3111 xinp->inp_lport = inp->inp_lport;
3112 xinp->inp_gencnt = inp->inp_gencnt;
3113 xinp->inp_flags = inp->inp_flags;
3114 xinp->inp_flow = inp->inp_flow;
3115 xinp->inp_vflag = inp->inp_vflag;
3116 xinp->inp_ip_ttl = inp->inp_ip_ttl;
3117 xinp->inp_ip_p = inp->inp_ip_p;
3118 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
3119 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
3120 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
3121 xinp->inp_depend6.inp6_hlim = 0;
3122 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
3123 xinp->inp_depend6.inp6_ifindex = 0;
3124 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
3125 }
3126 #endif /* XNU_TARGET_OS_OSX */
3127
3128 /*
3129 * The following routines implement this scheme:
3130 *
3131 * Callers of ip_output() that intend to cache the route in the inpcb pass
3132 * a local copy of the struct route to ip_output(). Using a local copy of
3133 * the cached route significantly simplifies things as IP no longer has to
3134 * worry about having exclusive access to the passed in struct route, since
3135 * it's defined in the caller's stack; in essence, this allows for a lock-
3136 * less operation when updating the struct route at the IP level and below,
3137 * whenever necessary. The scheme works as follows:
3138 *
3139 * Prior to dropping the socket's lock and calling ip_output(), the caller
3140 * copies the struct route from the inpcb into its stack, and adds a reference
3141 * to the cached route entry, if there was any. The socket's lock is then
3142 * dropped and ip_output() is called with a pointer to the copy of struct
3143 * route defined on the stack (not to the one in the inpcb.)
3144 *
3145 * Upon returning from ip_output(), the caller then acquires the socket's
3146 * lock and synchronizes the cache; if there is no route cached in the inpcb,
3147 * it copies the local copy of struct route (which may or may not contain any
3148 * route) back into the cache; otherwise, if the inpcb has a route cached in
3149 * it, the one in the local copy will be freed, if there's any. Trashing the
3150 * cached route in the inpcb can be avoided because ip_output() is single-
3151 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
3152 * by the socket/transport layer.)
3153 */
3154 void
inp_route_copyout(struct inpcb * inp,struct route * dst)3155 inp_route_copyout(struct inpcb *inp, struct route *dst)
3156 {
3157 struct route *src = &inp->inp_route;
3158
3159 socket_lock_assert_owned(inp->inp_socket);
3160
3161 /*
3162 * If the route in the PCB is stale or not for IPv4, blow it away;
3163 * this is possible in the case of IPv4-mapped address case.
3164 */
3165 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) {
3166 ROUTE_RELEASE(src);
3167 }
3168
3169 route_copyout(dst, src, sizeof(*dst));
3170 }
3171
3172 void
inp_route_copyin(struct inpcb * inp,struct route * src)3173 inp_route_copyin(struct inpcb *inp, struct route *src)
3174 {
3175 struct route *dst = &inp->inp_route;
3176
3177 socket_lock_assert_owned(inp->inp_socket);
3178
3179 /* Minor sanity check */
3180 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) {
3181 panic("%s: wrong or corrupted route: %p", __func__, src);
3182 }
3183
3184 route_copyin(src, dst, sizeof(*src));
3185 }
3186
3187 /*
3188 * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option.
3189 */
3190 static void
inp_bindif_common(struct inpcb * inp,struct ifnet * ifp)3191 inp_bindif_common(struct inpcb *inp, struct ifnet *ifp)
3192 {
3193 /*
3194 * A zero interface scope value indicates an "unbind".
3195 * Otherwise, take in whatever value the app desires;
3196 * the app may already know the scope (or force itself
3197 * to such a scope) ahead of time before the interface
3198 * gets attached. It doesn't matter either way; any
3199 * route lookup from this point on will require an
3200 * exact match for the embedded interface scope.
3201 */
3202 inp->inp_boundifp = ifp;
3203 if (inp->inp_boundifp == NULL) {
3204 inp->inp_flags &= ~INP_BOUND_IF;
3205 } else {
3206 inp->inp_flags |= INP_BOUND_IF;
3207 }
3208
3209 /* Blow away any cached route in the PCB */
3210 ROUTE_RELEASE(&inp->inp_route);
3211 }
3212
3213
3214 int
inp_bindif(struct inpcb * inp,unsigned int ifscope,struct ifnet ** pifp)3215 inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
3216 {
3217 struct ifnet *ifp = NULL;
3218
3219 ifnet_head_lock_shared();
3220 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
3221 (ifp = ifindex2ifnet[ifscope]) == NULL)) {
3222 ifnet_head_done();
3223 return ENXIO;
3224 }
3225 ifnet_head_done();
3226
3227 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
3228
3229 inp_bindif_common(inp, ifp);
3230
3231 if (pifp != NULL) {
3232 *pifp = ifp;
3233 }
3234
3235 return 0;
3236 }
3237
3238 int
inp_bindtodevice(struct inpcb * inp,const char * ifname)3239 inp_bindtodevice(struct inpcb *inp, const char *ifname)
3240 {
3241 ifnet_ref_t ifp = NULL;
3242
3243 if (*ifname != 0) {
3244 int error = ifnet_find_by_name(ifname, &ifp);
3245 if (error != 0) {
3246 return error;
3247 }
3248 }
3249
3250 inp_bindif_common(inp, ifp);
3251
3252 if (ifp != NULL) {
3253 ifnet_release(ifp);
3254 }
3255 return 0;
3256 }
3257
3258 /*
3259 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
3260 * as well as for setting PROC_UUID_NO_CELLULAR policy.
3261 */
3262 void
inp_set_nocellular(struct inpcb * inp)3263 inp_set_nocellular(struct inpcb *inp)
3264 {
3265 inp->inp_flags |= INP_NO_IFT_CELLULAR;
3266
3267 /* Blow away any cached route in the PCB */
3268 ROUTE_RELEASE(&inp->inp_route);
3269 }
3270
3271 /*
3272 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
3273 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
3274 */
3275 void
inp_clear_nocellular(struct inpcb * inp)3276 inp_clear_nocellular(struct inpcb *inp)
3277 {
3278 struct socket *so = inp->inp_socket;
3279
3280 /*
3281 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
3282 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
3283 * if and only if the socket is unrestricted.
3284 */
3285 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
3286 inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
3287
3288 /* Blow away any cached route in the PCB */
3289 ROUTE_RELEASE(&inp->inp_route);
3290 }
3291 }
3292
3293 void
inp_set_noexpensive(struct inpcb * inp)3294 inp_set_noexpensive(struct inpcb *inp)
3295 {
3296 inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
3297
3298 /* Blow away any cached route in the PCB */
3299 ROUTE_RELEASE(&inp->inp_route);
3300 }
3301
3302 void
inp_set_noconstrained(struct inpcb * inp)3303 inp_set_noconstrained(struct inpcb *inp)
3304 {
3305 inp->inp_flags2 |= INP2_NO_IFF_CONSTRAINED;
3306
3307 /* Blow away any cached route in the PCB */
3308 ROUTE_RELEASE(&inp->inp_route);
3309 }
3310
3311 void
inp_set_awdl_unrestricted(struct inpcb * inp)3312 inp_set_awdl_unrestricted(struct inpcb *inp)
3313 {
3314 inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
3315
3316 /* Blow away any cached route in the PCB */
3317 ROUTE_RELEASE(&inp->inp_route);
3318 }
3319
3320 boolean_t
inp_get_awdl_unrestricted(struct inpcb * inp)3321 inp_get_awdl_unrestricted(struct inpcb *inp)
3322 {
3323 return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
3324 }
3325
3326 void
inp_clear_awdl_unrestricted(struct inpcb * inp)3327 inp_clear_awdl_unrestricted(struct inpcb *inp)
3328 {
3329 inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
3330
3331 /* Blow away any cached route in the PCB */
3332 ROUTE_RELEASE(&inp->inp_route);
3333 }
3334
3335 void
inp_set_intcoproc_allowed(struct inpcb * inp)3336 inp_set_intcoproc_allowed(struct inpcb *inp)
3337 {
3338 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
3339
3340 /* Blow away any cached route in the PCB */
3341 ROUTE_RELEASE(&inp->inp_route);
3342 }
3343
3344 boolean_t
inp_get_intcoproc_allowed(struct inpcb * inp)3345 inp_get_intcoproc_allowed(struct inpcb *inp)
3346 {
3347 return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE;
3348 }
3349
3350 void
inp_clear_intcoproc_allowed(struct inpcb * inp)3351 inp_clear_intcoproc_allowed(struct inpcb *inp)
3352 {
3353 inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED;
3354
3355 /* Blow away any cached route in the PCB */
3356 ROUTE_RELEASE(&inp->inp_route);
3357 }
3358
3359 void
inp_set_management_allowed(struct inpcb * inp)3360 inp_set_management_allowed(struct inpcb *inp)
3361 {
3362 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
3363 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
3364
3365 /* Blow away any cached route in the PCB */
3366 ROUTE_RELEASE(&inp->inp_route);
3367 }
3368
3369 boolean_t
inp_get_management_allowed(struct inpcb * inp)3370 inp_get_management_allowed(struct inpcb *inp)
3371 {
3372 return (inp->inp_flags2 & INP2_MANAGEMENT_ALLOWED) ? TRUE : FALSE;
3373 }
3374
3375 void
inp_clear_management_allowed(struct inpcb * inp)3376 inp_clear_management_allowed(struct inpcb *inp)
3377 {
3378 inp->inp_flags2 &= ~INP2_MANAGEMENT_ALLOWED;
3379
3380 /* Blow away any cached route in the PCB */
3381 ROUTE_RELEASE(&inp->inp_route);
3382 }
3383
3384 void
inp_set_ultra_constrained_allowed(struct inpcb * inp)3385 inp_set_ultra_constrained_allowed(struct inpcb *inp)
3386 {
3387 inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_ALLOWED;
3388 inp->inp_flags2 |= INP2_ULTRA_CONSTRAINED_CHECKED;
3389
3390 /* Blow away any cached route in the PCB */
3391 ROUTE_RELEASE(&inp->inp_route);
3392 }
3393
3394 #if NECP
3395 /*
3396 * Called when PROC_UUID_NECP_APP_POLICY is set.
3397 */
3398 void
inp_set_want_app_policy(struct inpcb * inp)3399 inp_set_want_app_policy(struct inpcb *inp)
3400 {
3401 inp->inp_flags2 |= INP2_WANT_APP_POLICY;
3402 }
3403
3404 /*
3405 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
3406 */
3407 void
inp_clear_want_app_policy(struct inpcb * inp)3408 inp_clear_want_app_policy(struct inpcb *inp)
3409 {
3410 inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
3411 }
3412 #endif /* NECP */
3413
3414 /*
3415 * Calculate flow hash for an inp, used by an interface to identify a
3416 * flow. When an interface provides flow control advisory, this flow
3417 * hash is used as an identifier.
3418 */
3419 u_int32_t
inp_calc_flowhash(struct inpcb * inp)3420 inp_calc_flowhash(struct inpcb *inp)
3421 {
3422 #if SKYWALK
3423
3424 uint32_t flowid;
3425 struct flowidns_flow_key fk;
3426
3427 bzero(&fk, sizeof(fk));
3428
3429 if (inp->inp_vflag & INP_IPV4) {
3430 fk.ffk_af = AF_INET;
3431 fk.ffk_laddr_v4 = inp->inp_laddr;
3432 fk.ffk_raddr_v4 = inp->inp_faddr;
3433 } else {
3434 fk.ffk_af = AF_INET6;
3435 fk.ffk_laddr_v6 = inp->in6p_laddr;
3436 fk.ffk_raddr_v6 = inp->in6p_faddr;
3437 /* clear embedded scope ID */
3438 if (IN6_IS_SCOPE_EMBED(&fk.ffk_laddr_v6)) {
3439 fk.ffk_laddr_v6.s6_addr16[1] = 0;
3440 }
3441 if (IN6_IS_SCOPE_EMBED(&fk.ffk_raddr_v6)) {
3442 fk.ffk_raddr_v6.s6_addr16[1] = 0;
3443 }
3444 }
3445
3446 fk.ffk_lport = inp->inp_lport;
3447 fk.ffk_rport = inp->inp_fport;
3448 fk.ffk_proto = (inp->inp_ip_p != 0) ? inp->inp_ip_p :
3449 (uint8_t)SOCK_PROTO(inp->inp_socket);
3450 flowidns_allocate_flowid(FLOWIDNS_DOMAIN_INPCB, &fk, &flowid);
3451 /* Insert the inp into inp_fc_tree */
3452 lck_mtx_lock_spin(&inp_fc_lck);
3453 ASSERT(inp->inp_flowhash == 0);
3454 ASSERT((inp->inp_flags2 & INP2_IN_FCTREE) == 0);
3455 inp->inp_flowhash = flowid;
3456 VERIFY(RB_INSERT(inp_fc_tree, &inp_fc_tree, inp) == NULL);
3457 inp->inp_flags2 |= INP2_IN_FCTREE;
3458 lck_mtx_unlock(&inp_fc_lck);
3459
3460 return flowid;
3461
3462 #else /* !SKYWALK */
3463
3464 struct inp_flowhash_key fh __attribute__((aligned(8)));
3465 u_int32_t flowhash = 0;
3466 struct inpcb *tmp_inp = NULL;
3467
3468 if (inp_hash_seed == 0) {
3469 inp_hash_seed = RandomULong();
3470 }
3471
3472 bzero(&fh, sizeof(fh));
3473
3474 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof(fh.infh_laddr));
3475 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof(fh.infh_faddr));
3476
3477 fh.infh_lport = inp->inp_lport;
3478 fh.infh_fport = inp->inp_fport;
3479 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
3480 fh.infh_proto = inp->inp_ip_p;
3481 fh.infh_rand1 = RandomULong();
3482 fh.infh_rand2 = RandomULong();
3483
3484 try_again:
3485 flowhash = net_flowhash(&fh, sizeof(fh), inp_hash_seed);
3486 if (flowhash == 0) {
3487 /* try to get a non-zero flowhash */
3488 inp_hash_seed = RandomULong();
3489 goto try_again;
3490 }
3491
3492 inp->inp_flowhash = flowhash;
3493
3494 /* Insert the inp into inp_fc_tree */
3495 lck_mtx_lock_spin(&inp_fc_lck);
3496 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
3497 if (tmp_inp != NULL) {
3498 /*
3499 * There is a different inp with the same flowhash.
3500 * There can be a collision on flow hash but the
3501 * probability is low. Let's recompute the
3502 * flowhash.
3503 */
3504 lck_mtx_unlock(&inp_fc_lck);
3505 /* recompute hash seed */
3506 inp_hash_seed = RandomULong();
3507 goto try_again;
3508 }
3509
3510 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
3511 inp->inp_flags2 |= INP2_IN_FCTREE;
3512 lck_mtx_unlock(&inp_fc_lck);
3513
3514 return flowhash;
3515
3516 #endif /* !SKYWALK */
3517 }
3518
3519 void
inp_flowadv(uint32_t flowhash)3520 inp_flowadv(uint32_t flowhash)
3521 {
3522 struct inpcb *inp;
3523
3524 inp = inp_fc_getinp(flowhash, 0);
3525
3526 if (inp == NULL) {
3527 return;
3528 }
3529 inp_fc_feedback(inp);
3530 }
3531
3532 /*
3533 * Function to compare inp_fc_entries in inp flow control tree
3534 */
3535 static inline int
infc_cmp(const struct inpcb * inp1,const struct inpcb * inp2)3536 infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
3537 {
3538 return memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
3539 sizeof(inp1->inp_flowhash));
3540 }
3541
3542 static struct inpcb *
inp_fc_getinp(u_int32_t flowhash,u_int32_t flags)3543 inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
3544 {
3545 struct inpcb *inp = NULL;
3546 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
3547
3548 lck_mtx_lock_spin(&inp_fc_lck);
3549 key_inp.inp_flowhash = flowhash;
3550 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
3551 if (inp == NULL) {
3552 /* inp is not present, return */
3553 lck_mtx_unlock(&inp_fc_lck);
3554 return NULL;
3555 }
3556
3557 if (flags & INPFC_REMOVE) {
3558 ASSERT((inp->inp_flags2 & INP2_IN_FCTREE) != 0);
3559 lck_mtx_convert_spin(&inp_fc_lck);
3560 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
3561 bzero(&(inp->infc_link), sizeof(inp->infc_link));
3562 #if SKYWALK
3563 VERIFY(inp->inp_flowhash != 0);
3564 flowidns_release_flowid(inp->inp_flowhash);
3565 inp->inp_flowhash = 0;
3566 #endif /* !SKYWALK */
3567 inp->inp_flags2 &= ~INP2_IN_FCTREE;
3568 lck_mtx_unlock(&inp_fc_lck);
3569 return NULL;
3570 }
3571
3572 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) {
3573 inp = NULL;
3574 }
3575 lck_mtx_unlock(&inp_fc_lck);
3576
3577 return inp;
3578 }
3579
3580 static void
inp_fc_feedback(struct inpcb * inp)3581 inp_fc_feedback(struct inpcb *inp)
3582 {
3583 struct socket *so = inp->inp_socket;
3584
3585 /* we already hold a want_cnt on this inp, socket can't be null */
3586 VERIFY(so != NULL);
3587 socket_lock(so, 1);
3588
3589 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3590 socket_unlock(so, 1);
3591 return;
3592 }
3593
3594 if (inp->inp_sndinprog_cnt > 0) {
3595 inp->inp_flags |= INP_FC_FEEDBACK;
3596 }
3597
3598 /*
3599 * Return if the connection is not in flow-controlled state.
3600 * This can happen if the connection experienced
3601 * loss while it was in flow controlled state
3602 */
3603 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
3604 socket_unlock(so, 1);
3605 return;
3606 }
3607 inp_reset_fc_state(inp);
3608
3609 if (SOCK_TYPE(so) == SOCK_STREAM) {
3610 inp_fc_unthrottle_tcp(inp);
3611 }
3612
3613 socket_unlock(so, 1);
3614 }
3615
3616 static void
inp_reset_fc_timerstat(struct inpcb * inp)3617 inp_reset_fc_timerstat(struct inpcb *inp)
3618 {
3619 uint64_t now;
3620
3621 if (inp->inp_fadv_start_time == 0) {
3622 return;
3623 }
3624
3625 now = net_uptime_us();
3626 ASSERT(now >= inp->inp_fadv_start_time);
3627
3628 inp->inp_fadv_total_time += (now - inp->inp_fadv_start_time);
3629 inp->inp_fadv_cnt++;
3630
3631 inp->inp_fadv_start_time = 0;
3632 }
3633
3634 static void
inp_set_fc_timerstat(struct inpcb * inp)3635 inp_set_fc_timerstat(struct inpcb *inp)
3636 {
3637 if (inp->inp_fadv_start_time != 0) {
3638 return;
3639 }
3640
3641 inp->inp_fadv_start_time = net_uptime_us();
3642 }
3643
3644 void
inp_reset_fc_state(struct inpcb * inp)3645 inp_reset_fc_state(struct inpcb *inp)
3646 {
3647 struct socket *so = inp->inp_socket;
3648 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
3649 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
3650
3651 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3652
3653 inp_reset_fc_timerstat(inp);
3654
3655 if (suspended) {
3656 so->so_flags &= ~(SOF_SUSPENDED);
3657 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
3658 }
3659
3660 /* Give a write wakeup to unblock the socket */
3661 if (needwakeup) {
3662 sowwakeup(so);
3663 }
3664 }
3665
3666 int
inp_set_fc_state(struct inpcb * inp,int advcode)3667 inp_set_fc_state(struct inpcb *inp, int advcode)
3668 {
3669 boolean_t is_flow_controlled = INP_WAIT_FOR_IF_FEEDBACK(inp);
3670 struct inpcb *tmp_inp = NULL;
3671 /*
3672 * If there was a feedback from the interface when
3673 * send operation was in progress, we should ignore
3674 * this flow advisory to avoid a race between setting
3675 * flow controlled state and receiving feedback from
3676 * the interface
3677 */
3678 if (inp->inp_flags & INP_FC_FEEDBACK) {
3679 return 0;
3680 }
3681
3682 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3683 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
3684 INPFC_SOLOCKED)) != NULL) {
3685 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3686 goto exit_reset;
3687 }
3688 VERIFY(tmp_inp == inp);
3689 switch (advcode) {
3690 case FADV_FLOW_CONTROLLED:
3691 inp->inp_flags |= INP_FLOW_CONTROLLED;
3692 inp_set_fc_timerstat(inp);
3693 break;
3694 case FADV_SUSPENDED:
3695 inp->inp_flags |= INP_FLOW_SUSPENDED;
3696 inp_set_fc_timerstat(inp);
3697
3698 soevent(inp->inp_socket,
3699 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
3700
3701 /* Record the fact that suspend event was sent */
3702 inp->inp_socket->so_flags |= SOF_SUSPENDED;
3703 break;
3704 }
3705
3706 if (!is_flow_controlled && SOCK_TYPE(inp->inp_socket) == SOCK_STREAM) {
3707 inp_fc_throttle_tcp(inp);
3708 }
3709 return 1;
3710 }
3711
3712 exit_reset:
3713 inp_reset_fc_timerstat(inp);
3714
3715 return 0;
3716 }
3717
3718 /*
3719 * Handler for SO_FLUSH socket option.
3720 */
3721 int
inp_flush(struct inpcb * inp,int optval)3722 inp_flush(struct inpcb *inp, int optval)
3723 {
3724 u_int32_t flowhash = inp->inp_flowhash;
3725 struct ifnet *rtifp, *oifp;
3726
3727 /* Either all classes or one of the valid ones */
3728 if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) {
3729 return EINVAL;
3730 }
3731
3732 /* We need a flow hash for identification */
3733 if (flowhash == 0) {
3734 return 0;
3735 }
3736
3737 /* Grab the interfaces from the route and pcb */
3738 rtifp = ((inp->inp_route.ro_rt != NULL) ?
3739 inp->inp_route.ro_rt->rt_ifp : NULL);
3740 oifp = inp->inp_last_outifp;
3741
3742 if (rtifp != NULL) {
3743 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL);
3744 }
3745 if (oifp != NULL && oifp != rtifp) {
3746 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL);
3747 }
3748
3749 return 0;
3750 }
3751
3752 /*
3753 * Clear the INP_INADDR_ANY flag (special case for PPP only)
3754 */
3755 void
inp_clear_INP_INADDR_ANY(struct socket * so)3756 inp_clear_INP_INADDR_ANY(struct socket *so)
3757 {
3758 struct inpcb *inp = NULL;
3759
3760 socket_lock(so, 1);
3761 inp = sotoinpcb(so);
3762 if (inp) {
3763 inp->inp_flags &= ~INP_INADDR_ANY;
3764 }
3765 socket_unlock(so, 1);
3766 }
3767
3768 void
inp_get_soprocinfo(struct inpcb * inp,struct so_procinfo * soprocinfo)3769 inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
3770 {
3771 struct socket *so = inp->inp_socket;
3772
3773 soprocinfo->spi_pid = so->last_pid;
3774 strbufcpy(soprocinfo->spi_proc_name, inp->inp_last_proc_name);
3775 if (so->last_pid != 0) {
3776 uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
3777 }
3778 /*
3779 * When not delegated, the effective pid is the same as the real pid
3780 */
3781 if (so->so_flags & SOF_DELEGATED) {
3782 soprocinfo->spi_delegated = 1;
3783 soprocinfo->spi_epid = so->e_pid;
3784 uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
3785 } else {
3786 soprocinfo->spi_delegated = 0;
3787 soprocinfo->spi_epid = so->last_pid;
3788 }
3789 strbufcpy(soprocinfo->spi_e_proc_name, inp->inp_e_proc_name);
3790 }
3791
3792 int
inp_findinpcb_procinfo(struct inpcbinfo * pcbinfo,uint32_t flowhash,struct so_procinfo * soprocinfo)3793 inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
3794 struct so_procinfo *soprocinfo)
3795 {
3796 struct inpcb *inp = NULL;
3797 int found = 0;
3798
3799 bzero(soprocinfo, sizeof(struct so_procinfo));
3800
3801 if (!flowhash) {
3802 return -1;
3803 }
3804
3805 lck_rw_lock_shared(&pcbinfo->ipi_lock);
3806 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
3807 if (inp->inp_state != INPCB_STATE_DEAD &&
3808 inp->inp_socket != NULL &&
3809 inp->inp_flowhash == flowhash) {
3810 found = 1;
3811 inp_get_soprocinfo(inp, soprocinfo);
3812 break;
3813 }
3814 }
3815 lck_rw_done(&pcbinfo->ipi_lock);
3816
3817 return found;
3818 }
3819
3820 #if CONFIG_PROC_UUID_POLICY
3821 static void
inp_update_cellular_policy(struct inpcb * inp,boolean_t set)3822 inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
3823 {
3824 struct socket *so = inp->inp_socket;
3825 int before, after;
3826
3827 VERIFY(so != NULL);
3828 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3829
3830 before = INP_NO_CELLULAR(inp);
3831 if (set) {
3832 inp_set_nocellular(inp);
3833 } else {
3834 inp_clear_nocellular(inp);
3835 }
3836 after = INP_NO_CELLULAR(inp);
3837 if (net_io_policy_log && (before != after)) {
3838 static const char *ok = "OK";
3839 static const char *nok = "NOACCESS";
3840 uuid_string_t euuid_buf;
3841 pid_t epid;
3842
3843 if (so->so_flags & SOF_DELEGATED) {
3844 uuid_unparse(so->e_uuid, euuid_buf);
3845 epid = so->e_pid;
3846 } else {
3847 uuid_unparse(so->last_uuid, euuid_buf);
3848 epid = so->last_pid;
3849 }
3850
3851 /* allow this socket to generate another notification event */
3852 so->so_ifdenied_notifies = 0;
3853
3854 log(LOG_DEBUG, "%s: so %llu [%d,%d] epid %d "
3855 "euuid %s%s %s->%s\n", __func__,
3856 so->so_gencnt, SOCK_DOM(so),
3857 SOCK_TYPE(so), epid, euuid_buf,
3858 (so->so_flags & SOF_DELEGATED) ?
3859 " [delegated]" : "",
3860 ((before < after) ? ok : nok),
3861 ((before < after) ? nok : ok));
3862 }
3863 }
3864
3865 #if NECP
3866 static void
inp_update_necp_want_app_policy(struct inpcb * inp,boolean_t set)3867 inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
3868 {
3869 struct socket *so = inp->inp_socket;
3870 int before, after;
3871
3872 VERIFY(so != NULL);
3873 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3874
3875 before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3876 if (set) {
3877 inp_set_want_app_policy(inp);
3878 } else {
3879 inp_clear_want_app_policy(inp);
3880 }
3881 after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3882 if (net_io_policy_log && (before != after)) {
3883 static const char *wanted = "WANTED";
3884 static const char *unwanted = "UNWANTED";
3885 uuid_string_t euuid_buf;
3886 pid_t epid;
3887
3888 if (so->so_flags & SOF_DELEGATED) {
3889 uuid_unparse(so->e_uuid, euuid_buf);
3890 epid = so->e_pid;
3891 } else {
3892 uuid_unparse(so->last_uuid, euuid_buf);
3893 epid = so->last_pid;
3894 }
3895
3896 log(LOG_DEBUG, "%s: so %llu [%d,%d] epid %d "
3897 "euuid %s%s %s->%s\n", __func__,
3898 so->so_gencnt, SOCK_DOM(so),
3899 SOCK_TYPE(so), epid, euuid_buf,
3900 (so->so_flags & SOF_DELEGATED) ?
3901 " [delegated]" : "",
3902 ((before < after) ? unwanted : wanted),
3903 ((before < after) ? wanted : unwanted));
3904 }
3905 }
3906 #endif /* NECP */
3907 #endif /* !CONFIG_PROC_UUID_POLICY */
3908
3909 #if NECP
3910 void
inp_update_necp_policy(struct inpcb * inp,struct sockaddr * override_local_addr,struct sockaddr * override_remote_addr,u_int override_bound_interface)3911 inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
3912 {
3913 necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
3914 if (necp_socket_should_rescope(inp) &&
3915 inp->inp_lport == 0 &&
3916 inp->inp_laddr.s_addr == INADDR_ANY &&
3917 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
3918 // If we should rescope, and the socket is not yet bound
3919 inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
3920 inp->inp_flags2 |= INP2_SCOPED_BY_NECP;
3921 }
3922 }
3923 #endif /* NECP */
3924
3925 int
inp_update_policy(struct inpcb * inp)3926 inp_update_policy(struct inpcb *inp)
3927 {
3928 #if CONFIG_PROC_UUID_POLICY
3929 struct socket *so = inp->inp_socket;
3930 uint32_t pflags = 0;
3931 int32_t ogencnt;
3932 int err = 0;
3933 uint8_t *lookup_uuid = NULL;
3934
3935 if (!net_io_policy_uuid ||
3936 so == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3937 return 0;
3938 }
3939
3940 /*
3941 * Kernel-created sockets that aren't delegating other sockets
3942 * are currently exempted from UUID policy checks.
3943 */
3944 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) {
3945 return 0;
3946 }
3947
3948 #if defined(XNU_TARGET_OS_OSX)
3949 if (so->so_rpid > 0) {
3950 lookup_uuid = so->so_ruuid;
3951 ogencnt = so->so_policy_gencnt;
3952 err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3953 }
3954 #endif
3955 if (lookup_uuid == NULL || err == ENOENT) {
3956 lookup_uuid = ((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid);
3957 ogencnt = so->so_policy_gencnt;
3958 err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3959 }
3960
3961 /*
3962 * Discard cached generation count if the entry is gone (ENOENT),
3963 * so that we go thru the checks below.
3964 */
3965 if (err == ENOENT && ogencnt != 0) {
3966 so->so_policy_gencnt = 0;
3967 }
3968
3969 /*
3970 * If the generation count has changed, inspect the policy flags
3971 * and act accordingly. If a policy flag was previously set and
3972 * the UUID is no longer present in the table (ENOENT), treat it
3973 * as if the flag has been cleared.
3974 */
3975 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
3976 /* update cellular policy for this socket */
3977 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
3978 inp_update_cellular_policy(inp, TRUE);
3979 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
3980 inp_update_cellular_policy(inp, FALSE);
3981 }
3982 #if NECP
3983 /* update necp want app policy for this socket */
3984 if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
3985 inp_update_necp_want_app_policy(inp, TRUE);
3986 } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
3987 inp_update_necp_want_app_policy(inp, FALSE);
3988 }
3989 #endif /* NECP */
3990 }
3991
3992 return (err == ENOENT) ? 0 : err;
3993 #else /* !CONFIG_PROC_UUID_POLICY */
3994 #pragma unused(inp)
3995 return 0;
3996 #endif /* !CONFIG_PROC_UUID_POLICY */
3997 }
3998
3999 unsigned int log_restricted;
4000 SYSCTL_DECL(_net_inet);
4001 SYSCTL_INT(_net_inet, OID_AUTO, log_restricted,
4002 CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0,
4003 "Log network restrictions");
4004
4005
4006 /*
4007 * Called when we need to enforce policy restrictions in the input path.
4008 *
4009 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
4010 */
4011 static boolean_t
_inp_restricted_recv(struct inpcb * inp,struct ifnet * ifp)4012 _inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
4013 {
4014 VERIFY(inp != NULL);
4015
4016 /*
4017 * Inbound restrictions.
4018 */
4019 if (!sorestrictrecv) {
4020 return FALSE;
4021 }
4022
4023 if (ifp == NULL) {
4024 return FALSE;
4025 }
4026
4027 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
4028 return TRUE;
4029 }
4030
4031 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
4032 return TRUE;
4033 }
4034
4035 if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
4036 return TRUE;
4037 }
4038
4039 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
4040 return TRUE;
4041 }
4042
4043 if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) {
4044 return FALSE;
4045 }
4046
4047 if (inp->inp_flags & INP_RECV_ANYIF) {
4048 return FALSE;
4049 }
4050
4051 /*
4052 * An entitled process can use the management interface without being bound
4053 * to the interface
4054 */
4055 if (IFNET_IS_MANAGEMENT(ifp)) {
4056 if (INP_MANAGEMENT_ALLOWED(inp)) {
4057 return FALSE;
4058 }
4059 if (if_management_verbose > 1) {
4060 os_log(OS_LOG_DEFAULT, "_inp_restricted_recv %s:%d not allowed on management interface %s",
4061 proc_best_name(current_proc()), proc_getpid(current_proc()),
4062 ifp->if_xname);
4063 }
4064 return TRUE;
4065 }
4066
4067 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) {
4068 return FALSE;
4069 }
4070
4071 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
4072 return TRUE;
4073 }
4074
4075
4076 return TRUE;
4077 }
4078
4079 boolean_t
inp_restricted_recv(struct inpcb * inp,struct ifnet * ifp)4080 inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
4081 {
4082 boolean_t ret;
4083
4084 ret = _inp_restricted_recv(inp, ifp);
4085 if (ret == TRUE && log_restricted) {
4086 printf("pid %d (%s) is unable to receive packets on %s\n",
4087 proc_getpid(current_proc()), proc_best_name(current_proc()),
4088 ifp->if_xname);
4089 }
4090 return ret;
4091 }
4092
4093 /*
4094 * Called when we need to enforce policy restrictions in the output path.
4095 *
4096 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
4097 */
4098 static boolean_t
_inp_restricted_send(struct inpcb * inp,struct ifnet * ifp)4099 _inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
4100 {
4101 VERIFY(inp != NULL);
4102
4103 /*
4104 * Outbound restrictions.
4105 */
4106 if (!sorestrictsend) {
4107 return FALSE;
4108 }
4109
4110 if (ifp == NULL) {
4111 return FALSE;
4112 }
4113
4114 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
4115 return TRUE;
4116 }
4117
4118 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
4119 return TRUE;
4120 }
4121
4122 if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
4123 return TRUE;
4124 }
4125
4126 if (IFNET_IS_ULTRA_CONSTRAINED(ifp) && uuid_is_null(inp->necp_client_uuid) &&
4127 !INP_ULTRA_CONSTRAINED_ALLOWED(inp)) {
4128 // Non-NECP-aware sockets are not allowed to use ultra constrained interfaces
4129 // without an entitlement
4130 return TRUE;
4131 }
4132
4133 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
4134 return TRUE;
4135 }
4136
4137 if (IFNET_IS_MANAGEMENT(ifp)) {
4138 if (!INP_MANAGEMENT_ALLOWED(inp)) {
4139 if (if_management_verbose > 1) {
4140 os_log(OS_LOG_DEFAULT, "_inp_restricted_send %s:%d not allowed on management interface %s",
4141 proc_best_name(current_proc()), proc_getpid(current_proc()),
4142 ifp->if_xname);
4143 }
4144 return TRUE;
4145 }
4146 }
4147
4148 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
4149 return TRUE;
4150 }
4151
4152 return FALSE;
4153 }
4154
4155 boolean_t
inp_restricted_send(struct inpcb * inp,struct ifnet * ifp)4156 inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
4157 {
4158 boolean_t ret;
4159
4160 ret = _inp_restricted_send(inp, ifp);
4161 if (ret == TRUE && log_restricted) {
4162 printf("%s:%d pid %d (%s) is unable to transmit packets on %s\n",
4163 __func__, __LINE__,
4164 proc_getpid(current_proc()), proc_best_name(current_proc()),
4165 ifp->if_xname);
4166 }
4167 return ret;
4168 }
4169
4170 inline void
inp_count_sndbytes(struct inpcb * inp,u_int32_t th_ack)4171 inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack)
4172 {
4173 struct ifnet *ifp = inp->inp_last_outifp;
4174 struct socket *so = inp->inp_socket;
4175 if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
4176 (ifp->if_type == IFT_CELLULAR || IFNET_IS_WIFI(ifp))) {
4177 int32_t unsent;
4178
4179 so->so_snd.sb_flags |= SB_SNDBYTE_CNT;
4180
4181 /*
4182 * There can be data outstanding before the connection
4183 * becomes established -- TFO case
4184 */
4185 if (so->so_snd.sb_cc > 0) {
4186 inp_incr_sndbytes_total(so, so->so_snd.sb_cc);
4187 }
4188
4189 unsent = inp_get_sndbytes_allunsent(so, th_ack);
4190 if (unsent > 0) {
4191 inp_incr_sndbytes_unsent(so, unsent);
4192 }
4193 }
4194 }
4195
4196 inline void
inp_incr_sndbytes_total(struct socket * so,int32_t len)4197 inp_incr_sndbytes_total(struct socket *so, int32_t len)
4198 {
4199 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4200 struct ifnet *ifp = inp->inp_last_outifp;
4201
4202 if (ifp != NULL) {
4203 VERIFY(ifp->if_sndbyte_total >= 0);
4204 OSAddAtomic64(len, &ifp->if_sndbyte_total);
4205 }
4206 }
4207
4208 inline void
inp_decr_sndbytes_total(struct socket * so,int32_t len)4209 inp_decr_sndbytes_total(struct socket *so, int32_t len)
4210 {
4211 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4212 struct ifnet *ifp = inp->inp_last_outifp;
4213
4214 if (ifp != NULL) {
4215 if (ifp->if_sndbyte_total >= len) {
4216 OSAddAtomic64(-len, &ifp->if_sndbyte_total);
4217 } else {
4218 ifp->if_sndbyte_total = 0;
4219 }
4220 }
4221 }
4222
4223 inline void
inp_incr_sndbytes_unsent(struct socket * so,int32_t len)4224 inp_incr_sndbytes_unsent(struct socket *so, int32_t len)
4225 {
4226 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4227 struct ifnet *ifp = inp->inp_last_outifp;
4228
4229 if (ifp != NULL) {
4230 VERIFY(ifp->if_sndbyte_unsent >= 0);
4231 OSAddAtomic64(len, &ifp->if_sndbyte_unsent);
4232 }
4233 }
4234
4235 inline void
inp_decr_sndbytes_unsent(struct socket * so,int32_t len)4236 inp_decr_sndbytes_unsent(struct socket *so, int32_t len)
4237 {
4238 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
4239 return;
4240 }
4241
4242 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4243 struct ifnet *ifp = inp->inp_last_outifp;
4244
4245 if (ifp != NULL) {
4246 if (ifp->if_sndbyte_unsent >= len) {
4247 OSAddAtomic64(-len, &ifp->if_sndbyte_unsent);
4248 } else {
4249 ifp->if_sndbyte_unsent = 0;
4250 }
4251 }
4252 }
4253
4254 inline void
inp_decr_sndbytes_allunsent(struct socket * so,u_int32_t th_ack)4255 inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
4256 {
4257 int32_t len;
4258
4259 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
4260 return;
4261 }
4262
4263 len = inp_get_sndbytes_allunsent(so, th_ack);
4264 inp_decr_sndbytes_unsent(so, len);
4265 }
4266
4267 #if SKYWALK
4268 inline void
inp_update_netns_flags(struct socket * so)4269 inp_update_netns_flags(struct socket *so)
4270 {
4271 struct inpcb *inp;
4272 uint32_t set_flags = 0;
4273 uint32_t clear_flags = 0;
4274
4275 if (!(SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
4276 return;
4277 }
4278
4279 inp = sotoinpcb(so);
4280
4281 if (inp == NULL) {
4282 return;
4283 }
4284
4285 if (!NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
4286 return;
4287 }
4288
4289 if (so->so_options & SO_NOWAKEFROMSLEEP) {
4290 set_flags |= NETNS_NOWAKEFROMSLEEP;
4291 } else {
4292 clear_flags |= NETNS_NOWAKEFROMSLEEP;
4293 }
4294
4295 if (inp->inp_flags & INP_RECV_ANYIF) {
4296 set_flags |= NETNS_RECVANYIF;
4297 } else {
4298 clear_flags |= NETNS_RECVANYIF;
4299 }
4300
4301 if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
4302 set_flags |= NETNS_EXTBGIDLE;
4303 } else {
4304 clear_flags |= NETNS_EXTBGIDLE;
4305 }
4306
4307 netns_change_flags(&inp->inp_netns_token, set_flags, clear_flags);
4308 }
4309 #endif /* SKYWALK */
4310
4311 inline void
inp_get_activity_bitmap(struct inpcb * inp,activity_bitmap_t * ab)4312 inp_get_activity_bitmap(struct inpcb *inp, activity_bitmap_t *ab)
4313 {
4314 // Just grab the total bitmap until we have more precision in bitmap retrieval
4315 bcopy(&inp->inp_mstat.ms_total.ts_bitmap, ab, sizeof(*ab));
4316 }
4317
4318 void
inp_update_last_owner(struct socket * so,struct proc * p,struct proc * ep)4319 inp_update_last_owner(struct socket *so, struct proc *p, struct proc *ep)
4320 {
4321 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4322
4323 if (inp == NULL) {
4324 return;
4325 }
4326
4327 if (p != NULL) {
4328 strlcpy(&inp->inp_last_proc_name[0], proc_name_address(p), sizeof(inp->inp_last_proc_name));
4329 }
4330 if (so->so_flags & SOF_DELEGATED) {
4331 if (ep != NULL) {
4332 strlcpy(&inp->inp_e_proc_name[0], proc_name_address(ep), sizeof(inp->inp_e_proc_name));
4333 } else {
4334 inp->inp_e_proc_name[0] = 0;
4335 }
4336 } else {
4337 inp->inp_e_proc_name[0] = 0;
4338 }
4339 nstat_pcb_update_last_owner(inp);
4340 }
4341
4342 void
inp_copy_last_owner(struct socket * so,struct socket * head)4343 inp_copy_last_owner(struct socket *so, struct socket *head)
4344 {
4345 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4346 struct inpcb *head_inp = (struct inpcb *)head->so_pcb;
4347
4348 if (inp == NULL || head_inp == NULL) {
4349 return;
4350 }
4351
4352 strbufcpy(inp->inp_last_proc_name, head_inp->inp_last_proc_name);
4353 strbufcpy(inp->inp_e_proc_name, head_inp->inp_e_proc_name);
4354 }
4355
4356 static int
in_check_management_interface_proc_callout(proc_t proc,void * arg __unused)4357 in_check_management_interface_proc_callout(proc_t proc, void *arg __unused)
4358 {
4359 struct fileproc *fp = NULL;
4360 task_t __single task = proc_task(proc);
4361 bool allowed = false;
4362
4363 if (IOTaskHasEntitlement(task, INTCOPROC_RESTRICTED_ENTITLEMENT) == true
4364 || IOTaskHasEntitlement(task, MANAGEMENT_DATA_ENTITLEMENT) == true
4365 #if DEBUG || DEVELOPMENT
4366 || IOTaskHasEntitlement(task, INTCOPROC_RESTRICTED_ENTITLEMENT_DEVELOPMENT) == true
4367 || IOTaskHasEntitlement(task, MANAGEMENT_DATA_ENTITLEMENT_DEVELOPMENT) == true
4368 #endif /* DEBUG || DEVELOPMENT */
4369 ) {
4370 allowed = true;
4371 }
4372 if (allowed == false && management_data_unrestricted == false) {
4373 return PROC_RETURNED;
4374 }
4375
4376 proc_fdlock(proc);
4377 fdt_foreach(fp, proc) {
4378 struct fileglob *fg = fp->fp_glob;
4379 struct socket *so;
4380 struct inpcb *inp;
4381
4382 if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET) {
4383 continue;
4384 }
4385
4386 so = (struct socket *)fp_get_data(fp);
4387 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
4388 continue;
4389 }
4390
4391 inp = (struct inpcb *)so->so_pcb;
4392
4393 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
4394 continue;
4395 }
4396
4397 socket_lock(so, 1);
4398
4399 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
4400 socket_unlock(so, 1);
4401 continue;
4402 }
4403 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
4404 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
4405
4406 socket_unlock(so, 1);
4407 }
4408 proc_fdunlock(proc);
4409
4410 return PROC_RETURNED;
4411 }
4412
4413 static bool in_management_interface_checked = false;
4414
4415 static void
in_management_interface_event_callback(struct nwk_wq_entry * nwk_item)4416 in_management_interface_event_callback(struct nwk_wq_entry *nwk_item)
4417 {
4418 kfree_type(struct nwk_wq_entry, nwk_item);
4419
4420 if (in_management_interface_checked == true) {
4421 return;
4422 }
4423 in_management_interface_checked = true;
4424
4425 proc_iterate(PROC_ALLPROCLIST,
4426 in_check_management_interface_proc_callout,
4427 NULL, NULL, NULL);
4428 }
4429
4430 void
in_management_interface_check(void)4431 in_management_interface_check(void)
4432 {
4433 struct nwk_wq_entry *nwk_item;
4434
4435 if (if_management_interface_check_needed == false ||
4436 in_management_interface_checked == true) {
4437 return;
4438 }
4439
4440 nwk_item = kalloc_type(struct nwk_wq_entry,
4441 Z_WAITOK | Z_ZERO | Z_NOFAIL);
4442
4443 nwk_item->func = in_management_interface_event_callback;
4444
4445 nwk_wq_enqueue(nwk_item);
4446 }
4447
4448 void
inp_enter_bind_in_progress(struct socket * so)4449 inp_enter_bind_in_progress(struct socket *so)
4450 {
4451 struct inpcb *inp = sotoinpcb(so);
4452
4453 #if (DEBUG || DEVELOPMENT)
4454 socket_lock_assert_owned(so);
4455 #endif /* (DEBUG || DEVELOPMENT) */
4456
4457 VERIFY(inp->inp_bind_in_progress_waiters != UINT16_MAX);
4458
4459 while ((inp->inp_flags2 & INP2_BIND_IN_PROGRESS) != 0) {
4460 lck_mtx_t *mutex_held;
4461
4462 inp->inp_bind_in_progress_waiters++;
4463 inp->inp_bind_in_progress_last_waiter_thread = current_thread();
4464
4465 if (so->so_proto->pr_getlock != NULL) {
4466 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
4467 } else {
4468 mutex_held = so->so_proto->pr_domain->dom_mtx;
4469 }
4470 msleep(&inp->inp_bind_in_progress_waiters, mutex_held,
4471 PSOCK | PCATCH, "inp_enter_bind_in_progress", NULL);
4472
4473 inp->inp_bind_in_progress_last_waiter_thread = NULL;
4474
4475 inp->inp_bind_in_progress_waiters--;
4476 }
4477 inp->inp_flags2 |= INP2_BIND_IN_PROGRESS;
4478 inp->inp_bind_in_progress_thread = current_thread();
4479 }
4480
4481 void
inp_exit_bind_in_progress(struct socket * so)4482 inp_exit_bind_in_progress(struct socket *so)
4483 {
4484 struct inpcb *inp = sotoinpcb(so);
4485
4486 #if (DEBUG || DEVELOPMENT)
4487 socket_lock_assert_owned(so);
4488 #endif /* (DEBUG || DEVELOPMENT) */
4489
4490 inp->inp_flags2 &= ~INP2_BIND_IN_PROGRESS;
4491 inp->inp_bind_in_progress_thread = NULL;
4492 if (__improbable(inp->inp_bind_in_progress_waiters > 0)) {
4493 wakeup_one((caddr_t)&inp->inp_bind_in_progress_waiters);
4494 }
4495 }
4496
4497 /*
4498 * XXX: this is borrowed from in6_pcbsetport(). If possible, we should
4499 * share this function by all *bsd*...
4500 */
4501 int
in_pcbsetport(struct in_addr laddr,struct sockaddr * remote,struct inpcb * inp,struct proc * p,int locked)4502 in_pcbsetport(struct in_addr laddr, struct sockaddr *remote, struct inpcb *inp, struct proc *p,
4503 int locked)
4504 {
4505 struct socket *__single so = inp->inp_socket;
4506 uint16_t lport = 0, first, last, rand_port;
4507 uint16_t *__single lastport;
4508 int count, error = 0, wild = 0;
4509 boolean_t counting_down;
4510 bool found, randomport;
4511 struct inpcbinfo *__single pcbinfo = inp->inp_pcbinfo;
4512 kauth_cred_t __single cred;
4513 #if SKYWALK
4514 bool laddr_unspecified = laddr.s_addr == INADDR_ANY;
4515 #else
4516 #pragma unused(laddr)
4517 #endif
4518 if (!locked) { /* Make sure we don't run into a deadlock: 4052373 */
4519 if (!lck_rw_try_lock_exclusive(&pcbinfo->ipi_lock)) {
4520 socket_unlock(inp->inp_socket, 0);
4521 lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
4522 socket_lock(inp->inp_socket, 0);
4523 }
4524
4525 /*
4526 * Check if a local port was assigned to the inp while
4527 * this thread was waiting for the pcbinfo lock
4528 */
4529 if (inp->inp_lport != 0) {
4530 VERIFY(inp->inp_flags2 & INP2_INHASHLIST);
4531 lck_rw_done(&pcbinfo->ipi_lock);
4532
4533 /*
4534 * It is not an error if another thread allocated
4535 * a port
4536 */
4537 return 0;
4538 }
4539 }
4540
4541 /* XXX: this is redundant when called from in6_pcbbind */
4542 if ((so->so_options & (SO_REUSEADDR | SO_REUSEPORT)) == 0) {
4543 wild = INPLOOKUP_WILDCARD;
4544 }
4545
4546 randomport = (so->so_flags & SOF_BINDRANDOMPORT) > 0 ||
4547 (so->so_type == SOCK_STREAM ? tcp_use_randomport :
4548 udp_use_randomport) > 0;
4549
4550 if (inp->inp_flags & INP_HIGHPORT) {
4551 first = (uint16_t)ipport_hifirstauto; /* sysctl */
4552 last = (uint16_t)ipport_hilastauto;
4553 lastport = &pcbinfo->ipi_lasthi;
4554 } else if (inp->inp_flags & INP_LOWPORT) {
4555 cred = kauth_cred_proc_ref(p);
4556 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
4557 kauth_cred_unref(&cred);
4558 if (error != 0) {
4559 if (!locked) {
4560 lck_rw_done(&pcbinfo->ipi_lock);
4561 }
4562 return error;
4563 }
4564 first = (uint16_t)ipport_lowfirstauto; /* 1023 */
4565 last = (uint16_t)ipport_lowlastauto; /* 600 */
4566 lastport = &pcbinfo->ipi_lastlow;
4567 } else {
4568 first = (uint16_t)ipport_firstauto; /* sysctl */
4569 last = (uint16_t)ipport_lastauto;
4570 lastport = &pcbinfo->ipi_lastport;
4571 }
4572
4573 if (first == last) {
4574 randomport = false;
4575 }
4576 /*
4577 * Simple check to ensure all ports are not used up causing
4578 * a deadlock here.
4579 */
4580 found = false;
4581 if (first > last) {
4582 /* counting down */
4583 if (randomport) {
4584 read_frandom(&rand_port, sizeof(rand_port));
4585 *lastport = first - (rand_port % (first - last));
4586 }
4587 count = first - last;
4588 counting_down = TRUE;
4589 } else {
4590 /* counting up */
4591 if (randomport) {
4592 read_frandom(&rand_port, sizeof(rand_port));
4593 *lastport = first + (rand_port % (first - last));
4594 }
4595 count = last - first;
4596 counting_down = FALSE;
4597 }
4598 do {
4599 if (count-- < 0) { /* completely used? */
4600 /*
4601 * Undo any address bind that may have
4602 * occurred above.
4603 */
4604 inp->in6p_laddr = in6addr_any;
4605 inp->in6p_last_outifp = NULL;
4606 #if SKYWALK
4607 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
4608 netns_set_ifnet(&inp->inp_netns_token,
4609 NULL);
4610 }
4611 #endif /* SKYWALK */
4612 if (!locked) {
4613 lck_rw_done(&pcbinfo->ipi_lock);
4614 }
4615 return EAGAIN;
4616 }
4617 if (counting_down) {
4618 --*lastport;
4619 if (*lastport > first || *lastport < last) {
4620 *lastport = first;
4621 }
4622 } else {
4623 ++*lastport;
4624 if (*lastport < first || *lastport > last) {
4625 *lastport = first;
4626 }
4627 }
4628 lport = htons(*lastport);
4629
4630 /*
4631 * Skip if this is a restricted port as we do not want to
4632 * use restricted ports as ephemeral
4633 */
4634 if (IS_RESTRICTED_IN_PORT(lport)) {
4635 continue;
4636 }
4637
4638 found = (in_pcblookup_local(pcbinfo, inp->inp_laddr,
4639 lport, wild) == NULL);
4640 #if SKYWALK
4641 if (found &&
4642 (SOCK_PROTO(so) == IPPROTO_TCP ||
4643 SOCK_PROTO(so) == IPPROTO_UDP) &&
4644 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
4645 if (laddr_unspecified &&
4646 (inp->inp_vflag & INP_IPV6) != 0 &&
4647 (inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
4648 struct in_addr ip_zero = { .s_addr = 0 };
4649
4650 netns_release(&inp->inp_wildcard_netns_token);
4651 if (netns_reserve_in(
4652 &inp->inp_wildcard_netns_token,
4653 ip_zero,
4654 (uint8_t)SOCK_PROTO(so), lport,
4655 NETNS_BSD, NULL) != 0) {
4656 /* port in use in IPv4 namespace */
4657 found = false;
4658 }
4659 }
4660 if (found &&
4661 netns_reserve_in(&inp->inp_netns_token,
4662 inp->inp_laddr, (uint8_t)SOCK_PROTO(so), lport,
4663 NETNS_BSD, NULL) != 0) {
4664 netns_release(&inp->inp_wildcard_netns_token);
4665 found = false;
4666 }
4667 }
4668 #endif /* SKYWALK */
4669 } while (!found);
4670
4671 inp->inp_lport = lport;
4672 inp->inp_flags |= INP_ANONPORT;
4673
4674 bool is_ipv6 = (inp->inp_vflag & INP_IPV6);
4675 if (is_ipv6) {
4676 inp->inp_vflag &= ~INP_IPV6;
4677 }
4678
4679 if (in_pcbinshash(inp, remote, 1) != 0) {
4680 inp->inp_last_outifp = NULL;
4681 inp->inp_lifscope = IFSCOPE_NONE;
4682 #if SKYWALK
4683 netns_release(&inp->inp_netns_token);
4684 #endif /* SKYWALK */
4685 inp->inp_lport = 0;
4686 inp->inp_flags &= ~INP_ANONPORT;
4687 if (is_ipv6) {
4688 inp->inp_vflag |= INP_IPV6;
4689 }
4690 if (!locked) {
4691 lck_rw_done(&pcbinfo->ipi_lock);
4692 }
4693 return EAGAIN;
4694 }
4695 if (is_ipv6) {
4696 inp->inp_vflag |= INP_IPV6;
4697 }
4698
4699 if (!locked) {
4700 lck_rw_done(&pcbinfo->ipi_lock);
4701 }
4702 return 0;
4703 }
4704