1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/sysctl.h>
75 #include <sys/mcache.h>
76 #include <sys/kauth.h>
77 #include <sys/priv.h>
78 #include <sys/proc_uuid_policy.h>
79 #include <sys/syslog.h>
80 #include <sys/priv.h>
81 #include <sys/file_internal.h>
82 #include <net/dlil.h>
83
84 #include <libkern/OSAtomic.h>
85 #include <kern/locks.h>
86
87 #include <machine/limits.h>
88
89 #include <kern/zalloc.h>
90
91 #include <net/if.h>
92 #include <net/if_types.h>
93 #include <net/route.h>
94 #include <net/flowhash.h>
95 #include <net/flowadv.h>
96 #include <net/nat464_utils.h>
97 #include <net/ntstat.h>
98 #include <net/nwk_wq.h>
99 #include <net/restricted_in_port.h>
100
101 #include <netinet/in.h>
102 #include <netinet/in_pcb.h>
103 #include <netinet/inp_log.h>
104 #include <netinet/in_var.h>
105 #include <netinet/ip_var.h>
106
107 #include <netinet/ip6.h>
108 #include <netinet6/ip6_var.h>
109
110 #include <sys/kdebug.h>
111 #include <sys/random.h>
112
113 #include <dev/random/randomdev.h>
114 #include <mach/boolean.h>
115
116 #include <atm/atm_internal.h>
117 #include <pexpert/pexpert.h>
118
119 #if NECP
120 #include <net/necp.h>
121 #endif
122
123 #include <sys/stat.h>
124 #include <sys/ubc.h>
125 #include <sys/vnode.h>
126
127 #include <os/log.h>
128
129 #if SKYWALK
130 #include <skywalk/namespace/flowidns.h>
131 #endif /* SKYWALK */
132
133 #include <IOKit/IOBSD.h>
134
135 extern const char *proc_name_address(struct proc *);
136
137 static LCK_GRP_DECLARE(inpcb_lock_grp, "inpcb");
138 static LCK_ATTR_DECLARE(inpcb_lock_attr, 0, 0);
139 static LCK_MTX_DECLARE_ATTR(inpcb_lock, &inpcb_lock_grp, &inpcb_lock_attr);
140 static LCK_MTX_DECLARE_ATTR(inpcb_timeout_lock, &inpcb_lock_grp, &inpcb_lock_attr);
141
142 static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
143
144 static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
145 static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
146 static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */
147 static boolean_t inpcb_fast_timer_on = FALSE;
148
149 #define INPCB_GCREQ_THRESHOLD 50000
150
151 static thread_call_t inpcb_thread_call, inpcb_fast_thread_call;
152 static void inpcb_sched_timeout(void);
153 static void inpcb_sched_lazy_timeout(void);
154 static void _inpcb_sched_timeout(unsigned int);
155 static void inpcb_timeout(void *, void *);
156 const int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */
157 extern int tvtohz(struct timeval *);
158
159 #if CONFIG_PROC_UUID_POLICY
160 static void inp_update_cellular_policy(struct inpcb *, boolean_t);
161 #if NECP
162 static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
163 #endif /* NECP */
164 #endif /* !CONFIG_PROC_UUID_POLICY */
165
166 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
167 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
168
169 int allow_udp_port_exhaustion = 0;
170
171 /*
172 * These configure the range of local port addresses assigned to
173 * "unspecified" outgoing connections/packets/whatever.
174 */
175 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
176 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
177 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
178 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
179 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
180 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
181
182 #define RANGECHK(var, min, max) \
183 if ((var) < (min)) { (var) = (min); } \
184 else if ((var) > (max)) { (var) = (max); }
185
186 static int
187 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
188 {
189 #pragma unused(arg1, arg2)
190 int error;
191 int new_value = *(int *)oidp->oid_arg1;
192 #if (DEBUG | DEVELOPMENT)
193 int old_value = *(int *)oidp->oid_arg1;
194 /*
195 * For unit testing allow a non-superuser process with the
196 * proper entitlement to modify the variables
197 */
198 if (req->newptr) {
199 if (proc_suser(current_proc()) != 0 &&
200 (error = priv_check_cred(kauth_cred_get(),
201 PRIV_NETINET_RESERVEDPORT, 0))) {
202 return EPERM;
203 }
204 }
205 #endif /* (DEBUG | DEVELOPMENT) */
206
207 error = sysctl_handle_int(oidp, &new_value, 0, req);
208 if (!error) {
209 if (oidp->oid_arg1 == &ipport_lowfirstauto || oidp->oid_arg1 == &ipport_lowlastauto) {
210 RANGECHK(new_value, 1, IPPORT_RESERVED - 1);
211 } else {
212 RANGECHK(new_value, IPPORT_RESERVED, USHRT_MAX);
213 }
214 *(int *)oidp->oid_arg1 = new_value;
215 }
216
217 #if (DEBUG | DEVELOPMENT)
218 os_log(OS_LOG_DEFAULT,
219 "%s:%u sysctl net.restricted_port.verbose: %d -> %d)",
220 proc_best_name(current_proc()), proc_selfpid(),
221 old_value, *(int *)oidp->oid_arg1);
222 #endif /* (DEBUG | DEVELOPMENT) */
223
224 return error;
225 }
226
227 #undef RANGECHK
228
229 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
230 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IP Ports");
231
232 #if (DEBUG | DEVELOPMENT)
233 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY)
234 #else
235 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED)
236 #endif /* (DEBUG | DEVELOPMENT) */
237
238 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
239 CTLFAGS_IP_PORTRANGE,
240 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
241 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
242 CTLFAGS_IP_PORTRANGE,
243 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
244 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
245 CTLFAGS_IP_PORTRANGE,
246 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
247 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
248 CTLFAGS_IP_PORTRANGE,
249 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
250 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
251 CTLFAGS_IP_PORTRANGE,
252 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
253 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
254 CTLFAGS_IP_PORTRANGE,
255 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
256 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, ipport_allow_udp_port_exhaustion,
257 CTLFLAG_LOCKED | CTLFLAG_RW, &allow_udp_port_exhaustion, 0, "");
258
259 static uint32_t apn_fallbk_debug = 0;
260 #define apn_fallbk_log(x) do { if (apn_fallbk_debug >= 1) log x; } while (0)
261
262 #if !XNU_TARGET_OS_OSX
263 static boolean_t apn_fallbk_enabled = TRUE;
264
265 SYSCTL_DECL(_net_inet);
266 SYSCTL_NODE(_net_inet, OID_AUTO, apn_fallback, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "APN Fallback");
267 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
268 &apn_fallbk_enabled, 0, "APN fallback enable");
269 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
270 &apn_fallbk_debug, 0, "APN fallback debug enable");
271 #else /* XNU_TARGET_OS_OSX */
272 static boolean_t apn_fallbk_enabled = FALSE;
273 #endif /* XNU_TARGET_OS_OSX */
274
275 extern int udp_use_randomport;
276 extern int tcp_use_randomport;
277
278 /* Structs used for flowhash computation */
279 struct inp_flowhash_key_addr {
280 union {
281 struct in_addr v4;
282 struct in6_addr v6;
283 u_int8_t addr8[16];
284 u_int16_t addr16[8];
285 u_int32_t addr32[4];
286 } infha;
287 };
288
289 struct inp_flowhash_key {
290 struct inp_flowhash_key_addr infh_laddr;
291 struct inp_flowhash_key_addr infh_faddr;
292 u_int32_t infh_lport;
293 u_int32_t infh_fport;
294 u_int32_t infh_af;
295 u_int32_t infh_proto;
296 u_int32_t infh_rand1;
297 u_int32_t infh_rand2;
298 };
299
300 #if !SKYWALK
301 static u_int32_t inp_hash_seed = 0;
302 #endif /* !SKYWALK */
303
304 static int infc_cmp(const struct inpcb *, const struct inpcb *);
305
306 /* Flags used by inp_fc_getinp */
307 #define INPFC_SOLOCKED 0x1
308 #define INPFC_REMOVE 0x2
309 static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
310
311 static void inp_fc_feedback(struct inpcb *);
312 extern void tcp_remove_from_time_wait(struct inpcb *inp);
313
314 static LCK_MTX_DECLARE_ATTR(inp_fc_lck, &inpcb_lock_grp, &inpcb_lock_attr);
315
316 RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
317 RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
318 RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
319
320 /*
321 * Use this inp as a key to find an inp in the flowhash tree.
322 * Accesses to it are protected by inp_fc_lck.
323 */
324 struct inpcb key_inp;
325
326 /*
327 * in_pcb.c: manage the Protocol Control Blocks.
328 */
329
330 void
in_pcbinit(void)331 in_pcbinit(void)
332 {
333 static int inpcb_initialized = 0;
334 uint32_t logging_config;
335
336 VERIFY(!inpcb_initialized);
337 inpcb_initialized = 1;
338
339 logging_config = atm_get_diagnostic_config();
340 if (logging_config & 0x80000000) {
341 inp_log_privacy = 1;
342 }
343
344 inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout,
345 NULL, THREAD_CALL_PRIORITY_KERNEL);
346 /* Give it an arg so that we know that this is the fast timer */
347 inpcb_fast_thread_call = thread_call_allocate_with_priority(
348 inpcb_timeout, &inpcb_timeout, THREAD_CALL_PRIORITY_KERNEL);
349 if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL) {
350 panic("unable to alloc the inpcb thread call");
351 }
352
353 /*
354 * Initialize data structures required to deliver
355 * flow advisories.
356 */
357 lck_mtx_lock(&inp_fc_lck);
358 RB_INIT(&inp_fc_tree);
359 bzero(&key_inp, sizeof(key_inp));
360 lck_mtx_unlock(&inp_fc_lck);
361 }
362
363 #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
364 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
365 static void
inpcb_timeout(void * arg0,void * arg1)366 inpcb_timeout(void *arg0, void *arg1)
367 {
368 #pragma unused(arg1)
369 struct inpcbinfo *ipi;
370 boolean_t t, gc;
371 struct intimercount gccnt, tmcnt;
372
373 /*
374 * Update coarse-grained networking timestamp (in sec.); the idea
375 * is to piggy-back on the timeout callout to update the counter
376 * returnable via net_uptime().
377 */
378 net_update_uptime();
379
380 bzero(&gccnt, sizeof(gccnt));
381 bzero(&tmcnt, sizeof(tmcnt));
382
383 lck_mtx_lock_spin(&inpcb_timeout_lock);
384 gc = inpcb_garbage_collecting;
385 inpcb_garbage_collecting = FALSE;
386
387 t = inpcb_ticking;
388 inpcb_ticking = FALSE;
389
390 if (gc || t) {
391 lck_mtx_unlock(&inpcb_timeout_lock);
392
393 lck_mtx_lock(&inpcb_lock);
394 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
395 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
396 bzero(&ipi->ipi_gc_req,
397 sizeof(ipi->ipi_gc_req));
398 if (gc && ipi->ipi_gc != NULL) {
399 ipi->ipi_gc(ipi);
400 gccnt.intimer_lazy +=
401 ipi->ipi_gc_req.intimer_lazy;
402 gccnt.intimer_fast +=
403 ipi->ipi_gc_req.intimer_fast;
404 gccnt.intimer_nodelay +=
405 ipi->ipi_gc_req.intimer_nodelay;
406 }
407 }
408 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
409 bzero(&ipi->ipi_timer_req,
410 sizeof(ipi->ipi_timer_req));
411 if (t && ipi->ipi_timer != NULL) {
412 ipi->ipi_timer(ipi);
413 tmcnt.intimer_lazy +=
414 ipi->ipi_timer_req.intimer_lazy;
415 tmcnt.intimer_fast +=
416 ipi->ipi_timer_req.intimer_fast;
417 tmcnt.intimer_nodelay +=
418 ipi->ipi_timer_req.intimer_nodelay;
419 }
420 }
421 }
422 lck_mtx_unlock(&inpcb_lock);
423 lck_mtx_lock_spin(&inpcb_timeout_lock);
424 }
425
426 /* lock was dropped above, so check first before overriding */
427 if (!inpcb_garbage_collecting) {
428 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
429 }
430 if (!inpcb_ticking) {
431 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
432 }
433
434 /* arg0 will be set if we are the fast timer */
435 if (arg0 != NULL) {
436 inpcb_fast_timer_on = FALSE;
437 }
438 inpcb_timeout_run--;
439 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
440
441 /* re-arm the timer if there's work to do */
442 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) {
443 inpcb_sched_timeout();
444 } else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) {
445 /* be lazy when idle with little activity */
446 inpcb_sched_lazy_timeout();
447 } else {
448 inpcb_sched_timeout();
449 }
450
451 lck_mtx_unlock(&inpcb_timeout_lock);
452 }
453
454 static void
inpcb_sched_timeout(void)455 inpcb_sched_timeout(void)
456 {
457 _inpcb_sched_timeout(0);
458 }
459
460 static void
inpcb_sched_lazy_timeout(void)461 inpcb_sched_lazy_timeout(void)
462 {
463 _inpcb_sched_timeout(inpcb_timeout_lazy);
464 }
465
466 static void
_inpcb_sched_timeout(unsigned int offset)467 _inpcb_sched_timeout(unsigned int offset)
468 {
469 uint64_t deadline, leeway;
470
471 clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline);
472 LCK_MTX_ASSERT(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
473 if (inpcb_timeout_run == 0 &&
474 (inpcb_garbage_collecting || inpcb_ticking)) {
475 lck_mtx_convert_spin(&inpcb_timeout_lock);
476 inpcb_timeout_run++;
477 if (offset == 0) {
478 inpcb_fast_timer_on = TRUE;
479 thread_call_enter_delayed(inpcb_fast_thread_call,
480 deadline);
481 } else {
482 inpcb_fast_timer_on = FALSE;
483 clock_interval_to_absolutetime_interval(offset,
484 NSEC_PER_SEC, &leeway);
485 thread_call_enter_delayed_with_leeway(
486 inpcb_thread_call, NULL, deadline, leeway,
487 THREAD_CALL_DELAY_LEEWAY);
488 }
489 } else if (inpcb_timeout_run == 1 &&
490 offset == 0 && !inpcb_fast_timer_on) {
491 /*
492 * Since the request was for a fast timer but the
493 * scheduled timer is a lazy timer, try to schedule
494 * another instance of fast timer also.
495 */
496 lck_mtx_convert_spin(&inpcb_timeout_lock);
497 inpcb_timeout_run++;
498 inpcb_fast_timer_on = TRUE;
499 thread_call_enter_delayed(inpcb_fast_thread_call, deadline);
500 }
501 }
502
503 void
inpcb_gc_sched(struct inpcbinfo * ipi,u_int32_t type)504 inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
505 {
506 u_int32_t gccnt;
507
508 lck_mtx_lock_spin(&inpcb_timeout_lock);
509 inpcb_garbage_collecting = TRUE;
510 gccnt = ipi->ipi_gc_req.intimer_nodelay +
511 ipi->ipi_gc_req.intimer_fast;
512
513 if (gccnt > INPCB_GCREQ_THRESHOLD) {
514 type = INPCB_TIMER_FAST;
515 }
516
517 switch (type) {
518 case INPCB_TIMER_NODELAY:
519 os_atomic_inc(&ipi->ipi_gc_req.intimer_nodelay, relaxed);
520 inpcb_sched_timeout();
521 break;
522 case INPCB_TIMER_FAST:
523 os_atomic_inc(&ipi->ipi_gc_req.intimer_fast, relaxed);
524 inpcb_sched_timeout();
525 break;
526 default:
527 os_atomic_inc(&ipi->ipi_gc_req.intimer_lazy, relaxed);
528 inpcb_sched_lazy_timeout();
529 break;
530 }
531 lck_mtx_unlock(&inpcb_timeout_lock);
532 }
533
534 void
inpcb_timer_sched(struct inpcbinfo * ipi,u_int32_t type)535 inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
536 {
537 lck_mtx_lock_spin(&inpcb_timeout_lock);
538 inpcb_ticking = TRUE;
539 switch (type) {
540 case INPCB_TIMER_NODELAY:
541 os_atomic_inc(&ipi->ipi_timer_req.intimer_nodelay, relaxed);
542 inpcb_sched_timeout();
543 break;
544 case INPCB_TIMER_FAST:
545 os_atomic_inc(&ipi->ipi_timer_req.intimer_fast, relaxed);
546 inpcb_sched_timeout();
547 break;
548 default:
549 os_atomic_inc(&ipi->ipi_timer_req.intimer_lazy, relaxed);
550 inpcb_sched_lazy_timeout();
551 break;
552 }
553 lck_mtx_unlock(&inpcb_timeout_lock);
554 }
555
556 void
in_pcbinfo_attach(struct inpcbinfo * ipi)557 in_pcbinfo_attach(struct inpcbinfo *ipi)
558 {
559 struct inpcbinfo *ipi0;
560
561 lck_mtx_lock(&inpcb_lock);
562 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
563 if (ipi0 == ipi) {
564 panic("%s: ipi %p already in the list",
565 __func__, ipi);
566 /* NOTREACHED */
567 }
568 }
569 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
570 lck_mtx_unlock(&inpcb_lock);
571 }
572
573 int
in_pcbinfo_detach(struct inpcbinfo * ipi)574 in_pcbinfo_detach(struct inpcbinfo *ipi)
575 {
576 struct inpcbinfo *ipi0;
577 int error = 0;
578
579 lck_mtx_lock(&inpcb_lock);
580 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
581 if (ipi0 == ipi) {
582 break;
583 }
584 }
585 if (ipi0 != NULL) {
586 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
587 } else {
588 error = ENXIO;
589 }
590 lck_mtx_unlock(&inpcb_lock);
591
592 return error;
593 }
594
595 __attribute__((noinline))
596 char *
inp_snprintf_tuple(struct inpcb * inp,char * buf,size_t buflen)597 inp_snprintf_tuple(struct inpcb *inp, char *buf, size_t buflen)
598 {
599 char laddrstr[MAX_IPv6_STR_LEN];
600 char faddrstr[MAX_IPv6_STR_LEN];
601 uint16_t lport = 0;
602 uint16_t fport = 0;
603 uint16_t proto = IPPROTO_IP;
604
605 if (inp->inp_socket != NULL && inp->inp_socket->so_proto != NULL) {
606 proto = inp->inp_socket->so_proto->pr_protocol;
607
608 if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
609 lport = inp->inp_lport;
610 fport = inp->inp_fport;
611 }
612 }
613 if (inp->inp_vflag & INP_IPV4) {
614 inet_ntop(AF_INET, (void *)&inp->inp_laddr.s_addr, laddrstr, sizeof(laddrstr));
615 inet_ntop(AF_INET, (void *)&inp->inp_faddr.s_addr, faddrstr, sizeof(faddrstr));
616 } else if (inp->inp_vflag & INP_IPV6) {
617 inet_ntop(AF_INET6, (void *)&inp->in6p_faddr, laddrstr, sizeof(laddrstr));
618 inet_ntop(AF_INET6, (void *)&inp->in6p_faddr, faddrstr, sizeof(faddrstr));
619 }
620 snprintf(buf, buflen, "[%u %s:%u %s:%u]",
621 proto, laddrstr, ntohs(lport), faddrstr, ntohs(fport));
622
623 return buf;
624 }
625
626 __attribute__((noinline))
627 void
in_pcb_check_management_entitled(struct inpcb * inp)628 in_pcb_check_management_entitled(struct inpcb *inp)
629 {
630 if (inp->inp_flags2 & INP2_MANAGEMENT_CHECKED) {
631 return;
632 }
633
634 if (management_data_unrestricted) {
635 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
636 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
637 } else if (if_management_interface_check_needed == true) {
638 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
639 /*
640 * Note that soopt_cred_check check both intcoproc entitlements
641 * We check MANAGEMENT_DATA_ENTITLEMENT as there is no corresponding PRIV value
642 */
643 if (soopt_cred_check(inp->inp_socket, PRIV_NET_RESTRICTED_INTCOPROC, false, false) == 0
644 || IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT) == true
645 #if DEBUG || DEVELOPMENT
646 || IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT_DEVELOPMENT) == true
647 #endif /* DEBUG || DEVELOPMENT */
648 ) {
649 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
650 } else {
651 if (__improbable(if_management_verbose > 1)) {
652 char buf[128];
653
654 os_log(OS_LOG_DEFAULT, "in_pcb_check_management_entitled %s:%d not management entitled %s",
655 proc_best_name(current_proc()),
656 proc_selfpid(),
657 inp_snprintf_tuple(inp, buf, sizeof(buf)));
658 }
659 }
660 }
661 }
662
663 /*
664 * Allocate a PCB and associate it with the socket.
665 *
666 * Returns: 0 Success
667 * ENOBUFS
668 * ENOMEM
669 */
670 int
in_pcballoc(struct socket * so,struct inpcbinfo * pcbinfo,struct proc * p)671 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
672 {
673 #pragma unused(p)
674 struct inpcb *inp;
675 caddr_t temp;
676
677 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
678 inp = zalloc_flags(pcbinfo->ipi_zone,
679 Z_WAITOK | Z_ZERO | Z_NOFAIL);
680 } else {
681 inp = (struct inpcb *)(void *)so->so_saved_pcb;
682 temp = inp->inp_saved_ppcb;
683 bzero((caddr_t)inp, sizeof(*inp));
684 inp->inp_saved_ppcb = temp;
685 }
686
687 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
688 inp->inp_pcbinfo = pcbinfo;
689 inp->inp_socket = so;
690 /* make sure inp_stat is always 64-bit aligned */
691 inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store,
692 sizeof(u_int64_t));
693 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) +
694 sizeof(*inp->inp_stat) > sizeof(inp->inp_stat_store)) {
695 panic("%s: insufficient space to align inp_stat", __func__);
696 /* NOTREACHED */
697 }
698
699 /* make sure inp_cstat is always 64-bit aligned */
700 inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store,
701 sizeof(u_int64_t));
702 if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) +
703 sizeof(*inp->inp_cstat) > sizeof(inp->inp_cstat_store)) {
704 panic("%s: insufficient space to align inp_cstat", __func__);
705 /* NOTREACHED */
706 }
707
708 /* make sure inp_wstat is always 64-bit aligned */
709 inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store,
710 sizeof(u_int64_t));
711 if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) +
712 sizeof(*inp->inp_wstat) > sizeof(inp->inp_wstat_store)) {
713 panic("%s: insufficient space to align inp_wstat", __func__);
714 /* NOTREACHED */
715 }
716
717 /* make sure inp_Wstat is always 64-bit aligned */
718 inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store,
719 sizeof(u_int64_t));
720 if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
721 sizeof(*inp->inp_Wstat) > sizeof(inp->inp_Wstat_store)) {
722 panic("%s: insufficient space to align inp_Wstat", __func__);
723 /* NOTREACHED */
724 }
725
726 so->so_pcb = (caddr_t)inp;
727
728 if (so->so_proto->pr_flags & PR_PCBLOCK) {
729 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
730 &pcbinfo->ipi_lock_attr);
731 }
732
733 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) {
734 inp->inp_flags |= IN6P_IPV6_V6ONLY;
735 }
736
737 if (ip6_auto_flowlabel) {
738 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
739 }
740 if (intcoproc_unrestricted) {
741 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
742 }
743
744 (void) inp_update_policy(inp);
745
746 lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
747 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
748 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
749 pcbinfo->ipi_count++;
750 lck_rw_done(&pcbinfo->ipi_lock);
751 return 0;
752 }
753
754 /*
755 * in_pcblookup_local_and_cleanup does everything
756 * in_pcblookup_local does but it checks for a socket
757 * that's going away. Since we know that the lock is
758 * held read+write when this function is called, we
759 * can safely dispose of this socket like the slow
760 * timer would usually do and return NULL. This is
761 * great for bind.
762 */
763 struct inpcb *
in_pcblookup_local_and_cleanup(struct inpcbinfo * pcbinfo,struct in_addr laddr,u_int lport_arg,int wild_okay)764 in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
765 u_int lport_arg, int wild_okay)
766 {
767 struct inpcb *inp;
768
769 /* Perform normal lookup */
770 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
771
772 /* Check if we found a match but it's waiting to be disposed */
773 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
774 struct socket *so = inp->inp_socket;
775
776 socket_lock(so, 0);
777
778 if (so->so_usecount == 0) {
779 if (inp->inp_state != INPCB_STATE_DEAD) {
780 in_pcbdetach(inp);
781 }
782 in_pcbdispose(inp); /* will unlock & destroy */
783 inp = NULL;
784 } else {
785 socket_unlock(so, 0);
786 }
787 }
788
789 return inp;
790 }
791
792 static void
in_pcb_conflict_post_msg(u_int16_t port)793 in_pcb_conflict_post_msg(u_int16_t port)
794 {
795 /*
796 * Radar 5523020 send a kernel event notification if a
797 * non-participating socket tries to bind the port a socket
798 * who has set SOF_NOTIFYCONFLICT owns.
799 */
800 struct kev_msg ev_msg;
801 struct kev_in_portinuse in_portinuse;
802
803 bzero(&in_portinuse, sizeof(struct kev_in_portinuse));
804 bzero(&ev_msg, sizeof(struct kev_msg));
805 in_portinuse.port = ntohs(port); /* port in host order */
806 in_portinuse.req_pid = proc_selfpid();
807 ev_msg.vendor_code = KEV_VENDOR_APPLE;
808 ev_msg.kev_class = KEV_NETWORK_CLASS;
809 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
810 ev_msg.event_code = KEV_INET_PORTINUSE;
811 ev_msg.dv[0].data_ptr = &in_portinuse;
812 ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse);
813 ev_msg.dv[1].data_length = 0;
814 dlil_post_complete_msg(NULL, &ev_msg);
815 }
816
817 /*
818 * Bind an INPCB to an address and/or port. This routine should not alter
819 * the caller-supplied local address "nam".
820 *
821 * Returns: 0 Success
822 * EADDRNOTAVAIL Address not available.
823 * EINVAL Invalid argument
824 * EAFNOSUPPORT Address family not supported [notdef]
825 * EACCES Permission denied
826 * EADDRINUSE Address in use
827 * EAGAIN Resource unavailable, try again
828 * priv_check_cred:EPERM Operation not permitted
829 */
830 int
in_pcbbind(struct inpcb * inp,struct sockaddr * nam,struct proc * p)831 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
832 {
833 struct socket *so = inp->inp_socket;
834 unsigned short *lastport;
835 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
836 u_short lport = 0, rand_port = 0;
837 int wild = 0;
838 int reuseport = (so->so_options & SO_REUSEPORT);
839 int error = 0;
840 int randomport;
841 int conflict = 0;
842 boolean_t anonport = FALSE;
843 kauth_cred_t cred;
844 struct in_addr laddr;
845 struct ifnet *outif = NULL;
846
847 if (inp->inp_flags2 & INP2_BIND_IN_PROGRESS) {
848 return EINVAL;
849 }
850 inp->inp_flags2 |= INP2_BIND_IN_PROGRESS;
851
852 if (TAILQ_EMPTY(&in_ifaddrhead)) { /* XXX broken! */
853 error = EADDRNOTAVAIL;
854 goto done;
855 }
856 if (!(so->so_options & (SO_REUSEADDR | SO_REUSEPORT))) {
857 wild = 1;
858 }
859
860 bzero(&laddr, sizeof(laddr));
861
862 socket_unlock(so, 0); /* keep reference on socket */
863 lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
864 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
865 /* another thread completed the bind */
866 lck_rw_done(&pcbinfo->ipi_lock);
867 socket_lock(so, 0);
868 error = EINVAL;
869 goto done;
870 }
871
872 if (nam != NULL) {
873 if (nam->sa_len != sizeof(struct sockaddr_in)) {
874 lck_rw_done(&pcbinfo->ipi_lock);
875 socket_lock(so, 0);
876 error = EINVAL;
877 goto done;
878 }
879 #if 0
880 /*
881 * We should check the family, but old programs
882 * incorrectly fail to initialize it.
883 */
884 if (nam->sa_family != AF_INET) {
885 lck_rw_done(&pcbinfo->ipi_lock);
886 socket_lock(so, 0);
887 error = EAFNOSUPPORT;
888 goto done;
889 }
890 #endif /* 0 */
891 lport = SIN(nam)->sin_port;
892
893 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
894 /*
895 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
896 * allow complete duplication of binding if
897 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
898 * and a multicast address is bound on both
899 * new and duplicated sockets.
900 */
901 if (so->so_options & SO_REUSEADDR) {
902 reuseport = SO_REUSEADDR | SO_REUSEPORT;
903 }
904 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
905 struct sockaddr_in sin;
906 struct ifaddr *ifa;
907
908 /* Sanitized for interface address searches */
909 bzero(&sin, sizeof(sin));
910 sin.sin_family = AF_INET;
911 sin.sin_len = sizeof(struct sockaddr_in);
912 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
913
914 ifa = ifa_ifwithaddr(SA(&sin));
915 if (ifa == NULL) {
916 lck_rw_done(&pcbinfo->ipi_lock);
917 socket_lock(so, 0);
918 error = EADDRNOTAVAIL;
919 goto done;
920 } else {
921 /*
922 * Opportunistically determine the outbound
923 * interface that may be used; this may not
924 * hold true if we end up using a route
925 * going over a different interface, e.g.
926 * when sending to a local address. This
927 * will get updated again after sending.
928 */
929 IFA_LOCK(ifa);
930 outif = ifa->ifa_ifp;
931 IFA_UNLOCK(ifa);
932 IFA_REMREF(ifa);
933 }
934 }
935
936 #if SKYWALK
937 if (inp->inp_flags2 & INP2_EXTERNAL_PORT) {
938 // Extract the external flow info
939 struct ns_flow_info nfi = {};
940 error = necp_client_get_netns_flow_info(inp->necp_client_uuid,
941 &nfi);
942 if (error != 0) {
943 lck_rw_done(&pcbinfo->ipi_lock);
944 socket_lock(so, 0);
945 goto done;
946 }
947
948 // Extract the reserved port
949 u_int16_t reserved_lport = 0;
950 if (nfi.nfi_laddr.sa.sa_family == AF_INET) {
951 reserved_lport = nfi.nfi_laddr.sin.sin_port;
952 } else if (nfi.nfi_laddr.sa.sa_family == AF_INET6) {
953 reserved_lport = nfi.nfi_laddr.sin6.sin6_port;
954 } else {
955 lck_rw_done(&pcbinfo->ipi_lock);
956 socket_lock(so, 0);
957 error = EINVAL;
958 goto done;
959 }
960
961 // Validate or use the reserved port
962 if (lport == 0) {
963 lport = reserved_lport;
964 } else if (lport != reserved_lport) {
965 lck_rw_done(&pcbinfo->ipi_lock);
966 socket_lock(so, 0);
967 error = EINVAL;
968 goto done;
969 }
970 }
971
972 /* Do not allow reserving a UDP port if remaining UDP port count is below 4096 */
973 if (SOCK_PROTO(so) == IPPROTO_UDP && !allow_udp_port_exhaustion) {
974 uint32_t current_reservations = 0;
975 if (inp->inp_vflag & INP_IPV6) {
976 current_reservations = netns_lookup_reservations_count_in6(inp->in6p_laddr, IPPROTO_UDP);
977 } else {
978 current_reservations = netns_lookup_reservations_count_in(inp->inp_laddr, IPPROTO_UDP);
979 }
980 if (USHRT_MAX - UDP_RANDOM_PORT_RESERVE < current_reservations) {
981 log(LOG_ERR, "UDP port not available, less than 4096 UDP ports left");
982 lck_rw_done(&pcbinfo->ipi_lock);
983 socket_lock(so, 0);
984 error = EADDRNOTAVAIL;
985 goto done;
986 }
987 }
988
989 #endif /* SKYWALK */
990
991 if (lport != 0) {
992 struct inpcb *t;
993 uid_t u;
994
995 #if XNU_TARGET_OS_OSX
996 if (ntohs(lport) < IPPORT_RESERVED &&
997 SIN(nam)->sin_addr.s_addr != 0 &&
998 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
999 cred = kauth_cred_proc_ref(p);
1000 error = priv_check_cred(cred,
1001 PRIV_NETINET_RESERVEDPORT, 0);
1002 kauth_cred_unref(&cred);
1003 if (error != 0) {
1004 lck_rw_done(&pcbinfo->ipi_lock);
1005 socket_lock(so, 0);
1006 error = EACCES;
1007 goto done;
1008 }
1009 }
1010 #endif /* XNU_TARGET_OS_OSX */
1011 /*
1012 * Check wether the process is allowed to bind to a restricted port
1013 */
1014 if (!current_task_can_use_restricted_in_port(lport,
1015 (uint8_t)so->so_proto->pr_protocol, PORT_FLAGS_BSD)) {
1016 lck_rw_done(&pcbinfo->ipi_lock);
1017 socket_lock(so, 0);
1018 error = EADDRINUSE;
1019 goto done;
1020 }
1021
1022 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1023 (u = kauth_cred_getuid(so->so_cred)) != 0 &&
1024 (t = in_pcblookup_local_and_cleanup(
1025 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
1026 INPLOOKUP_WILDCARD)) != NULL &&
1027 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1028 t->inp_laddr.s_addr != INADDR_ANY ||
1029 !(t->inp_socket->so_options & SO_REUSEPORT)) &&
1030 (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
1031 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
1032 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1033 t->inp_laddr.s_addr != INADDR_ANY) &&
1034 (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
1035 !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
1036 uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
1037 if ((t->inp_socket->so_flags &
1038 SOF_NOTIFYCONFLICT) &&
1039 !(so->so_flags & SOF_NOTIFYCONFLICT)) {
1040 conflict = 1;
1041 }
1042
1043 lck_rw_done(&pcbinfo->ipi_lock);
1044
1045 if (conflict) {
1046 in_pcb_conflict_post_msg(lport);
1047 }
1048
1049 socket_lock(so, 0);
1050 error = EADDRINUSE;
1051 goto done;
1052 }
1053 t = in_pcblookup_local_and_cleanup(pcbinfo,
1054 SIN(nam)->sin_addr, lport, wild);
1055 if (t != NULL &&
1056 (reuseport & t->inp_socket->so_options) == 0 &&
1057 (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
1058 !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
1059 uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
1060 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1061 t->inp_laddr.s_addr != INADDR_ANY ||
1062 SOCK_DOM(so) != PF_INET6 ||
1063 SOCK_DOM(t->inp_socket) != PF_INET6) {
1064 if ((t->inp_socket->so_flags &
1065 SOF_NOTIFYCONFLICT) &&
1066 !(so->so_flags & SOF_NOTIFYCONFLICT)) {
1067 conflict = 1;
1068 }
1069
1070 lck_rw_done(&pcbinfo->ipi_lock);
1071
1072 if (conflict) {
1073 in_pcb_conflict_post_msg(lport);
1074 }
1075 socket_lock(so, 0);
1076 error = EADDRINUSE;
1077 goto done;
1078 }
1079 }
1080 #if SKYWALK
1081 if ((SOCK_PROTO(so) == IPPROTO_TCP ||
1082 SOCK_PROTO(so) == IPPROTO_UDP) &&
1083 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1084 int res_err = 0;
1085 if (inp->inp_vflag & INP_IPV6) {
1086 res_err = netns_reserve_in6(
1087 &inp->inp_netns_token,
1088 SIN6(nam)->sin6_addr,
1089 (uint8_t)SOCK_PROTO(so), lport, NETNS_BSD,
1090 NULL);
1091 } else {
1092 res_err = netns_reserve_in(
1093 &inp->inp_netns_token,
1094 SIN(nam)->sin_addr, (uint8_t)SOCK_PROTO(so),
1095 lport, NETNS_BSD, NULL);
1096 }
1097 if (res_err != 0) {
1098 lck_rw_done(&pcbinfo->ipi_lock);
1099 socket_lock(so, 0);
1100 error = EADDRINUSE;
1101 goto done;
1102 }
1103 }
1104 #endif /* SKYWALK */
1105 }
1106 laddr = SIN(nam)->sin_addr;
1107 }
1108 if (lport == 0) {
1109 u_short first, last;
1110 int count;
1111 bool found;
1112
1113 /*
1114 * Override wild = 1 for implicit bind (mainly used by connect)
1115 * For implicit bind (lport == 0), we always use an unused port,
1116 * so REUSEADDR|REUSEPORT don't apply
1117 */
1118 wild = 1;
1119
1120 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
1121 (so->so_type == SOCK_STREAM ? tcp_use_randomport :
1122 udp_use_randomport);
1123
1124 /*
1125 * Even though this looks similar to the code in
1126 * in6_pcbsetport, the v6 vs v4 checks are different.
1127 */
1128 anonport = TRUE;
1129 if (inp->inp_flags & INP_HIGHPORT) {
1130 first = (u_short)ipport_hifirstauto; /* sysctl */
1131 last = (u_short)ipport_hilastauto;
1132 lastport = &pcbinfo->ipi_lasthi;
1133 } else if (inp->inp_flags & INP_LOWPORT) {
1134 cred = kauth_cred_proc_ref(p);
1135 error = priv_check_cred(cred,
1136 PRIV_NETINET_RESERVEDPORT, 0);
1137 kauth_cred_unref(&cred);
1138 if (error != 0) {
1139 lck_rw_done(&pcbinfo->ipi_lock);
1140 socket_lock(so, 0);
1141 goto done;
1142 }
1143 first = (u_short)ipport_lowfirstauto; /* 1023 */
1144 last = (u_short)ipport_lowlastauto; /* 600 */
1145 lastport = &pcbinfo->ipi_lastlow;
1146 } else {
1147 first = (u_short)ipport_firstauto; /* sysctl */
1148 last = (u_short)ipport_lastauto;
1149 lastport = &pcbinfo->ipi_lastport;
1150 }
1151 /* No point in randomizing if only one port is available */
1152
1153 if (first == last) {
1154 randomport = 0;
1155 }
1156 /*
1157 * Simple check to ensure all ports are not used up causing
1158 * a deadlock here.
1159 *
1160 * We split the two cases (up and down) so that the direction
1161 * is not being tested on each round of the loop.
1162 */
1163 if (first > last) {
1164 struct in_addr lookup_addr;
1165
1166 /*
1167 * counting down
1168 */
1169 if (randomport) {
1170 read_frandom(&rand_port, sizeof(rand_port));
1171 *lastport =
1172 first - (rand_port % (first - last));
1173 }
1174 count = first - last;
1175
1176 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1177 inp->inp_laddr;
1178
1179 found = false;
1180 do {
1181 if (count-- < 0) { /* completely used? */
1182 lck_rw_done(&pcbinfo->ipi_lock);
1183 socket_lock(so, 0);
1184 error = EADDRNOTAVAIL;
1185 goto done;
1186 }
1187 --*lastport;
1188 if (*lastport > first || *lastport < last) {
1189 *lastport = first;
1190 }
1191 lport = htons(*lastport);
1192
1193 /*
1194 * Skip if this is a restricted port as we do not want to
1195 * restricted ports as ephemeral
1196 */
1197 if (IS_RESTRICTED_IN_PORT(lport)) {
1198 continue;
1199 }
1200
1201 found = in_pcblookup_local_and_cleanup(pcbinfo,
1202 lookup_addr, lport, wild) == NULL;
1203 #if SKYWALK
1204 if (found &&
1205 (SOCK_PROTO(so) == IPPROTO_TCP ||
1206 SOCK_PROTO(so) == IPPROTO_UDP) &&
1207 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1208 int res_err;
1209 if (inp->inp_vflag & INP_IPV6) {
1210 res_err = netns_reserve_in6(
1211 &inp->inp_netns_token,
1212 inp->in6p_laddr,
1213 (uint8_t)SOCK_PROTO(so), lport,
1214 NETNS_BSD, NULL);
1215 } else {
1216 res_err = netns_reserve_in(
1217 &inp->inp_netns_token,
1218 lookup_addr, (uint8_t)SOCK_PROTO(so),
1219 lport, NETNS_BSD, NULL);
1220 }
1221 found = res_err == 0;
1222 }
1223 #endif /* SKYWALK */
1224 } while (!found);
1225 } else {
1226 struct in_addr lookup_addr;
1227
1228 /*
1229 * counting up
1230 */
1231 if (randomport) {
1232 read_frandom(&rand_port, sizeof(rand_port));
1233 *lastport =
1234 first + (rand_port % (first - last));
1235 }
1236 count = last - first;
1237
1238 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1239 inp->inp_laddr;
1240
1241 found = false;
1242 do {
1243 if (count-- < 0) { /* completely used? */
1244 lck_rw_done(&pcbinfo->ipi_lock);
1245 socket_lock(so, 0);
1246 error = EADDRNOTAVAIL;
1247 goto done;
1248 }
1249 ++*lastport;
1250 if (*lastport < first || *lastport > last) {
1251 *lastport = first;
1252 }
1253 lport = htons(*lastport);
1254
1255 /*
1256 * Skip if this is a restricted port as we do not want to
1257 * restricted ports as ephemeral
1258 */
1259 if (IS_RESTRICTED_IN_PORT(lport)) {
1260 continue;
1261 }
1262
1263 found = in_pcblookup_local_and_cleanup(pcbinfo,
1264 lookup_addr, lport, wild) == NULL;
1265 #if SKYWALK
1266 if (found &&
1267 (SOCK_PROTO(so) == IPPROTO_TCP ||
1268 SOCK_PROTO(so) == IPPROTO_UDP) &&
1269 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1270 int res_err;
1271 if (inp->inp_vflag & INP_IPV6) {
1272 res_err = netns_reserve_in6(
1273 &inp->inp_netns_token,
1274 inp->in6p_laddr,
1275 (uint8_t)SOCK_PROTO(so), lport,
1276 NETNS_BSD, NULL);
1277 } else {
1278 res_err = netns_reserve_in(
1279 &inp->inp_netns_token,
1280 lookup_addr, (uint8_t)SOCK_PROTO(so),
1281 lport, NETNS_BSD, NULL);
1282 }
1283 found = res_err == 0;
1284 }
1285 #endif /* SKYWALK */
1286 } while (!found);
1287 }
1288 }
1289 socket_lock(so, 0);
1290
1291 /*
1292 * We unlocked socket's protocol lock for a long time.
1293 * The socket might have been dropped/defuncted.
1294 * Checking if world has changed since.
1295 */
1296 if (inp->inp_state == INPCB_STATE_DEAD) {
1297 #if SKYWALK
1298 netns_release(&inp->inp_netns_token);
1299 #endif /* SKYWALK */
1300 lck_rw_done(&pcbinfo->ipi_lock);
1301 error = ECONNABORTED;
1302 goto done;
1303 }
1304
1305 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
1306 #if SKYWALK
1307 netns_release(&inp->inp_netns_token);
1308 #endif /* SKYWALK */
1309 lck_rw_done(&pcbinfo->ipi_lock);
1310 error = EINVAL;
1311 goto done;
1312 }
1313
1314 if (laddr.s_addr != INADDR_ANY) {
1315 inp->inp_laddr = laddr;
1316 inp->inp_last_outifp = outif;
1317 #if SKYWALK
1318 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
1319 netns_set_ifnet(&inp->inp_netns_token, outif);
1320 }
1321 #endif /* SKYWALK */
1322 }
1323 inp->inp_lport = lport;
1324 if (anonport) {
1325 inp->inp_flags |= INP_ANONPORT;
1326 }
1327
1328 if (in_pcbinshash(inp, 1) != 0) {
1329 inp->inp_laddr.s_addr = INADDR_ANY;
1330 inp->inp_last_outifp = NULL;
1331
1332 #if SKYWALK
1333 netns_release(&inp->inp_netns_token);
1334 #endif /* SKYWALK */
1335 inp->inp_lport = 0;
1336 if (anonport) {
1337 inp->inp_flags &= ~INP_ANONPORT;
1338 }
1339 lck_rw_done(&pcbinfo->ipi_lock);
1340 error = EAGAIN;
1341 goto done;
1342 }
1343 lck_rw_done(&pcbinfo->ipi_lock);
1344 sflt_notify(so, sock_evt_bound, NULL);
1345
1346 in_pcb_check_management_entitled(inp);
1347 done:
1348 inp->inp_flags2 &= ~INP2_BIND_IN_PROGRESS;
1349 return error;
1350 }
1351
1352 #define APN_FALLBACK_IP_FILTER(a) \
1353 (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \
1354 IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \
1355 IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \
1356 IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \
1357 IN_PRIVATE(ntohl((a)->sin_addr.s_addr)))
1358
1359 #define APN_FALLBACK_NOTIF_INTERVAL 2 /* Magic Number */
1360 static uint64_t last_apn_fallback = 0;
1361
1362 static boolean_t
apn_fallback_required(proc_t proc,struct socket * so,struct sockaddr_in * p_dstv4)1363 apn_fallback_required(proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4)
1364 {
1365 uint64_t timenow;
1366 struct sockaddr_storage lookup_default_addr;
1367 struct rtentry *rt = NULL;
1368
1369 VERIFY(proc != NULL);
1370
1371 if (apn_fallbk_enabled == FALSE) {
1372 return FALSE;
1373 }
1374
1375 if (proc == kernproc) {
1376 return FALSE;
1377 }
1378
1379 if (so && (so->so_options & SO_NOAPNFALLBK)) {
1380 return FALSE;
1381 }
1382
1383 timenow = net_uptime();
1384 if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) {
1385 apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n"));
1386 return FALSE;
1387 }
1388
1389 if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4)) {
1390 return FALSE;
1391 }
1392
1393 /* Check if we have unscoped IPv6 default route through cellular */
1394 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1395 lookup_default_addr.ss_family = AF_INET6;
1396 lookup_default_addr.ss_len = sizeof(struct sockaddr_in6);
1397
1398 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1399 if (NULL == rt) {
1400 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1401 "unscoped default IPv6 route.\n"));
1402 return FALSE;
1403 }
1404
1405 if (!IFNET_IS_CELLULAR(rt->rt_ifp)) {
1406 rtfree(rt);
1407 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1408 "unscoped default IPv6 route through cellular interface.\n"));
1409 return FALSE;
1410 }
1411
1412 /*
1413 * We have a default IPv6 route, ensure that
1414 * we do not have IPv4 default route before triggering
1415 * the event
1416 */
1417 rtfree(rt);
1418 rt = NULL;
1419
1420 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1421 lookup_default_addr.ss_family = AF_INET;
1422 lookup_default_addr.ss_len = sizeof(struct sockaddr_in);
1423
1424 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1425
1426 if (rt) {
1427 rtfree(rt);
1428 rt = NULL;
1429 apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped "
1430 "IPv4 default route!\n"));
1431 return FALSE;
1432 }
1433
1434 {
1435 /*
1436 * We disable APN fallback if the binary is not a third-party app.
1437 * Note that platform daemons use their process name as a
1438 * bundle ID so we filter out bundle IDs without dots.
1439 */
1440 const char *bundle_id = cs_identity_get(proc);
1441 if (bundle_id == NULL ||
1442 bundle_id[0] == '\0' ||
1443 strchr(bundle_id, '.') == NULL ||
1444 strncmp(bundle_id, "com.apple.", sizeof("com.apple.") - 1) == 0) {
1445 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-"
1446 "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL")));
1447 return FALSE;
1448 }
1449 }
1450
1451 {
1452 /*
1453 * The Apple App Store IPv6 requirement started on
1454 * June 1st, 2016 at 12:00:00 AM PDT.
1455 * We disable APN fallback if the binary is more recent than that.
1456 * We check both atime and birthtime since birthtime is not always supported.
1457 */
1458 static const long ipv6_start_date = 1464764400L;
1459 vfs_context_t context;
1460 struct stat64 sb;
1461 int vn_stat_error;
1462
1463 bzero(&sb, sizeof(struct stat64));
1464 context = vfs_context_create(NULL);
1465 vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, 0, context);
1466 (void)vfs_context_rele(context);
1467
1468 if (vn_stat_error != 0 ||
1469 sb.st_atimespec.tv_sec >= ipv6_start_date ||
1470 sb.st_birthtimespec.tv_sec >= ipv6_start_date) {
1471 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary "
1472 "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n",
1473 vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec,
1474 sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec));
1475 return FALSE;
1476 }
1477 }
1478 return TRUE;
1479 }
1480
1481 static void
apn_fallback_trigger(proc_t proc,struct socket * so)1482 apn_fallback_trigger(proc_t proc, struct socket *so)
1483 {
1484 pid_t pid = 0;
1485 struct kev_msg ev_msg;
1486 struct kev_netevent_apnfallbk_data apnfallbk_data;
1487
1488 last_apn_fallback = net_uptime();
1489 pid = proc_pid(proc);
1490 uuid_t application_uuid;
1491 uuid_clear(application_uuid);
1492 proc_getexecutableuuid(proc, application_uuid,
1493 sizeof(application_uuid));
1494
1495 bzero(&ev_msg, sizeof(struct kev_msg));
1496 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1497 ev_msg.kev_class = KEV_NETWORK_CLASS;
1498 ev_msg.kev_subclass = KEV_NETEVENT_SUBCLASS;
1499 ev_msg.event_code = KEV_NETEVENT_APNFALLBACK;
1500
1501 bzero(&apnfallbk_data, sizeof(apnfallbk_data));
1502
1503 if (so->so_flags & SOF_DELEGATED) {
1504 apnfallbk_data.epid = so->e_pid;
1505 uuid_copy(apnfallbk_data.euuid, so->e_uuid);
1506 } else {
1507 apnfallbk_data.epid = so->last_pid;
1508 uuid_copy(apnfallbk_data.euuid, so->last_uuid);
1509 }
1510
1511 ev_msg.dv[0].data_ptr = &apnfallbk_data;
1512 ev_msg.dv[0].data_length = sizeof(apnfallbk_data);
1513 kev_post_msg(&ev_msg);
1514 apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n"));
1515 }
1516
1517 /*
1518 * Transform old in_pcbconnect() into an inner subroutine for new
1519 * in_pcbconnect(); do some validity-checking on the remote address
1520 * (in "nam") and then determine local host address (i.e., which
1521 * interface) to use to access that remote host.
1522 *
1523 * This routine may alter the caller-supplied remote address "nam".
1524 *
1525 * The caller may override the bound-to-interface setting of the socket
1526 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1527 *
1528 * This routine might return an ifp with a reference held if the caller
1529 * provides a non-NULL outif, even in the error case. The caller is
1530 * responsible for releasing its reference.
1531 *
1532 * Returns: 0 Success
1533 * EINVAL Invalid argument
1534 * EAFNOSUPPORT Address family not supported
1535 * EADDRNOTAVAIL Address not available
1536 */
1537 int
in_pcbladdr(struct inpcb * inp,struct sockaddr * nam,struct in_addr * laddr,unsigned int ifscope,struct ifnet ** outif,int raw)1538 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
1539 unsigned int ifscope, struct ifnet **outif, int raw)
1540 {
1541 struct route *ro = &inp->inp_route;
1542 struct in_ifaddr *ia = NULL;
1543 struct sockaddr_in sin;
1544 int error = 0;
1545 boolean_t restricted = FALSE;
1546
1547 if (outif != NULL) {
1548 *outif = NULL;
1549 }
1550 if (nam->sa_len != sizeof(struct sockaddr_in)) {
1551 return EINVAL;
1552 }
1553 if (SIN(nam)->sin_family != AF_INET) {
1554 return EAFNOSUPPORT;
1555 }
1556 if (raw == 0 && SIN(nam)->sin_port == 0) {
1557 return EADDRNOTAVAIL;
1558 }
1559
1560 in_pcb_check_management_entitled(inp);
1561
1562 /*
1563 * If the destination address is INADDR_ANY,
1564 * use the primary local address.
1565 * If the supplied address is INADDR_BROADCAST,
1566 * and the primary interface supports broadcast,
1567 * choose the broadcast address for that interface.
1568 */
1569 if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1570 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) {
1571 lck_rw_lock_shared(&in_ifaddr_rwlock);
1572 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1573 ia = TAILQ_FIRST(&in_ifaddrhead);
1574 IFA_LOCK_SPIN(&ia->ia_ifa);
1575 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1576 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1577 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1578 SIN(nam)->sin_addr =
1579 SIN(&ia->ia_broadaddr)->sin_addr;
1580 }
1581 IFA_UNLOCK(&ia->ia_ifa);
1582 ia = NULL;
1583 }
1584 lck_rw_done(&in_ifaddr_rwlock);
1585 }
1586 /*
1587 * Otherwise, if the socket has already bound the source, just use it.
1588 */
1589 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1590 VERIFY(ia == NULL);
1591 *laddr = inp->inp_laddr;
1592 return 0;
1593 }
1594
1595 /*
1596 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1597 * then it overrides the sticky ifscope set for the socket.
1598 */
1599 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) {
1600 ifscope = inp->inp_boundifp->if_index;
1601 }
1602
1603 /*
1604 * If route is known or can be allocated now,
1605 * our src addr is taken from the i/f, else punt.
1606 * Note that we should check the address family of the cached
1607 * destination, in case of sharing the cache with IPv6.
1608 */
1609 if (ro->ro_rt != NULL) {
1610 RT_LOCK_SPIN(ro->ro_rt);
1611 }
1612 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1613 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1614 (inp->inp_socket->so_options & SO_DONTROUTE)) {
1615 if (ro->ro_rt != NULL) {
1616 RT_UNLOCK(ro->ro_rt);
1617 }
1618 ROUTE_RELEASE(ro);
1619 }
1620 if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1621 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1622 if (ro->ro_rt != NULL) {
1623 RT_UNLOCK(ro->ro_rt);
1624 }
1625 ROUTE_RELEASE(ro);
1626 /* No route yet, so try to acquire one */
1627 bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
1628 ro->ro_dst.sa_family = AF_INET;
1629 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
1630 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1631 rtalloc_scoped(ro, ifscope);
1632 if (ro->ro_rt != NULL) {
1633 RT_LOCK_SPIN(ro->ro_rt);
1634 }
1635 }
1636 /* Sanitized local copy for interface address searches */
1637 bzero(&sin, sizeof(sin));
1638 sin.sin_family = AF_INET;
1639 sin.sin_len = sizeof(struct sockaddr_in);
1640 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1641 /*
1642 * If we did not find (or use) a route, assume dest is reachable
1643 * on a directly connected network and try to find a corresponding
1644 * interface to take the source address from.
1645 */
1646 if (ro->ro_rt == NULL) {
1647 proc_t proc = current_proc();
1648
1649 VERIFY(ia == NULL);
1650 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1651 if (ia == NULL) {
1652 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1653 }
1654 error = ((ia == NULL) ? ENETUNREACH : 0);
1655
1656 if (apn_fallback_required(proc, inp->inp_socket,
1657 (void *)nam)) {
1658 apn_fallback_trigger(proc, inp->inp_socket);
1659 }
1660
1661 goto done;
1662 }
1663 RT_LOCK_ASSERT_HELD(ro->ro_rt);
1664 /*
1665 * If the outgoing interface on the route found is not
1666 * a loopback interface, use the address from that interface.
1667 */
1668 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1669 VERIFY(ia == NULL);
1670 /*
1671 * If the route points to a cellular interface and the
1672 * caller forbids our using interfaces of such type,
1673 * pretend that there is no route.
1674 * Apply the same logic for expensive interfaces.
1675 */
1676 if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
1677 RT_UNLOCK(ro->ro_rt);
1678 ROUTE_RELEASE(ro);
1679 error = EHOSTUNREACH;
1680 restricted = TRUE;
1681 } else {
1682 /* Become a regular mutex */
1683 RT_CONVERT_LOCK(ro->ro_rt);
1684 ia = ifatoia(ro->ro_rt->rt_ifa);
1685 IFA_ADDREF(&ia->ia_ifa);
1686
1687 /*
1688 * Mark the control block for notification of
1689 * a possible flow that might undergo clat46
1690 * translation.
1691 *
1692 * We defer the decision to a later point when
1693 * inpcb is being disposed off.
1694 * The reason is that we only want to send notification
1695 * if the flow was ever used to send data.
1696 */
1697 if (IS_INTF_CLAT46(ro->ro_rt->rt_ifp)) {
1698 inp->inp_flags2 |= INP2_CLAT46_FLOW;
1699 }
1700
1701 RT_UNLOCK(ro->ro_rt);
1702 error = 0;
1703 }
1704 goto done;
1705 }
1706 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1707 RT_UNLOCK(ro->ro_rt);
1708 /*
1709 * The outgoing interface is marked with 'loopback net', so a route
1710 * to ourselves is here.
1711 * Try to find the interface of the destination address and then
1712 * take the address from there. That interface is not necessarily
1713 * a loopback interface.
1714 */
1715 VERIFY(ia == NULL);
1716 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1717 if (ia == NULL) {
1718 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1719 }
1720 if (ia == NULL) {
1721 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1722 }
1723 if (ia == NULL) {
1724 RT_LOCK(ro->ro_rt);
1725 ia = ifatoia(ro->ro_rt->rt_ifa);
1726 if (ia != NULL) {
1727 IFA_ADDREF(&ia->ia_ifa);
1728 }
1729 RT_UNLOCK(ro->ro_rt);
1730 }
1731 error = ((ia == NULL) ? ENETUNREACH : 0);
1732
1733 done:
1734 /*
1735 * If the destination address is multicast and an outgoing
1736 * interface has been set as a multicast option, use the
1737 * address of that interface as our source address.
1738 */
1739 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1740 inp->inp_moptions != NULL) {
1741 struct ip_moptions *imo;
1742 struct ifnet *ifp;
1743
1744 imo = inp->inp_moptions;
1745 IMO_LOCK(imo);
1746 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1747 ia->ia_ifp != imo->imo_multicast_ifp)) {
1748 ifp = imo->imo_multicast_ifp;
1749 if (ia != NULL) {
1750 IFA_REMREF(&ia->ia_ifa);
1751 }
1752 lck_rw_lock_shared(&in_ifaddr_rwlock);
1753 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1754 if (ia->ia_ifp == ifp) {
1755 break;
1756 }
1757 }
1758 if (ia != NULL) {
1759 IFA_ADDREF(&ia->ia_ifa);
1760 }
1761 lck_rw_done(&in_ifaddr_rwlock);
1762 if (ia == NULL) {
1763 error = EADDRNOTAVAIL;
1764 } else {
1765 error = 0;
1766 }
1767 }
1768 IMO_UNLOCK(imo);
1769 }
1770 /*
1771 * Don't do pcblookup call here; return interface in laddr
1772 * and exit to caller, that will do the lookup.
1773 */
1774 if (ia != NULL) {
1775 /*
1776 * If the source address belongs to a cellular interface
1777 * and the socket forbids our using interfaces of such
1778 * type, pretend that there is no source address.
1779 * Apply the same logic for expensive interfaces.
1780 */
1781 IFA_LOCK_SPIN(&ia->ia_ifa);
1782 if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
1783 IFA_UNLOCK(&ia->ia_ifa);
1784 error = EHOSTUNREACH;
1785 restricted = TRUE;
1786 } else if (error == 0) {
1787 *laddr = ia->ia_addr.sin_addr;
1788 if (outif != NULL) {
1789 struct ifnet *ifp;
1790
1791 if (ro->ro_rt != NULL) {
1792 ifp = ro->ro_rt->rt_ifp;
1793 } else {
1794 ifp = ia->ia_ifp;
1795 }
1796
1797 VERIFY(ifp != NULL);
1798 IFA_CONVERT_LOCK(&ia->ia_ifa);
1799 ifnet_reference(ifp); /* for caller */
1800 if (*outif != NULL) {
1801 ifnet_release(*outif);
1802 }
1803 *outif = ifp;
1804 }
1805 IFA_UNLOCK(&ia->ia_ifa);
1806 } else {
1807 IFA_UNLOCK(&ia->ia_ifa);
1808 }
1809 IFA_REMREF(&ia->ia_ifa);
1810 ia = NULL;
1811 }
1812
1813 if (restricted && error == EHOSTUNREACH) {
1814 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1815 SO_FILT_HINT_IFDENIED));
1816 }
1817
1818 return error;
1819 }
1820
1821 /*
1822 * Outer subroutine:
1823 * Connect from a socket to a specified address.
1824 * Both address and port must be specified in argument sin.
1825 * If don't have a local address for this socket yet,
1826 * then pick one.
1827 *
1828 * The caller may override the bound-to-interface setting of the socket
1829 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1830 */
1831 int
in_pcbconnect(struct inpcb * inp,struct sockaddr * nam,struct proc * p,unsigned int ifscope,struct ifnet ** outif)1832 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
1833 unsigned int ifscope, struct ifnet **outif)
1834 {
1835 struct in_addr laddr;
1836 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
1837 struct inpcb *pcb;
1838 int error;
1839 struct socket *so = inp->inp_socket;
1840
1841 #if CONTENT_FILTER
1842 if (so) {
1843 so->so_state_change_cnt++;
1844 }
1845 #endif
1846
1847 /*
1848 * Call inner routine, to assign local interface address.
1849 */
1850 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0) {
1851 return error;
1852 }
1853
1854 socket_unlock(so, 0);
1855 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
1856 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
1857 inp->inp_lport, 0, NULL);
1858 socket_lock(so, 0);
1859
1860 /*
1861 * Check if the socket is still in a valid state. When we unlock this
1862 * embryonic socket, it can get aborted if another thread is closing
1863 * the listener (radar 7947600).
1864 */
1865 if ((so->so_flags & SOF_ABORTED) != 0) {
1866 return ECONNREFUSED;
1867 }
1868
1869 if (pcb != NULL) {
1870 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1871 return EADDRINUSE;
1872 }
1873 if (inp->inp_laddr.s_addr == INADDR_ANY) {
1874 if (inp->inp_lport == 0) {
1875 error = in_pcbbind(inp, NULL, p);
1876 if (error) {
1877 return error;
1878 }
1879 }
1880 if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1881 /*
1882 * Lock inversion issue, mostly with udp
1883 * multicast packets.
1884 */
1885 socket_unlock(so, 0);
1886 lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1887 socket_lock(so, 0);
1888 }
1889 inp->inp_laddr = laddr;
1890 /* no reference needed */
1891 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
1892 #if SKYWALK
1893 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
1894 netns_set_ifnet(&inp->inp_netns_token,
1895 inp->inp_last_outifp);
1896 }
1897 #endif /* SKYWALK */
1898 inp->inp_flags |= INP_INADDR_ANY;
1899 } else {
1900 /*
1901 * Usage of IP_PKTINFO, without local port already
1902 * speficified will cause kernel to panic,
1903 * see rdar://problem/18508185.
1904 * For now returning error to avoid a kernel panic
1905 * This routines can be refactored and handle this better
1906 * in future.
1907 */
1908 if (inp->inp_lport == 0) {
1909 return EINVAL;
1910 }
1911 if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1912 /*
1913 * Lock inversion issue, mostly with udp
1914 * multicast packets.
1915 */
1916 socket_unlock(so, 0);
1917 lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1918 socket_lock(so, 0);
1919 }
1920 }
1921 inp->inp_faddr = sin->sin_addr;
1922 inp->inp_fport = sin->sin_port;
1923 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1924 nstat_pcb_invalidate_cache(inp);
1925 }
1926 in_pcbrehash(inp);
1927 lck_rw_done(&inp->inp_pcbinfo->ipi_lock);
1928 return 0;
1929 }
1930
1931 void
in_pcbdisconnect(struct inpcb * inp)1932 in_pcbdisconnect(struct inpcb *inp)
1933 {
1934 struct socket *so = inp->inp_socket;
1935
1936 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1937 nstat_pcb_cache(inp);
1938 }
1939
1940 inp->inp_faddr.s_addr = INADDR_ANY;
1941 inp->inp_fport = 0;
1942
1943 #if CONTENT_FILTER
1944 if (so) {
1945 so->so_state_change_cnt++;
1946 }
1947 #endif
1948
1949 if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1950 /* lock inversion issue, mostly with udp multicast packets */
1951 socket_unlock(so, 0);
1952 lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1953 socket_lock(so, 0);
1954 }
1955
1956 in_pcbrehash(inp);
1957 lck_rw_done(&inp->inp_pcbinfo->ipi_lock);
1958 /*
1959 * A multipath subflow socket would have its SS_NOFDREF set by default,
1960 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1961 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1962 */
1963 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) {
1964 in_pcbdetach(inp);
1965 }
1966 }
1967
1968 void
in_pcbdetach(struct inpcb * inp)1969 in_pcbdetach(struct inpcb *inp)
1970 {
1971 struct socket *so = inp->inp_socket;
1972
1973 if (so->so_pcb == NULL) {
1974 /* PCB has been disposed */
1975 panic("%s: inp=%p so=%p proto=%d so_pcb is null!", __func__,
1976 inp, so, SOCK_PROTO(so));
1977 /* NOTREACHED */
1978 }
1979
1980 #if IPSEC
1981 if (inp->inp_sp != NULL) {
1982 (void) ipsec4_delete_pcbpolicy(inp);
1983 }
1984 #endif /* IPSEC */
1985
1986 if (inp->inp_stat != NULL && SOCK_PROTO(so) == IPPROTO_UDP) {
1987 if (inp->inp_stat->rxpackets == 0 && inp->inp_stat->txpackets == 0) {
1988 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_no_data);
1989 }
1990 }
1991
1992 /*
1993 * Let NetworkStatistics know this PCB is going away
1994 * before we detach it.
1995 */
1996 if (nstat_collect &&
1997 (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
1998 nstat_pcb_detach(inp);
1999 }
2000
2001 /* Free memory buffer held for generating keep alives */
2002 if (inp->inp_keepalive_data != NULL) {
2003 kfree_data(inp->inp_keepalive_data, inp->inp_keepalive_datalen);
2004 inp->inp_keepalive_data = NULL;
2005 }
2006
2007 /* mark socket state as dead */
2008 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
2009 panic("%s: so=%p proto=%d couldn't set to STOPUSING",
2010 __func__, so, SOCK_PROTO(so));
2011 /* NOTREACHED */
2012 }
2013
2014 #if SKYWALK
2015 /* Free up the port in the namespace registrar if not in TIME_WAIT */
2016 if (!(inp->inp_flags2 & INP2_TIMEWAIT)) {
2017 netns_release(&inp->inp_netns_token);
2018 netns_release(&inp->inp_wildcard_netns_token);
2019 }
2020 #endif /* SKYWALK */
2021
2022 if (!(so->so_flags & SOF_PCBCLEARING)) {
2023 struct ip_moptions *imo;
2024
2025 inp->inp_vflag = 0;
2026 if (inp->inp_options != NULL) {
2027 (void) m_free(inp->inp_options);
2028 inp->inp_options = NULL;
2029 }
2030 ROUTE_RELEASE(&inp->inp_route);
2031 imo = inp->inp_moptions;
2032 if (imo != NULL) {
2033 IMO_REMREF(imo);
2034 }
2035 inp->inp_moptions = NULL;
2036 sofreelastref(so, 0);
2037 inp->inp_state = INPCB_STATE_DEAD;
2038
2039 /*
2040 * Enqueue an event to send kernel event notification
2041 * if the flow has to CLAT46 for data packets
2042 */
2043 if (inp->inp_flags2 & INP2_CLAT46_FLOW) {
2044 /*
2045 * If there has been any exchange of data bytes
2046 * over this flow.
2047 * Schedule a notification to report that flow is
2048 * using client side translation.
2049 */
2050 if (inp->inp_stat != NULL &&
2051 (inp->inp_stat->txbytes != 0 ||
2052 inp->inp_stat->rxbytes != 0)) {
2053 if (so->so_flags & SOF_DELEGATED) {
2054 in6_clat46_event_enqueue_nwk_wq_entry(
2055 IN6_CLAT46_EVENT_V4_FLOW,
2056 so->e_pid,
2057 so->e_uuid);
2058 } else {
2059 in6_clat46_event_enqueue_nwk_wq_entry(
2060 IN6_CLAT46_EVENT_V4_FLOW,
2061 so->last_pid,
2062 so->last_uuid);
2063 }
2064 }
2065 }
2066
2067 /* makes sure we're not called twice from so_close */
2068 so->so_flags |= SOF_PCBCLEARING;
2069
2070 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
2071 }
2072 }
2073
2074
2075 void
in_pcbdispose(struct inpcb * inp)2076 in_pcbdispose(struct inpcb *inp)
2077 {
2078 struct socket *so = inp->inp_socket;
2079 struct inpcbinfo *ipi = inp->inp_pcbinfo;
2080
2081 if (so != NULL && so->so_usecount != 0) {
2082 panic("%s: so %p [%d,%d] usecount %d lockhistory %s",
2083 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
2084 solockhistory_nr(so));
2085 /* NOTREACHED */
2086 } else if (inp->inp_wantcnt != WNT_STOPUSING) {
2087 if (so != NULL) {
2088 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
2089 "[%d,%d] usecount %d retaincnt %d state 0x%x "
2090 "flags 0x%x lockhistory %s\n", __func__, inp,
2091 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
2092 so->so_usecount, so->so_retaincnt, so->so_state,
2093 so->so_flags, solockhistory_nr(so));
2094 /* NOTREACHED */
2095 } else {
2096 panic("%s: inp %p invalid wantcnt %d no socket",
2097 __func__, inp, inp->inp_wantcnt);
2098 /* NOTREACHED */
2099 }
2100 }
2101
2102 LCK_RW_ASSERT(&ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
2103
2104 inp->inp_gencnt = ++ipi->ipi_gencnt;
2105 /* access ipi in in_pcbremlists */
2106 in_pcbremlists(inp);
2107
2108 if (so != NULL) {
2109 if (so->so_proto->pr_flags & PR_PCBLOCK) {
2110 sofreelastref(so, 0);
2111 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
2112 /*
2113 * selthreadclear() already called
2114 * during sofreelastref() above.
2115 */
2116 sbrelease(&so->so_rcv);
2117 sbrelease(&so->so_snd);
2118 }
2119 if (so->so_head != NULL) {
2120 panic("%s: so=%p head still exist",
2121 __func__, so);
2122 /* NOTREACHED */
2123 }
2124 lck_mtx_unlock(&inp->inpcb_mtx);
2125
2126 #if NECP
2127 necp_inpcb_remove_cb(inp);
2128 #endif /* NECP */
2129
2130 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
2131 }
2132 /* makes sure we're not called twice from so_close */
2133 so->so_flags |= SOF_PCBCLEARING;
2134 so->so_saved_pcb = (caddr_t)inp;
2135 so->so_pcb = NULL;
2136 inp->inp_socket = NULL;
2137 #if NECP
2138 necp_inpcb_dispose(inp);
2139 #endif /* NECP */
2140 /*
2141 * In case there a route cached after a detach (possible
2142 * in the tcp case), make sure that it is freed before
2143 * we deallocate the structure.
2144 */
2145 ROUTE_RELEASE(&inp->inp_route);
2146 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
2147 zfree(ipi->ipi_zone, inp);
2148 }
2149 sodealloc(so);
2150 }
2151 }
2152
2153 /*
2154 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
2155 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
2156 * in struct pr_usrreqs, so that protocols can just reference then directly
2157 * without the need for a wrapper function.
2158 */
2159 int
in_getsockaddr(struct socket * so,struct sockaddr ** nam)2160 in_getsockaddr(struct socket *so, struct sockaddr **nam)
2161 {
2162 struct inpcb *inp;
2163 struct sockaddr_in *sin;
2164
2165 /*
2166 * Do the malloc first in case it blocks.
2167 */
2168 sin = (struct sockaddr_in *)alloc_sockaddr(sizeof(*sin),
2169 Z_WAITOK | Z_NOFAIL);
2170
2171 sin->sin_family = AF_INET;
2172
2173 if ((inp = sotoinpcb(so)) == NULL) {
2174 free_sockaddr(sin);
2175 return EINVAL;
2176 }
2177 sin->sin_port = inp->inp_lport;
2178 sin->sin_addr = inp->inp_laddr;
2179
2180 *nam = (struct sockaddr *)sin;
2181 return 0;
2182 }
2183
2184 int
in_getsockaddr_s(struct socket * so,struct sockaddr_in * ss)2185 in_getsockaddr_s(struct socket *so, struct sockaddr_in *ss)
2186 {
2187 struct sockaddr_in *sin = ss;
2188 struct inpcb *inp;
2189
2190 VERIFY(ss != NULL);
2191 bzero(ss, sizeof(*ss));
2192
2193 sin->sin_family = AF_INET;
2194 sin->sin_len = sizeof(*sin);
2195
2196 if ((inp = sotoinpcb(so)) == NULL) {
2197 return EINVAL;
2198 }
2199
2200 sin->sin_port = inp->inp_lport;
2201 sin->sin_addr = inp->inp_laddr;
2202 return 0;
2203 }
2204
2205 int
in_getpeeraddr(struct socket * so,struct sockaddr ** nam)2206 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
2207 {
2208 struct inpcb *inp;
2209 struct sockaddr_in *sin;
2210
2211 /*
2212 * Do the malloc first in case it blocks.
2213 */
2214 sin = (struct sockaddr_in *)alloc_sockaddr(sizeof(*sin),
2215 Z_WAITOK | Z_NOFAIL);
2216
2217 sin->sin_family = AF_INET;
2218
2219 if ((inp = sotoinpcb(so)) == NULL) {
2220 free_sockaddr(sin);
2221 return EINVAL;
2222 }
2223 sin->sin_port = inp->inp_fport;
2224 sin->sin_addr = inp->inp_faddr;
2225
2226 *nam = (struct sockaddr *)sin;
2227 return 0;
2228 }
2229
2230 void
in_pcbnotifyall(struct inpcbinfo * pcbinfo,struct in_addr faddr,int errno,void (* notify)(struct inpcb *,int))2231 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2232 int errno, void (*notify)(struct inpcb *, int))
2233 {
2234 struct inpcb *inp;
2235
2236 lck_rw_lock_shared(&pcbinfo->ipi_lock);
2237
2238 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
2239 if (!(inp->inp_vflag & INP_IPV4)) {
2240 continue;
2241 }
2242 if (inp->inp_faddr.s_addr != faddr.s_addr ||
2243 inp->inp_socket == NULL) {
2244 continue;
2245 }
2246 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
2247 continue;
2248 }
2249 socket_lock(inp->inp_socket, 1);
2250 (*notify)(inp, errno);
2251 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
2252 socket_unlock(inp->inp_socket, 1);
2253 }
2254 lck_rw_done(&pcbinfo->ipi_lock);
2255 }
2256
2257 /*
2258 * Check for alternatives when higher level complains
2259 * about service problems. For now, invalidate cached
2260 * routing information. If the route was created dynamically
2261 * (by a redirect), time to try a default gateway again.
2262 */
2263 void
in_losing(struct inpcb * inp)2264 in_losing(struct inpcb *inp)
2265 {
2266 boolean_t release = FALSE;
2267 struct rtentry *rt;
2268
2269 if ((rt = inp->inp_route.ro_rt) != NULL) {
2270 struct in_ifaddr *ia = NULL;
2271
2272 RT_LOCK(rt);
2273 if (rt->rt_flags & RTF_DYNAMIC) {
2274 /*
2275 * Prevent another thread from modifying rt_key,
2276 * rt_gateway via rt_setgate() after rt_lock is
2277 * dropped by marking the route as defunct.
2278 */
2279 rt->rt_flags |= RTF_CONDEMNED;
2280 RT_UNLOCK(rt);
2281 (void) rtrequest(RTM_DELETE, rt_key(rt),
2282 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
2283 } else {
2284 RT_UNLOCK(rt);
2285 }
2286 /* if the address is gone keep the old route in the pcb */
2287 if (inp->inp_laddr.s_addr != INADDR_ANY &&
2288 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2289 /*
2290 * Address is around; ditch the route. A new route
2291 * can be allocated the next time output is attempted.
2292 */
2293 release = TRUE;
2294 }
2295 if (ia != NULL) {
2296 IFA_REMREF(&ia->ia_ifa);
2297 }
2298 }
2299 if (rt == NULL || release) {
2300 ROUTE_RELEASE(&inp->inp_route);
2301 }
2302 }
2303
2304 /*
2305 * After a routing change, flush old routing
2306 * and allocate a (hopefully) better one.
2307 */
2308 void
in_rtchange(struct inpcb * inp,int errno)2309 in_rtchange(struct inpcb *inp, int errno)
2310 {
2311 #pragma unused(errno)
2312 boolean_t release = FALSE;
2313 struct rtentry *rt;
2314
2315 if ((rt = inp->inp_route.ro_rt) != NULL) {
2316 struct in_ifaddr *ia = NULL;
2317
2318 /* if address is gone, keep the old route */
2319 if (inp->inp_laddr.s_addr != INADDR_ANY &&
2320 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2321 /*
2322 * Address is around; ditch the route. A new route
2323 * can be allocated the next time output is attempted.
2324 */
2325 release = TRUE;
2326 }
2327 if (ia != NULL) {
2328 IFA_REMREF(&ia->ia_ifa);
2329 }
2330 }
2331 if (rt == NULL || release) {
2332 ROUTE_RELEASE(&inp->inp_route);
2333 }
2334 }
2335
2336 /*
2337 * Lookup a PCB based on the local address and port.
2338 */
2339 struct inpcb *
in_pcblookup_local(struct inpcbinfo * pcbinfo,struct in_addr laddr,unsigned int lport_arg,int wild_okay)2340 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
2341 unsigned int lport_arg, int wild_okay)
2342 {
2343 struct inpcb *inp;
2344 int matchwild = 3, wildcard;
2345 u_short lport = (u_short)lport_arg;
2346
2347 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
2348
2349 if (!wild_okay) {
2350 struct inpcbhead *head;
2351 /*
2352 * Look for an unconnected (wildcard foreign addr) PCB that
2353 * matches the local address and port we're looking for.
2354 */
2355 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2356 pcbinfo->ipi_hashmask)];
2357 LIST_FOREACH(inp, head, inp_hash) {
2358 if (!(inp->inp_vflag & INP_IPV4)) {
2359 continue;
2360 }
2361 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2362 inp->inp_laddr.s_addr == laddr.s_addr &&
2363 inp->inp_lport == lport) {
2364 /*
2365 * Found.
2366 */
2367 return inp;
2368 }
2369 }
2370 /*
2371 * Not found.
2372 */
2373 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
2374 return NULL;
2375 } else {
2376 struct inpcbporthead *porthash;
2377 struct inpcbport *phd;
2378 struct inpcb *match = NULL;
2379 /*
2380 * Best fit PCB lookup.
2381 *
2382 * First see if this local port is in use by looking on the
2383 * port hash list.
2384 */
2385 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
2386 pcbinfo->ipi_porthashmask)];
2387 LIST_FOREACH(phd, porthash, phd_hash) {
2388 if (phd->phd_port == lport) {
2389 break;
2390 }
2391 }
2392 if (phd != NULL) {
2393 /*
2394 * Port is in use by one or more PCBs. Look for best
2395 * fit.
2396 */
2397 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
2398 wildcard = 0;
2399 if (!(inp->inp_vflag & INP_IPV4)) {
2400 continue;
2401 }
2402 if (inp->inp_faddr.s_addr != INADDR_ANY) {
2403 wildcard++;
2404 }
2405 if (inp->inp_laddr.s_addr != INADDR_ANY) {
2406 if (laddr.s_addr == INADDR_ANY) {
2407 wildcard++;
2408 } else if (inp->inp_laddr.s_addr !=
2409 laddr.s_addr) {
2410 continue;
2411 }
2412 } else {
2413 if (laddr.s_addr != INADDR_ANY) {
2414 wildcard++;
2415 }
2416 }
2417 if (wildcard < matchwild) {
2418 match = inp;
2419 matchwild = wildcard;
2420 if (matchwild == 0) {
2421 break;
2422 }
2423 }
2424 }
2425 }
2426 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
2427 0, 0, 0, 0);
2428 return match;
2429 }
2430 }
2431
2432 /*
2433 * Check if PCB exists in hash list.
2434 */
2435 int
in_pcblookup_hash_exists(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,uid_t * uid,gid_t * gid,struct ifnet * ifp)2436 in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2437 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2438 uid_t *uid, gid_t *gid, struct ifnet *ifp)
2439 {
2440 struct inpcbhead *head;
2441 struct inpcb *inp;
2442 u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2443 int found = 0;
2444 struct inpcb *local_wild = NULL;
2445 struct inpcb *local_wild_mapped = NULL;
2446
2447 *uid = UID_MAX;
2448 *gid = GID_MAX;
2449
2450 /*
2451 * We may have found the pcb in the last lookup - check this first.
2452 */
2453
2454 lck_rw_lock_shared(&pcbinfo->ipi_lock);
2455
2456 /*
2457 * First look for an exact match.
2458 */
2459 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2460 pcbinfo->ipi_hashmask)];
2461 LIST_FOREACH(inp, head, inp_hash) {
2462 if (!(inp->inp_vflag & INP_IPV4)) {
2463 continue;
2464 }
2465 if (inp_restricted_recv(inp, ifp)) {
2466 continue;
2467 }
2468
2469 #if NECP
2470 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2471 continue;
2472 }
2473 #endif /* NECP */
2474
2475 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2476 inp->inp_laddr.s_addr == laddr.s_addr &&
2477 inp->inp_fport == fport &&
2478 inp->inp_lport == lport) {
2479 if ((found = (inp->inp_socket != NULL))) {
2480 /*
2481 * Found.
2482 */
2483 *uid = kauth_cred_getuid(
2484 inp->inp_socket->so_cred);
2485 *gid = kauth_cred_getgid(
2486 inp->inp_socket->so_cred);
2487 }
2488 lck_rw_done(&pcbinfo->ipi_lock);
2489 return found;
2490 }
2491 }
2492
2493 if (!wildcard) {
2494 /*
2495 * Not found.
2496 */
2497 lck_rw_done(&pcbinfo->ipi_lock);
2498 return 0;
2499 }
2500
2501 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2502 pcbinfo->ipi_hashmask)];
2503 LIST_FOREACH(inp, head, inp_hash) {
2504 if (!(inp->inp_vflag & INP_IPV4)) {
2505 continue;
2506 }
2507 if (inp_restricted_recv(inp, ifp)) {
2508 continue;
2509 }
2510
2511 #if NECP
2512 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2513 continue;
2514 }
2515 #endif /* NECP */
2516
2517 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2518 inp->inp_lport == lport) {
2519 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2520 if ((found = (inp->inp_socket != NULL))) {
2521 *uid = kauth_cred_getuid(
2522 inp->inp_socket->so_cred);
2523 *gid = kauth_cred_getgid(
2524 inp->inp_socket->so_cred);
2525 }
2526 lck_rw_done(&pcbinfo->ipi_lock);
2527 return found;
2528 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2529 if (inp->inp_socket &&
2530 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2531 local_wild_mapped = inp;
2532 } else {
2533 local_wild = inp;
2534 }
2535 }
2536 }
2537 }
2538 if (local_wild == NULL) {
2539 if (local_wild_mapped != NULL) {
2540 if ((found = (local_wild_mapped->inp_socket != NULL))) {
2541 *uid = kauth_cred_getuid(
2542 local_wild_mapped->inp_socket->so_cred);
2543 *gid = kauth_cred_getgid(
2544 local_wild_mapped->inp_socket->so_cred);
2545 }
2546 lck_rw_done(&pcbinfo->ipi_lock);
2547 return found;
2548 }
2549 lck_rw_done(&pcbinfo->ipi_lock);
2550 return 0;
2551 }
2552 if ((found = (local_wild->inp_socket != NULL))) {
2553 *uid = kauth_cred_getuid(
2554 local_wild->inp_socket->so_cred);
2555 *gid = kauth_cred_getgid(
2556 local_wild->inp_socket->so_cred);
2557 }
2558 lck_rw_done(&pcbinfo->ipi_lock);
2559 return found;
2560 }
2561
2562 /*
2563 * Lookup PCB in hash list.
2564 */
2565 struct inpcb *
in_pcblookup_hash(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,struct ifnet * ifp)2566 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2567 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2568 struct ifnet *ifp)
2569 {
2570 struct inpcbhead *head;
2571 struct inpcb *inp;
2572 u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2573 struct inpcb *local_wild = NULL;
2574 struct inpcb *local_wild_mapped = NULL;
2575
2576 /*
2577 * We may have found the pcb in the last lookup - check this first.
2578 */
2579
2580 lck_rw_lock_shared(&pcbinfo->ipi_lock);
2581
2582 /*
2583 * First look for an exact match.
2584 */
2585 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2586 pcbinfo->ipi_hashmask)];
2587 LIST_FOREACH(inp, head, inp_hash) {
2588 if (!(inp->inp_vflag & INP_IPV4)) {
2589 continue;
2590 }
2591 if (inp_restricted_recv(inp, ifp)) {
2592 continue;
2593 }
2594
2595 #if NECP
2596 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2597 continue;
2598 }
2599 #endif /* NECP */
2600
2601 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2602 inp->inp_laddr.s_addr == laddr.s_addr &&
2603 inp->inp_fport == fport &&
2604 inp->inp_lport == lport) {
2605 /*
2606 * Found.
2607 */
2608 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2609 WNT_STOPUSING) {
2610 lck_rw_done(&pcbinfo->ipi_lock);
2611 return inp;
2612 } else {
2613 /* it's there but dead, say it isn't found */
2614 lck_rw_done(&pcbinfo->ipi_lock);
2615 return NULL;
2616 }
2617 }
2618 }
2619
2620 if (!wildcard) {
2621 /*
2622 * Not found.
2623 */
2624 lck_rw_done(&pcbinfo->ipi_lock);
2625 return NULL;
2626 }
2627
2628 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2629 pcbinfo->ipi_hashmask)];
2630 LIST_FOREACH(inp, head, inp_hash) {
2631 if (!(inp->inp_vflag & INP_IPV4)) {
2632 continue;
2633 }
2634 if (inp_restricted_recv(inp, ifp)) {
2635 continue;
2636 }
2637
2638 #if NECP
2639 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2640 continue;
2641 }
2642 #endif /* NECP */
2643
2644 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2645 inp->inp_lport == lport) {
2646 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2647 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2648 WNT_STOPUSING) {
2649 lck_rw_done(&pcbinfo->ipi_lock);
2650 return inp;
2651 } else {
2652 /* it's dead; say it isn't found */
2653 lck_rw_done(&pcbinfo->ipi_lock);
2654 return NULL;
2655 }
2656 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2657 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2658 local_wild_mapped = inp;
2659 } else {
2660 local_wild = inp;
2661 }
2662 }
2663 }
2664 }
2665 if (local_wild == NULL) {
2666 if (local_wild_mapped != NULL) {
2667 if (in_pcb_checkstate(local_wild_mapped,
2668 WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2669 lck_rw_done(&pcbinfo->ipi_lock);
2670 return local_wild_mapped;
2671 } else {
2672 /* it's dead; say it isn't found */
2673 lck_rw_done(&pcbinfo->ipi_lock);
2674 return NULL;
2675 }
2676 }
2677 lck_rw_done(&pcbinfo->ipi_lock);
2678 return NULL;
2679 }
2680 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2681 lck_rw_done(&pcbinfo->ipi_lock);
2682 return local_wild;
2683 }
2684 /*
2685 * It's either not found or is already dead.
2686 */
2687 lck_rw_done(&pcbinfo->ipi_lock);
2688 return NULL;
2689 }
2690
2691 /*
2692 * @brief Insert PCB onto various hash lists.
2693 *
2694 * @param inp Pointer to internet protocol control block
2695 * @param locked Implies if ipi_lock (protecting pcb list)
2696 * is already locked or not.
2697 *
2698 * @return int error on failure and 0 on success
2699 */
2700 int
in_pcbinshash(struct inpcb * inp,int locked)2701 in_pcbinshash(struct inpcb *inp, int locked)
2702 {
2703 struct inpcbhead *pcbhash;
2704 struct inpcbporthead *pcbporthash;
2705 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2706 struct inpcbport *phd;
2707 u_int32_t hashkey_faddr;
2708
2709 if (!locked) {
2710 if (!lck_rw_try_lock_exclusive(&pcbinfo->ipi_lock)) {
2711 /*
2712 * Lock inversion issue, mostly with udp
2713 * multicast packets
2714 */
2715 socket_unlock(inp->inp_socket, 0);
2716 lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
2717 socket_lock(inp->inp_socket, 0);
2718 }
2719 }
2720
2721 /*
2722 * This routine or its caller may have given up
2723 * socket's protocol lock briefly.
2724 * During that time the socket may have been dropped.
2725 * Safe-guarding against that.
2726 */
2727 if (inp->inp_state == INPCB_STATE_DEAD) {
2728 if (!locked) {
2729 lck_rw_done(&pcbinfo->ipi_lock);
2730 }
2731 return ECONNABORTED;
2732 }
2733
2734
2735 if (inp->inp_vflag & INP_IPV6) {
2736 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2737 } else {
2738 hashkey_faddr = inp->inp_faddr.s_addr;
2739 }
2740
2741 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2742 inp->inp_fport, pcbinfo->ipi_hashmask);
2743
2744 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
2745
2746 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2747 pcbinfo->ipi_porthashmask)];
2748
2749 /*
2750 * Go through port list and look for a head for this lport.
2751 */
2752 LIST_FOREACH(phd, pcbporthash, phd_hash) {
2753 if (phd->phd_port == inp->inp_lport) {
2754 break;
2755 }
2756 }
2757
2758 /*
2759 * If none exists, malloc one and tack it on.
2760 */
2761 if (phd == NULL) {
2762 phd = kalloc_type(struct inpcbport, Z_WAITOK | Z_NOFAIL);
2763 phd->phd_port = inp->inp_lport;
2764 LIST_INIT(&phd->phd_pcblist);
2765 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2766 }
2767
2768 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2769
2770 #if SKYWALK
2771 int err;
2772 struct socket *so = inp->inp_socket;
2773 if ((SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP) &&
2774 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
2775 if (inp->inp_vflag & INP_IPV6) {
2776 err = netns_reserve_in6(&inp->inp_netns_token,
2777 inp->in6p_laddr, (uint8_t)SOCK_PROTO(so), inp->inp_lport,
2778 NETNS_BSD | NETNS_PRERESERVED, NULL);
2779 } else {
2780 err = netns_reserve_in(&inp->inp_netns_token,
2781 inp->inp_laddr, (uint8_t)SOCK_PROTO(so), inp->inp_lport,
2782 NETNS_BSD | NETNS_PRERESERVED, NULL);
2783 }
2784 if (err) {
2785 if (!locked) {
2786 lck_rw_done(&pcbinfo->ipi_lock);
2787 }
2788 return err;
2789 }
2790 netns_set_ifnet(&inp->inp_netns_token, inp->inp_last_outifp);
2791 inp_update_netns_flags(so);
2792 }
2793 #endif /* SKYWALK */
2794
2795 inp->inp_phd = phd;
2796 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2797 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
2798 inp->inp_flags2 |= INP2_INHASHLIST;
2799
2800 if (!locked) {
2801 lck_rw_done(&pcbinfo->ipi_lock);
2802 }
2803
2804 #if NECP
2805 // This call catches the original setting of the local address
2806 inp_update_necp_policy(inp, NULL, NULL, 0);
2807 #endif /* NECP */
2808
2809 return 0;
2810 }
2811
2812 /*
2813 * Move PCB to the proper hash bucket when { faddr, fport } have been
2814 * changed. NOTE: This does not handle the case of the lport changing (the
2815 * hashed port list would have to be updated as well), so the lport must
2816 * not change after in_pcbinshash() has been called.
2817 */
2818 void
in_pcbrehash(struct inpcb * inp)2819 in_pcbrehash(struct inpcb *inp)
2820 {
2821 struct inpcbhead *head;
2822 u_int32_t hashkey_faddr;
2823
2824 #if SKYWALK
2825 struct socket *so = inp->inp_socket;
2826 if ((SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP) &&
2827 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
2828 int err;
2829 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2830 if (inp->inp_vflag & INP_IPV6) {
2831 err = netns_change_addr_in6(
2832 &inp->inp_netns_token, inp->in6p_laddr);
2833 } else {
2834 err = netns_change_addr_in(
2835 &inp->inp_netns_token, inp->inp_laddr);
2836 }
2837 } else {
2838 if (inp->inp_vflag & INP_IPV6) {
2839 err = netns_reserve_in6(&inp->inp_netns_token,
2840 inp->in6p_laddr, (uint8_t)SOCK_PROTO(so),
2841 inp->inp_lport, NETNS_BSD, NULL);
2842 } else {
2843 err = netns_reserve_in(&inp->inp_netns_token,
2844 inp->inp_laddr, (uint8_t)SOCK_PROTO(so),
2845 inp->inp_lport, NETNS_BSD, NULL);
2846 }
2847 }
2848 /* We are assuming that whatever code paths result in a rehash
2849 * did their due diligence and ensured that the given
2850 * <proto, laddr, lport> tuple was free ahead of time. Just
2851 * reserving the lport on INADDR_ANY should be enough, since
2852 * that will block Skywalk from trying to reserve that same
2853 * port. Given this assumption, the above netns calls should
2854 * never fail*/
2855 VERIFY(err == 0);
2856
2857 netns_set_ifnet(&inp->inp_netns_token, inp->inp_last_outifp);
2858 inp_update_netns_flags(so);
2859 }
2860 #endif /* SKYWALK */
2861 if (inp->inp_vflag & INP_IPV6) {
2862 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2863 } else {
2864 hashkey_faddr = inp->inp_faddr.s_addr;
2865 }
2866
2867 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2868 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2869 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
2870
2871 if (inp->inp_flags2 & INP2_INHASHLIST) {
2872 LIST_REMOVE(inp, inp_hash);
2873 inp->inp_flags2 &= ~INP2_INHASHLIST;
2874 }
2875
2876 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2877 LIST_INSERT_HEAD(head, inp, inp_hash);
2878 inp->inp_flags2 |= INP2_INHASHLIST;
2879
2880 #if NECP
2881 // This call catches updates to the remote addresses
2882 inp_update_necp_policy(inp, NULL, NULL, 0);
2883 #endif /* NECP */
2884 }
2885
2886 /*
2887 * Remove PCB from various lists.
2888 * Must be called pcbinfo lock is held in exclusive mode.
2889 */
2890 void
in_pcbremlists(struct inpcb * inp)2891 in_pcbremlists(struct inpcb *inp)
2892 {
2893 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
2894
2895 /*
2896 * Check if it's in hashlist -- an inp is placed in hashlist when
2897 * it's local port gets assigned. So it should also be present
2898 * in the port list.
2899 */
2900 if (inp->inp_flags2 & INP2_INHASHLIST) {
2901 struct inpcbport *phd = inp->inp_phd;
2902
2903 VERIFY(phd != NULL && inp->inp_lport > 0);
2904
2905 LIST_REMOVE(inp, inp_hash);
2906 inp->inp_hash.le_next = NULL;
2907 inp->inp_hash.le_prev = NULL;
2908
2909 LIST_REMOVE(inp, inp_portlist);
2910 inp->inp_portlist.le_next = NULL;
2911 inp->inp_portlist.le_prev = NULL;
2912 if (LIST_EMPTY(&phd->phd_pcblist)) {
2913 LIST_REMOVE(phd, phd_hash);
2914 kfree_type(struct inpcbport, phd);
2915 }
2916 inp->inp_phd = NULL;
2917 inp->inp_flags2 &= ~INP2_INHASHLIST;
2918 #if SKYWALK
2919 /* Free up the port in the namespace registrar */
2920 netns_release(&inp->inp_netns_token);
2921 netns_release(&inp->inp_wildcard_netns_token);
2922 #endif /* SKYWALK */
2923 }
2924 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2925
2926 if (inp->inp_flags2 & INP2_TIMEWAIT) {
2927 /* Remove from time-wait queue */
2928 tcp_remove_from_time_wait(inp);
2929 inp->inp_flags2 &= ~INP2_TIMEWAIT;
2930 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2931 inp->inp_pcbinfo->ipi_twcount--;
2932 } else {
2933 /* Remove from global inp list if it is not time-wait */
2934 LIST_REMOVE(inp, inp_list);
2935 }
2936
2937 if (inp->inp_flags2 & INP2_IN_FCTREE) {
2938 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED | INPFC_REMOVE));
2939 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2940 }
2941
2942 inp->inp_pcbinfo->ipi_count--;
2943 }
2944
2945 /*
2946 * Mechanism used to defer the memory release of PCBs
2947 * The pcb list will contain the pcb until the reaper can clean it up if
2948 * the following conditions are met:
2949 * 1) state "DEAD",
2950 * 2) wantcnt is STOPUSING
2951 * 3) usecount is 0
2952 * This function will be called to either mark the pcb as
2953 */
2954 int
in_pcb_checkstate(struct inpcb * pcb,int mode,int locked)2955 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
2956 {
2957 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2958 UInt32 origwant;
2959 UInt32 newwant;
2960
2961 switch (mode) {
2962 case WNT_STOPUSING:
2963 /*
2964 * Try to mark the pcb as ready for recycling. CAS with
2965 * STOPUSING, if success we're good, if it's in use, will
2966 * be marked later
2967 */
2968 if (locked == 0) {
2969 socket_lock(pcb->inp_socket, 1);
2970 }
2971 pcb->inp_state = INPCB_STATE_DEAD;
2972
2973 stopusing:
2974 if (pcb->inp_socket->so_usecount < 0) {
2975 panic("%s: pcb=%p so=%p usecount is negative",
2976 __func__, pcb, pcb->inp_socket);
2977 /* NOTREACHED */
2978 }
2979 if (locked == 0) {
2980 socket_unlock(pcb->inp_socket, 1);
2981 }
2982
2983 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
2984
2985 origwant = *wantcnt;
2986 if ((UInt16) origwant == 0xffff) { /* should stop using */
2987 return WNT_STOPUSING;
2988 }
2989 newwant = 0xffff;
2990 if ((UInt16) origwant == 0) {
2991 /* try to mark it as unsuable now */
2992 OSCompareAndSwap(origwant, newwant, wantcnt);
2993 }
2994 return WNT_STOPUSING;
2995
2996 case WNT_ACQUIRE:
2997 /*
2998 * Try to increase reference to pcb. If WNT_STOPUSING
2999 * should bail out. If socket state DEAD, try to set count
3000 * to STOPUSING, return failed otherwise increase cnt.
3001 */
3002 do {
3003 origwant = *wantcnt;
3004 if ((UInt16) origwant == 0xffff) {
3005 /* should stop using */
3006 return WNT_STOPUSING;
3007 }
3008 newwant = origwant + 1;
3009 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
3010 return WNT_ACQUIRE;
3011
3012 case WNT_RELEASE:
3013 /*
3014 * Release reference. If result is null and pcb state
3015 * is DEAD, set wanted bit to STOPUSING
3016 */
3017 if (locked == 0) {
3018 socket_lock(pcb->inp_socket, 1);
3019 }
3020
3021 do {
3022 origwant = *wantcnt;
3023 if ((UInt16) origwant == 0x0) {
3024 panic("%s: pcb=%p release with zero count",
3025 __func__, pcb);
3026 /* NOTREACHED */
3027 }
3028 if ((UInt16) origwant == 0xffff) {
3029 /* should stop using */
3030 if (locked == 0) {
3031 socket_unlock(pcb->inp_socket, 1);
3032 }
3033 return WNT_STOPUSING;
3034 }
3035 newwant = origwant - 1;
3036 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
3037
3038 if (pcb->inp_state == INPCB_STATE_DEAD) {
3039 goto stopusing;
3040 }
3041 if (pcb->inp_socket->so_usecount < 0) {
3042 panic("%s: RELEASE pcb=%p so=%p usecount is negative",
3043 __func__, pcb, pcb->inp_socket);
3044 /* NOTREACHED */
3045 }
3046
3047 if (locked == 0) {
3048 socket_unlock(pcb->inp_socket, 1);
3049 }
3050 return WNT_RELEASE;
3051
3052 default:
3053 panic("%s: so=%p not a valid state =%x", __func__,
3054 pcb->inp_socket, mode);
3055 /* NOTREACHED */
3056 }
3057
3058 /* NOTREACHED */
3059 return mode;
3060 }
3061
3062 /*
3063 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
3064 * The inpcb_compat data structure is passed to user space and must
3065 * not change. We intentionally avoid copying pointers.
3066 */
3067 void
inpcb_to_compat(struct inpcb * inp,struct inpcb_compat * inp_compat)3068 inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
3069 {
3070 bzero(inp_compat, sizeof(*inp_compat));
3071 inp_compat->inp_fport = inp->inp_fport;
3072 inp_compat->inp_lport = inp->inp_lport;
3073 inp_compat->nat_owner = 0;
3074 inp_compat->nat_cookie = 0;
3075 inp_compat->inp_gencnt = inp->inp_gencnt;
3076 inp_compat->inp_flags = inp->inp_flags;
3077 inp_compat->inp_flow = inp->inp_flow;
3078 inp_compat->inp_vflag = inp->inp_vflag;
3079 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
3080 inp_compat->inp_ip_p = inp->inp_ip_p;
3081 inp_compat->inp_dependfaddr.inp6_foreign =
3082 inp->inp_dependfaddr.inp6_foreign;
3083 inp_compat->inp_dependladdr.inp6_local =
3084 inp->inp_dependladdr.inp6_local;
3085 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
3086 inp_compat->inp_depend6.inp6_hlim = 0;
3087 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
3088 inp_compat->inp_depend6.inp6_ifindex = 0;
3089 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
3090 }
3091
3092 #if XNU_TARGET_OS_OSX
3093 void
inpcb_to_xinpcb64(struct inpcb * inp,struct xinpcb64 * xinp)3094 inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
3095 {
3096 xinp->inp_fport = inp->inp_fport;
3097 xinp->inp_lport = inp->inp_lport;
3098 xinp->inp_gencnt = inp->inp_gencnt;
3099 xinp->inp_flags = inp->inp_flags;
3100 xinp->inp_flow = inp->inp_flow;
3101 xinp->inp_vflag = inp->inp_vflag;
3102 xinp->inp_ip_ttl = inp->inp_ip_ttl;
3103 xinp->inp_ip_p = inp->inp_ip_p;
3104 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
3105 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
3106 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
3107 xinp->inp_depend6.inp6_hlim = 0;
3108 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
3109 xinp->inp_depend6.inp6_ifindex = 0;
3110 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
3111 }
3112 #endif /* XNU_TARGET_OS_OSX */
3113
3114 /*
3115 * The following routines implement this scheme:
3116 *
3117 * Callers of ip_output() that intend to cache the route in the inpcb pass
3118 * a local copy of the struct route to ip_output(). Using a local copy of
3119 * the cached route significantly simplifies things as IP no longer has to
3120 * worry about having exclusive access to the passed in struct route, since
3121 * it's defined in the caller's stack; in essence, this allows for a lock-
3122 * less operation when updating the struct route at the IP level and below,
3123 * whenever necessary. The scheme works as follows:
3124 *
3125 * Prior to dropping the socket's lock and calling ip_output(), the caller
3126 * copies the struct route from the inpcb into its stack, and adds a reference
3127 * to the cached route entry, if there was any. The socket's lock is then
3128 * dropped and ip_output() is called with a pointer to the copy of struct
3129 * route defined on the stack (not to the one in the inpcb.)
3130 *
3131 * Upon returning from ip_output(), the caller then acquires the socket's
3132 * lock and synchronizes the cache; if there is no route cached in the inpcb,
3133 * it copies the local copy of struct route (which may or may not contain any
3134 * route) back into the cache; otherwise, if the inpcb has a route cached in
3135 * it, the one in the local copy will be freed, if there's any. Trashing the
3136 * cached route in the inpcb can be avoided because ip_output() is single-
3137 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
3138 * by the socket/transport layer.)
3139 */
3140 void
inp_route_copyout(struct inpcb * inp,struct route * dst)3141 inp_route_copyout(struct inpcb *inp, struct route *dst)
3142 {
3143 struct route *src = &inp->inp_route;
3144
3145 socket_lock_assert_owned(inp->inp_socket);
3146
3147 /*
3148 * If the route in the PCB is stale or not for IPv4, blow it away;
3149 * this is possible in the case of IPv4-mapped address case.
3150 */
3151 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) {
3152 ROUTE_RELEASE(src);
3153 }
3154
3155 route_copyout(dst, src, sizeof(*dst));
3156 }
3157
3158 void
inp_route_copyin(struct inpcb * inp,struct route * src)3159 inp_route_copyin(struct inpcb *inp, struct route *src)
3160 {
3161 struct route *dst = &inp->inp_route;
3162
3163 socket_lock_assert_owned(inp->inp_socket);
3164
3165 /* Minor sanity check */
3166 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) {
3167 panic("%s: wrong or corrupted route: %p", __func__, src);
3168 }
3169
3170 route_copyin(src, dst, sizeof(*src));
3171 }
3172
3173 /*
3174 * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option.
3175 */
3176 int
inp_bindif(struct inpcb * inp,unsigned int ifscope,struct ifnet ** pifp)3177 inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
3178 {
3179 struct ifnet *ifp = NULL;
3180
3181 ifnet_head_lock_shared();
3182 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
3183 (ifp = ifindex2ifnet[ifscope]) == NULL)) {
3184 ifnet_head_done();
3185 return ENXIO;
3186 }
3187 ifnet_head_done();
3188
3189 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
3190
3191 /*
3192 * A zero interface scope value indicates an "unbind".
3193 * Otherwise, take in whatever value the app desires;
3194 * the app may already know the scope (or force itself
3195 * to such a scope) ahead of time before the interface
3196 * gets attached. It doesn't matter either way; any
3197 * route lookup from this point on will require an
3198 * exact match for the embedded interface scope.
3199 */
3200 inp->inp_boundifp = ifp;
3201 if (inp->inp_boundifp == NULL) {
3202 inp->inp_flags &= ~INP_BOUND_IF;
3203 } else {
3204 inp->inp_flags |= INP_BOUND_IF;
3205 }
3206
3207 /* Blow away any cached route in the PCB */
3208 ROUTE_RELEASE(&inp->inp_route);
3209
3210 if (pifp != NULL) {
3211 *pifp = ifp;
3212 }
3213
3214 return 0;
3215 }
3216
3217 /*
3218 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
3219 * as well as for setting PROC_UUID_NO_CELLULAR policy.
3220 */
3221 void
inp_set_nocellular(struct inpcb * inp)3222 inp_set_nocellular(struct inpcb *inp)
3223 {
3224 inp->inp_flags |= INP_NO_IFT_CELLULAR;
3225
3226 /* Blow away any cached route in the PCB */
3227 ROUTE_RELEASE(&inp->inp_route);
3228 }
3229
3230 /*
3231 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
3232 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
3233 */
3234 void
inp_clear_nocellular(struct inpcb * inp)3235 inp_clear_nocellular(struct inpcb *inp)
3236 {
3237 struct socket *so = inp->inp_socket;
3238
3239 /*
3240 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
3241 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
3242 * if and only if the socket is unrestricted.
3243 */
3244 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
3245 inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
3246
3247 /* Blow away any cached route in the PCB */
3248 ROUTE_RELEASE(&inp->inp_route);
3249 }
3250 }
3251
3252 void
inp_set_noexpensive(struct inpcb * inp)3253 inp_set_noexpensive(struct inpcb *inp)
3254 {
3255 inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
3256
3257 /* Blow away any cached route in the PCB */
3258 ROUTE_RELEASE(&inp->inp_route);
3259 }
3260
3261 void
inp_set_noconstrained(struct inpcb * inp)3262 inp_set_noconstrained(struct inpcb *inp)
3263 {
3264 inp->inp_flags2 |= INP2_NO_IFF_CONSTRAINED;
3265
3266 /* Blow away any cached route in the PCB */
3267 ROUTE_RELEASE(&inp->inp_route);
3268 }
3269
3270 void
inp_set_awdl_unrestricted(struct inpcb * inp)3271 inp_set_awdl_unrestricted(struct inpcb *inp)
3272 {
3273 inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
3274
3275 /* Blow away any cached route in the PCB */
3276 ROUTE_RELEASE(&inp->inp_route);
3277 }
3278
3279 boolean_t
inp_get_awdl_unrestricted(struct inpcb * inp)3280 inp_get_awdl_unrestricted(struct inpcb *inp)
3281 {
3282 return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
3283 }
3284
3285 void
inp_clear_awdl_unrestricted(struct inpcb * inp)3286 inp_clear_awdl_unrestricted(struct inpcb *inp)
3287 {
3288 inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
3289
3290 /* Blow away any cached route in the PCB */
3291 ROUTE_RELEASE(&inp->inp_route);
3292 }
3293
3294 void
inp_set_intcoproc_allowed(struct inpcb * inp)3295 inp_set_intcoproc_allowed(struct inpcb *inp)
3296 {
3297 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
3298
3299 /* Blow away any cached route in the PCB */
3300 ROUTE_RELEASE(&inp->inp_route);
3301 }
3302
3303 boolean_t
inp_get_intcoproc_allowed(struct inpcb * inp)3304 inp_get_intcoproc_allowed(struct inpcb *inp)
3305 {
3306 return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE;
3307 }
3308
3309 void
inp_clear_intcoproc_allowed(struct inpcb * inp)3310 inp_clear_intcoproc_allowed(struct inpcb *inp)
3311 {
3312 inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED;
3313
3314 /* Blow away any cached route in the PCB */
3315 ROUTE_RELEASE(&inp->inp_route);
3316 }
3317
3318 void
inp_set_management_allowed(struct inpcb * inp)3319 inp_set_management_allowed(struct inpcb *inp)
3320 {
3321 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
3322 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
3323
3324 /* Blow away any cached route in the PCB */
3325 ROUTE_RELEASE(&inp->inp_route);
3326 }
3327
3328 boolean_t
inp_get_management_allowed(struct inpcb * inp)3329 inp_get_management_allowed(struct inpcb *inp)
3330 {
3331 return (inp->inp_flags2 & INP2_MANAGEMENT_ALLOWED) ? TRUE : FALSE;
3332 }
3333
3334 void
inp_clear_management_allowed(struct inpcb * inp)3335 inp_clear_management_allowed(struct inpcb *inp)
3336 {
3337 inp->inp_flags2 &= ~INP2_MANAGEMENT_ALLOWED;
3338
3339 /* Blow away any cached route in the PCB */
3340 ROUTE_RELEASE(&inp->inp_route);
3341 }
3342
3343 #if NECP
3344 /*
3345 * Called when PROC_UUID_NECP_APP_POLICY is set.
3346 */
3347 void
inp_set_want_app_policy(struct inpcb * inp)3348 inp_set_want_app_policy(struct inpcb *inp)
3349 {
3350 inp->inp_flags2 |= INP2_WANT_APP_POLICY;
3351 }
3352
3353 /*
3354 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
3355 */
3356 void
inp_clear_want_app_policy(struct inpcb * inp)3357 inp_clear_want_app_policy(struct inpcb *inp)
3358 {
3359 inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
3360 }
3361 #endif /* NECP */
3362
3363 /*
3364 * Calculate flow hash for an inp, used by an interface to identify a
3365 * flow. When an interface provides flow control advisory, this flow
3366 * hash is used as an identifier.
3367 */
3368 u_int32_t
inp_calc_flowhash(struct inpcb * inp)3369 inp_calc_flowhash(struct inpcb *inp)
3370 {
3371 #if SKYWALK
3372
3373 uint32_t flowid;
3374 struct flowidns_flow_key fk;
3375
3376 bzero(&fk, sizeof(fk));
3377
3378 if (inp->inp_vflag & INP_IPV4) {
3379 fk.ffk_af = AF_INET;
3380 fk.ffk_laddr_v4 = inp->inp_laddr;
3381 fk.ffk_raddr_v4 = inp->inp_faddr;
3382 } else {
3383 fk.ffk_af = AF_INET6;
3384 fk.ffk_laddr_v6 = inp->in6p_laddr;
3385 fk.ffk_raddr_v6 = inp->in6p_faddr;
3386 /* clear embedded scope ID */
3387 if (IN6_IS_SCOPE_EMBED(&fk.ffk_laddr_v6)) {
3388 fk.ffk_laddr_v6.s6_addr16[1] = 0;
3389 }
3390 if (IN6_IS_SCOPE_EMBED(&fk.ffk_raddr_v6)) {
3391 fk.ffk_raddr_v6.s6_addr16[1] = 0;
3392 }
3393 }
3394
3395 fk.ffk_lport = inp->inp_lport;
3396 fk.ffk_rport = inp->inp_fport;
3397 fk.ffk_proto = (inp->inp_ip_p != 0) ? inp->inp_ip_p :
3398 (uint8_t)SOCK_PROTO(inp->inp_socket);
3399 flowidns_allocate_flowid(FLOWIDNS_DOMAIN_INPCB, &fk, &flowid);
3400 /* Insert the inp into inp_fc_tree */
3401 lck_mtx_lock_spin(&inp_fc_lck);
3402 ASSERT(inp->inp_flowhash == 0);
3403 ASSERT((inp->inp_flags2 & INP2_IN_FCTREE) == 0);
3404 inp->inp_flowhash = flowid;
3405 VERIFY(RB_INSERT(inp_fc_tree, &inp_fc_tree, inp) == NULL);
3406 inp->inp_flags2 |= INP2_IN_FCTREE;
3407 lck_mtx_unlock(&inp_fc_lck);
3408
3409 return flowid;
3410
3411 #else /* !SKYWALK */
3412
3413 struct inp_flowhash_key fh __attribute__((aligned(8)));
3414 u_int32_t flowhash = 0;
3415 struct inpcb *tmp_inp = NULL;
3416
3417 if (inp_hash_seed == 0) {
3418 inp_hash_seed = RandomULong();
3419 }
3420
3421 bzero(&fh, sizeof(fh));
3422
3423 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof(fh.infh_laddr));
3424 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof(fh.infh_faddr));
3425
3426 fh.infh_lport = inp->inp_lport;
3427 fh.infh_fport = inp->inp_fport;
3428 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
3429 fh.infh_proto = inp->inp_ip_p;
3430 fh.infh_rand1 = RandomULong();
3431 fh.infh_rand2 = RandomULong();
3432
3433 try_again:
3434 flowhash = net_flowhash(&fh, sizeof(fh), inp_hash_seed);
3435 if (flowhash == 0) {
3436 /* try to get a non-zero flowhash */
3437 inp_hash_seed = RandomULong();
3438 goto try_again;
3439 }
3440
3441 inp->inp_flowhash = flowhash;
3442
3443 /* Insert the inp into inp_fc_tree */
3444 lck_mtx_lock_spin(&inp_fc_lck);
3445 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
3446 if (tmp_inp != NULL) {
3447 /*
3448 * There is a different inp with the same flowhash.
3449 * There can be a collision on flow hash but the
3450 * probability is low. Let's recompute the
3451 * flowhash.
3452 */
3453 lck_mtx_unlock(&inp_fc_lck);
3454 /* recompute hash seed */
3455 inp_hash_seed = RandomULong();
3456 goto try_again;
3457 }
3458
3459 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
3460 inp->inp_flags2 |= INP2_IN_FCTREE;
3461 lck_mtx_unlock(&inp_fc_lck);
3462
3463 return flowhash;
3464
3465 #endif /* !SKYWALK */
3466 }
3467
3468 void
inp_flowadv(uint32_t flowhash)3469 inp_flowadv(uint32_t flowhash)
3470 {
3471 struct inpcb *inp;
3472
3473 inp = inp_fc_getinp(flowhash, 0);
3474
3475 if (inp == NULL) {
3476 return;
3477 }
3478 inp_fc_feedback(inp);
3479 }
3480
3481 /*
3482 * Function to compare inp_fc_entries in inp flow control tree
3483 */
3484 static inline int
infc_cmp(const struct inpcb * inp1,const struct inpcb * inp2)3485 infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
3486 {
3487 return memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
3488 sizeof(inp1->inp_flowhash));
3489 }
3490
3491 static struct inpcb *
inp_fc_getinp(u_int32_t flowhash,u_int32_t flags)3492 inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
3493 {
3494 struct inpcb *inp = NULL;
3495 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
3496
3497 lck_mtx_lock_spin(&inp_fc_lck);
3498 key_inp.inp_flowhash = flowhash;
3499 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
3500 if (inp == NULL) {
3501 /* inp is not present, return */
3502 lck_mtx_unlock(&inp_fc_lck);
3503 return NULL;
3504 }
3505
3506 if (flags & INPFC_REMOVE) {
3507 ASSERT((inp->inp_flags2 & INP2_IN_FCTREE) != 0);
3508 lck_mtx_convert_spin(&inp_fc_lck);
3509 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
3510 bzero(&(inp->infc_link), sizeof(inp->infc_link));
3511 #if SKYWALK
3512 VERIFY(inp->inp_flowhash != 0);
3513 flowidns_release_flowid(inp->inp_flowhash);
3514 inp->inp_flowhash = 0;
3515 #endif /* !SKYWALK */
3516 inp->inp_flags2 &= ~INP2_IN_FCTREE;
3517 lck_mtx_unlock(&inp_fc_lck);
3518 return NULL;
3519 }
3520
3521 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) {
3522 inp = NULL;
3523 }
3524 lck_mtx_unlock(&inp_fc_lck);
3525
3526 return inp;
3527 }
3528
3529 static void
inp_fc_feedback(struct inpcb * inp)3530 inp_fc_feedback(struct inpcb *inp)
3531 {
3532 struct socket *so = inp->inp_socket;
3533
3534 /* we already hold a want_cnt on this inp, socket can't be null */
3535 VERIFY(so != NULL);
3536 socket_lock(so, 1);
3537
3538 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3539 socket_unlock(so, 1);
3540 return;
3541 }
3542
3543 if (inp->inp_sndinprog_cnt > 0) {
3544 inp->inp_flags |= INP_FC_FEEDBACK;
3545 }
3546
3547 /*
3548 * Return if the connection is not in flow-controlled state.
3549 * This can happen if the connection experienced
3550 * loss while it was in flow controlled state
3551 */
3552 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
3553 socket_unlock(so, 1);
3554 return;
3555 }
3556 inp_reset_fc_state(inp);
3557
3558 if (SOCK_TYPE(so) == SOCK_STREAM) {
3559 inp_fc_unthrottle_tcp(inp);
3560 }
3561
3562 socket_unlock(so, 1);
3563 }
3564
3565 static void
inp_reset_fc_timerstat(struct inpcb * inp)3566 inp_reset_fc_timerstat(struct inpcb *inp)
3567 {
3568 uint64_t now;
3569
3570 if (inp->inp_fadv_start_time == 0) {
3571 return;
3572 }
3573
3574 now = net_uptime_us();
3575 ASSERT(now >= inp->inp_fadv_start_time);
3576
3577 inp->inp_fadv_total_time += (now - inp->inp_fadv_start_time);
3578 inp->inp_fadv_cnt++;
3579
3580 inp->inp_fadv_start_time = 0;
3581 }
3582
3583 static void
inp_set_fc_timerstat(struct inpcb * inp)3584 inp_set_fc_timerstat(struct inpcb *inp)
3585 {
3586 if (inp->inp_fadv_start_time != 0) {
3587 return;
3588 }
3589
3590 inp->inp_fadv_start_time = net_uptime_us();
3591 }
3592
3593 void
inp_reset_fc_state(struct inpcb * inp)3594 inp_reset_fc_state(struct inpcb *inp)
3595 {
3596 struct socket *so = inp->inp_socket;
3597 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
3598 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
3599
3600 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3601
3602 inp_reset_fc_timerstat(inp);
3603
3604 if (suspended) {
3605 so->so_flags &= ~(SOF_SUSPENDED);
3606 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
3607 }
3608
3609 /* Give a write wakeup to unblock the socket */
3610 if (needwakeup) {
3611 sowwakeup(so);
3612 }
3613 }
3614
3615 int
inp_set_fc_state(struct inpcb * inp,int advcode)3616 inp_set_fc_state(struct inpcb *inp, int advcode)
3617 {
3618 boolean_t is_flow_controlled = INP_WAIT_FOR_IF_FEEDBACK(inp);
3619 struct inpcb *tmp_inp = NULL;
3620 /*
3621 * If there was a feedback from the interface when
3622 * send operation was in progress, we should ignore
3623 * this flow advisory to avoid a race between setting
3624 * flow controlled state and receiving feedback from
3625 * the interface
3626 */
3627 if (inp->inp_flags & INP_FC_FEEDBACK) {
3628 return 0;
3629 }
3630
3631 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3632 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
3633 INPFC_SOLOCKED)) != NULL) {
3634 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3635 goto exit_reset;
3636 }
3637 VERIFY(tmp_inp == inp);
3638 switch (advcode) {
3639 case FADV_FLOW_CONTROLLED:
3640 inp->inp_flags |= INP_FLOW_CONTROLLED;
3641 inp_set_fc_timerstat(inp);
3642 break;
3643 case FADV_SUSPENDED:
3644 inp->inp_flags |= INP_FLOW_SUSPENDED;
3645 inp_set_fc_timerstat(inp);
3646
3647 soevent(inp->inp_socket,
3648 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
3649
3650 /* Record the fact that suspend event was sent */
3651 inp->inp_socket->so_flags |= SOF_SUSPENDED;
3652 break;
3653 }
3654
3655 if (!is_flow_controlled && SOCK_TYPE(inp->inp_socket) == SOCK_STREAM) {
3656 inp_fc_throttle_tcp(inp);
3657 }
3658 return 1;
3659 }
3660
3661 exit_reset:
3662 inp_reset_fc_timerstat(inp);
3663
3664 return 0;
3665 }
3666
3667 /*
3668 * Handler for SO_FLUSH socket option.
3669 */
3670 int
inp_flush(struct inpcb * inp,int optval)3671 inp_flush(struct inpcb *inp, int optval)
3672 {
3673 u_int32_t flowhash = inp->inp_flowhash;
3674 struct ifnet *rtifp, *oifp;
3675
3676 /* Either all classes or one of the valid ones */
3677 if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) {
3678 return EINVAL;
3679 }
3680
3681 /* We need a flow hash for identification */
3682 if (flowhash == 0) {
3683 return 0;
3684 }
3685
3686 /* Grab the interfaces from the route and pcb */
3687 rtifp = ((inp->inp_route.ro_rt != NULL) ?
3688 inp->inp_route.ro_rt->rt_ifp : NULL);
3689 oifp = inp->inp_last_outifp;
3690
3691 if (rtifp != NULL) {
3692 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3693 }
3694 if (oifp != NULL && oifp != rtifp) {
3695 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3696 }
3697
3698 return 0;
3699 }
3700
3701 /*
3702 * Clear the INP_INADDR_ANY flag (special case for PPP only)
3703 */
3704 void
inp_clear_INP_INADDR_ANY(struct socket * so)3705 inp_clear_INP_INADDR_ANY(struct socket *so)
3706 {
3707 struct inpcb *inp = NULL;
3708
3709 socket_lock(so, 1);
3710 inp = sotoinpcb(so);
3711 if (inp) {
3712 inp->inp_flags &= ~INP_INADDR_ANY;
3713 }
3714 socket_unlock(so, 1);
3715 }
3716
3717 void
inp_get_soprocinfo(struct inpcb * inp,struct so_procinfo * soprocinfo)3718 inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
3719 {
3720 struct socket *so = inp->inp_socket;
3721
3722 soprocinfo->spi_pid = so->last_pid;
3723 strlcpy(&soprocinfo->spi_proc_name[0], &inp->inp_last_proc_name[0],
3724 sizeof(soprocinfo->spi_proc_name));
3725 if (so->last_pid != 0) {
3726 uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
3727 }
3728 /*
3729 * When not delegated, the effective pid is the same as the real pid
3730 */
3731 if (so->so_flags & SOF_DELEGATED) {
3732 soprocinfo->spi_delegated = 1;
3733 soprocinfo->spi_epid = so->e_pid;
3734 uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
3735 } else {
3736 soprocinfo->spi_delegated = 0;
3737 soprocinfo->spi_epid = so->last_pid;
3738 }
3739 strlcpy(&soprocinfo->spi_e_proc_name[0], &inp->inp_e_proc_name[0],
3740 sizeof(soprocinfo->spi_e_proc_name));
3741 }
3742
3743 int
inp_findinpcb_procinfo(struct inpcbinfo * pcbinfo,uint32_t flowhash,struct so_procinfo * soprocinfo)3744 inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
3745 struct so_procinfo *soprocinfo)
3746 {
3747 struct inpcb *inp = NULL;
3748 int found = 0;
3749
3750 bzero(soprocinfo, sizeof(struct so_procinfo));
3751
3752 if (!flowhash) {
3753 return -1;
3754 }
3755
3756 lck_rw_lock_shared(&pcbinfo->ipi_lock);
3757 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
3758 if (inp->inp_state != INPCB_STATE_DEAD &&
3759 inp->inp_socket != NULL &&
3760 inp->inp_flowhash == flowhash) {
3761 found = 1;
3762 inp_get_soprocinfo(inp, soprocinfo);
3763 break;
3764 }
3765 }
3766 lck_rw_done(&pcbinfo->ipi_lock);
3767
3768 return found;
3769 }
3770
3771 #if CONFIG_PROC_UUID_POLICY
3772 static void
inp_update_cellular_policy(struct inpcb * inp,boolean_t set)3773 inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
3774 {
3775 struct socket *so = inp->inp_socket;
3776 int before, after;
3777
3778 VERIFY(so != NULL);
3779 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3780
3781 before = INP_NO_CELLULAR(inp);
3782 if (set) {
3783 inp_set_nocellular(inp);
3784 } else {
3785 inp_clear_nocellular(inp);
3786 }
3787 after = INP_NO_CELLULAR(inp);
3788 if (net_io_policy_log && (before != after)) {
3789 static const char *ok = "OK";
3790 static const char *nok = "NOACCESS";
3791 uuid_string_t euuid_buf;
3792 pid_t epid;
3793
3794 if (so->so_flags & SOF_DELEGATED) {
3795 uuid_unparse(so->e_uuid, euuid_buf);
3796 epid = so->e_pid;
3797 } else {
3798 uuid_unparse(so->last_uuid, euuid_buf);
3799 epid = so->last_pid;
3800 }
3801
3802 /* allow this socket to generate another notification event */
3803 so->so_ifdenied_notifies = 0;
3804
3805 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3806 "euuid %s%s %s->%s\n", __func__,
3807 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3808 SOCK_TYPE(so), epid, euuid_buf,
3809 (so->so_flags & SOF_DELEGATED) ?
3810 " [delegated]" : "",
3811 ((before < after) ? ok : nok),
3812 ((before < after) ? nok : ok));
3813 }
3814 }
3815
3816 #if NECP
3817 static void
inp_update_necp_want_app_policy(struct inpcb * inp,boolean_t set)3818 inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
3819 {
3820 struct socket *so = inp->inp_socket;
3821 int before, after;
3822
3823 VERIFY(so != NULL);
3824 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3825
3826 before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3827 if (set) {
3828 inp_set_want_app_policy(inp);
3829 } else {
3830 inp_clear_want_app_policy(inp);
3831 }
3832 after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3833 if (net_io_policy_log && (before != after)) {
3834 static const char *wanted = "WANTED";
3835 static const char *unwanted = "UNWANTED";
3836 uuid_string_t euuid_buf;
3837 pid_t epid;
3838
3839 if (so->so_flags & SOF_DELEGATED) {
3840 uuid_unparse(so->e_uuid, euuid_buf);
3841 epid = so->e_pid;
3842 } else {
3843 uuid_unparse(so->last_uuid, euuid_buf);
3844 epid = so->last_pid;
3845 }
3846
3847 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3848 "euuid %s%s %s->%s\n", __func__,
3849 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3850 SOCK_TYPE(so), epid, euuid_buf,
3851 (so->so_flags & SOF_DELEGATED) ?
3852 " [delegated]" : "",
3853 ((before < after) ? unwanted : wanted),
3854 ((before < after) ? wanted : unwanted));
3855 }
3856 }
3857 #endif /* NECP */
3858 #endif /* !CONFIG_PROC_UUID_POLICY */
3859
3860 #if NECP
3861 void
inp_update_necp_policy(struct inpcb * inp,struct sockaddr * override_local_addr,struct sockaddr * override_remote_addr,u_int override_bound_interface)3862 inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
3863 {
3864 necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
3865 if (necp_socket_should_rescope(inp) &&
3866 inp->inp_lport == 0 &&
3867 inp->inp_laddr.s_addr == INADDR_ANY &&
3868 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
3869 // If we should rescope, and the socket is not yet bound
3870 inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
3871 inp->inp_flags2 |= INP2_SCOPED_BY_NECP;
3872 }
3873 }
3874 #endif /* NECP */
3875
3876 int
inp_update_policy(struct inpcb * inp)3877 inp_update_policy(struct inpcb *inp)
3878 {
3879 #if CONFIG_PROC_UUID_POLICY
3880 struct socket *so = inp->inp_socket;
3881 uint32_t pflags = 0;
3882 int32_t ogencnt;
3883 int err = 0;
3884 uint8_t *lookup_uuid = NULL;
3885
3886 if (!net_io_policy_uuid ||
3887 so == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3888 return 0;
3889 }
3890
3891 /*
3892 * Kernel-created sockets that aren't delegating other sockets
3893 * are currently exempted from UUID policy checks.
3894 */
3895 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) {
3896 return 0;
3897 }
3898
3899 #if defined(XNU_TARGET_OS_OSX)
3900 if (so->so_rpid > 0) {
3901 lookup_uuid = so->so_ruuid;
3902 ogencnt = so->so_policy_gencnt;
3903 err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3904 }
3905 #endif
3906 if (lookup_uuid == NULL || err == ENOENT) {
3907 lookup_uuid = ((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid);
3908 ogencnt = so->so_policy_gencnt;
3909 err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3910 }
3911
3912 /*
3913 * Discard cached generation count if the entry is gone (ENOENT),
3914 * so that we go thru the checks below.
3915 */
3916 if (err == ENOENT && ogencnt != 0) {
3917 so->so_policy_gencnt = 0;
3918 }
3919
3920 /*
3921 * If the generation count has changed, inspect the policy flags
3922 * and act accordingly. If a policy flag was previously set and
3923 * the UUID is no longer present in the table (ENOENT), treat it
3924 * as if the flag has been cleared.
3925 */
3926 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
3927 /* update cellular policy for this socket */
3928 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
3929 inp_update_cellular_policy(inp, TRUE);
3930 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
3931 inp_update_cellular_policy(inp, FALSE);
3932 }
3933 #if NECP
3934 /* update necp want app policy for this socket */
3935 if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
3936 inp_update_necp_want_app_policy(inp, TRUE);
3937 } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
3938 inp_update_necp_want_app_policy(inp, FALSE);
3939 }
3940 #endif /* NECP */
3941 }
3942
3943 return (err == ENOENT) ? 0 : err;
3944 #else /* !CONFIG_PROC_UUID_POLICY */
3945 #pragma unused(inp)
3946 return 0;
3947 #endif /* !CONFIG_PROC_UUID_POLICY */
3948 }
3949
3950 unsigned int log_restricted;
3951 SYSCTL_DECL(_net_inet);
3952 SYSCTL_INT(_net_inet, OID_AUTO, log_restricted,
3953 CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0,
3954 "Log network restrictions");
3955
3956
3957 /*
3958 * Called when we need to enforce policy restrictions in the input path.
3959 *
3960 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
3961 */
3962 static boolean_t
_inp_restricted_recv(struct inpcb * inp,struct ifnet * ifp)3963 _inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3964 {
3965 VERIFY(inp != NULL);
3966
3967 /*
3968 * Inbound restrictions.
3969 */
3970 if (!sorestrictrecv) {
3971 return FALSE;
3972 }
3973
3974 if (ifp == NULL) {
3975 return FALSE;
3976 }
3977
3978 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
3979 return TRUE;
3980 }
3981
3982 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
3983 return TRUE;
3984 }
3985
3986 if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
3987 return TRUE;
3988 }
3989
3990 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
3991 return TRUE;
3992 }
3993
3994 if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) {
3995 return FALSE;
3996 }
3997
3998 if (inp->inp_flags & INP_RECV_ANYIF) {
3999 return FALSE;
4000 }
4001
4002 /*
4003 * An entitled process can use the management interface without being bound
4004 * to the interface
4005 */
4006 if (IFNET_IS_MANAGEMENT(ifp)) {
4007 if (INP_MANAGEMENT_ALLOWED(inp)) {
4008 return FALSE;
4009 }
4010 if (if_management_verbose > 1) {
4011 os_log(OS_LOG_DEFAULT, "_inp_restricted_recv %s:%d not allowed on management interface %s",
4012 proc_best_name(current_proc()), proc_getpid(current_proc()),
4013 ifp->if_xname);
4014 }
4015 return TRUE;
4016 }
4017
4018 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) {
4019 return FALSE;
4020 }
4021
4022 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
4023 return TRUE;
4024 }
4025
4026
4027 return TRUE;
4028 }
4029
4030 boolean_t
inp_restricted_recv(struct inpcb * inp,struct ifnet * ifp)4031 inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
4032 {
4033 boolean_t ret;
4034
4035 ret = _inp_restricted_recv(inp, ifp);
4036 if (ret == TRUE && log_restricted) {
4037 printf("pid %d (%s) is unable to receive packets on %s\n",
4038 proc_getpid(current_proc()), proc_best_name(current_proc()),
4039 ifp->if_xname);
4040 }
4041 return ret;
4042 }
4043
4044 /*
4045 * Called when we need to enforce policy restrictions in the output path.
4046 *
4047 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
4048 */
4049 static boolean_t
_inp_restricted_send(struct inpcb * inp,struct ifnet * ifp)4050 _inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
4051 {
4052 VERIFY(inp != NULL);
4053
4054 /*
4055 * Outbound restrictions.
4056 */
4057 if (!sorestrictsend) {
4058 return FALSE;
4059 }
4060
4061 if (ifp == NULL) {
4062 return FALSE;
4063 }
4064
4065 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
4066 return TRUE;
4067 }
4068
4069 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
4070 return TRUE;
4071 }
4072
4073 if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
4074 return TRUE;
4075 }
4076
4077 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
4078 return TRUE;
4079 }
4080
4081 if (IFNET_IS_MANAGEMENT(ifp)) {
4082 if (!INP_MANAGEMENT_ALLOWED(inp)) {
4083 if (if_management_verbose > 1) {
4084 os_log(OS_LOG_DEFAULT, "_inp_restricted_send %s:%d not allowed on management interface %s",
4085 proc_best_name(current_proc()), proc_getpid(current_proc()),
4086 ifp->if_xname);
4087 }
4088 return TRUE;
4089 }
4090 }
4091
4092 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
4093 return TRUE;
4094 }
4095
4096 return FALSE;
4097 }
4098
4099 boolean_t
inp_restricted_send(struct inpcb * inp,struct ifnet * ifp)4100 inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
4101 {
4102 boolean_t ret;
4103
4104 ret = _inp_restricted_send(inp, ifp);
4105 if (ret == TRUE && log_restricted) {
4106 printf("pid %d (%s) is unable to transmit packets on %s\n",
4107 proc_getpid(current_proc()), proc_best_name(current_proc()),
4108 ifp->if_xname);
4109 }
4110 return ret;
4111 }
4112
4113 inline void
inp_count_sndbytes(struct inpcb * inp,u_int32_t th_ack)4114 inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack)
4115 {
4116 struct ifnet *ifp = inp->inp_last_outifp;
4117 struct socket *so = inp->inp_socket;
4118 if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
4119 (ifp->if_type == IFT_CELLULAR || IFNET_IS_WIFI(ifp))) {
4120 int32_t unsent;
4121
4122 so->so_snd.sb_flags |= SB_SNDBYTE_CNT;
4123
4124 /*
4125 * There can be data outstanding before the connection
4126 * becomes established -- TFO case
4127 */
4128 if (so->so_snd.sb_cc > 0) {
4129 inp_incr_sndbytes_total(so, so->so_snd.sb_cc);
4130 }
4131
4132 unsent = inp_get_sndbytes_allunsent(so, th_ack);
4133 if (unsent > 0) {
4134 inp_incr_sndbytes_unsent(so, unsent);
4135 }
4136 }
4137 }
4138
4139 inline void
inp_incr_sndbytes_total(struct socket * so,int32_t len)4140 inp_incr_sndbytes_total(struct socket *so, int32_t len)
4141 {
4142 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4143 struct ifnet *ifp = inp->inp_last_outifp;
4144
4145 if (ifp != NULL) {
4146 VERIFY(ifp->if_sndbyte_total >= 0);
4147 OSAddAtomic64(len, &ifp->if_sndbyte_total);
4148 }
4149 }
4150
4151 inline void
inp_decr_sndbytes_total(struct socket * so,int32_t len)4152 inp_decr_sndbytes_total(struct socket *so, int32_t len)
4153 {
4154 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4155 struct ifnet *ifp = inp->inp_last_outifp;
4156
4157 if (ifp != NULL) {
4158 if (ifp->if_sndbyte_total >= len) {
4159 OSAddAtomic64(-len, &ifp->if_sndbyte_total);
4160 } else {
4161 ifp->if_sndbyte_total = 0;
4162 }
4163 }
4164 }
4165
4166 inline void
inp_incr_sndbytes_unsent(struct socket * so,int32_t len)4167 inp_incr_sndbytes_unsent(struct socket *so, int32_t len)
4168 {
4169 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4170 struct ifnet *ifp = inp->inp_last_outifp;
4171
4172 if (ifp != NULL) {
4173 VERIFY(ifp->if_sndbyte_unsent >= 0);
4174 OSAddAtomic64(len, &ifp->if_sndbyte_unsent);
4175 }
4176 }
4177
4178 inline void
inp_decr_sndbytes_unsent(struct socket * so,int32_t len)4179 inp_decr_sndbytes_unsent(struct socket *so, int32_t len)
4180 {
4181 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
4182 return;
4183 }
4184
4185 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4186 struct ifnet *ifp = inp->inp_last_outifp;
4187
4188 if (ifp != NULL) {
4189 if (ifp->if_sndbyte_unsent >= len) {
4190 OSAddAtomic64(-len, &ifp->if_sndbyte_unsent);
4191 } else {
4192 ifp->if_sndbyte_unsent = 0;
4193 }
4194 }
4195 }
4196
4197 inline void
inp_decr_sndbytes_allunsent(struct socket * so,u_int32_t th_ack)4198 inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
4199 {
4200 int32_t len;
4201
4202 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
4203 return;
4204 }
4205
4206 len = inp_get_sndbytes_allunsent(so, th_ack);
4207 inp_decr_sndbytes_unsent(so, len);
4208 }
4209
4210 #if SKYWALK
4211 inline void
inp_update_netns_flags(struct socket * so)4212 inp_update_netns_flags(struct socket *so)
4213 {
4214 struct inpcb *inp;
4215 uint32_t set_flags = 0;
4216 uint32_t clear_flags = 0;
4217
4218 if (!(SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
4219 return;
4220 }
4221
4222 inp = sotoinpcb(so);
4223
4224 if (inp == NULL) {
4225 return;
4226 }
4227
4228 if (!NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
4229 return;
4230 }
4231
4232 if (so->so_options & SO_NOWAKEFROMSLEEP) {
4233 set_flags |= NETNS_NOWAKEFROMSLEEP;
4234 } else {
4235 clear_flags |= NETNS_NOWAKEFROMSLEEP;
4236 }
4237
4238 if (inp->inp_flags & INP_RECV_ANYIF) {
4239 set_flags |= NETNS_RECVANYIF;
4240 } else {
4241 clear_flags |= NETNS_RECVANYIF;
4242 }
4243
4244 if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
4245 set_flags |= NETNS_EXTBGIDLE;
4246 } else {
4247 clear_flags |= NETNS_EXTBGIDLE;
4248 }
4249
4250 netns_change_flags(&inp->inp_netns_token, set_flags, clear_flags);
4251 }
4252 #endif /* SKYWALK */
4253
4254 inline void
inp_set_activity_bitmap(struct inpcb * inp)4255 inp_set_activity_bitmap(struct inpcb *inp)
4256 {
4257 in_stat_set_activity_bitmap(&inp->inp_nw_activity, net_uptime());
4258 }
4259
4260 inline void
inp_get_activity_bitmap(struct inpcb * inp,activity_bitmap_t * ab)4261 inp_get_activity_bitmap(struct inpcb *inp, activity_bitmap_t *ab)
4262 {
4263 bcopy(&inp->inp_nw_activity, ab, sizeof(*ab));
4264 }
4265
4266 void
inp_update_last_owner(struct socket * so,struct proc * p,struct proc * ep)4267 inp_update_last_owner(struct socket *so, struct proc *p, struct proc *ep)
4268 {
4269 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4270
4271 if (inp == NULL) {
4272 return;
4273 }
4274
4275 if (p != NULL) {
4276 strlcpy(&inp->inp_last_proc_name[0], proc_name_address(p), sizeof(inp->inp_last_proc_name));
4277 }
4278 if (so->so_flags & SOF_DELEGATED) {
4279 if (ep != NULL) {
4280 strlcpy(&inp->inp_e_proc_name[0], proc_name_address(ep), sizeof(inp->inp_e_proc_name));
4281 } else {
4282 inp->inp_e_proc_name[0] = 0;
4283 }
4284 } else {
4285 inp->inp_e_proc_name[0] = 0;
4286 }
4287 }
4288
4289 void
inp_copy_last_owner(struct socket * so,struct socket * head)4290 inp_copy_last_owner(struct socket *so, struct socket *head)
4291 {
4292 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4293 struct inpcb *head_inp = (struct inpcb *)head->so_pcb;
4294
4295 if (inp == NULL || head_inp == NULL) {
4296 return;
4297 }
4298
4299 strlcpy(&inp->inp_last_proc_name[0], &head_inp->inp_last_proc_name[0], sizeof(inp->inp_last_proc_name));
4300 strlcpy(&inp->inp_e_proc_name[0], &head_inp->inp_e_proc_name[0], sizeof(inp->inp_e_proc_name));
4301 }
4302
4303 static int
in_check_management_interface_proc_callout(proc_t proc,void * arg __unused)4304 in_check_management_interface_proc_callout(proc_t proc, void *arg __unused)
4305 {
4306 struct fileproc *fp = NULL;
4307 task_t task = proc_task(proc);
4308 bool allowed = false;
4309
4310 if (IOTaskHasEntitlement(task, INTCOPROC_RESTRICTED_ENTITLEMENT) == true
4311 || IOTaskHasEntitlement(task, MANAGEMENT_DATA_ENTITLEMENT) == true
4312 #if DEBUG || DEVELOPMENT
4313 || IOTaskHasEntitlement(task, INTCOPROC_RESTRICTED_ENTITLEMENT_DEVELOPMENT) == true
4314 || IOTaskHasEntitlement(task, MANAGEMENT_DATA_ENTITLEMENT_DEVELOPMENT) == true
4315 #endif /* DEBUG || DEVELOPMENT */
4316 ) {
4317 allowed = true;
4318 }
4319 if (allowed == false && management_data_unrestricted == false) {
4320 return PROC_RETURNED;
4321 }
4322
4323 proc_fdlock(proc);
4324 fdt_foreach(fp, proc) {
4325 struct fileglob *fg = fp->fp_glob;
4326 struct socket *so;
4327 struct inpcb *inp;
4328
4329 if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET) {
4330 continue;
4331 }
4332
4333 so = (struct socket *)fp_get_data(fp);
4334 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
4335 continue;
4336 }
4337
4338 inp = (struct inpcb *)so->so_pcb;
4339
4340 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
4341 continue;
4342 }
4343
4344 socket_lock(so, 1);
4345
4346 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
4347 socket_unlock(so, 1);
4348 continue;
4349 }
4350 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
4351 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
4352
4353 socket_unlock(so, 1);
4354 }
4355 proc_fdunlock(proc);
4356
4357 return PROC_RETURNED;
4358 }
4359
4360 static bool in_management_interface_checked = false;
4361
4362 static void
in_management_interface_event_callback(struct nwk_wq_entry * nwk_item)4363 in_management_interface_event_callback(struct nwk_wq_entry *nwk_item)
4364 {
4365 kfree_type(struct nwk_wq_entry, nwk_item);
4366
4367 if (in_management_interface_checked == true) {
4368 return;
4369 }
4370 in_management_interface_checked = true;
4371
4372 proc_iterate(PROC_ALLPROCLIST,
4373 in_check_management_interface_proc_callout,
4374 NULL, NULL, NULL);
4375 }
4376
4377 void
in_management_interface_check(void)4378 in_management_interface_check(void)
4379 {
4380 struct nwk_wq_entry *nwk_item;
4381
4382 if (if_management_interface_check_needed == false ||
4383 in_management_interface_checked == true) {
4384 return;
4385 }
4386
4387 nwk_item = kalloc_type(struct nwk_wq_entry,
4388 Z_WAITOK | Z_ZERO | Z_NOFAIL);
4389
4390 nwk_item->func = in_management_interface_event_callback;
4391
4392 nwk_wq_enqueue(nwk_item);
4393 }
4394