1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1991, 1993, 1995
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
61 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.17 2001/08/13 16:26:17 ume Exp $
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
67 #include <sys/mbuf.h>
68 #include <sys/domain.h>
69 #include <sys/protosw.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/sysctl.h>
75 #include <sys/mcache.h>
76 #include <sys/kauth.h>
77 #include <sys/priv.h>
78 #include <sys/proc_uuid_policy.h>
79 #include <sys/syslog.h>
80 #include <sys/priv.h>
81 #include <sys/file_internal.h>
82 #include <net/dlil.h>
83
84 #include <libkern/OSAtomic.h>
85 #include <kern/locks.h>
86
87 #include <machine/limits.h>
88
89 #include <kern/zalloc.h>
90
91 #include <net/if.h>
92 #include <net/if_types.h>
93 #include <net/route.h>
94 #include <net/flowhash.h>
95 #include <net/flowadv.h>
96 #include <net/nat464_utils.h>
97 #include <net/ntstat.h>
98 #include <net/nwk_wq.h>
99 #include <net/restricted_in_port.h>
100
101 #include <netinet/in.h>
102 #include <netinet/in_pcb.h>
103 #include <netinet/in_var.h>
104 #include <netinet/ip_var.h>
105
106 #include <netinet/ip6.h>
107 #include <netinet6/ip6_var.h>
108
109 #include <sys/kdebug.h>
110 #include <sys/random.h>
111
112 #include <dev/random/randomdev.h>
113 #include <mach/boolean.h>
114
115 #include <pexpert/pexpert.h>
116
117 #if NECP
118 #include <net/necp.h>
119 #endif
120
121 #include <sys/stat.h>
122 #include <sys/ubc.h>
123 #include <sys/vnode.h>
124
125 #include <os/log.h>
126
127 #if SKYWALK
128 #include <skywalk/namespace/flowidns.h>
129 #endif /* SKYWALK */
130
131 #include <IOKit/IOBSD.h>
132
133 extern const char *proc_name_address(struct proc *);
134
135 static LCK_GRP_DECLARE(inpcb_lock_grp, "inpcb");
136 static LCK_ATTR_DECLARE(inpcb_lock_attr, 0, 0);
137 static LCK_MTX_DECLARE_ATTR(inpcb_lock, &inpcb_lock_grp, &inpcb_lock_attr);
138 static LCK_MTX_DECLARE_ATTR(inpcb_timeout_lock, &inpcb_lock_grp, &inpcb_lock_attr);
139
140 static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head);
141
142 static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */
143 static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */
144 static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */
145 static boolean_t inpcb_fast_timer_on = FALSE;
146
147 #define INPCB_GCREQ_THRESHOLD 50000
148
149 static thread_call_t inpcb_thread_call, inpcb_fast_thread_call;
150 static void inpcb_sched_timeout(void);
151 static void inpcb_sched_lazy_timeout(void);
152 static void _inpcb_sched_timeout(unsigned int);
153 static void inpcb_timeout(void *, void *);
154 const int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */
155 extern int tvtohz(struct timeval *);
156
157 #if CONFIG_PROC_UUID_POLICY
158 static void inp_update_cellular_policy(struct inpcb *, boolean_t);
159 #if NECP
160 static void inp_update_necp_want_app_policy(struct inpcb *, boolean_t);
161 #endif /* NECP */
162 #endif /* !CONFIG_PROC_UUID_POLICY */
163
164 #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8))
165 #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1))
166
167 int allow_udp_port_exhaustion = 0;
168
169 /*
170 * These configure the range of local port addresses assigned to
171 * "unspecified" outgoing connections/packets/whatever.
172 */
173 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
174 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
175 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
176 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
177 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
178 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
179
180 #define RANGECHK(var, min, max) \
181 if ((var) < (min)) { (var) = (min); } \
182 else if ((var) > (max)) { (var) = (max); }
183
184 static int
185 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
186 {
187 #pragma unused(arg1, arg2)
188 int error;
189 int new_value = *(int *)oidp->oid_arg1;
190 #if (DEBUG | DEVELOPMENT)
191 int old_value = *(int *)oidp->oid_arg1;
192 /*
193 * For unit testing allow a non-superuser process with the
194 * proper entitlement to modify the variables
195 */
196 if (req->newptr) {
197 if (proc_suser(current_proc()) != 0 &&
198 (error = priv_check_cred(kauth_cred_get(),
199 PRIV_NETINET_RESERVEDPORT, 0))) {
200 return EPERM;
201 }
202 }
203 #endif /* (DEBUG | DEVELOPMENT) */
204
205 error = sysctl_handle_int(oidp, &new_value, 0, req);
206 if (!error) {
207 if (oidp->oid_arg1 == &ipport_lowfirstauto || oidp->oid_arg1 == &ipport_lowlastauto) {
208 RANGECHK(new_value, 1, IPPORT_RESERVED - 1);
209 } else {
210 RANGECHK(new_value, IPPORT_RESERVED, USHRT_MAX);
211 }
212 *(int *)oidp->oid_arg1 = new_value;
213 }
214
215 #if (DEBUG | DEVELOPMENT)
216 os_log(OS_LOG_DEFAULT,
217 "%s:%u sysctl net.restricted_port.verbose: %d -> %d)",
218 proc_best_name(current_proc()), proc_selfpid(),
219 old_value, *(int *)oidp->oid_arg1);
220 #endif /* (DEBUG | DEVELOPMENT) */
221
222 return error;
223 }
224
225 #undef RANGECHK
226
227 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange,
228 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IP Ports");
229
230 #if (DEBUG | DEVELOPMENT)
231 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY)
232 #else
233 #define CTLFAGS_IP_PORTRANGE (CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED)
234 #endif /* (DEBUG | DEVELOPMENT) */
235
236 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
237 CTLFAGS_IP_PORTRANGE,
238 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
239 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
240 CTLFAGS_IP_PORTRANGE,
241 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
242 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
243 CTLFAGS_IP_PORTRANGE,
244 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
245 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
246 CTLFAGS_IP_PORTRANGE,
247 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
248 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
249 CTLFAGS_IP_PORTRANGE,
250 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
251 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
252 CTLFAGS_IP_PORTRANGE,
253 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
254 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, ipport_allow_udp_port_exhaustion,
255 CTLFLAG_LOCKED | CTLFLAG_RW, &allow_udp_port_exhaustion, 0, "");
256
257 static uint32_t apn_fallbk_debug = 0;
258 #define apn_fallbk_log(x) do { if (apn_fallbk_debug >= 1) log x; } while (0)
259
260 #if !XNU_TARGET_OS_OSX
261 static boolean_t apn_fallbk_enabled = TRUE;
262
263 SYSCTL_DECL(_net_inet);
264 SYSCTL_NODE(_net_inet, OID_AUTO, apn_fallback, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "APN Fallback");
265 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
266 &apn_fallbk_enabled, 0, "APN fallback enable");
267 SYSCTL_UINT(_net_inet_apn_fallback, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
268 &apn_fallbk_debug, 0, "APN fallback debug enable");
269 #else /* XNU_TARGET_OS_OSX */
270 static boolean_t apn_fallbk_enabled = FALSE;
271 #endif /* XNU_TARGET_OS_OSX */
272
273 extern int udp_use_randomport;
274 extern int tcp_use_randomport;
275
276 /* Structs used for flowhash computation */
277 struct inp_flowhash_key_addr {
278 union {
279 struct in_addr v4;
280 struct in6_addr v6;
281 u_int8_t addr8[16];
282 u_int16_t addr16[8];
283 u_int32_t addr32[4];
284 } infha;
285 };
286
287 struct inp_flowhash_key {
288 struct inp_flowhash_key_addr infh_laddr;
289 struct inp_flowhash_key_addr infh_faddr;
290 u_int32_t infh_lport;
291 u_int32_t infh_fport;
292 u_int32_t infh_af;
293 u_int32_t infh_proto;
294 u_int32_t infh_rand1;
295 u_int32_t infh_rand2;
296 };
297
298 #if !SKYWALK
299 static u_int32_t inp_hash_seed = 0;
300 #endif /* !SKYWALK */
301
302 static int infc_cmp(const struct inpcb *, const struct inpcb *);
303
304 /* Flags used by inp_fc_getinp */
305 #define INPFC_SOLOCKED 0x1
306 #define INPFC_REMOVE 0x2
307 static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t);
308
309 static void inp_fc_feedback(struct inpcb *);
310 extern void tcp_remove_from_time_wait(struct inpcb *inp);
311
312 static LCK_MTX_DECLARE_ATTR(inp_fc_lck, &inpcb_lock_grp, &inpcb_lock_attr);
313
314 RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree;
315 RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp);
316 RB_GENERATE(inp_fc_tree, inpcb, infc_link, infc_cmp);
317
318 /*
319 * Use this inp as a key to find an inp in the flowhash tree.
320 * Accesses to it are protected by inp_fc_lck.
321 */
322 struct inpcb key_inp;
323
324 /*
325 * in_pcb.c: manage the Protocol Control Blocks.
326 */
327
328 void
in_pcbinit(void)329 in_pcbinit(void)
330 {
331 static int inpcb_initialized = 0;
332
333 VERIFY(!inpcb_initialized);
334 inpcb_initialized = 1;
335
336 inpcb_thread_call = thread_call_allocate_with_priority(inpcb_timeout,
337 NULL, THREAD_CALL_PRIORITY_KERNEL);
338 /* Give it an arg so that we know that this is the fast timer */
339 inpcb_fast_thread_call = thread_call_allocate_with_priority(
340 inpcb_timeout, &inpcb_timeout, THREAD_CALL_PRIORITY_KERNEL);
341 if (inpcb_thread_call == NULL || inpcb_fast_thread_call == NULL) {
342 panic("unable to alloc the inpcb thread call");
343 }
344
345 /*
346 * Initialize data structures required to deliver
347 * flow advisories.
348 */
349 lck_mtx_lock(&inp_fc_lck);
350 RB_INIT(&inp_fc_tree);
351 bzero(&key_inp, sizeof(key_inp));
352 lck_mtx_unlock(&inp_fc_lck);
353 }
354
355 #define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \
356 ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0))
357 static void
inpcb_timeout(void * arg0,void * arg1)358 inpcb_timeout(void *arg0, void *arg1)
359 {
360 #pragma unused(arg1)
361 struct inpcbinfo *ipi;
362 boolean_t t, gc;
363 struct intimercount gccnt, tmcnt;
364
365 /*
366 * Update coarse-grained networking timestamp (in sec.); the idea
367 * is to piggy-back on the timeout callout to update the counter
368 * returnable via net_uptime().
369 */
370 net_update_uptime();
371
372 bzero(&gccnt, sizeof(gccnt));
373 bzero(&tmcnt, sizeof(tmcnt));
374
375 lck_mtx_lock_spin(&inpcb_timeout_lock);
376 gc = inpcb_garbage_collecting;
377 inpcb_garbage_collecting = FALSE;
378
379 t = inpcb_ticking;
380 inpcb_ticking = FALSE;
381
382 if (gc || t) {
383 lck_mtx_unlock(&inpcb_timeout_lock);
384
385 lck_mtx_lock(&inpcb_lock);
386 TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) {
387 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) {
388 bzero(&ipi->ipi_gc_req,
389 sizeof(ipi->ipi_gc_req));
390 if (gc && ipi->ipi_gc != NULL) {
391 ipi->ipi_gc(ipi);
392 gccnt.intimer_lazy +=
393 ipi->ipi_gc_req.intimer_lazy;
394 gccnt.intimer_fast +=
395 ipi->ipi_gc_req.intimer_fast;
396 gccnt.intimer_nodelay +=
397 ipi->ipi_gc_req.intimer_nodelay;
398 }
399 }
400 if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) {
401 bzero(&ipi->ipi_timer_req,
402 sizeof(ipi->ipi_timer_req));
403 if (t && ipi->ipi_timer != NULL) {
404 ipi->ipi_timer(ipi);
405 tmcnt.intimer_lazy +=
406 ipi->ipi_timer_req.intimer_lazy;
407 tmcnt.intimer_fast +=
408 ipi->ipi_timer_req.intimer_fast;
409 tmcnt.intimer_nodelay +=
410 ipi->ipi_timer_req.intimer_nodelay;
411 }
412 }
413 }
414 lck_mtx_unlock(&inpcb_lock);
415 lck_mtx_lock_spin(&inpcb_timeout_lock);
416 }
417
418 /* lock was dropped above, so check first before overriding */
419 if (!inpcb_garbage_collecting) {
420 inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt);
421 }
422 if (!inpcb_ticking) {
423 inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt);
424 }
425
426 /* arg0 will be set if we are the fast timer */
427 if (arg0 != NULL) {
428 inpcb_fast_timer_on = FALSE;
429 }
430 inpcb_timeout_run--;
431 VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2);
432
433 /* re-arm the timer if there's work to do */
434 if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) {
435 inpcb_sched_timeout();
436 } else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) {
437 /* be lazy when idle with little activity */
438 inpcb_sched_lazy_timeout();
439 } else {
440 inpcb_sched_timeout();
441 }
442
443 lck_mtx_unlock(&inpcb_timeout_lock);
444 }
445
446 static void
inpcb_sched_timeout(void)447 inpcb_sched_timeout(void)
448 {
449 _inpcb_sched_timeout(0);
450 }
451
452 static void
inpcb_sched_lazy_timeout(void)453 inpcb_sched_lazy_timeout(void)
454 {
455 _inpcb_sched_timeout(inpcb_timeout_lazy);
456 }
457
458 static void
_inpcb_sched_timeout(unsigned int offset)459 _inpcb_sched_timeout(unsigned int offset)
460 {
461 uint64_t deadline, leeway;
462
463 clock_interval_to_deadline(1, NSEC_PER_SEC, &deadline);
464 LCK_MTX_ASSERT(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED);
465 if (inpcb_timeout_run == 0 &&
466 (inpcb_garbage_collecting || inpcb_ticking)) {
467 lck_mtx_convert_spin(&inpcb_timeout_lock);
468 inpcb_timeout_run++;
469 if (offset == 0) {
470 inpcb_fast_timer_on = TRUE;
471 thread_call_enter_delayed(inpcb_fast_thread_call,
472 deadline);
473 } else {
474 inpcb_fast_timer_on = FALSE;
475 clock_interval_to_absolutetime_interval(offset,
476 NSEC_PER_SEC, &leeway);
477 thread_call_enter_delayed_with_leeway(
478 inpcb_thread_call, NULL, deadline, leeway,
479 THREAD_CALL_DELAY_LEEWAY);
480 }
481 } else if (inpcb_timeout_run == 1 &&
482 offset == 0 && !inpcb_fast_timer_on) {
483 /*
484 * Since the request was for a fast timer but the
485 * scheduled timer is a lazy timer, try to schedule
486 * another instance of fast timer also.
487 */
488 lck_mtx_convert_spin(&inpcb_timeout_lock);
489 inpcb_timeout_run++;
490 inpcb_fast_timer_on = TRUE;
491 thread_call_enter_delayed(inpcb_fast_thread_call, deadline);
492 }
493 }
494
495 void
inpcb_gc_sched(struct inpcbinfo * ipi,u_int32_t type)496 inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type)
497 {
498 u_int32_t gccnt;
499
500 lck_mtx_lock_spin(&inpcb_timeout_lock);
501 inpcb_garbage_collecting = TRUE;
502 gccnt = ipi->ipi_gc_req.intimer_nodelay +
503 ipi->ipi_gc_req.intimer_fast;
504
505 if (gccnt > INPCB_GCREQ_THRESHOLD) {
506 type = INPCB_TIMER_FAST;
507 }
508
509 switch (type) {
510 case INPCB_TIMER_NODELAY:
511 atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1);
512 inpcb_sched_timeout();
513 break;
514 case INPCB_TIMER_FAST:
515 atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1);
516 inpcb_sched_timeout();
517 break;
518 default:
519 atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1);
520 inpcb_sched_lazy_timeout();
521 break;
522 }
523 lck_mtx_unlock(&inpcb_timeout_lock);
524 }
525
526 void
inpcb_timer_sched(struct inpcbinfo * ipi,u_int32_t type)527 inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type)
528 {
529 lck_mtx_lock_spin(&inpcb_timeout_lock);
530 inpcb_ticking = TRUE;
531 switch (type) {
532 case INPCB_TIMER_NODELAY:
533 atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1);
534 inpcb_sched_timeout();
535 break;
536 case INPCB_TIMER_FAST:
537 atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1);
538 inpcb_sched_timeout();
539 break;
540 default:
541 atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1);
542 inpcb_sched_lazy_timeout();
543 break;
544 }
545 lck_mtx_unlock(&inpcb_timeout_lock);
546 }
547
548 void
in_pcbinfo_attach(struct inpcbinfo * ipi)549 in_pcbinfo_attach(struct inpcbinfo *ipi)
550 {
551 struct inpcbinfo *ipi0;
552
553 lck_mtx_lock(&inpcb_lock);
554 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
555 if (ipi0 == ipi) {
556 panic("%s: ipi %p already in the list",
557 __func__, ipi);
558 /* NOTREACHED */
559 }
560 }
561 TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry);
562 lck_mtx_unlock(&inpcb_lock);
563 }
564
565 int
in_pcbinfo_detach(struct inpcbinfo * ipi)566 in_pcbinfo_detach(struct inpcbinfo *ipi)
567 {
568 struct inpcbinfo *ipi0;
569 int error = 0;
570
571 lck_mtx_lock(&inpcb_lock);
572 TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) {
573 if (ipi0 == ipi) {
574 break;
575 }
576 }
577 if (ipi0 != NULL) {
578 TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry);
579 } else {
580 error = ENXIO;
581 }
582 lck_mtx_unlock(&inpcb_lock);
583
584 return error;
585 }
586
587 __attribute__((noinline))
588 char *
inp_snprintf_tuple(struct inpcb * inp,char * buf,size_t buflen)589 inp_snprintf_tuple(struct inpcb *inp, char *buf, size_t buflen)
590 {
591 char laddrstr[MAX_IPv6_STR_LEN];
592 char faddrstr[MAX_IPv6_STR_LEN];
593 uint16_t lport = 0;
594 uint16_t fport = 0;
595 uint16_t proto = IPPROTO_IP;
596
597 if (inp->inp_socket != NULL && inp->inp_socket->so_proto != NULL) {
598 proto = inp->inp_socket->so_proto->pr_protocol;
599
600 if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
601 lport = inp->inp_lport;
602 fport = inp->inp_fport;
603 }
604 }
605 if (inp->inp_vflag & INP_IPV4) {
606 inet_ntop(AF_INET, (void *)&inp->inp_laddr.s_addr, laddrstr, sizeof(laddrstr));
607 inet_ntop(AF_INET, (void *)&inp->inp_faddr.s_addr, faddrstr, sizeof(faddrstr));
608 } else if (inp->inp_vflag & INP_IPV6) {
609 inet_ntop(AF_INET6, (void *)&inp->in6p_faddr, laddrstr, sizeof(laddrstr));
610 inet_ntop(AF_INET6, (void *)&inp->in6p_faddr, faddrstr, sizeof(faddrstr));
611 }
612 snprintf(buf, buflen, "[%u %s:%u %s:%u]",
613 proto, laddrstr, ntohs(lport), faddrstr, ntohs(fport));
614
615 return buf;
616 }
617
618 __attribute__((noinline))
619 void
in_pcb_check_management_entitled(struct inpcb * inp)620 in_pcb_check_management_entitled(struct inpcb *inp)
621 {
622 if (inp->inp_flags2 & INP2_MANAGEMENT_CHECKED) {
623 return;
624 }
625
626 if (management_data_unrestricted) {
627 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
628 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
629 } else if (if_management_interface_check_needed == true) {
630 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
631 /*
632 * Note that soopt_cred_check check both intcoproc entitlements
633 * We check MANAGEMENT_DATA_ENTITLEMENT as there is no corresponding PRIV value
634 */
635 if (soopt_cred_check(inp->inp_socket, PRIV_NET_RESTRICTED_INTCOPROC, false, false) == 0
636 || IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT) == true
637 #if DEBUG || DEVELOPMENT
638 || IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT_DEVELOPMENT) == true
639 #endif /* DEBUG || DEVELOPMENT */
640 ) {
641 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
642 } else {
643 if (__improbable(if_management_verbose > 1)) {
644 char buf[128];
645
646 os_log(OS_LOG_DEFAULT, "in_pcb_check_management_entitled %s:%d not management entitled %s",
647 proc_best_name(current_proc()),
648 proc_selfpid(),
649 inp_snprintf_tuple(inp, buf, sizeof(buf)));
650 }
651 }
652 }
653 }
654
655 /*
656 * Allocate a PCB and associate it with the socket.
657 *
658 * Returns: 0 Success
659 * ENOBUFS
660 * ENOMEM
661 */
662 int
in_pcballoc(struct socket * so,struct inpcbinfo * pcbinfo,struct proc * p)663 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p)
664 {
665 #pragma unused(p)
666 struct inpcb *inp;
667 caddr_t temp;
668
669 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
670 inp = zalloc_flags(pcbinfo->ipi_zone,
671 Z_WAITOK | Z_ZERO | Z_NOFAIL);
672 } else {
673 inp = (struct inpcb *)(void *)so->so_saved_pcb;
674 temp = inp->inp_saved_ppcb;
675 bzero((caddr_t)inp, sizeof(*inp));
676 inp->inp_saved_ppcb = temp;
677 }
678
679 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
680 inp->inp_pcbinfo = pcbinfo;
681 inp->inp_socket = so;
682 /* make sure inp_stat is always 64-bit aligned */
683 inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store,
684 sizeof(u_int64_t));
685 if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) +
686 sizeof(*inp->inp_stat) > sizeof(inp->inp_stat_store)) {
687 panic("%s: insufficient space to align inp_stat", __func__);
688 /* NOTREACHED */
689 }
690
691 /* make sure inp_cstat is always 64-bit aligned */
692 inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store,
693 sizeof(u_int64_t));
694 if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) +
695 sizeof(*inp->inp_cstat) > sizeof(inp->inp_cstat_store)) {
696 panic("%s: insufficient space to align inp_cstat", __func__);
697 /* NOTREACHED */
698 }
699
700 /* make sure inp_wstat is always 64-bit aligned */
701 inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store,
702 sizeof(u_int64_t));
703 if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) +
704 sizeof(*inp->inp_wstat) > sizeof(inp->inp_wstat_store)) {
705 panic("%s: insufficient space to align inp_wstat", __func__);
706 /* NOTREACHED */
707 }
708
709 /* make sure inp_Wstat is always 64-bit aligned */
710 inp->inp_Wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_Wstat_store,
711 sizeof(u_int64_t));
712 if (((uintptr_t)inp->inp_Wstat - (uintptr_t)inp->inp_Wstat_store) +
713 sizeof(*inp->inp_Wstat) > sizeof(inp->inp_Wstat_store)) {
714 panic("%s: insufficient space to align inp_Wstat", __func__);
715 /* NOTREACHED */
716 }
717
718 so->so_pcb = (caddr_t)inp;
719
720 if (so->so_proto->pr_flags & PR_PCBLOCK) {
721 lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp,
722 &pcbinfo->ipi_lock_attr);
723 }
724
725 if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) {
726 inp->inp_flags |= IN6P_IPV6_V6ONLY;
727 }
728
729 if (ip6_auto_flowlabel) {
730 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
731 }
732 if (intcoproc_unrestricted) {
733 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
734 }
735
736 (void) inp_update_policy(inp);
737
738 lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
739 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
740 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
741 pcbinfo->ipi_count++;
742 lck_rw_done(&pcbinfo->ipi_lock);
743 return 0;
744 }
745
746 /*
747 * in_pcblookup_local_and_cleanup does everything
748 * in_pcblookup_local does but it checks for a socket
749 * that's going away. Since we know that the lock is
750 * held read+write when this function is called, we
751 * can safely dispose of this socket like the slow
752 * timer would usually do and return NULL. This is
753 * great for bind.
754 */
755 struct inpcb *
in_pcblookup_local_and_cleanup(struct inpcbinfo * pcbinfo,struct in_addr laddr,u_int lport_arg,int wild_okay)756 in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr,
757 u_int lport_arg, int wild_okay)
758 {
759 struct inpcb *inp;
760
761 /* Perform normal lookup */
762 inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
763
764 /* Check if we found a match but it's waiting to be disposed */
765 if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
766 struct socket *so = inp->inp_socket;
767
768 socket_lock(so, 0);
769
770 if (so->so_usecount == 0) {
771 if (inp->inp_state != INPCB_STATE_DEAD) {
772 in_pcbdetach(inp);
773 }
774 in_pcbdispose(inp); /* will unlock & destroy */
775 inp = NULL;
776 } else {
777 socket_unlock(so, 0);
778 }
779 }
780
781 return inp;
782 }
783
784 static void
in_pcb_conflict_post_msg(u_int16_t port)785 in_pcb_conflict_post_msg(u_int16_t port)
786 {
787 /*
788 * Radar 5523020 send a kernel event notification if a
789 * non-participating socket tries to bind the port a socket
790 * who has set SOF_NOTIFYCONFLICT owns.
791 */
792 struct kev_msg ev_msg;
793 struct kev_in_portinuse in_portinuse;
794
795 bzero(&in_portinuse, sizeof(struct kev_in_portinuse));
796 bzero(&ev_msg, sizeof(struct kev_msg));
797 in_portinuse.port = ntohs(port); /* port in host order */
798 in_portinuse.req_pid = proc_selfpid();
799 ev_msg.vendor_code = KEV_VENDOR_APPLE;
800 ev_msg.kev_class = KEV_NETWORK_CLASS;
801 ev_msg.kev_subclass = KEV_INET_SUBCLASS;
802 ev_msg.event_code = KEV_INET_PORTINUSE;
803 ev_msg.dv[0].data_ptr = &in_portinuse;
804 ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse);
805 ev_msg.dv[1].data_length = 0;
806 dlil_post_complete_msg(NULL, &ev_msg);
807 }
808
809 /*
810 * Bind an INPCB to an address and/or port. This routine should not alter
811 * the caller-supplied local address "nam".
812 *
813 * Returns: 0 Success
814 * EADDRNOTAVAIL Address not available.
815 * EINVAL Invalid argument
816 * EAFNOSUPPORT Address family not supported [notdef]
817 * EACCES Permission denied
818 * EADDRINUSE Address in use
819 * EAGAIN Resource unavailable, try again
820 * priv_check_cred:EPERM Operation not permitted
821 */
822 int
in_pcbbind(struct inpcb * inp,struct sockaddr * nam,struct proc * p)823 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
824 {
825 struct socket *so = inp->inp_socket;
826 unsigned short *lastport;
827 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
828 u_short lport = 0, rand_port = 0;
829 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
830 int error, randomport, conflict = 0;
831 boolean_t anonport = FALSE;
832 kauth_cred_t cred;
833 struct in_addr laddr;
834 struct ifnet *outif = NULL;
835
836 if (TAILQ_EMPTY(&in_ifaddrhead)) { /* XXX broken! */
837 return EADDRNOTAVAIL;
838 }
839 if (!(so->so_options & (SO_REUSEADDR | SO_REUSEPORT))) {
840 wild = 1;
841 }
842
843 bzero(&laddr, sizeof(laddr));
844
845 socket_unlock(so, 0); /* keep reference on socket */
846 lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
847 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
848 /* another thread completed the bind */
849 lck_rw_done(&pcbinfo->ipi_lock);
850 socket_lock(so, 0);
851 return EINVAL;
852 }
853
854 if (nam != NULL) {
855 if (nam->sa_len != sizeof(struct sockaddr_in)) {
856 lck_rw_done(&pcbinfo->ipi_lock);
857 socket_lock(so, 0);
858 return EINVAL;
859 }
860 #if 0
861 /*
862 * We should check the family, but old programs
863 * incorrectly fail to initialize it.
864 */
865 if (nam->sa_family != AF_INET) {
866 lck_rw_done(&pcbinfo->ipi_lock);
867 socket_lock(so, 0);
868 return EAFNOSUPPORT;
869 }
870 #endif /* 0 */
871 lport = SIN(nam)->sin_port;
872
873 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) {
874 /*
875 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
876 * allow complete duplication of binding if
877 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
878 * and a multicast address is bound on both
879 * new and duplicated sockets.
880 */
881 if (so->so_options & SO_REUSEADDR) {
882 reuseport = SO_REUSEADDR | SO_REUSEPORT;
883 }
884 } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) {
885 struct sockaddr_in sin;
886 struct ifaddr *ifa;
887
888 /* Sanitized for interface address searches */
889 bzero(&sin, sizeof(sin));
890 sin.sin_family = AF_INET;
891 sin.sin_len = sizeof(struct sockaddr_in);
892 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
893
894 ifa = ifa_ifwithaddr(SA(&sin));
895 if (ifa == NULL) {
896 lck_rw_done(&pcbinfo->ipi_lock);
897 socket_lock(so, 0);
898 return EADDRNOTAVAIL;
899 } else {
900 /*
901 * Opportunistically determine the outbound
902 * interface that may be used; this may not
903 * hold true if we end up using a route
904 * going over a different interface, e.g.
905 * when sending to a local address. This
906 * will get updated again after sending.
907 */
908 IFA_LOCK(ifa);
909 outif = ifa->ifa_ifp;
910 IFA_UNLOCK(ifa);
911 IFA_REMREF(ifa);
912 }
913 }
914
915 #if SKYWALK
916 if (inp->inp_flags2 & INP2_EXTERNAL_PORT) {
917 // Extract the external flow info
918 struct ns_flow_info nfi = {};
919 error = necp_client_get_netns_flow_info(inp->necp_client_uuid,
920 &nfi);
921 if (error != 0) {
922 lck_rw_done(&pcbinfo->ipi_lock);
923 socket_lock(so, 0);
924 return error;
925 }
926
927 // Extract the reserved port
928 u_int16_t reserved_lport = 0;
929 if (nfi.nfi_laddr.sa.sa_family == AF_INET) {
930 reserved_lport = nfi.nfi_laddr.sin.sin_port;
931 } else if (nfi.nfi_laddr.sa.sa_family == AF_INET6) {
932 reserved_lport = nfi.nfi_laddr.sin6.sin6_port;
933 } else {
934 lck_rw_done(&pcbinfo->ipi_lock);
935 socket_lock(so, 0);
936 return EINVAL;
937 }
938
939 // Validate or use the reserved port
940 if (lport == 0) {
941 lport = reserved_lport;
942 } else if (lport != reserved_lport) {
943 lck_rw_done(&pcbinfo->ipi_lock);
944 socket_lock(so, 0);
945 return EINVAL;
946 }
947 }
948
949 /* Do not allow reserving a UDP port if remaining UDP port count is below 4096 */
950 if (SOCK_PROTO(so) == IPPROTO_UDP && !allow_udp_port_exhaustion) {
951 uint32_t current_reservations = 0;
952 if (inp->inp_vflag & INP_IPV6) {
953 current_reservations = netns_lookup_reservations_count_in6(inp->in6p_laddr, IPPROTO_UDP);
954 } else {
955 current_reservations = netns_lookup_reservations_count_in(inp->inp_laddr, IPPROTO_UDP);
956 }
957 if (USHRT_MAX - UDP_RANDOM_PORT_RESERVE < current_reservations) {
958 log(LOG_ERR, "UDP port not available, less than 4096 UDP ports left");
959 lck_rw_done(&pcbinfo->ipi_lock);
960 socket_lock(so, 0);
961 return EADDRNOTAVAIL;
962 }
963 }
964
965 #endif /* SKYWALK */
966
967 if (lport != 0) {
968 struct inpcb *t;
969 uid_t u;
970
971 #if XNU_TARGET_OS_OSX
972 if (ntohs(lport) < IPPORT_RESERVED &&
973 SIN(nam)->sin_addr.s_addr != 0 &&
974 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
975 cred = kauth_cred_proc_ref(p);
976 error = priv_check_cred(cred,
977 PRIV_NETINET_RESERVEDPORT, 0);
978 kauth_cred_unref(&cred);
979 if (error != 0) {
980 lck_rw_done(&pcbinfo->ipi_lock);
981 socket_lock(so, 0);
982 return EACCES;
983 }
984 }
985 #endif /* XNU_TARGET_OS_OSX */
986 /*
987 * Check wether the process is allowed to bind to a restricted port
988 */
989 if (!current_task_can_use_restricted_in_port(lport,
990 (uint8_t)so->so_proto->pr_protocol, PORT_FLAGS_BSD)) {
991 lck_rw_done(&pcbinfo->ipi_lock);
992 socket_lock(so, 0);
993 return EADDRINUSE;
994 }
995
996 if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
997 (u = kauth_cred_getuid(so->so_cred)) != 0 &&
998 (t = in_pcblookup_local_and_cleanup(
999 inp->inp_pcbinfo, SIN(nam)->sin_addr, lport,
1000 INPLOOKUP_WILDCARD)) != NULL &&
1001 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1002 t->inp_laddr.s_addr != INADDR_ANY ||
1003 !(t->inp_socket->so_options & SO_REUSEPORT)) &&
1004 (u != kauth_cred_getuid(t->inp_socket->so_cred)) &&
1005 !(t->inp_socket->so_flags & SOF_REUSESHAREUID) &&
1006 (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1007 t->inp_laddr.s_addr != INADDR_ANY) &&
1008 (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
1009 !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
1010 uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
1011 if ((t->inp_socket->so_flags &
1012 SOF_NOTIFYCONFLICT) &&
1013 !(so->so_flags & SOF_NOTIFYCONFLICT)) {
1014 conflict = 1;
1015 }
1016
1017 lck_rw_done(&pcbinfo->ipi_lock);
1018
1019 if (conflict) {
1020 in_pcb_conflict_post_msg(lport);
1021 }
1022
1023 socket_lock(so, 0);
1024 return EADDRINUSE;
1025 }
1026 t = in_pcblookup_local_and_cleanup(pcbinfo,
1027 SIN(nam)->sin_addr, lport, wild);
1028 if (t != NULL &&
1029 (reuseport & t->inp_socket->so_options) == 0 &&
1030 (!(t->inp_flags2 & INP2_EXTERNAL_PORT) ||
1031 !(inp->inp_flags2 & INP2_EXTERNAL_PORT) ||
1032 uuid_compare(t->necp_client_uuid, inp->necp_client_uuid) != 0)) {
1033 if (SIN(nam)->sin_addr.s_addr != INADDR_ANY ||
1034 t->inp_laddr.s_addr != INADDR_ANY ||
1035 SOCK_DOM(so) != PF_INET6 ||
1036 SOCK_DOM(t->inp_socket) != PF_INET6) {
1037 if ((t->inp_socket->so_flags &
1038 SOF_NOTIFYCONFLICT) &&
1039 !(so->so_flags & SOF_NOTIFYCONFLICT)) {
1040 conflict = 1;
1041 }
1042
1043 lck_rw_done(&pcbinfo->ipi_lock);
1044
1045 if (conflict) {
1046 in_pcb_conflict_post_msg(lport);
1047 }
1048 socket_lock(so, 0);
1049 return EADDRINUSE;
1050 }
1051 }
1052 #if SKYWALK
1053 if ((SOCK_PROTO(so) == IPPROTO_TCP ||
1054 SOCK_PROTO(so) == IPPROTO_UDP) &&
1055 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1056 int res_err = 0;
1057 if (inp->inp_vflag & INP_IPV6) {
1058 res_err = netns_reserve_in6(
1059 &inp->inp_netns_token,
1060 SIN6(nam)->sin6_addr,
1061 (uint8_t)SOCK_PROTO(so), lport, NETNS_BSD,
1062 NULL);
1063 } else {
1064 res_err = netns_reserve_in(
1065 &inp->inp_netns_token,
1066 SIN(nam)->sin_addr, (uint8_t)SOCK_PROTO(so),
1067 lport, NETNS_BSD, NULL);
1068 }
1069 if (res_err != 0) {
1070 lck_rw_done(&pcbinfo->ipi_lock);
1071 socket_lock(so, 0);
1072 return EADDRINUSE;
1073 }
1074 }
1075 #endif /* SKYWALK */
1076 }
1077 laddr = SIN(nam)->sin_addr;
1078 }
1079 if (lport == 0) {
1080 u_short first, last;
1081 int count;
1082 bool found;
1083
1084 /*
1085 * Override wild = 1 for implicit bind (mainly used by connect)
1086 * For implicit bind (lport == 0), we always use an unused port,
1087 * so REUSEADDR|REUSEPORT don't apply
1088 */
1089 wild = 1;
1090
1091 randomport = (so->so_flags & SOF_BINDRANDOMPORT) ||
1092 (so->so_type == SOCK_STREAM ? tcp_use_randomport :
1093 udp_use_randomport);
1094
1095 /*
1096 * Even though this looks similar to the code in
1097 * in6_pcbsetport, the v6 vs v4 checks are different.
1098 */
1099 anonport = TRUE;
1100 if (inp->inp_flags & INP_HIGHPORT) {
1101 first = (u_short)ipport_hifirstauto; /* sysctl */
1102 last = (u_short)ipport_hilastauto;
1103 lastport = &pcbinfo->ipi_lasthi;
1104 } else if (inp->inp_flags & INP_LOWPORT) {
1105 cred = kauth_cred_proc_ref(p);
1106 error = priv_check_cred(cred,
1107 PRIV_NETINET_RESERVEDPORT, 0);
1108 kauth_cred_unref(&cred);
1109 if (error != 0) {
1110 lck_rw_done(&pcbinfo->ipi_lock);
1111 socket_lock(so, 0);
1112 return error;
1113 }
1114 first = (u_short)ipport_lowfirstauto; /* 1023 */
1115 last = (u_short)ipport_lowlastauto; /* 600 */
1116 lastport = &pcbinfo->ipi_lastlow;
1117 } else {
1118 first = (u_short)ipport_firstauto; /* sysctl */
1119 last = (u_short)ipport_lastauto;
1120 lastport = &pcbinfo->ipi_lastport;
1121 }
1122 /* No point in randomizing if only one port is available */
1123
1124 if (first == last) {
1125 randomport = 0;
1126 }
1127 /*
1128 * Simple check to ensure all ports are not used up causing
1129 * a deadlock here.
1130 *
1131 * We split the two cases (up and down) so that the direction
1132 * is not being tested on each round of the loop.
1133 */
1134 if (first > last) {
1135 struct in_addr lookup_addr;
1136
1137 /*
1138 * counting down
1139 */
1140 if (randomport) {
1141 read_frandom(&rand_port, sizeof(rand_port));
1142 *lastport =
1143 first - (rand_port % (first - last));
1144 }
1145 count = first - last;
1146
1147 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1148 inp->inp_laddr;
1149
1150 found = false;
1151 do {
1152 if (count-- < 0) { /* completely used? */
1153 lck_rw_done(&pcbinfo->ipi_lock);
1154 socket_lock(so, 0);
1155 return EADDRNOTAVAIL;
1156 }
1157 --*lastport;
1158 if (*lastport > first || *lastport < last) {
1159 *lastport = first;
1160 }
1161 lport = htons(*lastport);
1162
1163 /*
1164 * Skip if this is a restricted port as we do not want to
1165 * restricted ports as ephemeral
1166 */
1167 if (IS_RESTRICTED_IN_PORT(lport)) {
1168 continue;
1169 }
1170
1171 found = in_pcblookup_local_and_cleanup(pcbinfo,
1172 lookup_addr, lport, wild) == NULL;
1173 #if SKYWALK
1174 if (found &&
1175 (SOCK_PROTO(so) == IPPROTO_TCP ||
1176 SOCK_PROTO(so) == IPPROTO_UDP) &&
1177 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1178 int res_err;
1179 if (inp->inp_vflag & INP_IPV6) {
1180 res_err = netns_reserve_in6(
1181 &inp->inp_netns_token,
1182 inp->in6p_laddr,
1183 (uint8_t)SOCK_PROTO(so), lport,
1184 NETNS_BSD, NULL);
1185 } else {
1186 res_err = netns_reserve_in(
1187 &inp->inp_netns_token,
1188 lookup_addr, (uint8_t)SOCK_PROTO(so),
1189 lport, NETNS_BSD, NULL);
1190 }
1191 found = res_err == 0;
1192 }
1193 #endif /* SKYWALK */
1194 } while (!found);
1195 } else {
1196 struct in_addr lookup_addr;
1197
1198 /*
1199 * counting up
1200 */
1201 if (randomport) {
1202 read_frandom(&rand_port, sizeof(rand_port));
1203 *lastport =
1204 first + (rand_port % (first - last));
1205 }
1206 count = last - first;
1207
1208 lookup_addr = (laddr.s_addr != INADDR_ANY) ? laddr :
1209 inp->inp_laddr;
1210
1211 found = false;
1212 do {
1213 if (count-- < 0) { /* completely used? */
1214 lck_rw_done(&pcbinfo->ipi_lock);
1215 socket_lock(so, 0);
1216 return EADDRNOTAVAIL;
1217 }
1218 ++*lastport;
1219 if (*lastport < first || *lastport > last) {
1220 *lastport = first;
1221 }
1222 lport = htons(*lastport);
1223
1224 /*
1225 * Skip if this is a restricted port as we do not want to
1226 * restricted ports as ephemeral
1227 */
1228 if (IS_RESTRICTED_IN_PORT(lport)) {
1229 continue;
1230 }
1231
1232 found = in_pcblookup_local_and_cleanup(pcbinfo,
1233 lookup_addr, lport, wild) == NULL;
1234 #if SKYWALK
1235 if (found &&
1236 (SOCK_PROTO(so) == IPPROTO_TCP ||
1237 SOCK_PROTO(so) == IPPROTO_UDP) &&
1238 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
1239 int res_err;
1240 if (inp->inp_vflag & INP_IPV6) {
1241 res_err = netns_reserve_in6(
1242 &inp->inp_netns_token,
1243 inp->in6p_laddr,
1244 (uint8_t)SOCK_PROTO(so), lport,
1245 NETNS_BSD, NULL);
1246 } else {
1247 res_err = netns_reserve_in(
1248 &inp->inp_netns_token,
1249 lookup_addr, (uint8_t)SOCK_PROTO(so),
1250 lport, NETNS_BSD, NULL);
1251 }
1252 found = res_err == 0;
1253 }
1254 #endif /* SKYWALK */
1255 } while (!found);
1256 }
1257 }
1258 socket_lock(so, 0);
1259
1260 /*
1261 * We unlocked socket's protocol lock for a long time.
1262 * The socket might have been dropped/defuncted.
1263 * Checking if world has changed since.
1264 */
1265 if (inp->inp_state == INPCB_STATE_DEAD) {
1266 #if SKYWALK
1267 netns_release(&inp->inp_netns_token);
1268 #endif /* SKYWALK */
1269 lck_rw_done(&pcbinfo->ipi_lock);
1270 return ECONNABORTED;
1271 }
1272
1273 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) {
1274 #if SKYWALK
1275 netns_release(&inp->inp_netns_token);
1276 #endif /* SKYWALK */
1277 lck_rw_done(&pcbinfo->ipi_lock);
1278 return EINVAL;
1279 }
1280
1281 if (laddr.s_addr != INADDR_ANY) {
1282 inp->inp_laddr = laddr;
1283 inp->inp_last_outifp = outif;
1284 #if SKYWALK
1285 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
1286 netns_set_ifnet(&inp->inp_netns_token, outif);
1287 }
1288 #endif /* SKYWALK */
1289 }
1290 inp->inp_lport = lport;
1291 if (anonport) {
1292 inp->inp_flags |= INP_ANONPORT;
1293 }
1294
1295 if (in_pcbinshash(inp, 1) != 0) {
1296 inp->inp_laddr.s_addr = INADDR_ANY;
1297 inp->inp_last_outifp = NULL;
1298
1299 #if SKYWALK
1300 netns_release(&inp->inp_netns_token);
1301 #endif /* SKYWALK */
1302 inp->inp_lport = 0;
1303 if (anonport) {
1304 inp->inp_flags &= ~INP_ANONPORT;
1305 }
1306 lck_rw_done(&pcbinfo->ipi_lock);
1307 return EAGAIN;
1308 }
1309 lck_rw_done(&pcbinfo->ipi_lock);
1310 sflt_notify(so, sock_evt_bound, NULL);
1311
1312 in_pcb_check_management_entitled(inp);
1313
1314 return 0;
1315 }
1316
1317 #define APN_FALLBACK_IP_FILTER(a) \
1318 (IN_LINKLOCAL(ntohl((a)->sin_addr.s_addr)) || \
1319 IN_LOOPBACK(ntohl((a)->sin_addr.s_addr)) || \
1320 IN_ZERONET(ntohl((a)->sin_addr.s_addr)) || \
1321 IN_MULTICAST(ntohl((a)->sin_addr.s_addr)) || \
1322 IN_PRIVATE(ntohl((a)->sin_addr.s_addr)))
1323
1324 #define APN_FALLBACK_NOTIF_INTERVAL 2 /* Magic Number */
1325 static uint64_t last_apn_fallback = 0;
1326
1327 static boolean_t
apn_fallback_required(proc_t proc,struct socket * so,struct sockaddr_in * p_dstv4)1328 apn_fallback_required(proc_t proc, struct socket *so, struct sockaddr_in *p_dstv4)
1329 {
1330 uint64_t timenow;
1331 struct sockaddr_storage lookup_default_addr;
1332 struct rtentry *rt = NULL;
1333
1334 VERIFY(proc != NULL);
1335
1336 if (apn_fallbk_enabled == FALSE) {
1337 return FALSE;
1338 }
1339
1340 if (proc == kernproc) {
1341 return FALSE;
1342 }
1343
1344 if (so && (so->so_options & SO_NOAPNFALLBK)) {
1345 return FALSE;
1346 }
1347
1348 timenow = net_uptime();
1349 if ((timenow - last_apn_fallback) < APN_FALLBACK_NOTIF_INTERVAL) {
1350 apn_fallbk_log((LOG_INFO, "APN fallback notification throttled.\n"));
1351 return FALSE;
1352 }
1353
1354 if (p_dstv4 && APN_FALLBACK_IP_FILTER(p_dstv4)) {
1355 return FALSE;
1356 }
1357
1358 /* Check if we have unscoped IPv6 default route through cellular */
1359 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1360 lookup_default_addr.ss_family = AF_INET6;
1361 lookup_default_addr.ss_len = sizeof(struct sockaddr_in6);
1362
1363 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1364 if (NULL == rt) {
1365 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1366 "unscoped default IPv6 route.\n"));
1367 return FALSE;
1368 }
1369
1370 if (!IFNET_IS_CELLULAR(rt->rt_ifp)) {
1371 rtfree(rt);
1372 apn_fallbk_log((LOG_INFO, "APN fallback notification could not find "
1373 "unscoped default IPv6 route through cellular interface.\n"));
1374 return FALSE;
1375 }
1376
1377 /*
1378 * We have a default IPv6 route, ensure that
1379 * we do not have IPv4 default route before triggering
1380 * the event
1381 */
1382 rtfree(rt);
1383 rt = NULL;
1384
1385 bzero(&lookup_default_addr, sizeof(lookup_default_addr));
1386 lookup_default_addr.ss_family = AF_INET;
1387 lookup_default_addr.ss_len = sizeof(struct sockaddr_in);
1388
1389 rt = rtalloc1((struct sockaddr *)&lookup_default_addr, 0, 0);
1390
1391 if (rt) {
1392 rtfree(rt);
1393 rt = NULL;
1394 apn_fallbk_log((LOG_INFO, "APN fallback notification found unscoped "
1395 "IPv4 default route!\n"));
1396 return FALSE;
1397 }
1398
1399 {
1400 /*
1401 * We disable APN fallback if the binary is not a third-party app.
1402 * Note that platform daemons use their process name as a
1403 * bundle ID so we filter out bundle IDs without dots.
1404 */
1405 const char *bundle_id = cs_identity_get(proc);
1406 if (bundle_id == NULL ||
1407 bundle_id[0] == '\0' ||
1408 strchr(bundle_id, '.') == NULL ||
1409 strncmp(bundle_id, "com.apple.", sizeof("com.apple.") - 1) == 0) {
1410 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found first-"
1411 "party bundle ID \"%s\"!\n", (bundle_id ? bundle_id : "NULL")));
1412 return FALSE;
1413 }
1414 }
1415
1416 {
1417 /*
1418 * The Apple App Store IPv6 requirement started on
1419 * June 1st, 2016 at 12:00:00 AM PDT.
1420 * We disable APN fallback if the binary is more recent than that.
1421 * We check both atime and birthtime since birthtime is not always supported.
1422 */
1423 static const long ipv6_start_date = 1464764400L;
1424 vfs_context_t context;
1425 struct stat64 sb;
1426 int vn_stat_error;
1427
1428 bzero(&sb, sizeof(struct stat64));
1429 context = vfs_context_create(NULL);
1430 vn_stat_error = vn_stat(proc->p_textvp, &sb, NULL, 1, 0, context);
1431 (void)vfs_context_rele(context);
1432
1433 if (vn_stat_error != 0 ||
1434 sb.st_atimespec.tv_sec >= ipv6_start_date ||
1435 sb.st_birthtimespec.tv_sec >= ipv6_start_date) {
1436 apn_fallbk_log((LOG_INFO, "Abort: APN fallback notification found binary "
1437 "too recent! (err %d atime %ld mtime %ld ctime %ld birthtime %ld)\n",
1438 vn_stat_error, sb.st_atimespec.tv_sec, sb.st_mtimespec.tv_sec,
1439 sb.st_ctimespec.tv_sec, sb.st_birthtimespec.tv_sec));
1440 return FALSE;
1441 }
1442 }
1443 return TRUE;
1444 }
1445
1446 static void
apn_fallback_trigger(proc_t proc,struct socket * so)1447 apn_fallback_trigger(proc_t proc, struct socket *so)
1448 {
1449 pid_t pid = 0;
1450 struct kev_msg ev_msg;
1451 struct kev_netevent_apnfallbk_data apnfallbk_data;
1452
1453 last_apn_fallback = net_uptime();
1454 pid = proc_pid(proc);
1455 uuid_t application_uuid;
1456 uuid_clear(application_uuid);
1457 proc_getexecutableuuid(proc, application_uuid,
1458 sizeof(application_uuid));
1459
1460 bzero(&ev_msg, sizeof(struct kev_msg));
1461 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1462 ev_msg.kev_class = KEV_NETWORK_CLASS;
1463 ev_msg.kev_subclass = KEV_NETEVENT_SUBCLASS;
1464 ev_msg.event_code = KEV_NETEVENT_APNFALLBACK;
1465
1466 bzero(&apnfallbk_data, sizeof(apnfallbk_data));
1467
1468 if (so->so_flags & SOF_DELEGATED) {
1469 apnfallbk_data.epid = so->e_pid;
1470 uuid_copy(apnfallbk_data.euuid, so->e_uuid);
1471 } else {
1472 apnfallbk_data.epid = so->last_pid;
1473 uuid_copy(apnfallbk_data.euuid, so->last_uuid);
1474 }
1475
1476 ev_msg.dv[0].data_ptr = &apnfallbk_data;
1477 ev_msg.dv[0].data_length = sizeof(apnfallbk_data);
1478 kev_post_msg(&ev_msg);
1479 apn_fallbk_log((LOG_INFO, "APN fallback notification issued.\n"));
1480 }
1481
1482 /*
1483 * Transform old in_pcbconnect() into an inner subroutine for new
1484 * in_pcbconnect(); do some validity-checking on the remote address
1485 * (in "nam") and then determine local host address (i.e., which
1486 * interface) to use to access that remote host.
1487 *
1488 * This routine may alter the caller-supplied remote address "nam".
1489 *
1490 * The caller may override the bound-to-interface setting of the socket
1491 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1492 *
1493 * This routine might return an ifp with a reference held if the caller
1494 * provides a non-NULL outif, even in the error case. The caller is
1495 * responsible for releasing its reference.
1496 *
1497 * Returns: 0 Success
1498 * EINVAL Invalid argument
1499 * EAFNOSUPPORT Address family not supported
1500 * EADDRNOTAVAIL Address not available
1501 */
1502 int
in_pcbladdr(struct inpcb * inp,struct sockaddr * nam,struct in_addr * laddr,unsigned int ifscope,struct ifnet ** outif,int raw)1503 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr,
1504 unsigned int ifscope, struct ifnet **outif, int raw)
1505 {
1506 struct route *ro = &inp->inp_route;
1507 struct in_ifaddr *ia = NULL;
1508 struct sockaddr_in sin;
1509 int error = 0;
1510 boolean_t restricted = FALSE;
1511
1512 if (outif != NULL) {
1513 *outif = NULL;
1514 }
1515 if (nam->sa_len != sizeof(struct sockaddr_in)) {
1516 return EINVAL;
1517 }
1518 if (SIN(nam)->sin_family != AF_INET) {
1519 return EAFNOSUPPORT;
1520 }
1521 if (raw == 0 && SIN(nam)->sin_port == 0) {
1522 return EADDRNOTAVAIL;
1523 }
1524
1525 in_pcb_check_management_entitled(inp);
1526
1527 /*
1528 * If the destination address is INADDR_ANY,
1529 * use the primary local address.
1530 * If the supplied address is INADDR_BROADCAST,
1531 * and the primary interface supports broadcast,
1532 * choose the broadcast address for that interface.
1533 */
1534 if (raw == 0 && (SIN(nam)->sin_addr.s_addr == INADDR_ANY ||
1535 SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST)) {
1536 lck_rw_lock_shared(&in_ifaddr_rwlock);
1537 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
1538 ia = TAILQ_FIRST(&in_ifaddrhead);
1539 IFA_LOCK_SPIN(&ia->ia_ifa);
1540 if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) {
1541 SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr;
1542 } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
1543 SIN(nam)->sin_addr =
1544 SIN(&ia->ia_broadaddr)->sin_addr;
1545 }
1546 IFA_UNLOCK(&ia->ia_ifa);
1547 ia = NULL;
1548 }
1549 lck_rw_done(&in_ifaddr_rwlock);
1550 }
1551 /*
1552 * Otherwise, if the socket has already bound the source, just use it.
1553 */
1554 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1555 VERIFY(ia == NULL);
1556 *laddr = inp->inp_laddr;
1557 return 0;
1558 }
1559
1560 /*
1561 * If the ifscope is specified by the caller (e.g. IP_PKTINFO)
1562 * then it overrides the sticky ifscope set for the socket.
1563 */
1564 if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) {
1565 ifscope = inp->inp_boundifp->if_index;
1566 }
1567
1568 /*
1569 * If route is known or can be allocated now,
1570 * our src addr is taken from the i/f, else punt.
1571 * Note that we should check the address family of the cached
1572 * destination, in case of sharing the cache with IPv6.
1573 */
1574 if (ro->ro_rt != NULL) {
1575 RT_LOCK_SPIN(ro->ro_rt);
1576 }
1577 if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET ||
1578 SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr ||
1579 (inp->inp_socket->so_options & SO_DONTROUTE)) {
1580 if (ro->ro_rt != NULL) {
1581 RT_UNLOCK(ro->ro_rt);
1582 }
1583 ROUTE_RELEASE(ro);
1584 }
1585 if (!(inp->inp_socket->so_options & SO_DONTROUTE) &&
1586 (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) {
1587 if (ro->ro_rt != NULL) {
1588 RT_UNLOCK(ro->ro_rt);
1589 }
1590 ROUTE_RELEASE(ro);
1591 /* No route yet, so try to acquire one */
1592 bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
1593 ro->ro_dst.sa_family = AF_INET;
1594 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
1595 SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr;
1596 rtalloc_scoped(ro, ifscope);
1597 if (ro->ro_rt != NULL) {
1598 RT_LOCK_SPIN(ro->ro_rt);
1599 }
1600 }
1601 /* Sanitized local copy for interface address searches */
1602 bzero(&sin, sizeof(sin));
1603 sin.sin_family = AF_INET;
1604 sin.sin_len = sizeof(struct sockaddr_in);
1605 sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr;
1606 /*
1607 * If we did not find (or use) a route, assume dest is reachable
1608 * on a directly connected network and try to find a corresponding
1609 * interface to take the source address from.
1610 */
1611 if (ro->ro_rt == NULL) {
1612 proc_t proc = current_proc();
1613
1614 VERIFY(ia == NULL);
1615 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1616 if (ia == NULL) {
1617 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1618 }
1619 error = ((ia == NULL) ? ENETUNREACH : 0);
1620
1621 if (apn_fallback_required(proc, inp->inp_socket,
1622 (void *)nam)) {
1623 apn_fallback_trigger(proc, inp->inp_socket);
1624 }
1625
1626 goto done;
1627 }
1628 RT_LOCK_ASSERT_HELD(ro->ro_rt);
1629 /*
1630 * If the outgoing interface on the route found is not
1631 * a loopback interface, use the address from that interface.
1632 */
1633 if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1634 VERIFY(ia == NULL);
1635 /*
1636 * If the route points to a cellular interface and the
1637 * caller forbids our using interfaces of such type,
1638 * pretend that there is no route.
1639 * Apply the same logic for expensive interfaces.
1640 */
1641 if (inp_restricted_send(inp, ro->ro_rt->rt_ifp)) {
1642 RT_UNLOCK(ro->ro_rt);
1643 ROUTE_RELEASE(ro);
1644 error = EHOSTUNREACH;
1645 restricted = TRUE;
1646 } else {
1647 /* Become a regular mutex */
1648 RT_CONVERT_LOCK(ro->ro_rt);
1649 ia = ifatoia(ro->ro_rt->rt_ifa);
1650 IFA_ADDREF(&ia->ia_ifa);
1651
1652 /*
1653 * Mark the control block for notification of
1654 * a possible flow that might undergo clat46
1655 * translation.
1656 *
1657 * We defer the decision to a later point when
1658 * inpcb is being disposed off.
1659 * The reason is that we only want to send notification
1660 * if the flow was ever used to send data.
1661 */
1662 if (IS_INTF_CLAT46(ro->ro_rt->rt_ifp)) {
1663 inp->inp_flags2 |= INP2_CLAT46_FLOW;
1664 }
1665
1666 RT_UNLOCK(ro->ro_rt);
1667 error = 0;
1668 }
1669 goto done;
1670 }
1671 VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK);
1672 RT_UNLOCK(ro->ro_rt);
1673 /*
1674 * The outgoing interface is marked with 'loopback net', so a route
1675 * to ourselves is here.
1676 * Try to find the interface of the destination address and then
1677 * take the address from there. That interface is not necessarily
1678 * a loopback interface.
1679 */
1680 VERIFY(ia == NULL);
1681 ia = ifatoia(ifa_ifwithdstaddr(SA(&sin)));
1682 if (ia == NULL) {
1683 ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope));
1684 }
1685 if (ia == NULL) {
1686 ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope));
1687 }
1688 if (ia == NULL) {
1689 RT_LOCK(ro->ro_rt);
1690 ia = ifatoia(ro->ro_rt->rt_ifa);
1691 if (ia != NULL) {
1692 IFA_ADDREF(&ia->ia_ifa);
1693 }
1694 RT_UNLOCK(ro->ro_rt);
1695 }
1696 error = ((ia == NULL) ? ENETUNREACH : 0);
1697
1698 done:
1699 /*
1700 * If the destination address is multicast and an outgoing
1701 * interface has been set as a multicast option, use the
1702 * address of that interface as our source address.
1703 */
1704 if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) &&
1705 inp->inp_moptions != NULL) {
1706 struct ip_moptions *imo;
1707 struct ifnet *ifp;
1708
1709 imo = inp->inp_moptions;
1710 IMO_LOCK(imo);
1711 if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
1712 ia->ia_ifp != imo->imo_multicast_ifp)) {
1713 ifp = imo->imo_multicast_ifp;
1714 if (ia != NULL) {
1715 IFA_REMREF(&ia->ia_ifa);
1716 }
1717 lck_rw_lock_shared(&in_ifaddr_rwlock);
1718 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1719 if (ia->ia_ifp == ifp) {
1720 break;
1721 }
1722 }
1723 if (ia != NULL) {
1724 IFA_ADDREF(&ia->ia_ifa);
1725 }
1726 lck_rw_done(&in_ifaddr_rwlock);
1727 if (ia == NULL) {
1728 error = EADDRNOTAVAIL;
1729 } else {
1730 error = 0;
1731 }
1732 }
1733 IMO_UNLOCK(imo);
1734 }
1735 /*
1736 * Don't do pcblookup call here; return interface in laddr
1737 * and exit to caller, that will do the lookup.
1738 */
1739 if (ia != NULL) {
1740 /*
1741 * If the source address belongs to a cellular interface
1742 * and the socket forbids our using interfaces of such
1743 * type, pretend that there is no source address.
1744 * Apply the same logic for expensive interfaces.
1745 */
1746 IFA_LOCK_SPIN(&ia->ia_ifa);
1747 if (inp_restricted_send(inp, ia->ia_ifa.ifa_ifp)) {
1748 IFA_UNLOCK(&ia->ia_ifa);
1749 error = EHOSTUNREACH;
1750 restricted = TRUE;
1751 } else if (error == 0) {
1752 *laddr = ia->ia_addr.sin_addr;
1753 if (outif != NULL) {
1754 struct ifnet *ifp;
1755
1756 if (ro->ro_rt != NULL) {
1757 ifp = ro->ro_rt->rt_ifp;
1758 } else {
1759 ifp = ia->ia_ifp;
1760 }
1761
1762 VERIFY(ifp != NULL);
1763 IFA_CONVERT_LOCK(&ia->ia_ifa);
1764 ifnet_reference(ifp); /* for caller */
1765 if (*outif != NULL) {
1766 ifnet_release(*outif);
1767 }
1768 *outif = ifp;
1769 }
1770 IFA_UNLOCK(&ia->ia_ifa);
1771 } else {
1772 IFA_UNLOCK(&ia->ia_ifa);
1773 }
1774 IFA_REMREF(&ia->ia_ifa);
1775 ia = NULL;
1776 }
1777
1778 if (restricted && error == EHOSTUNREACH) {
1779 soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED |
1780 SO_FILT_HINT_IFDENIED));
1781 }
1782
1783 return error;
1784 }
1785
1786 /*
1787 * Outer subroutine:
1788 * Connect from a socket to a specified address.
1789 * Both address and port must be specified in argument sin.
1790 * If don't have a local address for this socket yet,
1791 * then pick one.
1792 *
1793 * The caller may override the bound-to-interface setting of the socket
1794 * by specifying the ifscope parameter (e.g. from IP_PKTINFO.)
1795 */
1796 int
in_pcbconnect(struct inpcb * inp,struct sockaddr * nam,struct proc * p,unsigned int ifscope,struct ifnet ** outif)1797 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p,
1798 unsigned int ifscope, struct ifnet **outif)
1799 {
1800 struct in_addr laddr;
1801 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
1802 struct inpcb *pcb;
1803 int error;
1804 struct socket *so = inp->inp_socket;
1805
1806 #if CONTENT_FILTER
1807 if (so) {
1808 so->so_state_change_cnt++;
1809 }
1810 #endif
1811
1812 /*
1813 * Call inner routine, to assign local interface address.
1814 */
1815 if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif, 0)) != 0) {
1816 return error;
1817 }
1818
1819 socket_unlock(so, 0);
1820 pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
1821 inp->inp_laddr.s_addr ? inp->inp_laddr : laddr,
1822 inp->inp_lport, 0, NULL);
1823 socket_lock(so, 0);
1824
1825 /*
1826 * Check if the socket is still in a valid state. When we unlock this
1827 * embryonic socket, it can get aborted if another thread is closing
1828 * the listener (radar 7947600).
1829 */
1830 if ((so->so_flags & SOF_ABORTED) != 0) {
1831 return ECONNREFUSED;
1832 }
1833
1834 if (pcb != NULL) {
1835 in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
1836 return EADDRINUSE;
1837 }
1838 if (inp->inp_laddr.s_addr == INADDR_ANY) {
1839 if (inp->inp_lport == 0) {
1840 error = in_pcbbind(inp, NULL, p);
1841 if (error) {
1842 return error;
1843 }
1844 }
1845 if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1846 /*
1847 * Lock inversion issue, mostly with udp
1848 * multicast packets.
1849 */
1850 socket_unlock(so, 0);
1851 lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1852 socket_lock(so, 0);
1853 }
1854 inp->inp_laddr = laddr;
1855 /* no reference needed */
1856 inp->inp_last_outifp = (outif != NULL) ? *outif : NULL;
1857 #if SKYWALK
1858 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
1859 netns_set_ifnet(&inp->inp_netns_token,
1860 inp->inp_last_outifp);
1861 }
1862 #endif /* SKYWALK */
1863 inp->inp_flags |= INP_INADDR_ANY;
1864 } else {
1865 /*
1866 * Usage of IP_PKTINFO, without local port already
1867 * speficified will cause kernel to panic,
1868 * see rdar://problem/18508185.
1869 * For now returning error to avoid a kernel panic
1870 * This routines can be refactored and handle this better
1871 * in future.
1872 */
1873 if (inp->inp_lport == 0) {
1874 return EINVAL;
1875 }
1876 if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1877 /*
1878 * Lock inversion issue, mostly with udp
1879 * multicast packets.
1880 */
1881 socket_unlock(so, 0);
1882 lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1883 socket_lock(so, 0);
1884 }
1885 }
1886 inp->inp_faddr = sin->sin_addr;
1887 inp->inp_fport = sin->sin_port;
1888 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1889 nstat_pcb_invalidate_cache(inp);
1890 }
1891 in_pcbrehash(inp);
1892 lck_rw_done(&inp->inp_pcbinfo->ipi_lock);
1893 return 0;
1894 }
1895
1896 void
in_pcbdisconnect(struct inpcb * inp)1897 in_pcbdisconnect(struct inpcb *inp)
1898 {
1899 struct socket *so = inp->inp_socket;
1900
1901 if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) {
1902 nstat_pcb_cache(inp);
1903 }
1904
1905 inp->inp_faddr.s_addr = INADDR_ANY;
1906 inp->inp_fport = 0;
1907
1908 #if CONTENT_FILTER
1909 if (so) {
1910 so->so_state_change_cnt++;
1911 }
1912 #endif
1913
1914 if (!lck_rw_try_lock_exclusive(&inp->inp_pcbinfo->ipi_lock)) {
1915 /* lock inversion issue, mostly with udp multicast packets */
1916 socket_unlock(so, 0);
1917 lck_rw_lock_exclusive(&inp->inp_pcbinfo->ipi_lock);
1918 socket_lock(so, 0);
1919 }
1920
1921 in_pcbrehash(inp);
1922 lck_rw_done(&inp->inp_pcbinfo->ipi_lock);
1923 /*
1924 * A multipath subflow socket would have its SS_NOFDREF set by default,
1925 * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
1926 * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1927 */
1928 if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) {
1929 in_pcbdetach(inp);
1930 }
1931 }
1932
1933 void
in_pcbdetach(struct inpcb * inp)1934 in_pcbdetach(struct inpcb *inp)
1935 {
1936 struct socket *so = inp->inp_socket;
1937
1938 if (so->so_pcb == NULL) {
1939 /* PCB has been disposed */
1940 panic("%s: inp=%p so=%p proto=%d so_pcb is null!", __func__,
1941 inp, so, SOCK_PROTO(so));
1942 /* NOTREACHED */
1943 }
1944
1945 #if IPSEC
1946 if (inp->inp_sp != NULL) {
1947 (void) ipsec4_delete_pcbpolicy(inp);
1948 }
1949 #endif /* IPSEC */
1950
1951 if (inp->inp_stat != NULL && SOCK_PROTO(so) == IPPROTO_UDP) {
1952 if (inp->inp_stat->rxpackets == 0 && inp->inp_stat->txpackets == 0) {
1953 INC_ATOMIC_INT64_LIM(net_api_stats.nas_socket_inet_dgram_no_data);
1954 }
1955 }
1956
1957 /*
1958 * Let NetworkStatistics know this PCB is going away
1959 * before we detach it.
1960 */
1961 if (nstat_collect &&
1962 (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) {
1963 nstat_pcb_detach(inp);
1964 }
1965
1966 /* Free memory buffer held for generating keep alives */
1967 if (inp->inp_keepalive_data != NULL) {
1968 kfree_data(inp->inp_keepalive_data, inp->inp_keepalive_datalen);
1969 inp->inp_keepalive_data = NULL;
1970 }
1971
1972 /* mark socket state as dead */
1973 if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
1974 panic("%s: so=%p proto=%d couldn't set to STOPUSING",
1975 __func__, so, SOCK_PROTO(so));
1976 /* NOTREACHED */
1977 }
1978
1979 if (!(so->so_flags & SOF_PCBCLEARING)) {
1980 struct ip_moptions *imo;
1981
1982 inp->inp_vflag = 0;
1983 if (inp->inp_options != NULL) {
1984 (void) m_free(inp->inp_options);
1985 inp->inp_options = NULL;
1986 }
1987 ROUTE_RELEASE(&inp->inp_route);
1988 imo = inp->inp_moptions;
1989 if (imo != NULL) {
1990 IMO_REMREF(imo);
1991 }
1992 inp->inp_moptions = NULL;
1993 sofreelastref(so, 0);
1994 inp->inp_state = INPCB_STATE_DEAD;
1995
1996 /*
1997 * Enqueue an event to send kernel event notification
1998 * if the flow has to CLAT46 for data packets
1999 */
2000 if (inp->inp_flags2 & INP2_CLAT46_FLOW) {
2001 /*
2002 * If there has been any exchange of data bytes
2003 * over this flow.
2004 * Schedule a notification to report that flow is
2005 * using client side translation.
2006 */
2007 if (inp->inp_stat != NULL &&
2008 (inp->inp_stat->txbytes != 0 ||
2009 inp->inp_stat->rxbytes != 0)) {
2010 if (so->so_flags & SOF_DELEGATED) {
2011 in6_clat46_event_enqueue_nwk_wq_entry(
2012 IN6_CLAT46_EVENT_V4_FLOW,
2013 so->e_pid,
2014 so->e_uuid);
2015 } else {
2016 in6_clat46_event_enqueue_nwk_wq_entry(
2017 IN6_CLAT46_EVENT_V4_FLOW,
2018 so->last_pid,
2019 so->last_uuid);
2020 }
2021 }
2022 }
2023
2024 /* makes sure we're not called twice from so_close */
2025 so->so_flags |= SOF_PCBCLEARING;
2026
2027 inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
2028 }
2029 }
2030
2031
2032 void
in_pcbdispose(struct inpcb * inp)2033 in_pcbdispose(struct inpcb *inp)
2034 {
2035 struct socket *so = inp->inp_socket;
2036 struct inpcbinfo *ipi = inp->inp_pcbinfo;
2037
2038 if (so != NULL && so->so_usecount != 0) {
2039 panic("%s: so %p [%d,%d] usecount %d lockhistory %s",
2040 __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount,
2041 solockhistory_nr(so));
2042 /* NOTREACHED */
2043 } else if (inp->inp_wantcnt != WNT_STOPUSING) {
2044 if (so != NULL) {
2045 panic_plain("%s: inp %p invalid wantcnt %d, so %p "
2046 "[%d,%d] usecount %d retaincnt %d state 0x%x "
2047 "flags 0x%x lockhistory %s\n", __func__, inp,
2048 inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so),
2049 so->so_usecount, so->so_retaincnt, so->so_state,
2050 so->so_flags, solockhistory_nr(so));
2051 /* NOTREACHED */
2052 } else {
2053 panic("%s: inp %p invalid wantcnt %d no socket",
2054 __func__, inp, inp->inp_wantcnt);
2055 /* NOTREACHED */
2056 }
2057 }
2058
2059 LCK_RW_ASSERT(&ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE);
2060
2061 inp->inp_gencnt = ++ipi->ipi_gencnt;
2062 /* access ipi in in_pcbremlists */
2063 in_pcbremlists(inp);
2064
2065 if (so != NULL) {
2066 if (so->so_proto->pr_flags & PR_PCBLOCK) {
2067 sofreelastref(so, 0);
2068 if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) {
2069 /*
2070 * selthreadclear() already called
2071 * during sofreelastref() above.
2072 */
2073 sbrelease(&so->so_rcv);
2074 sbrelease(&so->so_snd);
2075 }
2076 if (so->so_head != NULL) {
2077 panic("%s: so=%p head still exist",
2078 __func__, so);
2079 /* NOTREACHED */
2080 }
2081 lck_mtx_unlock(&inp->inpcb_mtx);
2082
2083 #if NECP
2084 necp_inpcb_remove_cb(inp);
2085 #endif /* NECP */
2086
2087 lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp);
2088 }
2089 /* makes sure we're not called twice from so_close */
2090 so->so_flags |= SOF_PCBCLEARING;
2091 so->so_saved_pcb = (caddr_t)inp;
2092 so->so_pcb = NULL;
2093 inp->inp_socket = NULL;
2094 #if NECP
2095 necp_inpcb_dispose(inp);
2096 #endif /* NECP */
2097 /*
2098 * In case there a route cached after a detach (possible
2099 * in the tcp case), make sure that it is freed before
2100 * we deallocate the structure.
2101 */
2102 ROUTE_RELEASE(&inp->inp_route);
2103 if ((so->so_flags1 & SOF1_CACHED_IN_SOCK_LAYER) == 0) {
2104 zfree(ipi->ipi_zone, inp);
2105 }
2106 sodealloc(so);
2107 }
2108 }
2109
2110 /*
2111 * The calling convention of in_getsockaddr() and in_getpeeraddr() was
2112 * modified to match the pru_sockaddr() and pru_peeraddr() entry points
2113 * in struct pr_usrreqs, so that protocols can just reference then directly
2114 * without the need for a wrapper function.
2115 */
2116 int
in_getsockaddr(struct socket * so,struct sockaddr ** nam)2117 in_getsockaddr(struct socket *so, struct sockaddr **nam)
2118 {
2119 struct inpcb *inp;
2120 struct sockaddr_in *sin;
2121
2122 /*
2123 * Do the malloc first in case it blocks.
2124 */
2125 sin = (struct sockaddr_in *)alloc_sockaddr(sizeof(*sin),
2126 Z_WAITOK | Z_NOFAIL);
2127
2128 sin->sin_family = AF_INET;
2129
2130 if ((inp = sotoinpcb(so)) == NULL) {
2131 free_sockaddr(sin);
2132 return EINVAL;
2133 }
2134 sin->sin_port = inp->inp_lport;
2135 sin->sin_addr = inp->inp_laddr;
2136
2137 *nam = (struct sockaddr *)sin;
2138 return 0;
2139 }
2140
2141 int
in_getsockaddr_s(struct socket * so,struct sockaddr_in * ss)2142 in_getsockaddr_s(struct socket *so, struct sockaddr_in *ss)
2143 {
2144 struct sockaddr_in *sin = ss;
2145 struct inpcb *inp;
2146
2147 VERIFY(ss != NULL);
2148 bzero(ss, sizeof(*ss));
2149
2150 sin->sin_family = AF_INET;
2151 sin->sin_len = sizeof(*sin);
2152
2153 if ((inp = sotoinpcb(so)) == NULL) {
2154 return EINVAL;
2155 }
2156
2157 sin->sin_port = inp->inp_lport;
2158 sin->sin_addr = inp->inp_laddr;
2159 return 0;
2160 }
2161
2162 int
in_getpeeraddr(struct socket * so,struct sockaddr ** nam)2163 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
2164 {
2165 struct inpcb *inp;
2166 struct sockaddr_in *sin;
2167
2168 /*
2169 * Do the malloc first in case it blocks.
2170 */
2171 sin = (struct sockaddr_in *)alloc_sockaddr(sizeof(*sin),
2172 Z_WAITOK | Z_NOFAIL);
2173
2174 sin->sin_family = AF_INET;
2175
2176 if ((inp = sotoinpcb(so)) == NULL) {
2177 free_sockaddr(sin);
2178 return EINVAL;
2179 }
2180 sin->sin_port = inp->inp_fport;
2181 sin->sin_addr = inp->inp_faddr;
2182
2183 *nam = (struct sockaddr *)sin;
2184 return 0;
2185 }
2186
2187 void
in_pcbnotifyall(struct inpcbinfo * pcbinfo,struct in_addr faddr,int errno,void (* notify)(struct inpcb *,int))2188 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2189 int errno, void (*notify)(struct inpcb *, int))
2190 {
2191 struct inpcb *inp;
2192
2193 lck_rw_lock_shared(&pcbinfo->ipi_lock);
2194
2195 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
2196 if (!(inp->inp_vflag & INP_IPV4)) {
2197 continue;
2198 }
2199 if (inp->inp_faddr.s_addr != faddr.s_addr ||
2200 inp->inp_socket == NULL) {
2201 continue;
2202 }
2203 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
2204 continue;
2205 }
2206 socket_lock(inp->inp_socket, 1);
2207 (*notify)(inp, errno);
2208 (void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
2209 socket_unlock(inp->inp_socket, 1);
2210 }
2211 lck_rw_done(&pcbinfo->ipi_lock);
2212 }
2213
2214 /*
2215 * Check for alternatives when higher level complains
2216 * about service problems. For now, invalidate cached
2217 * routing information. If the route was created dynamically
2218 * (by a redirect), time to try a default gateway again.
2219 */
2220 void
in_losing(struct inpcb * inp)2221 in_losing(struct inpcb *inp)
2222 {
2223 boolean_t release = FALSE;
2224 struct rtentry *rt;
2225
2226 if ((rt = inp->inp_route.ro_rt) != NULL) {
2227 struct in_ifaddr *ia = NULL;
2228
2229 RT_LOCK(rt);
2230 if (rt->rt_flags & RTF_DYNAMIC) {
2231 /*
2232 * Prevent another thread from modifying rt_key,
2233 * rt_gateway via rt_setgate() after rt_lock is
2234 * dropped by marking the route as defunct.
2235 */
2236 rt->rt_flags |= RTF_CONDEMNED;
2237 RT_UNLOCK(rt);
2238 (void) rtrequest(RTM_DELETE, rt_key(rt),
2239 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
2240 } else {
2241 RT_UNLOCK(rt);
2242 }
2243 /* if the address is gone keep the old route in the pcb */
2244 if (inp->inp_laddr.s_addr != INADDR_ANY &&
2245 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2246 /*
2247 * Address is around; ditch the route. A new route
2248 * can be allocated the next time output is attempted.
2249 */
2250 release = TRUE;
2251 }
2252 if (ia != NULL) {
2253 IFA_REMREF(&ia->ia_ifa);
2254 }
2255 }
2256 if (rt == NULL || release) {
2257 ROUTE_RELEASE(&inp->inp_route);
2258 }
2259 }
2260
2261 /*
2262 * After a routing change, flush old routing
2263 * and allocate a (hopefully) better one.
2264 */
2265 void
in_rtchange(struct inpcb * inp,int errno)2266 in_rtchange(struct inpcb *inp, int errno)
2267 {
2268 #pragma unused(errno)
2269 boolean_t release = FALSE;
2270 struct rtentry *rt;
2271
2272 if ((rt = inp->inp_route.ro_rt) != NULL) {
2273 struct in_ifaddr *ia = NULL;
2274
2275 /* if address is gone, keep the old route */
2276 if (inp->inp_laddr.s_addr != INADDR_ANY &&
2277 (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
2278 /*
2279 * Address is around; ditch the route. A new route
2280 * can be allocated the next time output is attempted.
2281 */
2282 release = TRUE;
2283 }
2284 if (ia != NULL) {
2285 IFA_REMREF(&ia->ia_ifa);
2286 }
2287 }
2288 if (rt == NULL || release) {
2289 ROUTE_RELEASE(&inp->inp_route);
2290 }
2291 }
2292
2293 /*
2294 * Lookup a PCB based on the local address and port.
2295 */
2296 struct inpcb *
in_pcblookup_local(struct inpcbinfo * pcbinfo,struct in_addr laddr,unsigned int lport_arg,int wild_okay)2297 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
2298 unsigned int lport_arg, int wild_okay)
2299 {
2300 struct inpcb *inp;
2301 int matchwild = 3, wildcard;
2302 u_short lport = (u_short)lport_arg;
2303
2304 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0);
2305
2306 if (!wild_okay) {
2307 struct inpcbhead *head;
2308 /*
2309 * Look for an unconnected (wildcard foreign addr) PCB that
2310 * matches the local address and port we're looking for.
2311 */
2312 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2313 pcbinfo->ipi_hashmask)];
2314 LIST_FOREACH(inp, head, inp_hash) {
2315 if (!(inp->inp_vflag & INP_IPV4)) {
2316 continue;
2317 }
2318 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2319 inp->inp_laddr.s_addr == laddr.s_addr &&
2320 inp->inp_lport == lport) {
2321 /*
2322 * Found.
2323 */
2324 return inp;
2325 }
2326 }
2327 /*
2328 * Not found.
2329 */
2330 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0);
2331 return NULL;
2332 } else {
2333 struct inpcbporthead *porthash;
2334 struct inpcbport *phd;
2335 struct inpcb *match = NULL;
2336 /*
2337 * Best fit PCB lookup.
2338 *
2339 * First see if this local port is in use by looking on the
2340 * port hash list.
2341 */
2342 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
2343 pcbinfo->ipi_porthashmask)];
2344 LIST_FOREACH(phd, porthash, phd_hash) {
2345 if (phd->phd_port == lport) {
2346 break;
2347 }
2348 }
2349 if (phd != NULL) {
2350 /*
2351 * Port is in use by one or more PCBs. Look for best
2352 * fit.
2353 */
2354 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
2355 wildcard = 0;
2356 if (!(inp->inp_vflag & INP_IPV4)) {
2357 continue;
2358 }
2359 if (inp->inp_faddr.s_addr != INADDR_ANY) {
2360 wildcard++;
2361 }
2362 if (inp->inp_laddr.s_addr != INADDR_ANY) {
2363 if (laddr.s_addr == INADDR_ANY) {
2364 wildcard++;
2365 } else if (inp->inp_laddr.s_addr !=
2366 laddr.s_addr) {
2367 continue;
2368 }
2369 } else {
2370 if (laddr.s_addr != INADDR_ANY) {
2371 wildcard++;
2372 }
2373 }
2374 if (wildcard < matchwild) {
2375 match = inp;
2376 matchwild = wildcard;
2377 if (matchwild == 0) {
2378 break;
2379 }
2380 }
2381 }
2382 }
2383 KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,
2384 0, 0, 0, 0);
2385 return match;
2386 }
2387 }
2388
2389 /*
2390 * Check if PCB exists in hash list.
2391 */
2392 int
in_pcblookup_hash_exists(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,uid_t * uid,gid_t * gid,struct ifnet * ifp)2393 in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2394 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2395 uid_t *uid, gid_t *gid, struct ifnet *ifp)
2396 {
2397 struct inpcbhead *head;
2398 struct inpcb *inp;
2399 u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2400 int found = 0;
2401 struct inpcb *local_wild = NULL;
2402 struct inpcb *local_wild_mapped = NULL;
2403
2404 *uid = UID_MAX;
2405 *gid = GID_MAX;
2406
2407 /*
2408 * We may have found the pcb in the last lookup - check this first.
2409 */
2410
2411 lck_rw_lock_shared(&pcbinfo->ipi_lock);
2412
2413 /*
2414 * First look for an exact match.
2415 */
2416 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2417 pcbinfo->ipi_hashmask)];
2418 LIST_FOREACH(inp, head, inp_hash) {
2419 if (!(inp->inp_vflag & INP_IPV4)) {
2420 continue;
2421 }
2422 if (inp_restricted_recv(inp, ifp)) {
2423 continue;
2424 }
2425
2426 #if NECP
2427 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2428 continue;
2429 }
2430 #endif /* NECP */
2431
2432 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2433 inp->inp_laddr.s_addr == laddr.s_addr &&
2434 inp->inp_fport == fport &&
2435 inp->inp_lport == lport) {
2436 if ((found = (inp->inp_socket != NULL))) {
2437 /*
2438 * Found.
2439 */
2440 *uid = kauth_cred_getuid(
2441 inp->inp_socket->so_cred);
2442 *gid = kauth_cred_getgid(
2443 inp->inp_socket->so_cred);
2444 }
2445 lck_rw_done(&pcbinfo->ipi_lock);
2446 return found;
2447 }
2448 }
2449
2450 if (!wildcard) {
2451 /*
2452 * Not found.
2453 */
2454 lck_rw_done(&pcbinfo->ipi_lock);
2455 return 0;
2456 }
2457
2458 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2459 pcbinfo->ipi_hashmask)];
2460 LIST_FOREACH(inp, head, inp_hash) {
2461 if (!(inp->inp_vflag & INP_IPV4)) {
2462 continue;
2463 }
2464 if (inp_restricted_recv(inp, ifp)) {
2465 continue;
2466 }
2467
2468 #if NECP
2469 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2470 continue;
2471 }
2472 #endif /* NECP */
2473
2474 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2475 inp->inp_lport == lport) {
2476 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2477 if ((found = (inp->inp_socket != NULL))) {
2478 *uid = kauth_cred_getuid(
2479 inp->inp_socket->so_cred);
2480 *gid = kauth_cred_getgid(
2481 inp->inp_socket->so_cred);
2482 }
2483 lck_rw_done(&pcbinfo->ipi_lock);
2484 return found;
2485 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2486 if (inp->inp_socket &&
2487 SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2488 local_wild_mapped = inp;
2489 } else {
2490 local_wild = inp;
2491 }
2492 }
2493 }
2494 }
2495 if (local_wild == NULL) {
2496 if (local_wild_mapped != NULL) {
2497 if ((found = (local_wild_mapped->inp_socket != NULL))) {
2498 *uid = kauth_cred_getuid(
2499 local_wild_mapped->inp_socket->so_cred);
2500 *gid = kauth_cred_getgid(
2501 local_wild_mapped->inp_socket->so_cred);
2502 }
2503 lck_rw_done(&pcbinfo->ipi_lock);
2504 return found;
2505 }
2506 lck_rw_done(&pcbinfo->ipi_lock);
2507 return 0;
2508 }
2509 if ((found = (local_wild->inp_socket != NULL))) {
2510 *uid = kauth_cred_getuid(
2511 local_wild->inp_socket->so_cred);
2512 *gid = kauth_cred_getgid(
2513 local_wild->inp_socket->so_cred);
2514 }
2515 lck_rw_done(&pcbinfo->ipi_lock);
2516 return found;
2517 }
2518
2519 /*
2520 * Lookup PCB in hash list.
2521 */
2522 struct inpcb *
in_pcblookup_hash(struct inpcbinfo * pcbinfo,struct in_addr faddr,u_int fport_arg,struct in_addr laddr,u_int lport_arg,int wildcard,struct ifnet * ifp)2523 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
2524 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
2525 struct ifnet *ifp)
2526 {
2527 struct inpcbhead *head;
2528 struct inpcb *inp;
2529 u_short fport = (u_short)fport_arg, lport = (u_short)lport_arg;
2530 struct inpcb *local_wild = NULL;
2531 struct inpcb *local_wild_mapped = NULL;
2532
2533 /*
2534 * We may have found the pcb in the last lookup - check this first.
2535 */
2536
2537 lck_rw_lock_shared(&pcbinfo->ipi_lock);
2538
2539 /*
2540 * First look for an exact match.
2541 */
2542 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
2543 pcbinfo->ipi_hashmask)];
2544 LIST_FOREACH(inp, head, inp_hash) {
2545 if (!(inp->inp_vflag & INP_IPV4)) {
2546 continue;
2547 }
2548 if (inp_restricted_recv(inp, ifp)) {
2549 continue;
2550 }
2551
2552 #if NECP
2553 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2554 continue;
2555 }
2556 #endif /* NECP */
2557
2558 if (inp->inp_faddr.s_addr == faddr.s_addr &&
2559 inp->inp_laddr.s_addr == laddr.s_addr &&
2560 inp->inp_fport == fport &&
2561 inp->inp_lport == lport) {
2562 /*
2563 * Found.
2564 */
2565 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2566 WNT_STOPUSING) {
2567 lck_rw_done(&pcbinfo->ipi_lock);
2568 return inp;
2569 } else {
2570 /* it's there but dead, say it isn't found */
2571 lck_rw_done(&pcbinfo->ipi_lock);
2572 return NULL;
2573 }
2574 }
2575 }
2576
2577 if (!wildcard) {
2578 /*
2579 * Not found.
2580 */
2581 lck_rw_done(&pcbinfo->ipi_lock);
2582 return NULL;
2583 }
2584
2585 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
2586 pcbinfo->ipi_hashmask)];
2587 LIST_FOREACH(inp, head, inp_hash) {
2588 if (!(inp->inp_vflag & INP_IPV4)) {
2589 continue;
2590 }
2591 if (inp_restricted_recv(inp, ifp)) {
2592 continue;
2593 }
2594
2595 #if NECP
2596 if (!necp_socket_is_allowed_to_recv_on_interface(inp, ifp)) {
2597 continue;
2598 }
2599 #endif /* NECP */
2600
2601 if (inp->inp_faddr.s_addr == INADDR_ANY &&
2602 inp->inp_lport == lport) {
2603 if (inp->inp_laddr.s_addr == laddr.s_addr) {
2604 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
2605 WNT_STOPUSING) {
2606 lck_rw_done(&pcbinfo->ipi_lock);
2607 return inp;
2608 } else {
2609 /* it's dead; say it isn't found */
2610 lck_rw_done(&pcbinfo->ipi_lock);
2611 return NULL;
2612 }
2613 } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
2614 if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) {
2615 local_wild_mapped = inp;
2616 } else {
2617 local_wild = inp;
2618 }
2619 }
2620 }
2621 }
2622 if (local_wild == NULL) {
2623 if (local_wild_mapped != NULL) {
2624 if (in_pcb_checkstate(local_wild_mapped,
2625 WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2626 lck_rw_done(&pcbinfo->ipi_lock);
2627 return local_wild_mapped;
2628 } else {
2629 /* it's dead; say it isn't found */
2630 lck_rw_done(&pcbinfo->ipi_lock);
2631 return NULL;
2632 }
2633 }
2634 lck_rw_done(&pcbinfo->ipi_lock);
2635 return NULL;
2636 }
2637 if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
2638 lck_rw_done(&pcbinfo->ipi_lock);
2639 return local_wild;
2640 }
2641 /*
2642 * It's either not found or is already dead.
2643 */
2644 lck_rw_done(&pcbinfo->ipi_lock);
2645 return NULL;
2646 }
2647
2648 /*
2649 * @brief Insert PCB onto various hash lists.
2650 *
2651 * @param inp Pointer to internet protocol control block
2652 * @param locked Implies if ipi_lock (protecting pcb list)
2653 * is already locked or not.
2654 *
2655 * @return int error on failure and 0 on success
2656 */
2657 int
in_pcbinshash(struct inpcb * inp,int locked)2658 in_pcbinshash(struct inpcb *inp, int locked)
2659 {
2660 struct inpcbhead *pcbhash;
2661 struct inpcbporthead *pcbporthash;
2662 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
2663 struct inpcbport *phd;
2664 u_int32_t hashkey_faddr;
2665
2666 if (!locked) {
2667 if (!lck_rw_try_lock_exclusive(&pcbinfo->ipi_lock)) {
2668 /*
2669 * Lock inversion issue, mostly with udp
2670 * multicast packets
2671 */
2672 socket_unlock(inp->inp_socket, 0);
2673 lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
2674 socket_lock(inp->inp_socket, 0);
2675 }
2676 }
2677
2678 /*
2679 * This routine or its caller may have given up
2680 * socket's protocol lock briefly.
2681 * During that time the socket may have been dropped.
2682 * Safe-guarding against that.
2683 */
2684 if (inp->inp_state == INPCB_STATE_DEAD) {
2685 if (!locked) {
2686 lck_rw_done(&pcbinfo->ipi_lock);
2687 }
2688 return ECONNABORTED;
2689 }
2690
2691
2692 if (inp->inp_vflag & INP_IPV6) {
2693 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2694 } else {
2695 hashkey_faddr = inp->inp_faddr.s_addr;
2696 }
2697
2698 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2699 inp->inp_fport, pcbinfo->ipi_hashmask);
2700
2701 pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element];
2702
2703 pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport,
2704 pcbinfo->ipi_porthashmask)];
2705
2706 /*
2707 * Go through port list and look for a head for this lport.
2708 */
2709 LIST_FOREACH(phd, pcbporthash, phd_hash) {
2710 if (phd->phd_port == inp->inp_lport) {
2711 break;
2712 }
2713 }
2714
2715 /*
2716 * If none exists, malloc one and tack it on.
2717 */
2718 if (phd == NULL) {
2719 phd = kalloc_type(struct inpcbport, Z_WAITOK | Z_NOFAIL);
2720 phd->phd_port = inp->inp_lport;
2721 LIST_INIT(&phd->phd_pcblist);
2722 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
2723 }
2724
2725 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2726
2727 #if SKYWALK
2728 int err;
2729 struct socket *so = inp->inp_socket;
2730 if ((SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP) &&
2731 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
2732 if (inp->inp_vflag & INP_IPV6) {
2733 err = netns_reserve_in6(&inp->inp_netns_token,
2734 inp->in6p_laddr, (uint8_t)SOCK_PROTO(so), inp->inp_lport,
2735 NETNS_BSD | NETNS_PRERESERVED, NULL);
2736 } else {
2737 err = netns_reserve_in(&inp->inp_netns_token,
2738 inp->inp_laddr, (uint8_t)SOCK_PROTO(so), inp->inp_lport,
2739 NETNS_BSD | NETNS_PRERESERVED, NULL);
2740 }
2741 if (err) {
2742 if (!locked) {
2743 lck_rw_done(&pcbinfo->ipi_lock);
2744 }
2745 return err;
2746 }
2747 netns_set_ifnet(&inp->inp_netns_token, inp->inp_last_outifp);
2748 inp_update_netns_flags(so);
2749 }
2750 #endif /* SKYWALK */
2751
2752 inp->inp_phd = phd;
2753 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
2754 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
2755 inp->inp_flags2 |= INP2_INHASHLIST;
2756
2757 if (!locked) {
2758 lck_rw_done(&pcbinfo->ipi_lock);
2759 }
2760
2761 #if NECP
2762 // This call catches the original setting of the local address
2763 inp_update_necp_policy(inp, NULL, NULL, 0);
2764 #endif /* NECP */
2765
2766 return 0;
2767 }
2768
2769 /*
2770 * Move PCB to the proper hash bucket when { faddr, fport } have been
2771 * changed. NOTE: This does not handle the case of the lport changing (the
2772 * hashed port list would have to be updated as well), so the lport must
2773 * not change after in_pcbinshash() has been called.
2774 */
2775 void
in_pcbrehash(struct inpcb * inp)2776 in_pcbrehash(struct inpcb *inp)
2777 {
2778 struct inpcbhead *head;
2779 u_int32_t hashkey_faddr;
2780
2781 #if SKYWALK
2782 struct socket *so = inp->inp_socket;
2783 if ((SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP) &&
2784 !(inp->inp_flags2 & INP2_EXTERNAL_PORT)) {
2785 int err;
2786 if (NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
2787 if (inp->inp_vflag & INP_IPV6) {
2788 err = netns_change_addr_in6(
2789 &inp->inp_netns_token, inp->in6p_laddr);
2790 } else {
2791 err = netns_change_addr_in(
2792 &inp->inp_netns_token, inp->inp_laddr);
2793 }
2794 } else {
2795 if (inp->inp_vflag & INP_IPV6) {
2796 err = netns_reserve_in6(&inp->inp_netns_token,
2797 inp->in6p_laddr, (uint8_t)SOCK_PROTO(so),
2798 inp->inp_lport, NETNS_BSD, NULL);
2799 } else {
2800 err = netns_reserve_in(&inp->inp_netns_token,
2801 inp->inp_laddr, (uint8_t)SOCK_PROTO(so),
2802 inp->inp_lport, NETNS_BSD, NULL);
2803 }
2804 }
2805 /* We are assuming that whatever code paths result in a rehash
2806 * did their due diligence and ensured that the given
2807 * <proto, laddr, lport> tuple was free ahead of time. Just
2808 * reserving the lport on INADDR_ANY should be enough, since
2809 * that will block Skywalk from trying to reserve that same
2810 * port. Given this assumption, the above netns calls should
2811 * never fail*/
2812 VERIFY(err == 0);
2813
2814 netns_set_ifnet(&inp->inp_netns_token, inp->inp_last_outifp);
2815 inp_update_netns_flags(so);
2816 }
2817 #endif /* SKYWALK */
2818 if (inp->inp_vflag & INP_IPV6) {
2819 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
2820 } else {
2821 hashkey_faddr = inp->inp_faddr.s_addr;
2822 }
2823
2824 inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport,
2825 inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask);
2826 head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element];
2827
2828 if (inp->inp_flags2 & INP2_INHASHLIST) {
2829 LIST_REMOVE(inp, inp_hash);
2830 inp->inp_flags2 &= ~INP2_INHASHLIST;
2831 }
2832
2833 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2834 LIST_INSERT_HEAD(head, inp, inp_hash);
2835 inp->inp_flags2 |= INP2_INHASHLIST;
2836
2837 #if NECP
2838 // This call catches updates to the remote addresses
2839 inp_update_necp_policy(inp, NULL, NULL, 0);
2840 #endif /* NECP */
2841 }
2842
2843 /*
2844 * Remove PCB from various lists.
2845 * Must be called pcbinfo lock is held in exclusive mode.
2846 */
2847 void
in_pcbremlists(struct inpcb * inp)2848 in_pcbremlists(struct inpcb *inp)
2849 {
2850 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
2851
2852 /*
2853 * Check if it's in hashlist -- an inp is placed in hashlist when
2854 * it's local port gets assigned. So it should also be present
2855 * in the port list.
2856 */
2857 if (inp->inp_flags2 & INP2_INHASHLIST) {
2858 struct inpcbport *phd = inp->inp_phd;
2859
2860 VERIFY(phd != NULL && inp->inp_lport > 0);
2861
2862 LIST_REMOVE(inp, inp_hash);
2863 inp->inp_hash.le_next = NULL;
2864 inp->inp_hash.le_prev = NULL;
2865
2866 LIST_REMOVE(inp, inp_portlist);
2867 inp->inp_portlist.le_next = NULL;
2868 inp->inp_portlist.le_prev = NULL;
2869 if (LIST_EMPTY(&phd->phd_pcblist)) {
2870 LIST_REMOVE(phd, phd_hash);
2871 kfree_type(struct inpcbport, phd);
2872 }
2873 inp->inp_phd = NULL;
2874 inp->inp_flags2 &= ~INP2_INHASHLIST;
2875 #if SKYWALK
2876 /* Free up the port in the namespace registrar */
2877 netns_release(&inp->inp_netns_token);
2878 netns_release(&inp->inp_wildcard_netns_token);
2879 #endif /* SKYWALK */
2880 }
2881 VERIFY(!(inp->inp_flags2 & INP2_INHASHLIST));
2882
2883 if (inp->inp_flags2 & INP2_TIMEWAIT) {
2884 /* Remove from time-wait queue */
2885 tcp_remove_from_time_wait(inp);
2886 inp->inp_flags2 &= ~INP2_TIMEWAIT;
2887 VERIFY(inp->inp_pcbinfo->ipi_twcount != 0);
2888 inp->inp_pcbinfo->ipi_twcount--;
2889 } else {
2890 /* Remove from global inp list if it is not time-wait */
2891 LIST_REMOVE(inp, inp_list);
2892 }
2893
2894 if (inp->inp_flags2 & INP2_IN_FCTREE) {
2895 inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED | INPFC_REMOVE));
2896 VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE));
2897 }
2898
2899 inp->inp_pcbinfo->ipi_count--;
2900 }
2901
2902 /*
2903 * Mechanism used to defer the memory release of PCBs
2904 * The pcb list will contain the pcb until the reaper can clean it up if
2905 * the following conditions are met:
2906 * 1) state "DEAD",
2907 * 2) wantcnt is STOPUSING
2908 * 3) usecount is 0
2909 * This function will be called to either mark the pcb as
2910 */
2911 int
in_pcb_checkstate(struct inpcb * pcb,int mode,int locked)2912 in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
2913 {
2914 volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt;
2915 UInt32 origwant;
2916 UInt32 newwant;
2917
2918 switch (mode) {
2919 case WNT_STOPUSING:
2920 /*
2921 * Try to mark the pcb as ready for recycling. CAS with
2922 * STOPUSING, if success we're good, if it's in use, will
2923 * be marked later
2924 */
2925 if (locked == 0) {
2926 socket_lock(pcb->inp_socket, 1);
2927 }
2928 pcb->inp_state = INPCB_STATE_DEAD;
2929
2930 stopusing:
2931 if (pcb->inp_socket->so_usecount < 0) {
2932 panic("%s: pcb=%p so=%p usecount is negative",
2933 __func__, pcb, pcb->inp_socket);
2934 /* NOTREACHED */
2935 }
2936 if (locked == 0) {
2937 socket_unlock(pcb->inp_socket, 1);
2938 }
2939
2940 inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST);
2941
2942 origwant = *wantcnt;
2943 if ((UInt16) origwant == 0xffff) { /* should stop using */
2944 return WNT_STOPUSING;
2945 }
2946 newwant = 0xffff;
2947 if ((UInt16) origwant == 0) {
2948 /* try to mark it as unsuable now */
2949 OSCompareAndSwap(origwant, newwant, wantcnt);
2950 }
2951 return WNT_STOPUSING;
2952
2953 case WNT_ACQUIRE:
2954 /*
2955 * Try to increase reference to pcb. If WNT_STOPUSING
2956 * should bail out. If socket state DEAD, try to set count
2957 * to STOPUSING, return failed otherwise increase cnt.
2958 */
2959 do {
2960 origwant = *wantcnt;
2961 if ((UInt16) origwant == 0xffff) {
2962 /* should stop using */
2963 return WNT_STOPUSING;
2964 }
2965 newwant = origwant + 1;
2966 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2967 return WNT_ACQUIRE;
2968
2969 case WNT_RELEASE:
2970 /*
2971 * Release reference. If result is null and pcb state
2972 * is DEAD, set wanted bit to STOPUSING
2973 */
2974 if (locked == 0) {
2975 socket_lock(pcb->inp_socket, 1);
2976 }
2977
2978 do {
2979 origwant = *wantcnt;
2980 if ((UInt16) origwant == 0x0) {
2981 panic("%s: pcb=%p release with zero count",
2982 __func__, pcb);
2983 /* NOTREACHED */
2984 }
2985 if ((UInt16) origwant == 0xffff) {
2986 /* should stop using */
2987 if (locked == 0) {
2988 socket_unlock(pcb->inp_socket, 1);
2989 }
2990 return WNT_STOPUSING;
2991 }
2992 newwant = origwant - 1;
2993 } while (!OSCompareAndSwap(origwant, newwant, wantcnt));
2994
2995 if (pcb->inp_state == INPCB_STATE_DEAD) {
2996 goto stopusing;
2997 }
2998 if (pcb->inp_socket->so_usecount < 0) {
2999 panic("%s: RELEASE pcb=%p so=%p usecount is negative",
3000 __func__, pcb, pcb->inp_socket);
3001 /* NOTREACHED */
3002 }
3003
3004 if (locked == 0) {
3005 socket_unlock(pcb->inp_socket, 1);
3006 }
3007 return WNT_RELEASE;
3008
3009 default:
3010 panic("%s: so=%p not a valid state =%x", __func__,
3011 pcb->inp_socket, mode);
3012 /* NOTREACHED */
3013 }
3014
3015 /* NOTREACHED */
3016 return mode;
3017 }
3018
3019 /*
3020 * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat.
3021 * The inpcb_compat data structure is passed to user space and must
3022 * not change. We intentionally avoid copying pointers.
3023 */
3024 void
inpcb_to_compat(struct inpcb * inp,struct inpcb_compat * inp_compat)3025 inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat)
3026 {
3027 bzero(inp_compat, sizeof(*inp_compat));
3028 inp_compat->inp_fport = inp->inp_fport;
3029 inp_compat->inp_lport = inp->inp_lport;
3030 inp_compat->nat_owner = 0;
3031 inp_compat->nat_cookie = 0;
3032 inp_compat->inp_gencnt = inp->inp_gencnt;
3033 inp_compat->inp_flags = inp->inp_flags;
3034 inp_compat->inp_flow = inp->inp_flow;
3035 inp_compat->inp_vflag = inp->inp_vflag;
3036 inp_compat->inp_ip_ttl = inp->inp_ip_ttl;
3037 inp_compat->inp_ip_p = inp->inp_ip_p;
3038 inp_compat->inp_dependfaddr.inp6_foreign =
3039 inp->inp_dependfaddr.inp6_foreign;
3040 inp_compat->inp_dependladdr.inp6_local =
3041 inp->inp_dependladdr.inp6_local;
3042 inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
3043 inp_compat->inp_depend6.inp6_hlim = 0;
3044 inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
3045 inp_compat->inp_depend6.inp6_ifindex = 0;
3046 inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
3047 }
3048
3049 #if XNU_TARGET_OS_OSX
3050 void
inpcb_to_xinpcb64(struct inpcb * inp,struct xinpcb64 * xinp)3051 inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp)
3052 {
3053 xinp->inp_fport = inp->inp_fport;
3054 xinp->inp_lport = inp->inp_lport;
3055 xinp->inp_gencnt = inp->inp_gencnt;
3056 xinp->inp_flags = inp->inp_flags;
3057 xinp->inp_flow = inp->inp_flow;
3058 xinp->inp_vflag = inp->inp_vflag;
3059 xinp->inp_ip_ttl = inp->inp_ip_ttl;
3060 xinp->inp_ip_p = inp->inp_ip_p;
3061 xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
3062 xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
3063 xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
3064 xinp->inp_depend6.inp6_hlim = 0;
3065 xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
3066 xinp->inp_depend6.inp6_ifindex = 0;
3067 xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
3068 }
3069 #endif /* XNU_TARGET_OS_OSX */
3070
3071 /*
3072 * The following routines implement this scheme:
3073 *
3074 * Callers of ip_output() that intend to cache the route in the inpcb pass
3075 * a local copy of the struct route to ip_output(). Using a local copy of
3076 * the cached route significantly simplifies things as IP no longer has to
3077 * worry about having exclusive access to the passed in struct route, since
3078 * it's defined in the caller's stack; in essence, this allows for a lock-
3079 * less operation when updating the struct route at the IP level and below,
3080 * whenever necessary. The scheme works as follows:
3081 *
3082 * Prior to dropping the socket's lock and calling ip_output(), the caller
3083 * copies the struct route from the inpcb into its stack, and adds a reference
3084 * to the cached route entry, if there was any. The socket's lock is then
3085 * dropped and ip_output() is called with a pointer to the copy of struct
3086 * route defined on the stack (not to the one in the inpcb.)
3087 *
3088 * Upon returning from ip_output(), the caller then acquires the socket's
3089 * lock and synchronizes the cache; if there is no route cached in the inpcb,
3090 * it copies the local copy of struct route (which may or may not contain any
3091 * route) back into the cache; otherwise, if the inpcb has a route cached in
3092 * it, the one in the local copy will be freed, if there's any. Trashing the
3093 * cached route in the inpcb can be avoided because ip_output() is single-
3094 * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
3095 * by the socket/transport layer.)
3096 */
3097 void
inp_route_copyout(struct inpcb * inp,struct route * dst)3098 inp_route_copyout(struct inpcb *inp, struct route *dst)
3099 {
3100 struct route *src = &inp->inp_route;
3101
3102 socket_lock_assert_owned(inp->inp_socket);
3103
3104 /*
3105 * If the route in the PCB is stale or not for IPv4, blow it away;
3106 * this is possible in the case of IPv4-mapped address case.
3107 */
3108 if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) {
3109 ROUTE_RELEASE(src);
3110 }
3111
3112 route_copyout(dst, src, sizeof(*dst));
3113 }
3114
3115 void
inp_route_copyin(struct inpcb * inp,struct route * src)3116 inp_route_copyin(struct inpcb *inp, struct route *src)
3117 {
3118 struct route *dst = &inp->inp_route;
3119
3120 socket_lock_assert_owned(inp->inp_socket);
3121
3122 /* Minor sanity check */
3123 if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) {
3124 panic("%s: wrong or corrupted route: %p", __func__, src);
3125 }
3126
3127 route_copyin(src, dst, sizeof(*src));
3128 }
3129
3130 /*
3131 * Handler for setting IP_BOUND_IF/IPV6_BOUND_IF socket option.
3132 */
3133 int
inp_bindif(struct inpcb * inp,unsigned int ifscope,struct ifnet ** pifp)3134 inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp)
3135 {
3136 struct ifnet *ifp = NULL;
3137
3138 ifnet_head_lock_shared();
3139 if ((ifscope > (unsigned)if_index) || (ifscope != IFSCOPE_NONE &&
3140 (ifp = ifindex2ifnet[ifscope]) == NULL)) {
3141 ifnet_head_done();
3142 return ENXIO;
3143 }
3144 ifnet_head_done();
3145
3146 VERIFY(ifp != NULL || ifscope == IFSCOPE_NONE);
3147
3148 /*
3149 * A zero interface scope value indicates an "unbind".
3150 * Otherwise, take in whatever value the app desires;
3151 * the app may already know the scope (or force itself
3152 * to such a scope) ahead of time before the interface
3153 * gets attached. It doesn't matter either way; any
3154 * route lookup from this point on will require an
3155 * exact match for the embedded interface scope.
3156 */
3157 inp->inp_boundifp = ifp;
3158 if (inp->inp_boundifp == NULL) {
3159 inp->inp_flags &= ~INP_BOUND_IF;
3160 } else {
3161 inp->inp_flags |= INP_BOUND_IF;
3162 }
3163
3164 /* Blow away any cached route in the PCB */
3165 ROUTE_RELEASE(&inp->inp_route);
3166
3167 if (pifp != NULL) {
3168 *pifp = ifp;
3169 }
3170
3171 return 0;
3172 }
3173
3174 /*
3175 * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
3176 * as well as for setting PROC_UUID_NO_CELLULAR policy.
3177 */
3178 void
inp_set_nocellular(struct inpcb * inp)3179 inp_set_nocellular(struct inpcb *inp)
3180 {
3181 inp->inp_flags |= INP_NO_IFT_CELLULAR;
3182
3183 /* Blow away any cached route in the PCB */
3184 ROUTE_RELEASE(&inp->inp_route);
3185 }
3186
3187 /*
3188 * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option,
3189 * as well as for clearing PROC_UUID_NO_CELLULAR policy.
3190 */
3191 void
inp_clear_nocellular(struct inpcb * inp)3192 inp_clear_nocellular(struct inpcb *inp)
3193 {
3194 struct socket *so = inp->inp_socket;
3195
3196 /*
3197 * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket
3198 * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag
3199 * if and only if the socket is unrestricted.
3200 */
3201 if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
3202 inp->inp_flags &= ~INP_NO_IFT_CELLULAR;
3203
3204 /* Blow away any cached route in the PCB */
3205 ROUTE_RELEASE(&inp->inp_route);
3206 }
3207 }
3208
3209 void
inp_set_noexpensive(struct inpcb * inp)3210 inp_set_noexpensive(struct inpcb *inp)
3211 {
3212 inp->inp_flags2 |= INP2_NO_IFF_EXPENSIVE;
3213
3214 /* Blow away any cached route in the PCB */
3215 ROUTE_RELEASE(&inp->inp_route);
3216 }
3217
3218 void
inp_set_noconstrained(struct inpcb * inp)3219 inp_set_noconstrained(struct inpcb *inp)
3220 {
3221 inp->inp_flags2 |= INP2_NO_IFF_CONSTRAINED;
3222
3223 /* Blow away any cached route in the PCB */
3224 ROUTE_RELEASE(&inp->inp_route);
3225 }
3226
3227 void
inp_set_awdl_unrestricted(struct inpcb * inp)3228 inp_set_awdl_unrestricted(struct inpcb *inp)
3229 {
3230 inp->inp_flags2 |= INP2_AWDL_UNRESTRICTED;
3231
3232 /* Blow away any cached route in the PCB */
3233 ROUTE_RELEASE(&inp->inp_route);
3234 }
3235
3236 boolean_t
inp_get_awdl_unrestricted(struct inpcb * inp)3237 inp_get_awdl_unrestricted(struct inpcb *inp)
3238 {
3239 return (inp->inp_flags2 & INP2_AWDL_UNRESTRICTED) ? TRUE : FALSE;
3240 }
3241
3242 void
inp_clear_awdl_unrestricted(struct inpcb * inp)3243 inp_clear_awdl_unrestricted(struct inpcb *inp)
3244 {
3245 inp->inp_flags2 &= ~INP2_AWDL_UNRESTRICTED;
3246
3247 /* Blow away any cached route in the PCB */
3248 ROUTE_RELEASE(&inp->inp_route);
3249 }
3250
3251 void
inp_set_intcoproc_allowed(struct inpcb * inp)3252 inp_set_intcoproc_allowed(struct inpcb *inp)
3253 {
3254 inp->inp_flags2 |= INP2_INTCOPROC_ALLOWED;
3255
3256 /* Blow away any cached route in the PCB */
3257 ROUTE_RELEASE(&inp->inp_route);
3258 }
3259
3260 boolean_t
inp_get_intcoproc_allowed(struct inpcb * inp)3261 inp_get_intcoproc_allowed(struct inpcb *inp)
3262 {
3263 return (inp->inp_flags2 & INP2_INTCOPROC_ALLOWED) ? TRUE : FALSE;
3264 }
3265
3266 void
inp_clear_intcoproc_allowed(struct inpcb * inp)3267 inp_clear_intcoproc_allowed(struct inpcb *inp)
3268 {
3269 inp->inp_flags2 &= ~INP2_INTCOPROC_ALLOWED;
3270
3271 /* Blow away any cached route in the PCB */
3272 ROUTE_RELEASE(&inp->inp_route);
3273 }
3274
3275 void
inp_set_management_allowed(struct inpcb * inp)3276 inp_set_management_allowed(struct inpcb *inp)
3277 {
3278 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
3279 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
3280
3281 /* Blow away any cached route in the PCB */
3282 ROUTE_RELEASE(&inp->inp_route);
3283 }
3284
3285 boolean_t
inp_get_management_allowed(struct inpcb * inp)3286 inp_get_management_allowed(struct inpcb *inp)
3287 {
3288 return (inp->inp_flags2 & INP2_MANAGEMENT_ALLOWED) ? TRUE : FALSE;
3289 }
3290
3291 void
inp_clear_management_allowed(struct inpcb * inp)3292 inp_clear_management_allowed(struct inpcb *inp)
3293 {
3294 inp->inp_flags2 &= ~INP2_MANAGEMENT_ALLOWED;
3295
3296 /* Blow away any cached route in the PCB */
3297 ROUTE_RELEASE(&inp->inp_route);
3298 }
3299
3300 #if NECP
3301 /*
3302 * Called when PROC_UUID_NECP_APP_POLICY is set.
3303 */
3304 void
inp_set_want_app_policy(struct inpcb * inp)3305 inp_set_want_app_policy(struct inpcb *inp)
3306 {
3307 inp->inp_flags2 |= INP2_WANT_APP_POLICY;
3308 }
3309
3310 /*
3311 * Called when PROC_UUID_NECP_APP_POLICY is cleared.
3312 */
3313 void
inp_clear_want_app_policy(struct inpcb * inp)3314 inp_clear_want_app_policy(struct inpcb *inp)
3315 {
3316 inp->inp_flags2 &= ~INP2_WANT_APP_POLICY;
3317 }
3318 #endif /* NECP */
3319
3320 /*
3321 * Calculate flow hash for an inp, used by an interface to identify a
3322 * flow. When an interface provides flow control advisory, this flow
3323 * hash is used as an identifier.
3324 */
3325 u_int32_t
inp_calc_flowhash(struct inpcb * inp)3326 inp_calc_flowhash(struct inpcb *inp)
3327 {
3328 #if SKYWALK
3329
3330 uint32_t flowid;
3331 struct flowidns_flow_key fk;
3332
3333 bzero(&fk, sizeof(fk));
3334
3335 if (inp->inp_vflag & INP_IPV4) {
3336 fk.ffk_af = AF_INET;
3337 fk.ffk_laddr_v4 = inp->inp_laddr;
3338 fk.ffk_raddr_v4 = inp->inp_faddr;
3339 } else {
3340 fk.ffk_af = AF_INET6;
3341 fk.ffk_laddr_v6 = inp->in6p_laddr;
3342 fk.ffk_raddr_v6 = inp->in6p_faddr;
3343 /* clear embedded scope ID */
3344 if (IN6_IS_SCOPE_EMBED(&fk.ffk_laddr_v6)) {
3345 fk.ffk_laddr_v6.s6_addr16[1] = 0;
3346 }
3347 if (IN6_IS_SCOPE_EMBED(&fk.ffk_raddr_v6)) {
3348 fk.ffk_raddr_v6.s6_addr16[1] = 0;
3349 }
3350 }
3351
3352 fk.ffk_lport = inp->inp_lport;
3353 fk.ffk_rport = inp->inp_fport;
3354 fk.ffk_proto = (inp->inp_ip_p != 0) ? inp->inp_ip_p :
3355 (uint8_t)SOCK_PROTO(inp->inp_socket);
3356 flowidns_allocate_flowid(FLOWIDNS_DOMAIN_INPCB, &fk, &flowid);
3357 /* Insert the inp into inp_fc_tree */
3358 lck_mtx_lock_spin(&inp_fc_lck);
3359 ASSERT(inp->inp_flowhash == 0);
3360 ASSERT((inp->inp_flags2 & INP2_IN_FCTREE) == 0);
3361 inp->inp_flowhash = flowid;
3362 VERIFY(RB_INSERT(inp_fc_tree, &inp_fc_tree, inp) == NULL);
3363 inp->inp_flags2 |= INP2_IN_FCTREE;
3364 lck_mtx_unlock(&inp_fc_lck);
3365
3366 return flowid;
3367
3368 #else /* !SKYWALK */
3369
3370 struct inp_flowhash_key fh __attribute__((aligned(8)));
3371 u_int32_t flowhash = 0;
3372 struct inpcb *tmp_inp = NULL;
3373
3374 if (inp_hash_seed == 0) {
3375 inp_hash_seed = RandomULong();
3376 }
3377
3378 bzero(&fh, sizeof(fh));
3379
3380 bcopy(&inp->inp_dependladdr, &fh.infh_laddr, sizeof(fh.infh_laddr));
3381 bcopy(&inp->inp_dependfaddr, &fh.infh_faddr, sizeof(fh.infh_faddr));
3382
3383 fh.infh_lport = inp->inp_lport;
3384 fh.infh_fport = inp->inp_fport;
3385 fh.infh_af = (inp->inp_vflag & INP_IPV6) ? AF_INET6 : AF_INET;
3386 fh.infh_proto = inp->inp_ip_p;
3387 fh.infh_rand1 = RandomULong();
3388 fh.infh_rand2 = RandomULong();
3389
3390 try_again:
3391 flowhash = net_flowhash(&fh, sizeof(fh), inp_hash_seed);
3392 if (flowhash == 0) {
3393 /* try to get a non-zero flowhash */
3394 inp_hash_seed = RandomULong();
3395 goto try_again;
3396 }
3397
3398 inp->inp_flowhash = flowhash;
3399
3400 /* Insert the inp into inp_fc_tree */
3401 lck_mtx_lock_spin(&inp_fc_lck);
3402 tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp);
3403 if (tmp_inp != NULL) {
3404 /*
3405 * There is a different inp with the same flowhash.
3406 * There can be a collision on flow hash but the
3407 * probability is low. Let's recompute the
3408 * flowhash.
3409 */
3410 lck_mtx_unlock(&inp_fc_lck);
3411 /* recompute hash seed */
3412 inp_hash_seed = RandomULong();
3413 goto try_again;
3414 }
3415
3416 RB_INSERT(inp_fc_tree, &inp_fc_tree, inp);
3417 inp->inp_flags2 |= INP2_IN_FCTREE;
3418 lck_mtx_unlock(&inp_fc_lck);
3419
3420 return flowhash;
3421
3422 #endif /* !SKYWALK */
3423 }
3424
3425 void
inp_flowadv(uint32_t flowhash)3426 inp_flowadv(uint32_t flowhash)
3427 {
3428 struct inpcb *inp;
3429
3430 inp = inp_fc_getinp(flowhash, 0);
3431
3432 if (inp == NULL) {
3433 return;
3434 }
3435 inp_fc_feedback(inp);
3436 }
3437
3438 /*
3439 * Function to compare inp_fc_entries in inp flow control tree
3440 */
3441 static inline int
infc_cmp(const struct inpcb * inp1,const struct inpcb * inp2)3442 infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2)
3443 {
3444 return memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash),
3445 sizeof(inp1->inp_flowhash));
3446 }
3447
3448 static struct inpcb *
inp_fc_getinp(u_int32_t flowhash,u_int32_t flags)3449 inp_fc_getinp(u_int32_t flowhash, u_int32_t flags)
3450 {
3451 struct inpcb *inp = NULL;
3452 int locked = (flags & INPFC_SOLOCKED) ? 1 : 0;
3453
3454 lck_mtx_lock_spin(&inp_fc_lck);
3455 key_inp.inp_flowhash = flowhash;
3456 inp = RB_FIND(inp_fc_tree, &inp_fc_tree, &key_inp);
3457 if (inp == NULL) {
3458 /* inp is not present, return */
3459 lck_mtx_unlock(&inp_fc_lck);
3460 return NULL;
3461 }
3462
3463 if (flags & INPFC_REMOVE) {
3464 ASSERT((inp->inp_flags2 & INP2_IN_FCTREE) != 0);
3465 lck_mtx_convert_spin(&inp_fc_lck);
3466 RB_REMOVE(inp_fc_tree, &inp_fc_tree, inp);
3467 bzero(&(inp->infc_link), sizeof(inp->infc_link));
3468 #if SKYWALK
3469 VERIFY(inp->inp_flowhash != 0);
3470 flowidns_release_flowid(inp->inp_flowhash);
3471 inp->inp_flowhash = 0;
3472 #endif /* !SKYWALK */
3473 inp->inp_flags2 &= ~INP2_IN_FCTREE;
3474 lck_mtx_unlock(&inp_fc_lck);
3475 return NULL;
3476 }
3477
3478 if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) {
3479 inp = NULL;
3480 }
3481 lck_mtx_unlock(&inp_fc_lck);
3482
3483 return inp;
3484 }
3485
3486 static void
inp_fc_feedback(struct inpcb * inp)3487 inp_fc_feedback(struct inpcb *inp)
3488 {
3489 struct socket *so = inp->inp_socket;
3490
3491 /* we already hold a want_cnt on this inp, socket can't be null */
3492 VERIFY(so != NULL);
3493 socket_lock(so, 1);
3494
3495 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3496 socket_unlock(so, 1);
3497 return;
3498 }
3499
3500 if (inp->inp_sndinprog_cnt > 0) {
3501 inp->inp_flags |= INP_FC_FEEDBACK;
3502 }
3503
3504 /*
3505 * Return if the connection is not in flow-controlled state.
3506 * This can happen if the connection experienced
3507 * loss while it was in flow controlled state
3508 */
3509 if (!INP_WAIT_FOR_IF_FEEDBACK(inp)) {
3510 socket_unlock(so, 1);
3511 return;
3512 }
3513 inp_reset_fc_state(inp);
3514
3515 if (SOCK_TYPE(so) == SOCK_STREAM) {
3516 inp_fc_unthrottle_tcp(inp);
3517 }
3518
3519 socket_unlock(so, 1);
3520 }
3521
3522 void
inp_reset_fc_state(struct inpcb * inp)3523 inp_reset_fc_state(struct inpcb *inp)
3524 {
3525 struct socket *so = inp->inp_socket;
3526 int suspended = (INP_IS_FLOW_SUSPENDED(inp)) ? 1 : 0;
3527 int needwakeup = (INP_WAIT_FOR_IF_FEEDBACK(inp)) ? 1 : 0;
3528
3529 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3530
3531 if (suspended) {
3532 so->so_flags &= ~(SOF_SUSPENDED);
3533 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME));
3534 }
3535
3536 /* Give a write wakeup to unblock the socket */
3537 if (needwakeup) {
3538 sowwakeup(so);
3539 }
3540 }
3541
3542 int
inp_set_fc_state(struct inpcb * inp,int advcode)3543 inp_set_fc_state(struct inpcb *inp, int advcode)
3544 {
3545 boolean_t is_flow_controlled = INP_WAIT_FOR_IF_FEEDBACK(inp);
3546 struct inpcb *tmp_inp = NULL;
3547 /*
3548 * If there was a feedback from the interface when
3549 * send operation was in progress, we should ignore
3550 * this flow advisory to avoid a race between setting
3551 * flow controlled state and receiving feedback from
3552 * the interface
3553 */
3554 if (inp->inp_flags & INP_FC_FEEDBACK) {
3555 return 0;
3556 }
3557
3558 inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED);
3559 if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash,
3560 INPFC_SOLOCKED)) != NULL) {
3561 if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
3562 return 0;
3563 }
3564 VERIFY(tmp_inp == inp);
3565 switch (advcode) {
3566 case FADV_FLOW_CONTROLLED:
3567 inp->inp_flags |= INP_FLOW_CONTROLLED;
3568 inp->inp_fadv_flow_ctrl_cnt++;
3569 break;
3570 case FADV_SUSPENDED:
3571 inp->inp_flags |= INP_FLOW_SUSPENDED;
3572 inp->inp_fadv_suspended_cnt++;
3573 soevent(inp->inp_socket,
3574 (SO_FILT_HINT_LOCKED | SO_FILT_HINT_SUSPEND));
3575
3576 /* Record the fact that suspend event was sent */
3577 inp->inp_socket->so_flags |= SOF_SUSPENDED;
3578 break;
3579 }
3580
3581 if (!is_flow_controlled && SOCK_TYPE(inp->inp_socket) == SOCK_STREAM) {
3582 inp_fc_throttle_tcp(inp);
3583 }
3584 return 1;
3585 }
3586 return 0;
3587 }
3588
3589 /*
3590 * Handler for SO_FLUSH socket option.
3591 */
3592 int
inp_flush(struct inpcb * inp,int optval)3593 inp_flush(struct inpcb *inp, int optval)
3594 {
3595 u_int32_t flowhash = inp->inp_flowhash;
3596 struct ifnet *rtifp, *oifp;
3597
3598 /* Either all classes or one of the valid ones */
3599 if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) {
3600 return EINVAL;
3601 }
3602
3603 /* We need a flow hash for identification */
3604 if (flowhash == 0) {
3605 return 0;
3606 }
3607
3608 /* Grab the interfaces from the route and pcb */
3609 rtifp = ((inp->inp_route.ro_rt != NULL) ?
3610 inp->inp_route.ro_rt->rt_ifp : NULL);
3611 oifp = inp->inp_last_outifp;
3612
3613 if (rtifp != NULL) {
3614 if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3615 }
3616 if (oifp != NULL && oifp != rtifp) {
3617 if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0);
3618 }
3619
3620 return 0;
3621 }
3622
3623 /*
3624 * Clear the INP_INADDR_ANY flag (special case for PPP only)
3625 */
3626 void
inp_clear_INP_INADDR_ANY(struct socket * so)3627 inp_clear_INP_INADDR_ANY(struct socket *so)
3628 {
3629 struct inpcb *inp = NULL;
3630
3631 socket_lock(so, 1);
3632 inp = sotoinpcb(so);
3633 if (inp) {
3634 inp->inp_flags &= ~INP_INADDR_ANY;
3635 }
3636 socket_unlock(so, 1);
3637 }
3638
3639 void
inp_get_soprocinfo(struct inpcb * inp,struct so_procinfo * soprocinfo)3640 inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo)
3641 {
3642 struct socket *so = inp->inp_socket;
3643
3644 soprocinfo->spi_pid = so->last_pid;
3645 strlcpy(&soprocinfo->spi_proc_name[0], &inp->inp_last_proc_name[0],
3646 sizeof(soprocinfo->spi_proc_name));
3647 if (so->last_pid != 0) {
3648 uuid_copy(soprocinfo->spi_uuid, so->last_uuid);
3649 }
3650 /*
3651 * When not delegated, the effective pid is the same as the real pid
3652 */
3653 if (so->so_flags & SOF_DELEGATED) {
3654 soprocinfo->spi_delegated = 1;
3655 soprocinfo->spi_epid = so->e_pid;
3656 uuid_copy(soprocinfo->spi_euuid, so->e_uuid);
3657 } else {
3658 soprocinfo->spi_delegated = 0;
3659 soprocinfo->spi_epid = so->last_pid;
3660 }
3661 strlcpy(&soprocinfo->spi_e_proc_name[0], &inp->inp_e_proc_name[0],
3662 sizeof(soprocinfo->spi_e_proc_name));
3663 }
3664
3665 int
inp_findinpcb_procinfo(struct inpcbinfo * pcbinfo,uint32_t flowhash,struct so_procinfo * soprocinfo)3666 inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash,
3667 struct so_procinfo *soprocinfo)
3668 {
3669 struct inpcb *inp = NULL;
3670 int found = 0;
3671
3672 bzero(soprocinfo, sizeof(struct so_procinfo));
3673
3674 if (!flowhash) {
3675 return -1;
3676 }
3677
3678 lck_rw_lock_shared(&pcbinfo->ipi_lock);
3679 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
3680 if (inp->inp_state != INPCB_STATE_DEAD &&
3681 inp->inp_socket != NULL &&
3682 inp->inp_flowhash == flowhash) {
3683 found = 1;
3684 inp_get_soprocinfo(inp, soprocinfo);
3685 break;
3686 }
3687 }
3688 lck_rw_done(&pcbinfo->ipi_lock);
3689
3690 return found;
3691 }
3692
3693 #if CONFIG_PROC_UUID_POLICY
3694 static void
inp_update_cellular_policy(struct inpcb * inp,boolean_t set)3695 inp_update_cellular_policy(struct inpcb *inp, boolean_t set)
3696 {
3697 struct socket *so = inp->inp_socket;
3698 int before, after;
3699
3700 VERIFY(so != NULL);
3701 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3702
3703 before = INP_NO_CELLULAR(inp);
3704 if (set) {
3705 inp_set_nocellular(inp);
3706 } else {
3707 inp_clear_nocellular(inp);
3708 }
3709 after = INP_NO_CELLULAR(inp);
3710 if (net_io_policy_log && (before != after)) {
3711 static const char *ok = "OK";
3712 static const char *nok = "NOACCESS";
3713 uuid_string_t euuid_buf;
3714 pid_t epid;
3715
3716 if (so->so_flags & SOF_DELEGATED) {
3717 uuid_unparse(so->e_uuid, euuid_buf);
3718 epid = so->e_pid;
3719 } else {
3720 uuid_unparse(so->last_uuid, euuid_buf);
3721 epid = so->last_pid;
3722 }
3723
3724 /* allow this socket to generate another notification event */
3725 so->so_ifdenied_notifies = 0;
3726
3727 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3728 "euuid %s%s %s->%s\n", __func__,
3729 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3730 SOCK_TYPE(so), epid, euuid_buf,
3731 (so->so_flags & SOF_DELEGATED) ?
3732 " [delegated]" : "",
3733 ((before < after) ? ok : nok),
3734 ((before < after) ? nok : ok));
3735 }
3736 }
3737
3738 #if NECP
3739 static void
inp_update_necp_want_app_policy(struct inpcb * inp,boolean_t set)3740 inp_update_necp_want_app_policy(struct inpcb *inp, boolean_t set)
3741 {
3742 struct socket *so = inp->inp_socket;
3743 int before, after;
3744
3745 VERIFY(so != NULL);
3746 VERIFY(inp->inp_state != INPCB_STATE_DEAD);
3747
3748 before = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3749 if (set) {
3750 inp_set_want_app_policy(inp);
3751 } else {
3752 inp_clear_want_app_policy(inp);
3753 }
3754 after = (inp->inp_flags2 & INP2_WANT_APP_POLICY);
3755 if (net_io_policy_log && (before != after)) {
3756 static const char *wanted = "WANTED";
3757 static const char *unwanted = "UNWANTED";
3758 uuid_string_t euuid_buf;
3759 pid_t epid;
3760
3761 if (so->so_flags & SOF_DELEGATED) {
3762 uuid_unparse(so->e_uuid, euuid_buf);
3763 epid = so->e_pid;
3764 } else {
3765 uuid_unparse(so->last_uuid, euuid_buf);
3766 epid = so->last_pid;
3767 }
3768
3769 log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d "
3770 "euuid %s%s %s->%s\n", __func__,
3771 (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so),
3772 SOCK_TYPE(so), epid, euuid_buf,
3773 (so->so_flags & SOF_DELEGATED) ?
3774 " [delegated]" : "",
3775 ((before < after) ? unwanted : wanted),
3776 ((before < after) ? wanted : unwanted));
3777 }
3778 }
3779 #endif /* NECP */
3780 #endif /* !CONFIG_PROC_UUID_POLICY */
3781
3782 #if NECP
3783 void
inp_update_necp_policy(struct inpcb * inp,struct sockaddr * override_local_addr,struct sockaddr * override_remote_addr,u_int override_bound_interface)3784 inp_update_necp_policy(struct inpcb *inp, struct sockaddr *override_local_addr, struct sockaddr *override_remote_addr, u_int override_bound_interface)
3785 {
3786 necp_socket_find_policy_match(inp, override_local_addr, override_remote_addr, override_bound_interface);
3787 if (necp_socket_should_rescope(inp) &&
3788 inp->inp_lport == 0 &&
3789 inp->inp_laddr.s_addr == INADDR_ANY &&
3790 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
3791 // If we should rescope, and the socket is not yet bound
3792 inp_bindif(inp, necp_socket_get_rescope_if_index(inp), NULL);
3793 inp->inp_flags2 |= INP2_SCOPED_BY_NECP;
3794 }
3795 }
3796 #endif /* NECP */
3797
3798 int
inp_update_policy(struct inpcb * inp)3799 inp_update_policy(struct inpcb *inp)
3800 {
3801 #if CONFIG_PROC_UUID_POLICY
3802 struct socket *so = inp->inp_socket;
3803 uint32_t pflags = 0;
3804 int32_t ogencnt;
3805 int err = 0;
3806 uint8_t *lookup_uuid = NULL;
3807
3808 if (!net_io_policy_uuid ||
3809 so == NULL || inp->inp_state == INPCB_STATE_DEAD) {
3810 return 0;
3811 }
3812
3813 /*
3814 * Kernel-created sockets that aren't delegating other sockets
3815 * are currently exempted from UUID policy checks.
3816 */
3817 if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) {
3818 return 0;
3819 }
3820
3821 #if defined(XNU_TARGET_OS_OSX)
3822 if (so->so_rpid > 0) {
3823 lookup_uuid = so->so_ruuid;
3824 ogencnt = so->so_policy_gencnt;
3825 err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3826 }
3827 #endif
3828 if (lookup_uuid == NULL || err == ENOENT) {
3829 lookup_uuid = ((so->so_flags & SOF_DELEGATED) ? so->e_uuid : so->last_uuid);
3830 ogencnt = so->so_policy_gencnt;
3831 err = proc_uuid_policy_lookup(lookup_uuid, &pflags, &so->so_policy_gencnt);
3832 }
3833
3834 /*
3835 * Discard cached generation count if the entry is gone (ENOENT),
3836 * so that we go thru the checks below.
3837 */
3838 if (err == ENOENT && ogencnt != 0) {
3839 so->so_policy_gencnt = 0;
3840 }
3841
3842 /*
3843 * If the generation count has changed, inspect the policy flags
3844 * and act accordingly. If a policy flag was previously set and
3845 * the UUID is no longer present in the table (ENOENT), treat it
3846 * as if the flag has been cleared.
3847 */
3848 if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) {
3849 /* update cellular policy for this socket */
3850 if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) {
3851 inp_update_cellular_policy(inp, TRUE);
3852 } else if (!(pflags & PROC_UUID_NO_CELLULAR)) {
3853 inp_update_cellular_policy(inp, FALSE);
3854 }
3855 #if NECP
3856 /* update necp want app policy for this socket */
3857 if (err == 0 && (pflags & PROC_UUID_NECP_APP_POLICY)) {
3858 inp_update_necp_want_app_policy(inp, TRUE);
3859 } else if (!(pflags & PROC_UUID_NECP_APP_POLICY)) {
3860 inp_update_necp_want_app_policy(inp, FALSE);
3861 }
3862 #endif /* NECP */
3863 }
3864
3865 return (err == ENOENT) ? 0 : err;
3866 #else /* !CONFIG_PROC_UUID_POLICY */
3867 #pragma unused(inp)
3868 return 0;
3869 #endif /* !CONFIG_PROC_UUID_POLICY */
3870 }
3871
3872 unsigned int log_restricted;
3873 SYSCTL_DECL(_net_inet);
3874 SYSCTL_INT(_net_inet, OID_AUTO, log_restricted,
3875 CTLFLAG_RW | CTLFLAG_LOCKED, &log_restricted, 0,
3876 "Log network restrictions");
3877
3878
3879 /*
3880 * Called when we need to enforce policy restrictions in the input path.
3881 *
3882 * Returns TRUE if we're not allowed to receive data, otherwise FALSE.
3883 */
3884 static boolean_t
_inp_restricted_recv(struct inpcb * inp,struct ifnet * ifp)3885 _inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3886 {
3887 VERIFY(inp != NULL);
3888
3889 /*
3890 * Inbound restrictions.
3891 */
3892 if (!sorestrictrecv) {
3893 return FALSE;
3894 }
3895
3896 if (ifp == NULL) {
3897 return FALSE;
3898 }
3899
3900 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
3901 return TRUE;
3902 }
3903
3904 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
3905 return TRUE;
3906 }
3907
3908 if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
3909 return TRUE;
3910 }
3911
3912 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
3913 return TRUE;
3914 }
3915
3916 if (!(ifp->if_eflags & IFEF_RESTRICTED_RECV)) {
3917 return FALSE;
3918 }
3919
3920 if (inp->inp_flags & INP_RECV_ANYIF) {
3921 return FALSE;
3922 }
3923
3924 /*
3925 * An entitled process can use the management interface without being bound
3926 * to the interface
3927 */
3928 if (IFNET_IS_MANAGEMENT(ifp)) {
3929 if (INP_MANAGEMENT_ALLOWED(inp)) {
3930 return FALSE;
3931 }
3932 if (if_management_verbose > 1) {
3933 os_log(OS_LOG_DEFAULT, "_inp_restricted_recv %s:%d not allowed on management interface %s",
3934 proc_best_name(current_proc()), proc_getpid(current_proc()),
3935 ifp->if_xname);
3936 }
3937 return TRUE;
3938 }
3939
3940 if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) {
3941 return FALSE;
3942 }
3943
3944 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
3945 return TRUE;
3946 }
3947
3948
3949 return TRUE;
3950 }
3951
3952 boolean_t
inp_restricted_recv(struct inpcb * inp,struct ifnet * ifp)3953 inp_restricted_recv(struct inpcb *inp, struct ifnet *ifp)
3954 {
3955 boolean_t ret;
3956
3957 ret = _inp_restricted_recv(inp, ifp);
3958 if (ret == TRUE && log_restricted) {
3959 printf("pid %d (%s) is unable to receive packets on %s\n",
3960 proc_getpid(current_proc()), proc_best_name(current_proc()),
3961 ifp->if_xname);
3962 }
3963 return ret;
3964 }
3965
3966 /*
3967 * Called when we need to enforce policy restrictions in the output path.
3968 *
3969 * Returns TRUE if we're not allowed to send data out, otherwise FALSE.
3970 */
3971 static boolean_t
_inp_restricted_send(struct inpcb * inp,struct ifnet * ifp)3972 _inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
3973 {
3974 VERIFY(inp != NULL);
3975
3976 /*
3977 * Outbound restrictions.
3978 */
3979 if (!sorestrictsend) {
3980 return FALSE;
3981 }
3982
3983 if (ifp == NULL) {
3984 return FALSE;
3985 }
3986
3987 if (IFNET_IS_CELLULAR(ifp) && INP_NO_CELLULAR(inp)) {
3988 return TRUE;
3989 }
3990
3991 if (IFNET_IS_EXPENSIVE(ifp) && INP_NO_EXPENSIVE(inp)) {
3992 return TRUE;
3993 }
3994
3995 if (IFNET_IS_CONSTRAINED(ifp) && INP_NO_CONSTRAINED(inp)) {
3996 return TRUE;
3997 }
3998
3999 if (IFNET_IS_AWDL_RESTRICTED(ifp) && !INP_AWDL_UNRESTRICTED(inp)) {
4000 return TRUE;
4001 }
4002
4003 if (IFNET_IS_MANAGEMENT(ifp)) {
4004 if (!INP_MANAGEMENT_ALLOWED(inp)) {
4005 if (if_management_verbose > 1) {
4006 os_log(OS_LOG_DEFAULT, "_inp_restricted_send %s:%d not allowed on management interface %s",
4007 proc_best_name(current_proc()), proc_getpid(current_proc()),
4008 ifp->if_xname);
4009 }
4010 return TRUE;
4011 }
4012 }
4013
4014 if (IFNET_IS_INTCOPROC(ifp) && !INP_INTCOPROC_ALLOWED(inp)) {
4015 return TRUE;
4016 }
4017
4018 return FALSE;
4019 }
4020
4021 boolean_t
inp_restricted_send(struct inpcb * inp,struct ifnet * ifp)4022 inp_restricted_send(struct inpcb *inp, struct ifnet *ifp)
4023 {
4024 boolean_t ret;
4025
4026 ret = _inp_restricted_send(inp, ifp);
4027 if (ret == TRUE && log_restricted) {
4028 printf("pid %d (%s) is unable to transmit packets on %s\n",
4029 proc_getpid(current_proc()), proc_best_name(current_proc()),
4030 ifp->if_xname);
4031 }
4032 return ret;
4033 }
4034
4035 inline void
inp_count_sndbytes(struct inpcb * inp,u_int32_t th_ack)4036 inp_count_sndbytes(struct inpcb *inp, u_int32_t th_ack)
4037 {
4038 struct ifnet *ifp = inp->inp_last_outifp;
4039 struct socket *so = inp->inp_socket;
4040 if (ifp != NULL && !(so->so_flags & SOF_MP_SUBFLOW) &&
4041 (ifp->if_type == IFT_CELLULAR || IFNET_IS_WIFI(ifp))) {
4042 int32_t unsent;
4043
4044 so->so_snd.sb_flags |= SB_SNDBYTE_CNT;
4045
4046 /*
4047 * There can be data outstanding before the connection
4048 * becomes established -- TFO case
4049 */
4050 if (so->so_snd.sb_cc > 0) {
4051 inp_incr_sndbytes_total(so, so->so_snd.sb_cc);
4052 }
4053
4054 unsent = inp_get_sndbytes_allunsent(so, th_ack);
4055 if (unsent > 0) {
4056 inp_incr_sndbytes_unsent(so, unsent);
4057 }
4058 }
4059 }
4060
4061 inline void
inp_incr_sndbytes_total(struct socket * so,int32_t len)4062 inp_incr_sndbytes_total(struct socket *so, int32_t len)
4063 {
4064 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4065 struct ifnet *ifp = inp->inp_last_outifp;
4066
4067 if (ifp != NULL) {
4068 VERIFY(ifp->if_sndbyte_total >= 0);
4069 OSAddAtomic64(len, &ifp->if_sndbyte_total);
4070 }
4071 }
4072
4073 inline void
inp_decr_sndbytes_total(struct socket * so,int32_t len)4074 inp_decr_sndbytes_total(struct socket *so, int32_t len)
4075 {
4076 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4077 struct ifnet *ifp = inp->inp_last_outifp;
4078
4079 if (ifp != NULL) {
4080 if (ifp->if_sndbyte_total >= len) {
4081 OSAddAtomic64(-len, &ifp->if_sndbyte_total);
4082 } else {
4083 ifp->if_sndbyte_total = 0;
4084 }
4085 }
4086 }
4087
4088 inline void
inp_incr_sndbytes_unsent(struct socket * so,int32_t len)4089 inp_incr_sndbytes_unsent(struct socket *so, int32_t len)
4090 {
4091 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4092 struct ifnet *ifp = inp->inp_last_outifp;
4093
4094 if (ifp != NULL) {
4095 VERIFY(ifp->if_sndbyte_unsent >= 0);
4096 OSAddAtomic64(len, &ifp->if_sndbyte_unsent);
4097 }
4098 }
4099
4100 inline void
inp_decr_sndbytes_unsent(struct socket * so,int32_t len)4101 inp_decr_sndbytes_unsent(struct socket *so, int32_t len)
4102 {
4103 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
4104 return;
4105 }
4106
4107 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4108 struct ifnet *ifp = inp->inp_last_outifp;
4109
4110 if (ifp != NULL) {
4111 if (ifp->if_sndbyte_unsent >= len) {
4112 OSAddAtomic64(-len, &ifp->if_sndbyte_unsent);
4113 } else {
4114 ifp->if_sndbyte_unsent = 0;
4115 }
4116 }
4117 }
4118
4119 inline void
inp_decr_sndbytes_allunsent(struct socket * so,u_int32_t th_ack)4120 inp_decr_sndbytes_allunsent(struct socket *so, u_int32_t th_ack)
4121 {
4122 int32_t len;
4123
4124 if (so == NULL || !(so->so_snd.sb_flags & SB_SNDBYTE_CNT)) {
4125 return;
4126 }
4127
4128 len = inp_get_sndbytes_allunsent(so, th_ack);
4129 inp_decr_sndbytes_unsent(so, len);
4130 }
4131
4132 #if SKYWALK
4133 inline void
inp_update_netns_flags(struct socket * so)4134 inp_update_netns_flags(struct socket *so)
4135 {
4136 struct inpcb *inp;
4137 uint32_t set_flags = 0;
4138 uint32_t clear_flags = 0;
4139
4140 if (!(SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
4141 return;
4142 }
4143
4144 inp = sotoinpcb(so);
4145
4146 if (inp == NULL) {
4147 return;
4148 }
4149
4150 if (!NETNS_TOKEN_VALID(&inp->inp_netns_token)) {
4151 return;
4152 }
4153
4154 if (so->so_options & SO_NOWAKEFROMSLEEP) {
4155 set_flags |= NETNS_NOWAKEFROMSLEEP;
4156 } else {
4157 clear_flags |= NETNS_NOWAKEFROMSLEEP;
4158 }
4159
4160 if (inp->inp_flags & INP_RECV_ANYIF) {
4161 set_flags |= NETNS_RECVANYIF;
4162 } else {
4163 clear_flags |= NETNS_RECVANYIF;
4164 }
4165
4166 if (so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) {
4167 set_flags |= NETNS_EXTBGIDLE;
4168 } else {
4169 clear_flags |= NETNS_EXTBGIDLE;
4170 }
4171
4172 netns_change_flags(&inp->inp_netns_token, set_flags, clear_flags);
4173 }
4174 #endif /* SKYWALK */
4175
4176 inline void
inp_set_activity_bitmap(struct inpcb * inp)4177 inp_set_activity_bitmap(struct inpcb *inp)
4178 {
4179 in_stat_set_activity_bitmap(&inp->inp_nw_activity, net_uptime());
4180 }
4181
4182 inline void
inp_get_activity_bitmap(struct inpcb * inp,activity_bitmap_t * ab)4183 inp_get_activity_bitmap(struct inpcb *inp, activity_bitmap_t *ab)
4184 {
4185 bcopy(&inp->inp_nw_activity, ab, sizeof(*ab));
4186 }
4187
4188 void
inp_update_last_owner(struct socket * so,struct proc * p,struct proc * ep)4189 inp_update_last_owner(struct socket *so, struct proc *p, struct proc *ep)
4190 {
4191 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4192
4193 if (inp == NULL) {
4194 return;
4195 }
4196
4197 if (p != NULL) {
4198 strlcpy(&inp->inp_last_proc_name[0], proc_name_address(p), sizeof(inp->inp_last_proc_name));
4199 }
4200 if (so->so_flags & SOF_DELEGATED) {
4201 if (ep != NULL) {
4202 strlcpy(&inp->inp_e_proc_name[0], proc_name_address(ep), sizeof(inp->inp_e_proc_name));
4203 } else {
4204 inp->inp_e_proc_name[0] = 0;
4205 }
4206 } else {
4207 inp->inp_e_proc_name[0] = 0;
4208 }
4209 }
4210
4211 void
inp_copy_last_owner(struct socket * so,struct socket * head)4212 inp_copy_last_owner(struct socket *so, struct socket *head)
4213 {
4214 struct inpcb *inp = (struct inpcb *)so->so_pcb;
4215 struct inpcb *head_inp = (struct inpcb *)head->so_pcb;
4216
4217 if (inp == NULL || head_inp == NULL) {
4218 return;
4219 }
4220
4221 strlcpy(&inp->inp_last_proc_name[0], &head_inp->inp_last_proc_name[0], sizeof(inp->inp_last_proc_name));
4222 strlcpy(&inp->inp_e_proc_name[0], &head_inp->inp_e_proc_name[0], sizeof(inp->inp_e_proc_name));
4223 }
4224
4225 static int
in_check_management_interface_proc_callout(proc_t proc,void * arg __unused)4226 in_check_management_interface_proc_callout(proc_t proc, void *arg __unused)
4227 {
4228 struct fileproc *fp = NULL;
4229 task_t task = proc_task(proc);
4230 bool allowed = false;
4231
4232 if (IOTaskHasEntitlement(task, INTCOPROC_RESTRICTED_ENTITLEMENT) == true
4233 || IOTaskHasEntitlement(task, MANAGEMENT_DATA_ENTITLEMENT) == true
4234 #if DEBUG || DEVELOPMENT
4235 || IOTaskHasEntitlement(task, INTCOPROC_RESTRICTED_ENTITLEMENT_DEVELOPMENT) == true
4236 || IOTaskHasEntitlement(task, MANAGEMENT_DATA_ENTITLEMENT_DEVELOPMENT) == true
4237 #endif /* DEBUG || DEVELOPMENT */
4238 ) {
4239 allowed = true;
4240 }
4241 if (allowed == false && management_data_unrestricted == false) {
4242 return PROC_RETURNED;
4243 }
4244
4245 proc_fdlock(proc);
4246 fdt_foreach(fp, proc) {
4247 struct fileglob *fg = fp->fp_glob;
4248 struct socket *so;
4249 struct inpcb *inp;
4250
4251 if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET) {
4252 continue;
4253 }
4254
4255 so = (struct socket *)fp_get_data(fp);
4256 if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
4257 continue;
4258 }
4259
4260 inp = (struct inpcb *)so->so_pcb;
4261
4262 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
4263 continue;
4264 }
4265
4266 socket_lock(so, 1);
4267
4268 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
4269 socket_unlock(so, 1);
4270 continue;
4271 }
4272 inp->inp_flags2 |= INP2_MANAGEMENT_ALLOWED;
4273 inp->inp_flags2 |= INP2_MANAGEMENT_CHECKED;
4274
4275 socket_unlock(so, 1);
4276 }
4277 proc_fdunlock(proc);
4278
4279 return PROC_RETURNED;
4280 }
4281
4282 static bool in_management_interface_checked = false;
4283
4284 static void
in_management_interface_event_callback(struct nwk_wq_entry * nwk_item)4285 in_management_interface_event_callback(struct nwk_wq_entry *nwk_item)
4286 {
4287 kfree_type(struct nwk_wq_entry, nwk_item);
4288
4289 if (in_management_interface_checked == true) {
4290 return;
4291 }
4292 in_management_interface_checked = true;
4293
4294 proc_iterate(PROC_ALLPROCLIST,
4295 in_check_management_interface_proc_callout,
4296 NULL, NULL, NULL);
4297 }
4298
4299 void
in_management_interface_check(void)4300 in_management_interface_check(void)
4301 {
4302 struct nwk_wq_entry *nwk_item;
4303
4304 if (if_management_interface_check_needed == false ||
4305 in_management_interface_checked == true) {
4306 return;
4307 }
4308
4309 nwk_item = kalloc_type(struct nwk_wq_entry,
4310 Z_WAITOK | Z_ZERO | Z_NOFAIL);
4311
4312 nwk_item->func = in_management_interface_event_callback;
4313
4314 nwk_wq_enqueue(nwk_item);
4315 }
4316