xref: /xnu-8019.80.24/bsd/netinet/in_pcblist.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2010-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1990, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  */
60 
61 #include <sys/types.h>
62 #include <sys/malloc.h>
63 #include <sys/socket.h>
64 #include <sys/socketvar.h>
65 #include <sys/protosw.h>
66 #include <sys/domain.h>
67 #include <sys/filedesc.h>
68 #include <sys/file_internal.h>
69 #include <sys/kernel.h>
70 #include <sys/sysctl.h>
71 #include <sys/dtrace.h>
72 #include <sys/kauth.h>
73 
74 #include <net/route.h>
75 #include <net/if_var.h>
76 #include <net/if_ports_used.h>
77 #include <net/ntstat.h>
78 
79 #include <netinet/in.h>
80 #include <netinet/in_pcb.h>
81 #include <netinet/in_var.h>
82 #include <netinet/ip_var.h>
83 
84 #include <netinet/udp.h>
85 #include <netinet/udp_var.h>
86 
87 #include <netinet/tcp.h>
88 #include <netinet/tcp_fsm.h>
89 #include <netinet/tcp_seq.h>
90 #include <netinet/tcp_timer.h>
91 #include <netinet/tcp_var.h>
92 #include <netinet6/in6_var.h>
93 
94 #include <os/log.h>
95 
96 #ifndef ROUNDUP64
97 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
98 #endif
99 
100 #ifndef ADVANCE64
101 #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
102 #endif
103 
104 static void inpcb_to_xinpcb_n(struct inpcb *, struct xinpcb_n *);
105 static void tcpcb_to_xtcpcb_n(struct tcpcb *, struct xtcpcb_n *);
106 void shutdown_sockets_on_interface(struct ifnet *ifp);
107 
108 
109 __private_extern__ void
sotoxsocket_n(struct socket * so,struct xsocket_n * xso)110 sotoxsocket_n(struct socket *so, struct xsocket_n *xso)
111 {
112 	xso->xso_len = sizeof(struct xsocket_n);
113 	xso->xso_kind = XSO_SOCKET;
114 
115 	if (so == NULL) {
116 		return;
117 	}
118 
119 	xso->xso_so = (uint64_t)VM_KERNEL_ADDRPERM(so);
120 	xso->so_type = so->so_type;
121 	xso->so_options = so->so_options;
122 	xso->so_linger = so->so_linger;
123 	xso->so_state = so->so_state;
124 	xso->so_pcb = (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb);
125 	if (so->so_proto) {
126 		xso->xso_protocol = SOCK_PROTO(so);
127 		xso->xso_family = SOCK_DOM(so);
128 	} else {
129 		xso->xso_protocol = xso->xso_family = 0;
130 	}
131 	xso->so_qlen = so->so_qlen;
132 	xso->so_incqlen = so->so_incqlen;
133 	xso->so_qlimit = so->so_qlimit;
134 	xso->so_timeo = so->so_timeo;
135 	xso->so_error = so->so_error;
136 	xso->so_pgid = so->so_pgid;
137 	xso->so_oobmark = so->so_oobmark;
138 	xso->so_uid = kauth_cred_getuid(so->so_cred);
139 	xso->so_last_pid = so->last_pid;
140 	xso->so_e_pid = so->e_pid;
141 }
142 
143 __private_extern__ void
sbtoxsockbuf_n(struct sockbuf * sb,struct xsockbuf_n * xsb)144 sbtoxsockbuf_n(struct sockbuf *sb, struct xsockbuf_n *xsb)
145 {
146 	xsb->xsb_len = sizeof(struct xsockbuf_n);
147 
148 	if (sb == NULL) {
149 		return;
150 	}
151 
152 	xsb->xsb_kind = (sb->sb_flags & SB_RECV) ? XSO_RCVBUF : XSO_SNDBUF;
153 	xsb->sb_cc = sb->sb_cc;
154 	xsb->sb_hiwat = sb->sb_hiwat;
155 	xsb->sb_mbcnt = sb->sb_mbcnt;
156 	xsb->sb_mbmax = sb->sb_mbmax;
157 	xsb->sb_lowat = sb->sb_lowat;
158 	xsb->sb_flags = (short)sb->sb_flags;
159 	xsb->sb_timeo = (short)((sb->sb_timeo.tv_sec * hz) +
160 	    sb->sb_timeo.tv_usec / tick);
161 	if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0) {
162 		xsb->sb_timeo = 1;
163 	}
164 }
165 
166 __private_extern__ void
sbtoxsockstat_n(struct socket * so,struct xsockstat_n * xst)167 sbtoxsockstat_n(struct socket *so, struct xsockstat_n *xst)
168 {
169 	int i;
170 
171 	xst->xst_len = sizeof(struct xsockstat_n);
172 	xst->xst_kind = XSO_STATS;
173 
174 	if (so == NULL) {
175 		return;
176 	}
177 
178 	for (i = 0; i < SO_TC_STATS_MAX; i++) {
179 		xst->xst_tc_stats[i].rxpackets = so->so_tc_stats[i].rxpackets;
180 		xst->xst_tc_stats[i].rxbytes = so->so_tc_stats[i].rxbytes;
181 		xst->xst_tc_stats[i].txpackets = so->so_tc_stats[i].txpackets;
182 		xst->xst_tc_stats[i].txbytes = so->so_tc_stats[i].txbytes;
183 	}
184 }
185 
186 static void
inpcb_to_xinpcb_n(struct inpcb * inp,struct xinpcb_n * xinp)187 inpcb_to_xinpcb_n(struct inpcb *inp, struct xinpcb_n *xinp)
188 {
189 	xinp->xi_len = sizeof(struct xinpcb_n);
190 	xinp->xi_kind = XSO_INPCB;
191 	xinp->xi_inpp = (uint64_t)VM_KERNEL_ADDRPERM(inp);
192 	xinp->inp_fport = inp->inp_fport;
193 	xinp->inp_lport = inp->inp_lport;
194 	xinp->inp_ppcb = (uint64_t)VM_KERNEL_ADDRPERM(inp->inp_ppcb);
195 	xinp->inp_gencnt = inp->inp_gencnt;
196 	xinp->inp_flags = inp->inp_flags;
197 	xinp->inp_flow = inp->inp_flow;
198 	xinp->inp_vflag = inp->inp_vflag;
199 	xinp->inp_ip_ttl = inp->inp_ip_ttl;
200 	xinp->inp_ip_p = inp->inp_ip_p;
201 	xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
202 	xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
203 	xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
204 	xinp->inp_depend6.inp6_hlim = 0;
205 	xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
206 	xinp->inp_depend6.inp6_ifindex = 0;
207 	xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
208 	xinp->inp_flowhash = inp->inp_flowhash;
209 	xinp->inp_flags2 = inp->inp_flags2;
210 }
211 
212 __private_extern__ void
tcpcb_to_xtcpcb_n(struct tcpcb * tp,struct xtcpcb_n * xt)213 tcpcb_to_xtcpcb_n(struct tcpcb *tp, struct xtcpcb_n *xt)
214 {
215 	xt->xt_len = sizeof(struct xtcpcb_n);
216 	xt->xt_kind = XSO_TCPCB;
217 
218 	xt->t_segq = (uint32_t)VM_KERNEL_ADDRPERM(tp->t_segq.lh_first);
219 	xt->t_dupacks = tp->t_dupacks;
220 	xt->t_timer[TCPT_REXMT_EXT] = tp->t_timer[TCPT_REXMT];
221 	xt->t_timer[TCPT_PERSIST_EXT] = tp->t_timer[TCPT_PERSIST];
222 	xt->t_timer[TCPT_KEEP_EXT] = tp->t_timer[TCPT_KEEP];
223 	xt->t_timer[TCPT_2MSL_EXT] = tp->t_timer[TCPT_2MSL];
224 	xt->t_state = tp->t_state;
225 	xt->t_flags = tp->t_flags;
226 	xt->t_force = (tp->t_flagsext & TF_FORCE) ? 1 : 0;
227 	xt->snd_una = tp->snd_una;
228 	xt->snd_max = tp->snd_max;
229 	xt->snd_nxt = tp->snd_nxt;
230 	xt->snd_up = tp->snd_up;
231 	xt->snd_wl1 = tp->snd_wl1;
232 	xt->snd_wl2 = tp->snd_wl2;
233 	xt->iss = tp->iss;
234 	xt->irs = tp->irs;
235 	xt->rcv_nxt = tp->rcv_nxt;
236 	xt->rcv_adv = tp->rcv_adv;
237 	xt->rcv_wnd = tp->rcv_wnd;
238 	xt->rcv_up = tp->rcv_up;
239 	xt->snd_wnd = tp->snd_wnd;
240 	xt->snd_cwnd = tp->snd_cwnd;
241 	xt->snd_ssthresh = tp->snd_ssthresh;
242 	xt->t_maxopd = tp->t_maxopd;
243 	xt->t_rcvtime = tp->t_rcvtime;
244 	xt->t_starttime = tp->t_starttime;
245 	xt->t_rtttime = tp->t_rtttime;
246 	xt->t_rtseq = tp->t_rtseq;
247 	xt->t_rxtcur = tp->t_rxtcur;
248 	xt->t_maxseg = tp->t_maxseg;
249 	xt->t_srtt = tp->t_srtt;
250 	xt->t_rttvar = tp->t_rttvar;
251 	xt->t_rxtshift = tp->t_rxtshift;
252 	xt->t_rttmin = tp->t_rttmin;
253 	xt->t_rttupdated = tp->t_rttupdated;
254 	xt->max_sndwnd = tp->max_sndwnd;
255 	xt->t_softerror = tp->t_softerror;
256 	xt->t_oobflags = tp->t_oobflags;
257 	xt->t_iobc = tp->t_iobc;
258 	xt->snd_scale = tp->snd_scale;
259 	xt->rcv_scale = tp->rcv_scale;
260 	xt->request_r_scale = tp->request_r_scale;
261 	xt->requested_s_scale = tp->requested_s_scale;
262 	xt->ts_recent = tp->ts_recent;
263 	xt->ts_recent_age = tp->ts_recent_age;
264 	xt->last_ack_sent = tp->last_ack_sent;
265 	xt->cc_send = 0;
266 	xt->cc_recv = 0;
267 	xt->snd_recover = tp->snd_recover;
268 	xt->snd_cwnd_prev = tp->snd_cwnd_prev;
269 	xt->snd_ssthresh_prev = tp->snd_ssthresh_prev;
270 }
271 
272 __private_extern__ int
get_pcblist_n(short proto,struct sysctl_req * req,struct inpcbinfo * pcbinfo)273 get_pcblist_n(short proto, struct sysctl_req *req, struct inpcbinfo *pcbinfo)
274 {
275 	int error = 0;
276 	int i, n;
277 	struct inpcb *inp, **inp_list = NULL;
278 	inp_gen_t gencnt;
279 	struct xinpgen xig;
280 	void *buf = NULL;
281 	size_t item_size = ROUNDUP64(sizeof(struct xinpcb_n)) +
282 	    ROUNDUP64(sizeof(struct xsocket_n)) +
283 	    2 * ROUNDUP64(sizeof(struct xsockbuf_n)) +
284 	    ROUNDUP64(sizeof(struct xsockstat_n));
285 #if SKYWALK
286 	int nuserland;
287 	void *userlandsnapshot = NULL;
288 #endif /* SKYWALK */
289 
290 	if (proto == IPPROTO_TCP) {
291 		item_size += ROUNDUP64(sizeof(struct xtcpcb_n));
292 	}
293 
294 	if (req->oldptr == USER_ADDR_NULL) {
295 		n = pcbinfo->ipi_count;
296 #if SKYWALK
297 		n += ntstat_userland_count(proto);
298 #endif /* SKYWALK */
299 		req->oldidx = 2 * (sizeof(xig)) + (n + n / 8 + 1) * item_size;
300 		return 0;
301 	}
302 
303 	if (req->newptr != USER_ADDR_NULL) {
304 		return EPERM;
305 	}
306 
307 #if SKYWALK
308 	/*
309 	 * Get a snapshot of the state of the user level flows so we know
310 	 * the exact number of results to give back to the user.
311 	 * This could take a while and use other locks, so do this prior
312 	 * to taking any locks of our own.
313 	 */
314 	error = nstat_userland_get_snapshot(proto, &userlandsnapshot, &nuserland);
315 
316 	if (error) {
317 		return error;
318 	}
319 #endif /* SKYWALK */
320 
321 	/*
322 	 * The process of preparing the PCB list is too time-consuming and
323 	 * resource-intensive to repeat twice on every request.
324 	 */
325 	lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
326 	/*
327 	 * OK, now we're committed to doing something.
328 	 */
329 	gencnt = pcbinfo->ipi_gencnt;
330 	n = pcbinfo->ipi_count;
331 
332 	bzero(&xig, sizeof(xig));
333 	xig.xig_len = sizeof(xig);
334 	xig.xig_count = n;
335 #if SKYWALK
336 	xig.xig_count += nuserland;
337 #endif /* SKYWALK */
338 	xig.xig_gen = gencnt;
339 	xig.xig_sogen = so_gencnt;
340 	error = SYSCTL_OUT(req, &xig, sizeof(xig));
341 	if (error) {
342 		goto done;
343 	}
344 	/*
345 	 * We are done if there is no pcb
346 	 */
347 	if (xig.xig_count == 0) {
348 		goto done;
349 	}
350 
351 	buf = kalloc_data(item_size, Z_WAITOK);
352 	if (buf == NULL) {
353 		error = ENOMEM;
354 		goto done;
355 	}
356 
357 	inp_list = _MALLOC(n * sizeof(*inp_list), M_TEMP, M_WAITOK);
358 	if (inp_list == NULL) {
359 		error = ENOMEM;
360 		goto done;
361 	}
362 
363 	/*
364 	 * Special case TCP to include the connections in time wait
365 	 */
366 	if (proto == IPPROTO_TCP) {
367 		n = get_tcp_inp_list(inp_list, n, gencnt);
368 	} else {
369 		for (inp = pcbinfo->ipi_listhead->lh_first, i = 0; inp && i < n;
370 		    inp = inp->inp_list.le_next) {
371 			if (inp->inp_gencnt <= gencnt &&
372 			    inp->inp_state != INPCB_STATE_DEAD) {
373 				inp_list[i++] = inp;
374 			}
375 		}
376 		n = i;
377 	}
378 
379 
380 	error = 0;
381 	for (i = 0; i < n; i++) {
382 		inp = inp_list[i];
383 		if (inp->inp_gencnt <= gencnt &&
384 		    inp->inp_state != INPCB_STATE_DEAD) {
385 			struct xinpcb_n *xi = (struct xinpcb_n *)buf;
386 			struct xsocket_n *xso = (struct xsocket_n *)
387 			    ADVANCE64(xi, sizeof(*xi));
388 			struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *)
389 			    ADVANCE64(xso, sizeof(*xso));
390 			struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *)
391 			    ADVANCE64(xsbrcv, sizeof(*xsbrcv));
392 			struct xsockstat_n *xsostats = (struct xsockstat_n *)
393 			    ADVANCE64(xsbsnd, sizeof(*xsbsnd));
394 
395 			bzero(buf, item_size);
396 
397 			inpcb_to_xinpcb_n(inp, xi);
398 			sotoxsocket_n(inp->inp_socket, xso);
399 			sbtoxsockbuf_n(inp->inp_socket ?
400 			    &inp->inp_socket->so_rcv : NULL, xsbrcv);
401 			sbtoxsockbuf_n(inp->inp_socket ?
402 			    &inp->inp_socket->so_snd : NULL, xsbsnd);
403 			sbtoxsockstat_n(inp->inp_socket, xsostats);
404 			if (proto == IPPROTO_TCP) {
405 				struct  xtcpcb_n *xt = (struct xtcpcb_n *)
406 				    ADVANCE64(xsostats, sizeof(*xsostats));
407 
408 				/*
409 				 * inp->inp_ppcb, can only be NULL on
410 				 * an initialization race window.
411 				 * No need to lock.
412 				 */
413 				if (inp->inp_ppcb == NULL) {
414 					continue;
415 				}
416 
417 				tcpcb_to_xtcpcb_n((struct tcpcb *)
418 				    inp->inp_ppcb, xt);
419 			}
420 			error = SYSCTL_OUT(req, buf, item_size);
421 			if (error) {
422 				break;
423 			}
424 		}
425 	}
426 #if SKYWALK
427 	if (!error && nuserland > 0) {
428 		error = nstat_userland_list_snapshot(proto, req, userlandsnapshot, nuserland);
429 	}
430 #endif /* SKYWALK */
431 
432 	if (!error) {
433 		/*
434 		 * Give the user an updated idea of our state.
435 		 * If the generation differs from what we told
436 		 * her before, she knows that something happened
437 		 * while we were processing this request, and it
438 		 * might be necessary to retry.
439 		 */
440 		bzero(&xig, sizeof(xig));
441 		xig.xig_len = sizeof(xig);
442 		xig.xig_gen = pcbinfo->ipi_gencnt;
443 		xig.xig_sogen = so_gencnt;
444 		xig.xig_count = pcbinfo->ipi_count;
445 #if SKYWALK
446 		xig.xig_count +=  nuserland;
447 #endif /* SKYWALK */
448 		error = SYSCTL_OUT(req, &xig, sizeof(xig));
449 	}
450 done:
451 	lck_rw_done(&pcbinfo->ipi_lock);
452 
453 #if SKYWALK
454 	nstat_userland_release_snapshot(userlandsnapshot, nuserland);
455 #endif /* SKYWALK */
456 	if (inp_list != NULL) {
457 		FREE(inp_list, M_TEMP);
458 	}
459 	if (buf != NULL) {
460 		kfree_data(buf, item_size);
461 	}
462 	return error;
463 }
464 
465 static void
inpcb_get_if_ports_used(ifnet_t ifp,int protocol,uint32_t flags,bitstr_t * bitfield,struct inpcbinfo * pcbinfo)466 inpcb_get_if_ports_used(ifnet_t ifp, int protocol, uint32_t flags,
467     bitstr_t *bitfield, struct inpcbinfo *pcbinfo)
468 {
469 	struct inpcb *inp;
470 	struct socket *so;
471 	inp_gen_t gencnt;
472 	bool iswildcard, wildcardok, nowakeok;
473 	bool recvanyifonly, extbgidleok;
474 	bool activeonly;
475 	bool anytcpstateok;
476 
477 	if (ifp == NULL) {
478 		return;
479 	}
480 
481 	wildcardok = ((flags & IFNET_GET_LOCAL_PORTS_WILDCARDOK) != 0);
482 	nowakeok = ((flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) != 0);
483 	recvanyifonly = ((flags & IFNET_GET_LOCAL_PORTS_RECVANYIFONLY) != 0);
484 	extbgidleok = ((flags & IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY) != 0);
485 	activeonly = ((flags & IFNET_GET_LOCAL_PORTS_ACTIVEONLY) != 0);
486 	anytcpstateok = ((flags & IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK) != 0);
487 
488 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
489 	gencnt = pcbinfo->ipi_gencnt;
490 
491 	for (inp = LIST_FIRST(pcbinfo->ipi_listhead); inp;
492 	    inp = LIST_NEXT(inp, inp_list)) {
493 		if (inp->inp_gencnt > gencnt ||
494 		    inp->inp_state == INPCB_STATE_DEAD ||
495 		    inp->inp_wantcnt == WNT_STOPUSING) {
496 			continue;
497 		}
498 
499 		if ((so = inp->inp_socket) == NULL || inp->inp_lport == 0) {
500 			continue;
501 		}
502 
503 		/*
504 		 * ANYTCPSTATEOK means incoming packets cannot be filtered
505 		 * reception so cast a wide net of possibilities
506 		 */
507 		if (!anytcpstateok &&
508 		    ((so->so_state & SS_DEFUNCT) ||
509 		    (so->so_state & SS_ISDISCONNECTED))) {
510 			continue;
511 		}
512 
513 		/*
514 		 * If protocol is specified, filter out inpcbs that
515 		 * are not relevant to the protocol family of interest.
516 		 */
517 		if (protocol != PF_UNSPEC) {
518 			if (protocol == PF_INET) {
519 				/*
520 				 * If protocol of interest is IPv4, skip the inpcb
521 				 * if the family is not IPv4.
522 				 * OR
523 				 * If the family is IPv4, skip if the IPv4 flow is
524 				 * CLAT46 translated.
525 				 */
526 				if ((inp->inp_vflag & INP_IPV4) == 0 ||
527 				    (inp->inp_flags2 & INP2_CLAT46_FLOW) != 0) {
528 					continue;
529 				}
530 			} else if (protocol == PF_INET6) {
531 				/*
532 				 * If protocol of interest is IPv6, skip the inpcb
533 				 * if the family is not IPv6.
534 				 * AND
535 				 * The flow is not a CLAT46'd flow.
536 				 */
537 				if ((inp->inp_vflag & INP_IPV6) == 0 &&
538 				    (inp->inp_flags2 & INP2_CLAT46_FLOW) == 0) {
539 					continue;
540 				}
541 			} else {
542 				/* Protocol family not supported */
543 				continue;
544 			}
545 		}
546 
547 		if (SOCK_PROTO(inp->inp_socket) != IPPROTO_UDP &&
548 		    SOCK_PROTO(inp->inp_socket) != IPPROTO_TCP) {
549 			continue;
550 		}
551 
552 		iswildcard = (((inp->inp_vflag & INP_IPV4) &&
553 		    inp->inp_laddr.s_addr == INADDR_ANY) ||
554 		    ((inp->inp_vflag & INP_IPV6) &&
555 		    IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)));
556 
557 		if (!wildcardok && iswildcard) {
558 			continue;
559 		}
560 
561 		if ((so->so_options & SO_NOWAKEFROMSLEEP) &&
562 		    !nowakeok) {
563 			continue;
564 		}
565 
566 		if (!(inp->inp_flags & INP_RECV_ANYIF) &&
567 		    recvanyifonly) {
568 			continue;
569 		}
570 
571 		if (!(so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) &&
572 		    extbgidleok) {
573 			continue;
574 		}
575 
576 		if (!iswildcard &&
577 		    !(inp->inp_last_outifp == NULL || ifp == inp->inp_last_outifp)) {
578 			continue;
579 		}
580 
581 		if (SOCK_PROTO(inp->inp_socket) == IPPROTO_UDP &&
582 		    so->so_state & SS_CANTRCVMORE) {
583 			continue;
584 		}
585 
586 		if (SOCK_PROTO(inp->inp_socket) == IPPROTO_TCP) {
587 			struct  tcpcb *tp = sototcpcb(inp->inp_socket);
588 
589 			/*
590 			 * Workaround race where inp_ppcb is NULL during
591 			 * socket initialization
592 			 */
593 			if (tp == NULL) {
594 				continue;
595 			}
596 
597 			switch (tp->t_state) {
598 			case TCPS_CLOSED:
599 				if (anytcpstateok && inp->inp_fport != 0) {
600 					/*
601 					 * A foreign port means we had a 4 tuple at
602 					 * least a connection attempt so packets
603 					 * may be received for the 4 tuple after the
604 					 * connection is gone
605 					 */
606 					break;
607 				}
608 				continue;
609 			/* NOT REACHED */
610 			case TCPS_LISTEN:
611 			case TCPS_SYN_SENT:
612 			case TCPS_SYN_RECEIVED:
613 			case TCPS_ESTABLISHED:
614 			case TCPS_FIN_WAIT_1:
615 				/*
616 				 * Note: FIN_WAIT_1 is an active state
617 				 * because we need our FIN to be
618 				 * acknowledged
619 				 */
620 				break;
621 			case TCPS_CLOSE_WAIT:
622 			case TCPS_CLOSING:
623 			case TCPS_LAST_ACK:
624 			case TCPS_FIN_WAIT_2:
625 				/*
626 				 * In the closing states, the connection
627 				 * is active when there is outgoing
628 				 * data having to be acknowledged
629 				 */
630 				if (!anytcpstateok &&
631 				    (activeonly && so->so_snd.sb_cc == 0)) {
632 					continue;
633 				}
634 				break;
635 			case TCPS_TIME_WAIT:
636 				if (anytcpstateok) {
637 					/*
638 					 * Packets may still be received for the 4 tuple
639 					 * after the connection is gone
640 					 */
641 					break;
642 				}
643 				continue;
644 				/* NOT REACHED */
645 			}
646 		}
647 
648 		bitstr_set(bitfield, ntohs(inp->inp_lport));
649 
650 		(void) if_ports_used_add_inpcb(ifp->if_index, inp);
651 	}
652 	lck_rw_done(&pcbinfo->ipi_lock);
653 }
654 
655 __private_extern__ void
inpcb_get_ports_used(ifnet_t ifp,int protocol,uint32_t flags,bitstr_t * bitfield,struct inpcbinfo * pcbinfo)656 inpcb_get_ports_used(ifnet_t ifp, int protocol, uint32_t flags,
657     bitstr_t *bitfield, struct inpcbinfo *pcbinfo)
658 {
659 	if (ifp != NULL) {
660 		inpcb_get_if_ports_used(ifp, protocol, flags, bitfield, pcbinfo);
661 	} else {
662 		errno_t error;
663 		ifnet_t *ifp_list;
664 		uint32_t count, i;
665 
666 		error = ifnet_list_get_all(IFNET_FAMILY_ANY, &ifp_list, &count);
667 		if (error != 0) {
668 			os_log_error(OS_LOG_DEFAULT,
669 			    "%s: ifnet_list_get_all() failed %d",
670 			    __func__, error);
671 			return;
672 		}
673 		for (i = 0; i < count; i++) {
674 			if (TAILQ_EMPTY(&ifp_list[i]->if_addrhead)) {
675 				continue;
676 			}
677 			inpcb_get_if_ports_used(ifp_list[i], protocol, flags,
678 			    bitfield, pcbinfo);
679 		}
680 		ifnet_list_free(ifp_list);
681 	}
682 }
683 
684 __private_extern__ uint32_t
inpcb_count_opportunistic(unsigned int ifindex,struct inpcbinfo * pcbinfo,u_int32_t flags)685 inpcb_count_opportunistic(unsigned int ifindex, struct inpcbinfo *pcbinfo,
686     u_int32_t flags)
687 {
688 	uint32_t opportunistic = 0;
689 	struct inpcb *inp;
690 	inp_gen_t gencnt;
691 
692 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
693 	gencnt = pcbinfo->ipi_gencnt;
694 	for (inp = LIST_FIRST(pcbinfo->ipi_listhead);
695 	    inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
696 		if (inp->inp_gencnt <= gencnt &&
697 		    inp->inp_state != INPCB_STATE_DEAD &&
698 		    inp->inp_socket != NULL &&
699 		    so_get_opportunistic(inp->inp_socket) &&
700 		    inp->inp_last_outifp != NULL &&
701 		    ifindex == inp->inp_last_outifp->if_index) {
702 			opportunistic++;
703 			struct socket *so = inp->inp_socket;
704 			if ((flags & INPCB_OPPORTUNISTIC_SETCMD) &&
705 			    (so->so_state & SS_ISCONNECTED)) {
706 				socket_lock(so, 1);
707 				if (flags & INPCB_OPPORTUNISTIC_THROTTLEON) {
708 					so->so_flags |= SOF_SUSPENDED;
709 					soevent(so,
710 					    (SO_FILT_HINT_LOCKED |
711 					    SO_FILT_HINT_SUSPEND));
712 				} else {
713 					so->so_flags &= ~(SOF_SUSPENDED);
714 					soevent(so,
715 					    (SO_FILT_HINT_LOCKED |
716 					    SO_FILT_HINT_RESUME));
717 				}
718 				SOTHROTTLELOG("throttle[%d]: so 0x%llx "
719 				    "[%d,%d] %s\n", so->last_pid,
720 				    (uint64_t)VM_KERNEL_ADDRPERM(so),
721 				    SOCK_DOM(so), SOCK_TYPE(so),
722 				    (so->so_flags & SOF_SUSPENDED) ?
723 				    "SUSPENDED" : "RESUMED");
724 				socket_unlock(so, 1);
725 			}
726 		}
727 	}
728 
729 	lck_rw_done(&pcbinfo->ipi_lock);
730 
731 	return opportunistic;
732 }
733 
734 __private_extern__ uint32_t
inpcb_find_anypcb_byaddr(struct ifaddr * ifa,struct inpcbinfo * pcbinfo)735 inpcb_find_anypcb_byaddr(struct ifaddr *ifa, struct inpcbinfo *pcbinfo)
736 {
737 	struct inpcb *inp;
738 	inp_gen_t gencnt = pcbinfo->ipi_gencnt;
739 	struct socket *so = NULL;
740 	int af;
741 
742 	if ((ifa->ifa_addr->sa_family != AF_INET) &&
743 	    (ifa->ifa_addr->sa_family != AF_INET6)) {
744 		return 0;
745 	}
746 
747 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
748 	for (inp = LIST_FIRST(pcbinfo->ipi_listhead);
749 	    inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
750 		if (inp->inp_gencnt <= gencnt &&
751 		    inp->inp_state != INPCB_STATE_DEAD &&
752 		    inp->inp_socket != NULL) {
753 			so = inp->inp_socket;
754 			af = SOCK_DOM(so);
755 			if (af != ifa->ifa_addr->sa_family) {
756 				continue;
757 			}
758 			if (inp->inp_last_outifp != ifa->ifa_ifp) {
759 				continue;
760 			}
761 
762 			if (af == AF_INET) {
763 				if (inp->inp_laddr.s_addr ==
764 				    (satosin(ifa->ifa_addr))->sin_addr.s_addr) {
765 					lck_rw_done(&pcbinfo->ipi_lock);
766 					return 1;
767 				}
768 			}
769 			if (af == AF_INET6) {
770 				if (in6_are_addr_equal_scoped(IFA_IN6(ifa), &inp->in6p_laddr, ((struct sockaddr_in6 *)(void *)(ifa->ifa_addr))->sin6_scope_id, inp->inp_lifscope)) {
771 					lck_rw_done(&pcbinfo->ipi_lock);
772 					return 1;
773 				}
774 			}
775 		}
776 	}
777 	lck_rw_done(&pcbinfo->ipi_lock);
778 	return 0;
779 }
780 
781 static int
shutdown_sockets_on_interface_proc_callout(proc_t p,void * arg)782 shutdown_sockets_on_interface_proc_callout(proc_t p, void *arg)
783 {
784 	struct fileproc *fp;
785 	struct ifnet *ifp = (struct ifnet *)arg;
786 
787 	if (ifp == NULL) {
788 		return PROC_RETURNED;
789 	}
790 
791 	fdt_foreach(fp, p) {
792 		struct fileglob *fg = fp->fp_glob;
793 		struct socket *so;
794 		struct inpcb *inp;
795 		struct ifnet *inp_ifp;
796 		int error;
797 
798 		if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET) {
799 			continue;
800 		}
801 
802 		so = (struct socket *)fp_get_data(fp);
803 		if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
804 			continue;
805 		}
806 
807 		inp = (struct inpcb *)so->so_pcb;
808 
809 		if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
810 			continue;
811 		}
812 
813 		socket_lock(so, 1);
814 
815 		if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
816 			socket_unlock(so, 1);
817 			continue;
818 		}
819 
820 		if (inp->inp_boundifp != NULL) {
821 			inp_ifp = inp->inp_boundifp;
822 		} else if (inp->inp_last_outifp != NULL) {
823 			inp_ifp = inp->inp_last_outifp;
824 		} else {
825 			socket_unlock(so, 1);
826 			continue;
827 		}
828 
829 		if (inp_ifp != ifp && inp_ifp->if_delegated.ifp != ifp) {
830 			socket_unlock(so, 1);
831 			continue;
832 		}
833 		error = sosetdefunct(p, so, 0, TRUE);
834 		if (error != 0) {
835 			log(LOG_ERR, "%s: sosetdefunct() error %d",
836 			    __func__, error);
837 		} else {
838 			error = sodefunct(p, so, 0);
839 			if (error != 0) {
840 				log(LOG_ERR, "%s: sodefunct() error %d",
841 				    __func__, error);
842 			}
843 		}
844 
845 		socket_unlock(so, 1);
846 	}
847 	proc_fdunlock(p);
848 
849 	return PROC_RETURNED;
850 }
851 
852 void
shutdown_sockets_on_interface(struct ifnet * ifp)853 shutdown_sockets_on_interface(struct ifnet *ifp)
854 {
855 	proc_iterate(PROC_ALLPROCLIST,
856 	    shutdown_sockets_on_interface_proc_callout,
857 	    ifp, NULL, NULL);
858 }
859 
860 __private_extern__ int
inp_limit_companion_link(struct inpcbinfo * pcbinfo,u_int32_t limit)861 inp_limit_companion_link(struct inpcbinfo *pcbinfo, u_int32_t limit)
862 {
863 	struct inpcb *inp;
864 	struct socket *so = NULL;
865 
866 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
867 	inp_gen_t gencnt = pcbinfo->ipi_gencnt;
868 	for (inp = LIST_FIRST(pcbinfo->ipi_listhead);
869 	    inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
870 		if (inp->inp_gencnt <= gencnt &&
871 		    inp->inp_state != INPCB_STATE_DEAD &&
872 		    inp->inp_socket != NULL) {
873 			so = inp->inp_socket;
874 
875 			if ((so->so_state & SS_DEFUNCT) || so->so_state & SS_ISDISCONNECTED ||
876 			    SOCK_PROTO(so) != IPPROTO_TCP || inp->inp_last_outifp == NULL ||
877 			    !IFNET_IS_COMPANION_LINK(inp->inp_last_outifp)) {
878 				continue;
879 			}
880 			so->so_snd.sb_flags &= ~SB_LIMITED;
881 			u_int32_t new_size = MAX(MIN(limit, so->so_snd.sb_lowat), so->so_snd.sb_cc);
882 			sbreserve(&so->so_snd, new_size);
883 			so->so_snd.sb_flags |= SB_LIMITED;
884 		}
885 	}
886 	lck_rw_done(&pcbinfo->ipi_lock);
887 	return 0;
888 }
889 
890 __private_extern__ int
inp_recover_companion_link(struct inpcbinfo * pcbinfo)891 inp_recover_companion_link(struct inpcbinfo *pcbinfo)
892 {
893 	struct inpcb *inp;
894 	inp_gen_t gencnt = pcbinfo->ipi_gencnt;
895 	struct socket *so = NULL;
896 
897 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
898 	for (inp = LIST_FIRST(pcbinfo->ipi_listhead);
899 	    inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
900 		if (inp->inp_gencnt <= gencnt &&
901 		    inp->inp_state != INPCB_STATE_DEAD &&
902 		    inp->inp_socket != NULL) {
903 			so = inp->inp_socket;
904 
905 			if (SOCK_PROTO(so) != IPPROTO_TCP || inp->inp_last_outifp == NULL ||
906 			    !(so->so_snd.sb_flags & SB_LIMITED)) {
907 				continue;
908 			}
909 
910 			so->so_snd.sb_flags &= ~SB_LIMITED;
911 		}
912 	}
913 	lck_rw_done(&pcbinfo->ipi_lock);
914 	return 0;
915 }
916