xref: /xnu-10063.121.3/bsd/netinet/in_pcblist.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2010-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1990, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  */
60 
61 #include <sys/types.h>
62 #include <sys/malloc.h>
63 #include <sys/socket.h>
64 #include <sys/socketvar.h>
65 #include <sys/protosw.h>
66 #include <sys/domain.h>
67 #include <sys/filedesc.h>
68 #include <sys/file_internal.h>
69 #include <sys/kernel.h>
70 #include <sys/sysctl.h>
71 #include <sys/dtrace.h>
72 #include <sys/kauth.h>
73 
74 #include <net/route.h>
75 #include <net/if_var.h>
76 #include <net/if_ports_used.h>
77 #include <net/ntstat.h>
78 
79 #include <netinet/in.h>
80 #include <netinet/in_pcb.h>
81 #include <netinet/in_var.h>
82 #include <netinet/ip_var.h>
83 
84 #include <netinet/udp.h>
85 #include <netinet/udp_var.h>
86 
87 #include <netinet/tcp.h>
88 #include <netinet/tcp_fsm.h>
89 #include <netinet/tcp_seq.h>
90 #include <netinet/tcp_timer.h>
91 #include <netinet/tcp_var.h>
92 #include <netinet6/in6_var.h>
93 
94 #include <net/sockaddr_utils.h>
95 
96 #include <os/log.h>
97 
98 #ifndef ROUNDUP64
99 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
100 #endif
101 
102 #ifndef ADVANCE64
103 #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
104 #endif
105 
106 static void inpcb_to_xinpcb_n(struct inpcb *, struct xinpcb_n *);
107 static void tcpcb_to_xtcpcb_n(struct tcpcb *, struct xtcpcb_n *);
108 void shutdown_sockets_on_interface(struct ifnet *ifp);
109 
110 
111 __private_extern__ void
sotoxsocket_n(struct socket * so,struct xsocket_n * xso)112 sotoxsocket_n(struct socket *so, struct xsocket_n *xso)
113 {
114 	xso->xso_len = sizeof(struct xsocket_n);
115 	xso->xso_kind = XSO_SOCKET;
116 
117 	if (so == NULL) {
118 		return;
119 	}
120 
121 	xso->xso_so = (uint64_t)VM_KERNEL_ADDRHASH(so);
122 	xso->so_type = so->so_type;
123 	xso->so_options = so->so_options;
124 	xso->so_linger = so->so_linger;
125 	xso->so_state = so->so_state;
126 	xso->so_pcb = (uint64_t)VM_KERNEL_ADDRHASH(so->so_pcb);
127 	if (so->so_proto) {
128 		xso->xso_protocol = SOCK_PROTO(so);
129 		xso->xso_family = SOCK_DOM(so);
130 	} else {
131 		xso->xso_protocol = xso->xso_family = 0;
132 	}
133 	xso->so_qlen = so->so_qlen;
134 	xso->so_incqlen = so->so_incqlen;
135 	xso->so_qlimit = so->so_qlimit;
136 	xso->so_timeo = so->so_timeo;
137 	xso->so_error = so->so_error;
138 	xso->so_pgid = so->so_pgid;
139 	xso->so_oobmark = so->so_oobmark;
140 	xso->so_uid = kauth_cred_getuid(so->so_cred);
141 	xso->so_last_pid = so->last_pid;
142 	xso->so_e_pid = so->e_pid;
143 	xso->so_gencnt = so->so_gencnt;
144 	xso->so_flags = so->so_flags;
145 	xso->so_flags1 = so->so_flags1;
146 	xso->so_usecount = so->so_usecount;
147 	xso->so_retaincnt = so->so_retaincnt;
148 	if (so->so_filt != NULL) {
149 		xso->xso_filter_flags |= XSOFF_SO_FILT;
150 	}
151 	if (so->so_flow_db != NULL) {
152 		xso->xso_filter_flags |= XSOFF_FLOW_DB;
153 	}
154 	if (so->so_cfil != NULL) {
155 		xso->xso_filter_flags |= XSOFF_CFIL;
156 	}
157 	if (so->so_fd_pcb != NULL) {
158 		xso->xso_filter_flags |= XSOFF_FLOW_DIV;
159 	}
160 }
161 
162 __private_extern__ void
sbtoxsockbuf_n(struct sockbuf * sb,struct xsockbuf_n * xsb)163 sbtoxsockbuf_n(struct sockbuf *sb, struct xsockbuf_n *xsb)
164 {
165 	xsb->xsb_len = sizeof(struct xsockbuf_n);
166 
167 	if (sb == NULL) {
168 		return;
169 	}
170 
171 	xsb->xsb_kind = (sb->sb_flags & SB_RECV) ? XSO_RCVBUF : XSO_SNDBUF;
172 	xsb->sb_cc = sb->sb_cc;
173 	xsb->sb_hiwat = sb->sb_hiwat;
174 	xsb->sb_mbcnt = sb->sb_mbcnt;
175 	xsb->sb_mbmax = sb->sb_mbmax;
176 	xsb->sb_lowat = sb->sb_lowat;
177 	xsb->sb_flags = (short)sb->sb_flags;
178 	xsb->sb_timeo = (short)((sb->sb_timeo.tv_sec * hz) +
179 	    sb->sb_timeo.tv_usec / tick);
180 	if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0) {
181 		xsb->sb_timeo = 1;
182 	}
183 }
184 
185 __private_extern__ void
sbtoxsockstat_n(struct socket * so,struct xsockstat_n * xst)186 sbtoxsockstat_n(struct socket *so, struct xsockstat_n *xst)
187 {
188 	int i;
189 
190 	xst->xst_len = sizeof(struct xsockstat_n);
191 	xst->xst_kind = XSO_STATS;
192 
193 	if (so == NULL) {
194 		return;
195 	}
196 
197 	for (i = 0; i < SO_TC_STATS_MAX; i++) {
198 		xst->xst_tc_stats[i].rxpackets = so->so_tc_stats[i].rxpackets;
199 		xst->xst_tc_stats[i].rxbytes = so->so_tc_stats[i].rxbytes;
200 		xst->xst_tc_stats[i].txpackets = so->so_tc_stats[i].txpackets;
201 		xst->xst_tc_stats[i].txbytes = so->so_tc_stats[i].txbytes;
202 	}
203 }
204 
205 static void
inpcb_to_xinpcb_n(struct inpcb * inp,struct xinpcb_n * xinp)206 inpcb_to_xinpcb_n(struct inpcb *inp, struct xinpcb_n *xinp)
207 {
208 	xinp->xi_len = sizeof(struct xinpcb_n);
209 	xinp->xi_kind = XSO_INPCB;
210 	xinp->xi_inpp = (uint64_t)VM_KERNEL_ADDRHASH(inp);
211 	xinp->inp_fport = inp->inp_fport;
212 	xinp->inp_lport = inp->inp_lport;
213 	xinp->inp_ppcb = (uint64_t)VM_KERNEL_ADDRHASH(inp->inp_ppcb);
214 	xinp->inp_gencnt = inp->inp_gencnt;
215 	xinp->inp_flags = inp->inp_flags;
216 	xinp->inp_flow = inp->inp_flow;
217 	xinp->inp_vflag = inp->inp_vflag;
218 	xinp->inp_ip_ttl = inp->inp_ip_ttl;
219 	xinp->inp_ip_p = inp->inp_ip_p;
220 	xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
221 	xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
222 	xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
223 	xinp->inp_depend6.inp6_hlim = 0;
224 	xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
225 	xinp->inp_depend6.inp6_ifindex = 0;
226 	xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
227 	xinp->inp_flowhash = inp->inp_flowhash;
228 	xinp->inp_flags2 = inp->inp_flags2;
229 }
230 
231 __private_extern__ void
tcpcb_to_xtcpcb_n(struct tcpcb * tp,struct xtcpcb_n * xt)232 tcpcb_to_xtcpcb_n(struct tcpcb *tp, struct xtcpcb_n *xt)
233 {
234 	xt->xt_len = sizeof(struct xtcpcb_n);
235 	xt->xt_kind = XSO_TCPCB;
236 
237 	xt->t_segq = (uint32_t)VM_KERNEL_ADDRHASH(tp->t_segq.lh_first);
238 	xt->t_dupacks = tp->t_dupacks;
239 	xt->t_timer[TCPT_REXMT_EXT] = tp->t_timer[TCPT_REXMT];
240 	xt->t_timer[TCPT_PERSIST_EXT] = tp->t_timer[TCPT_PERSIST];
241 	xt->t_timer[TCPT_KEEP_EXT] = tp->t_timer[TCPT_KEEP];
242 	xt->t_timer[TCPT_2MSL_EXT] = tp->t_timer[TCPT_2MSL];
243 	xt->t_state = tp->t_state;
244 	xt->t_flags = tp->t_flags;
245 	xt->t_force = (tp->t_flagsext & TF_FORCE) ? 1 : 0;
246 	xt->snd_una = tp->snd_una;
247 	xt->snd_max = tp->snd_max;
248 	xt->snd_nxt = tp->snd_nxt;
249 	xt->snd_up = tp->snd_up;
250 	xt->snd_wl1 = tp->snd_wl1;
251 	xt->snd_wl2 = tp->snd_wl2;
252 	xt->iss = tp->iss;
253 	xt->irs = tp->irs;
254 	xt->rcv_nxt = tp->rcv_nxt;
255 	xt->rcv_adv = tp->rcv_adv;
256 	xt->rcv_wnd = tp->rcv_wnd;
257 	xt->rcv_up = tp->rcv_up;
258 	xt->snd_wnd = tp->snd_wnd;
259 	xt->snd_cwnd = tp->snd_cwnd;
260 	xt->snd_ssthresh = tp->snd_ssthresh;
261 	xt->t_maxopd = tp->t_maxopd;
262 	xt->t_rcvtime = tp->t_rcvtime;
263 	xt->t_starttime = tp->t_starttime;
264 	xt->t_rtttime = tp->t_rtttime;
265 	xt->t_rtseq = tp->t_rtseq;
266 	xt->t_rxtcur = tp->t_rxtcur;
267 	xt->t_maxseg = tp->t_maxseg;
268 	xt->t_srtt = tp->t_srtt;
269 	xt->t_rttvar = tp->t_rttvar;
270 	xt->t_rxtshift = tp->t_rxtshift;
271 	xt->t_rttmin = tp->t_rttmin;
272 	xt->t_rttupdated = tp->t_rttupdated;
273 	xt->max_sndwnd = tp->max_sndwnd;
274 	xt->t_softerror = tp->t_softerror;
275 	xt->t_oobflags = tp->t_oobflags;
276 	xt->t_iobc = tp->t_iobc;
277 	xt->snd_scale = tp->snd_scale;
278 	xt->rcv_scale = tp->rcv_scale;
279 	xt->request_r_scale = tp->request_r_scale;
280 	xt->requested_s_scale = tp->requested_s_scale;
281 	xt->ts_recent = tp->ts_recent;
282 	xt->ts_recent_age = tp->ts_recent_age;
283 	xt->last_ack_sent = tp->last_ack_sent;
284 	xt->cc_send = 0;
285 	xt->cc_recv = 0;
286 	xt->snd_recover = tp->snd_recover;
287 	xt->snd_cwnd_prev = tp->snd_cwnd_prev;
288 	xt->snd_ssthresh_prev = tp->snd_ssthresh_prev;
289 }
290 
291 __private_extern__ int
get_pcblist_n(short proto,struct sysctl_req * req,struct inpcbinfo * pcbinfo)292 get_pcblist_n(short proto, struct sysctl_req *req, struct inpcbinfo *pcbinfo)
293 {
294 	int error = 0;
295 	int i, n, sz;
296 	struct inpcb *inp, **inp_list = NULL;
297 	inp_gen_t gencnt;
298 	struct xinpgen xig;
299 	void *buf = NULL;
300 	size_t item_size = ROUNDUP64(sizeof(struct xinpcb_n)) +
301 	    ROUNDUP64(sizeof(struct xsocket_n)) +
302 	    2 * ROUNDUP64(sizeof(struct xsockbuf_n)) +
303 	    ROUNDUP64(sizeof(struct xsockstat_n));
304 #if SKYWALK
305 	int nuserland;
306 	void *userlandsnapshot = NULL;
307 #endif /* SKYWALK */
308 
309 	if (proto == IPPROTO_TCP) {
310 		item_size += ROUNDUP64(sizeof(struct xtcpcb_n));
311 	}
312 
313 	if (req->oldptr == USER_ADDR_NULL) {
314 		n = pcbinfo->ipi_count;
315 #if SKYWALK
316 		n += ntstat_userland_count(proto);
317 #endif /* SKYWALK */
318 		req->oldidx = 2 * (sizeof(xig)) + (n + n / 8 + 1) * item_size;
319 		return 0;
320 	}
321 
322 	if (req->newptr != USER_ADDR_NULL) {
323 		return EPERM;
324 	}
325 
326 #if SKYWALK
327 	/*
328 	 * Get a snapshot of the state of the user level flows so we know
329 	 * the exact number of results to give back to the user.
330 	 * This could take a while and use other locks, so do this prior
331 	 * to taking any locks of our own.
332 	 */
333 	error = nstat_userland_get_snapshot(proto, &userlandsnapshot, &nuserland);
334 
335 	if (error) {
336 		return error;
337 	}
338 #endif /* SKYWALK */
339 
340 	/*
341 	 * The process of preparing the PCB list is too time-consuming and
342 	 * resource-intensive to repeat twice on every request.
343 	 */
344 	lck_rw_lock_exclusive(&pcbinfo->ipi_lock);
345 	/*
346 	 * OK, now we're committed to doing something.
347 	 */
348 	gencnt = pcbinfo->ipi_gencnt;
349 	n = sz = pcbinfo->ipi_count;
350 
351 	bzero(&xig, sizeof(xig));
352 	xig.xig_len = sizeof(xig);
353 	xig.xig_count = n;
354 #if SKYWALK
355 	xig.xig_count += nuserland;
356 #endif /* SKYWALK */
357 	xig.xig_gen = gencnt;
358 	xig.xig_sogen = so_gencnt;
359 	error = SYSCTL_OUT(req, &xig, sizeof(xig));
360 	if (error) {
361 		goto done;
362 	}
363 	/*
364 	 * We are done if there is no pcb
365 	 */
366 	if (xig.xig_count == 0) {
367 		goto done;
368 	}
369 
370 	buf = kalloc_data(item_size, Z_WAITOK);
371 	if (buf == NULL) {
372 		error = ENOMEM;
373 		goto done;
374 	}
375 
376 	inp_list = kalloc_type(struct inpcb *, n, Z_WAITOK);
377 	if (inp_list == NULL) {
378 		error = ENOMEM;
379 		goto done;
380 	}
381 
382 	/*
383 	 * Special case TCP to include the connections in time wait
384 	 */
385 	if (proto == IPPROTO_TCP) {
386 		n = get_tcp_inp_list(inp_list, n, gencnt);
387 	} else {
388 		for (inp = pcbinfo->ipi_listhead->lh_first, i = 0; inp && i < n;
389 		    inp = inp->inp_list.le_next) {
390 			if (inp->inp_gencnt <= gencnt &&
391 			    inp->inp_state != INPCB_STATE_DEAD) {
392 				inp_list[i++] = inp;
393 			}
394 		}
395 		n = i;
396 	}
397 
398 
399 	error = 0;
400 	for (i = 0; i < n; i++) {
401 		inp = inp_list[i];
402 		if (inp->inp_gencnt <= gencnt &&
403 		    inp->inp_state != INPCB_STATE_DEAD) {
404 			struct xinpcb_n *xi = (struct xinpcb_n *)buf;
405 			struct xsocket_n *xso = (struct xsocket_n *)
406 			    ADVANCE64(xi, sizeof(*xi));
407 			struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *)
408 			    ADVANCE64(xso, sizeof(*xso));
409 			struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *)
410 			    ADVANCE64(xsbrcv, sizeof(*xsbrcv));
411 			struct xsockstat_n *xsostats = (struct xsockstat_n *)
412 			    ADVANCE64(xsbsnd, sizeof(*xsbsnd));
413 
414 			bzero(buf, item_size);
415 
416 			inpcb_to_xinpcb_n(inp, xi);
417 			sotoxsocket_n(inp->inp_socket, xso);
418 			sbtoxsockbuf_n(inp->inp_socket ?
419 			    &inp->inp_socket->so_rcv : NULL, xsbrcv);
420 			sbtoxsockbuf_n(inp->inp_socket ?
421 			    &inp->inp_socket->so_snd : NULL, xsbsnd);
422 			sbtoxsockstat_n(inp->inp_socket, xsostats);
423 			if (proto == IPPROTO_TCP) {
424 				struct  xtcpcb_n *xt = (struct xtcpcb_n *)
425 				    ADVANCE64(xsostats, sizeof(*xsostats));
426 
427 				/*
428 				 * inp->inp_ppcb, can only be NULL on
429 				 * an initialization race window.
430 				 * No need to lock.
431 				 */
432 				if (inp->inp_ppcb == NULL) {
433 					continue;
434 				}
435 
436 				tcpcb_to_xtcpcb_n((struct tcpcb *)
437 				    inp->inp_ppcb, xt);
438 			}
439 			error = SYSCTL_OUT(req, buf, item_size);
440 			if (error) {
441 				break;
442 			}
443 		}
444 	}
445 #if SKYWALK
446 	if (!error && nuserland > 0) {
447 		error = nstat_userland_list_snapshot(proto, req, userlandsnapshot, nuserland);
448 	}
449 #endif /* SKYWALK */
450 
451 	if (!error) {
452 		/*
453 		 * Give the user an updated idea of our state.
454 		 * If the generation differs from what we told
455 		 * her before, she knows that something happened
456 		 * while we were processing this request, and it
457 		 * might be necessary to retry.
458 		 */
459 		bzero(&xig, sizeof(xig));
460 		xig.xig_len = sizeof(xig);
461 		xig.xig_gen = pcbinfo->ipi_gencnt;
462 		xig.xig_sogen = so_gencnt;
463 		xig.xig_count = pcbinfo->ipi_count;
464 #if SKYWALK
465 		xig.xig_count +=  nuserland;
466 #endif /* SKYWALK */
467 		error = SYSCTL_OUT(req, &xig, sizeof(xig));
468 	}
469 done:
470 	lck_rw_done(&pcbinfo->ipi_lock);
471 
472 #if SKYWALK
473 	nstat_userland_release_snapshot(userlandsnapshot, nuserland);
474 #endif /* SKYWALK */
475 
476 	kfree_type(struct inpcb *, sz, inp_list);
477 	if (buf != NULL) {
478 		kfree_data(buf, item_size);
479 	}
480 	return error;
481 }
482 
483 static void
inpcb_get_if_ports_used(ifnet_t ifp,int protocol,uint32_t flags,bitstr_t * bitfield,struct inpcbinfo * pcbinfo)484 inpcb_get_if_ports_used(ifnet_t ifp, int protocol, uint32_t flags,
485     bitstr_t *bitfield, struct inpcbinfo *pcbinfo)
486 {
487 	struct inpcb *inp;
488 	struct socket *so;
489 	inp_gen_t gencnt;
490 	bool iswildcard, wildcardok, nowakeok;
491 	bool recvanyifonly, extbgidleok;
492 	bool activeonly;
493 	bool anytcpstateok;
494 
495 	if (ifp == NULL) {
496 		return;
497 	}
498 
499 	wildcardok = ((flags & IFNET_GET_LOCAL_PORTS_WILDCARDOK) != 0);
500 	nowakeok = ((flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) != 0);
501 	recvanyifonly = ((flags & IFNET_GET_LOCAL_PORTS_RECVANYIFONLY) != 0);
502 	extbgidleok = ((flags & IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY) != 0);
503 	activeonly = ((flags & IFNET_GET_LOCAL_PORTS_ACTIVEONLY) != 0);
504 	anytcpstateok = ((flags & IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK) != 0);
505 
506 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
507 	gencnt = pcbinfo->ipi_gencnt;
508 
509 	for (inp = LIST_FIRST(pcbinfo->ipi_listhead); inp;
510 	    inp = LIST_NEXT(inp, inp_list)) {
511 		if (inp->inp_gencnt > gencnt ||
512 		    inp->inp_state == INPCB_STATE_DEAD ||
513 		    inp->inp_wantcnt == WNT_STOPUSING) {
514 			continue;
515 		}
516 
517 		if ((so = inp->inp_socket) == NULL || inp->inp_lport == 0) {
518 			continue;
519 		}
520 
521 		/*
522 		 * ANYTCPSTATEOK means incoming packets cannot be filtered
523 		 * reception so cast a wide net of possibilities
524 		 */
525 		if (!anytcpstateok &&
526 		    ((so->so_state & SS_DEFUNCT) ||
527 		    (so->so_state & SS_ISDISCONNECTED))) {
528 			continue;
529 		}
530 
531 		/*
532 		 * If protocol is specified, filter out inpcbs that
533 		 * are not relevant to the protocol family of interest.
534 		 */
535 		if (protocol != PF_UNSPEC) {
536 			if (protocol == PF_INET) {
537 				/*
538 				 * If protocol of interest is IPv4, skip the inpcb
539 				 * if the family is not IPv4.
540 				 * OR
541 				 * If the family is IPv4, skip if the IPv4 flow is
542 				 * CLAT46 translated.
543 				 */
544 				if ((inp->inp_vflag & INP_IPV4) == 0 ||
545 				    (inp->inp_flags2 & INP2_CLAT46_FLOW) != 0) {
546 					continue;
547 				}
548 			} else if (protocol == PF_INET6) {
549 				/*
550 				 * If protocol of interest is IPv6, skip the inpcb
551 				 * if the family is not IPv6.
552 				 * AND
553 				 * The flow is not a CLAT46'd flow.
554 				 */
555 				if ((inp->inp_vflag & INP_IPV6) == 0 &&
556 				    (inp->inp_flags2 & INP2_CLAT46_FLOW) == 0) {
557 					continue;
558 				}
559 			} else {
560 				/* Protocol family not supported */
561 				continue;
562 			}
563 		}
564 
565 		if (SOCK_PROTO(inp->inp_socket) != IPPROTO_UDP &&
566 		    SOCK_PROTO(inp->inp_socket) != IPPROTO_TCP) {
567 			continue;
568 		}
569 
570 		iswildcard = (((inp->inp_vflag & INP_IPV4) &&
571 		    inp->inp_laddr.s_addr == INADDR_ANY) ||
572 		    ((inp->inp_vflag & INP_IPV6) &&
573 		    IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)));
574 
575 		if (!wildcardok && iswildcard) {
576 			continue;
577 		}
578 
579 		if (!(inp->inp_flags & INP_RECV_ANYIF) &&
580 		    recvanyifonly) {
581 			continue;
582 		}
583 
584 		if (!(so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) &&
585 		    extbgidleok) {
586 			continue;
587 		}
588 
589 		if (!iswildcard &&
590 		    !(inp->inp_last_outifp == NULL || ifp == inp->inp_last_outifp)) {
591 			continue;
592 		}
593 
594 		if (!iswildcard && (ifp->if_eflags & IFEF_AWDL) != 0) {
595 			if (inp->inp_route.ro_rt == NULL ||
596 			    (inp->inp_route.ro_rt->rt_flags & (RTF_UP | RTF_CONDEMNED)) != RTF_UP) {
597 #if DEBUG || DEVELOPMENT
598 				char lbuf[MAX_IPv6_STR_LEN + 6] = {};
599 				char fbuf[MAX_IPv6_STR_LEN + 6] = {};
600 				char pname[MAXCOMLEN + 1];
601 
602 				proc_name(so->last_pid, pname, sizeof(pname));
603 
604 				if (protocol == PF_INET) {
605 					inet_ntop(PF_INET, &inp->inp_laddr.s_addr,
606 					    lbuf, sizeof(lbuf));
607 					inet_ntop(PF_INET, &inp->inp_faddr.s_addr,
608 					    fbuf, sizeof(fbuf));
609 				} else {
610 					inet_ntop(PF_INET6, &inp->in6p_laddr.s6_addr,
611 					    lbuf, sizeof(lbuf));
612 					inet_ntop(PF_INET6, &inp->in6p_faddr.s6_addr,
613 					    fbuf, sizeof(fbuf));
614 				}
615 
616 				os_log(OS_LOG_DEFAULT,
617 				    "inpcb_get_if_ports_used: route is down %s %s:%u %s:%u ifp %s proc %s:%d",
618 				    SOCK_PROTO(inp->inp_socket) == IPPROTO_TCP ? "tcp" : "udp",
619 				    lbuf, ntohs(inp->inp_lport), fbuf, ntohs(inp->inp_fport),
620 				    ifp->if_xname, pname, so->last_pid);
621 #endif /* DEBUG || DEVELOPMENT */
622 				continue;
623 			}
624 		}
625 
626 		if (SOCK_PROTO(inp->inp_socket) == IPPROTO_UDP &&
627 		    so->so_state & SS_CANTRCVMORE) {
628 			continue;
629 		}
630 
631 		if (SOCK_PROTO(inp->inp_socket) == IPPROTO_TCP) {
632 			struct  tcpcb *tp = sototcpcb(inp->inp_socket);
633 
634 			/*
635 			 * Workaround race where inp_ppcb is NULL during
636 			 * socket initialization
637 			 */
638 			if (tp == NULL) {
639 				continue;
640 			}
641 
642 			switch (tp->t_state) {
643 			case TCPS_CLOSED:
644 				if (anytcpstateok && inp->inp_fport != 0) {
645 					/*
646 					 * A foreign port means we had a 4 tuple at
647 					 * least a connection attempt so packets
648 					 * may be received for the 4 tuple after the
649 					 * connection is gone
650 					 */
651 					break;
652 				}
653 				continue;
654 			/* NOT REACHED */
655 			case TCPS_LISTEN:
656 			case TCPS_SYN_SENT:
657 			case TCPS_SYN_RECEIVED:
658 			case TCPS_ESTABLISHED:
659 			case TCPS_FIN_WAIT_1:
660 				/*
661 				 * Note: FIN_WAIT_1 is an active state
662 				 * because we need our FIN to be
663 				 * acknowledged
664 				 */
665 				break;
666 			case TCPS_CLOSE_WAIT:
667 			case TCPS_CLOSING:
668 			case TCPS_LAST_ACK:
669 			case TCPS_FIN_WAIT_2:
670 				/*
671 				 * In the closing states, the connection
672 				 * is active when there is outgoing
673 				 * data having to be acknowledged
674 				 */
675 				if (!anytcpstateok &&
676 				    (activeonly && so->so_snd.sb_cc == 0)) {
677 					continue;
678 				}
679 				break;
680 			case TCPS_TIME_WAIT:
681 				if (anytcpstateok) {
682 					/*
683 					 * Packets may still be received for the 4 tuple
684 					 * after the connection is gone
685 					 */
686 					break;
687 				}
688 				continue;
689 				/* NOT REACHED */
690 			}
691 		}
692 
693 #if DEBUG || DEVELOPMENT
694 		if ((so->so_options & SO_NOWAKEFROMSLEEP) && !nowakeok) {
695 			char lbuf[MAX_IPv6_STR_LEN + 6] = {};
696 			char fbuf[MAX_IPv6_STR_LEN + 6] = {};
697 			char pname[MAXCOMLEN + 1];
698 
699 			proc_name(so->last_pid, pname, sizeof(pname));
700 
701 			if (protocol == PF_INET) {
702 				inet_ntop(PF_INET, &inp->inp_laddr.s_addr,
703 				    lbuf, sizeof(lbuf));
704 				inet_ntop(PF_INET, &inp->inp_faddr.s_addr,
705 				    fbuf, sizeof(fbuf));
706 			} else {
707 				inet_ntop(PF_INET6, &inp->in6p_laddr.s6_addr,
708 				    lbuf, sizeof(lbuf));
709 				inet_ntop(PF_INET6, &inp->in6p_faddr.s6_addr,
710 				    fbuf, sizeof(fbuf));
711 			}
712 
713 			os_log(OS_LOG_DEFAULT,
714 			    "inpcb_get_if_ports_used: no wake from sleep %s %s:%u %s:%u ifp %s proc %s:%d",
715 			    SOCK_PROTO(inp->inp_socket) == IPPROTO_TCP ? "tcp" : "udp",
716 			    lbuf, ntohs(inp->inp_lport), fbuf, ntohs(inp->inp_fport),
717 			    ifp->if_xname, pname, so->last_pid);
718 		}
719 #endif /* DEBUG || DEVELOPMENT */
720 
721 
722 		/*
723 		 * When the socket has "no wake from sleep" option, do not set the port in the bitmap
724 		 * except if explicetely requested by the driver.
725 		 * We always add the socket to the list of port in order to report spurious wakes
726 		 */
727 		if ((so->so_options & SO_NOWAKEFROMSLEEP) == 0 || nowakeok) {
728 			bitstr_set(bitfield, ntohs(inp->inp_lport));
729 		}
730 
731 		(void) if_ports_used_add_inpcb(ifp->if_index, inp);
732 	}
733 	lck_rw_done(&pcbinfo->ipi_lock);
734 }
735 
736 __private_extern__ void
inpcb_get_ports_used(ifnet_t ifp,int protocol,uint32_t flags,bitstr_t * bitfield,struct inpcbinfo * pcbinfo)737 inpcb_get_ports_used(ifnet_t ifp, int protocol, uint32_t flags,
738     bitstr_t *bitfield, struct inpcbinfo *pcbinfo)
739 {
740 	if (ifp != NULL) {
741 		inpcb_get_if_ports_used(ifp, protocol, flags, bitfield, pcbinfo);
742 	} else {
743 		errno_t error;
744 		ifnet_t *ifp_list;
745 		uint32_t count, i;
746 
747 		error = ifnet_list_get_all(IFNET_FAMILY_ANY, &ifp_list, &count);
748 		if (error != 0) {
749 			os_log_error(OS_LOG_DEFAULT,
750 			    "%s: ifnet_list_get_all() failed %d",
751 			    __func__, error);
752 			return;
753 		}
754 		for (i = 0; i < count; i++) {
755 			if (TAILQ_EMPTY(&ifp_list[i]->if_addrhead)) {
756 				continue;
757 			}
758 			inpcb_get_if_ports_used(ifp_list[i], protocol, flags,
759 			    bitfield, pcbinfo);
760 		}
761 		ifnet_list_free(ifp_list);
762 	}
763 }
764 
765 __private_extern__ uint32_t
inpcb_count_opportunistic(unsigned int ifindex,struct inpcbinfo * pcbinfo,u_int32_t flags)766 inpcb_count_opportunistic(unsigned int ifindex, struct inpcbinfo *pcbinfo,
767     u_int32_t flags)
768 {
769 	uint32_t opportunistic = 0;
770 	struct inpcb *inp;
771 	inp_gen_t gencnt;
772 
773 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
774 	gencnt = pcbinfo->ipi_gencnt;
775 	for (inp = LIST_FIRST(pcbinfo->ipi_listhead);
776 	    inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
777 		if (inp->inp_gencnt <= gencnt &&
778 		    inp->inp_state != INPCB_STATE_DEAD &&
779 		    inp->inp_socket != NULL &&
780 		    so_get_opportunistic(inp->inp_socket) &&
781 		    inp->inp_last_outifp != NULL &&
782 		    ifindex == inp->inp_last_outifp->if_index) {
783 			opportunistic++;
784 			struct socket *so = inp->inp_socket;
785 			if ((flags & INPCB_OPPORTUNISTIC_SETCMD) &&
786 			    (so->so_state & SS_ISCONNECTED)) {
787 				socket_lock(so, 1);
788 				if (flags & INPCB_OPPORTUNISTIC_THROTTLEON) {
789 					so->so_flags |= SOF_SUSPENDED;
790 					soevent(so,
791 					    (SO_FILT_HINT_LOCKED |
792 					    SO_FILT_HINT_SUSPEND));
793 				} else {
794 					so->so_flags &= ~(SOF_SUSPENDED);
795 					soevent(so,
796 					    (SO_FILT_HINT_LOCKED |
797 					    SO_FILT_HINT_RESUME));
798 				}
799 				SOTHROTTLELOG("throttle[%d]: so 0x%llx "
800 				    "[%d,%d] %s\n", so->last_pid,
801 				    (uint64_t)VM_KERNEL_ADDRHASH(so),
802 				    SOCK_DOM(so), SOCK_TYPE(so),
803 				    (so->so_flags & SOF_SUSPENDED) ?
804 				    "SUSPENDED" : "RESUMED");
805 				socket_unlock(so, 1);
806 			}
807 		}
808 	}
809 
810 	lck_rw_done(&pcbinfo->ipi_lock);
811 
812 	return opportunistic;
813 }
814 
815 __private_extern__ uint32_t
inpcb_find_anypcb_byaddr(struct ifaddr * ifa,struct inpcbinfo * pcbinfo)816 inpcb_find_anypcb_byaddr(struct ifaddr *ifa, struct inpcbinfo *pcbinfo)
817 {
818 	struct inpcb *inp;
819 	inp_gen_t gencnt = pcbinfo->ipi_gencnt;
820 	struct socket *so = NULL;
821 	int af;
822 
823 	if ((ifa->ifa_addr->sa_family != AF_INET) &&
824 	    (ifa->ifa_addr->sa_family != AF_INET6)) {
825 		return 0;
826 	}
827 
828 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
829 	for (inp = LIST_FIRST(pcbinfo->ipi_listhead);
830 	    inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
831 		if (inp->inp_gencnt <= gencnt &&
832 		    inp->inp_state != INPCB_STATE_DEAD &&
833 		    inp->inp_socket != NULL) {
834 			so = inp->inp_socket;
835 			af = SOCK_DOM(so);
836 			if (af != ifa->ifa_addr->sa_family) {
837 				continue;
838 			}
839 			if (inp->inp_last_outifp != ifa->ifa_ifp) {
840 				continue;
841 			}
842 
843 			if (af == AF_INET) {
844 				if (inp->inp_laddr.s_addr ==
845 				    (satosin(ifa->ifa_addr))->sin_addr.s_addr) {
846 					lck_rw_done(&pcbinfo->ipi_lock);
847 					return 1;
848 				}
849 			}
850 			if (af == AF_INET6) {
851 				if (in6_are_addr_equal_scoped(IFA_IN6(ifa), &inp->in6p_laddr, SIN6(ifa->ifa_addr)->sin6_scope_id, inp->inp_lifscope)) {
852 					lck_rw_done(&pcbinfo->ipi_lock);
853 					return 1;
854 				}
855 			}
856 		}
857 	}
858 	lck_rw_done(&pcbinfo->ipi_lock);
859 	return 0;
860 }
861 
862 static int
shutdown_sockets_on_interface_proc_callout(proc_t p,void * arg)863 shutdown_sockets_on_interface_proc_callout(proc_t p, void *arg)
864 {
865 	struct fileproc *fp;
866 	struct ifnet *ifp = (struct ifnet *)arg;
867 
868 	if (ifp == NULL) {
869 		return PROC_RETURNED;
870 	}
871 
872 	proc_fdlock(p);
873 
874 	fdt_foreach(fp, p) {
875 		struct fileglob *fg = fp->fp_glob;
876 		struct socket *so;
877 		struct inpcb *inp;
878 		struct ifnet *inp_ifp;
879 		int error;
880 
881 		if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET) {
882 			continue;
883 		}
884 
885 		so = (struct socket *)fp_get_data(fp);
886 		if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) {
887 			continue;
888 		}
889 
890 		inp = (struct inpcb *)so->so_pcb;
891 
892 		if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
893 			continue;
894 		}
895 
896 		socket_lock(so, 1);
897 
898 		if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
899 			socket_unlock(so, 1);
900 			continue;
901 		}
902 
903 		if (inp->inp_boundifp != NULL) {
904 			inp_ifp = inp->inp_boundifp;
905 		} else if (inp->inp_last_outifp != NULL) {
906 			inp_ifp = inp->inp_last_outifp;
907 		} else {
908 			socket_unlock(so, 1);
909 			continue;
910 		}
911 
912 		if (inp_ifp != ifp && inp_ifp->if_delegated.ifp != ifp) {
913 			socket_unlock(so, 1);
914 			continue;
915 		}
916 		error = sosetdefunct(p, so, 0, TRUE);
917 		if (error != 0) {
918 			log(LOG_ERR, "%s: sosetdefunct() error %d",
919 			    __func__, error);
920 		} else {
921 			error = sodefunct(p, so, 0);
922 			if (error != 0) {
923 				log(LOG_ERR, "%s: sodefunct() error %d",
924 				    __func__, error);
925 			}
926 		}
927 
928 		socket_unlock(so, 1);
929 	}
930 	proc_fdunlock(p);
931 
932 	return PROC_RETURNED;
933 }
934 
935 void
shutdown_sockets_on_interface(struct ifnet * ifp)936 shutdown_sockets_on_interface(struct ifnet *ifp)
937 {
938 	proc_iterate(PROC_ALLPROCLIST,
939 	    shutdown_sockets_on_interface_proc_callout,
940 	    ifp, NULL, NULL);
941 }
942 
943 __private_extern__ int
inp_limit_companion_link(struct inpcbinfo * pcbinfo,u_int32_t limit)944 inp_limit_companion_link(struct inpcbinfo *pcbinfo, u_int32_t limit)
945 {
946 	struct inpcb *inp;
947 	struct socket *so = NULL;
948 
949 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
950 	inp_gen_t gencnt = pcbinfo->ipi_gencnt;
951 	for (inp = LIST_FIRST(pcbinfo->ipi_listhead);
952 	    inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
953 		if (inp->inp_gencnt <= gencnt &&
954 		    inp->inp_state != INPCB_STATE_DEAD &&
955 		    inp->inp_socket != NULL) {
956 			so = inp->inp_socket;
957 
958 			if ((so->so_state & SS_DEFUNCT) || so->so_state & SS_ISDISCONNECTED ||
959 			    SOCK_PROTO(so) != IPPROTO_TCP || inp->inp_last_outifp == NULL ||
960 			    !IFNET_IS_COMPANION_LINK(inp->inp_last_outifp)) {
961 				continue;
962 			}
963 			so->so_snd.sb_flags &= ~SB_LIMITED;
964 			u_int32_t new_size = MAX(MIN(limit, so->so_snd.sb_lowat), so->so_snd.sb_cc);
965 			sbreserve(&so->so_snd, new_size);
966 			so->so_snd.sb_flags |= SB_LIMITED;
967 		}
968 	}
969 	lck_rw_done(&pcbinfo->ipi_lock);
970 	return 0;
971 }
972 
973 __private_extern__ int
inp_recover_companion_link(struct inpcbinfo * pcbinfo)974 inp_recover_companion_link(struct inpcbinfo *pcbinfo)
975 {
976 	struct inpcb *inp;
977 	inp_gen_t gencnt = pcbinfo->ipi_gencnt;
978 	struct socket *so = NULL;
979 
980 	lck_rw_lock_shared(&pcbinfo->ipi_lock);
981 	for (inp = LIST_FIRST(pcbinfo->ipi_listhead);
982 	    inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
983 		if (inp->inp_gencnt <= gencnt &&
984 		    inp->inp_state != INPCB_STATE_DEAD &&
985 		    inp->inp_socket != NULL) {
986 			so = inp->inp_socket;
987 
988 			if (SOCK_PROTO(so) != IPPROTO_TCP || inp->inp_last_outifp == NULL ||
989 			    !(so->so_snd.sb_flags & SB_LIMITED)) {
990 				continue;
991 			}
992 
993 			so->so_snd.sb_flags &= ~SB_LIMITED;
994 		}
995 	}
996 	lck_rw_done(&pcbinfo->ipi_lock);
997 	return 0;
998 }
999