xref: /xnu-8020.101.4/bsd/kern/uipc_usrreq.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1989, 1991, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
61  */
62 /*
63  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64  * support for mandatory and extensible security protections.  This notice
65  * is included in support of clause 2.2 (b) of the Apple Public License,
66  * Version 2.0.
67  */
68 #include <os/log.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/domain.h>
73 #include <sys/fcntl.h>
74 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
75 #include <sys/file_internal.h>
76 #include <sys/guarded.h>
77 #include <sys/filedesc.h>
78 #include <sys/lock.h>
79 #include <sys/mbuf.h>
80 #include <sys/namei.h>
81 #include <sys/proc_internal.h>
82 #include <sys/kauth.h>
83 #include <sys/protosw.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/stat.h>
87 #include <sys/sysctl.h>
88 #include <sys/un.h>
89 #include <sys/unpcb.h>
90 #include <sys/vnode_internal.h>
91 #include <sys/kdebug.h>
92 #include <sys/mcache.h>
93 
94 #include <kern/zalloc.h>
95 #include <kern/locks.h>
96 #include <kern/task.h>
97 
98 #if CONFIG_MACF
99 #include <security/mac_framework.h>
100 #endif /* CONFIG_MACF */
101 
102 #include <mach/vm_param.h>
103 
104 /*
105  * Maximum number of FDs that can be passed in an mbuf
106  */
107 #define UIPC_MAX_CMSG_FD        512
108 
109 ZONE_DEFINE_TYPE(unp_zone, "unpzone", struct unpcb, ZC_NONE);
110 static  unp_gen_t unp_gencnt;
111 static  u_int unp_count;
112 
113 static  LCK_ATTR_DECLARE(unp_mtx_attr, 0, 0);
114 static  LCK_GRP_DECLARE(unp_mtx_grp, "unp_list");
115 static  LCK_RW_DECLARE_ATTR(unp_list_mtx, &unp_mtx_grp, &unp_mtx_attr);
116 
117 static  LCK_MTX_DECLARE_ATTR(unp_disconnect_lock, &unp_mtx_grp, &unp_mtx_attr);
118 static  LCK_MTX_DECLARE_ATTR(unp_connect_lock, &unp_mtx_grp, &unp_mtx_attr);
119 static  LCK_MTX_DECLARE_ATTR(uipc_lock, &unp_mtx_grp, &unp_mtx_attr);
120 
121 static  u_int                   disconnect_in_progress;
122 
123 static struct unp_head unp_shead, unp_dhead;
124 static int      unp_defer;
125 static thread_call_t unp_gc_tcall;
126 static LIST_HEAD(, fileglob) unp_msghead = LIST_HEAD_INITIALIZER(unp_msghead);
127 
128 
129 /*
130  * mDNSResponder tracing.  When enabled, endpoints connected to
131  * /var/run/mDNSResponder will be traced; during each send on
132  * the traced socket, we log the PID and process name of the
133  * sending process.  We also print out a bit of info related
134  * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
135  * of mDNSResponder stays the same.
136  */
137 #define MDNSRESPONDER_PATH      "/var/run/mDNSResponder"
138 
139 static int unpst_tracemdns;     /* enable tracing */
140 
141 #define MDNS_IPC_MSG_HDR_VERSION_1      1
142 
143 struct mdns_ipc_msg_hdr {
144 	uint32_t version;
145 	uint32_t datalen;
146 	uint32_t ipc_flags;
147 	uint32_t op;
148 	union {
149 		void *context;
150 		uint32_t u32[2];
151 	} __attribute__((packed));
152 	uint32_t reg_index;
153 } __attribute__((packed));
154 
155 /*
156  * Unix communications domain.
157  *
158  * TODO:
159  *	SEQPACKET, RDM
160  *	rethink name space problems
161  *	need a proper out-of-band
162  *	lock pushdown
163  */
164 static struct   sockaddr sun_noname = { .sa_len = sizeof(sun_noname), .sa_family = AF_LOCAL, .sa_data = { 0 } };
165 static ino_t    unp_ino;                /* prototype for fake inode numbers */
166 
167 static int      unp_attach(struct socket *);
168 static void     unp_detach(struct unpcb *);
169 static int      unp_bind(struct unpcb *, struct sockaddr *, proc_t);
170 static int      unp_connect(struct socket *, struct sockaddr *, proc_t);
171 static void     unp_disconnect(struct unpcb *);
172 static void     unp_shutdown(struct unpcb *);
173 static void     unp_drop(struct unpcb *, int);
174 static void     unp_gc(thread_call_param_t arg0, thread_call_param_t arg1);
175 static void     unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);
176 static void     unp_mark(struct fileglob *, __unused void *);
177 static void     unp_discard(struct fileglob *, void *);
178 static int      unp_internalize(struct mbuf *, proc_t);
179 static int      unp_listen(struct unpcb *, proc_t);
180 static void     unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
181 static void     unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
182 
183 __startup_func
184 static void
unp_gc_setup(void)185 unp_gc_setup(void)
186 {
187 	unp_gc_tcall = thread_call_allocate_with_options(unp_gc,
188 	    NULL, THREAD_CALL_PRIORITY_KERNEL,
189 	    THREAD_CALL_OPTIONS_ONCE);
190 }
191 STARTUP(THREAD_CALL, STARTUP_RANK_MIDDLE, unp_gc_setup);
192 
193 static void
unp_get_locks_in_order(struct socket * so,struct socket * conn_so)194 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
195 {
196 	if (so < conn_so) {
197 		socket_lock(conn_so, 1);
198 	} else {
199 		struct unpcb *unp = sotounpcb(so);
200 		unp->unp_flags |= UNP_DONTDISCONNECT;
201 		unp->rw_thrcount++;
202 		socket_unlock(so, 0);
203 
204 		/* Get the locks in the correct order */
205 		socket_lock(conn_so, 1);
206 		socket_lock(so, 0);
207 		unp->rw_thrcount--;
208 		if (unp->rw_thrcount == 0) {
209 			unp->unp_flags &= ~UNP_DONTDISCONNECT;
210 			wakeup(unp);
211 		}
212 	}
213 }
214 
215 static int
uipc_abort(struct socket * so)216 uipc_abort(struct socket *so)
217 {
218 	struct unpcb *unp = sotounpcb(so);
219 
220 	if (unp == 0) {
221 		return EINVAL;
222 	}
223 	unp_drop(unp, ECONNABORTED);
224 	unp_detach(unp);
225 	sofree(so);
226 	return 0;
227 }
228 
229 static int
uipc_accept(struct socket * so,struct sockaddr ** nam)230 uipc_accept(struct socket *so, struct sockaddr **nam)
231 {
232 	struct unpcb *unp = sotounpcb(so);
233 
234 	if (unp == 0) {
235 		return EINVAL;
236 	}
237 
238 	/*
239 	 * Pass back name of connected socket,
240 	 * if it was bound and we are still connected
241 	 * (our peer may have closed already!).
242 	 */
243 	if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
244 		*nam = dup_sockaddr((struct sockaddr *)
245 		    unp->unp_conn->unp_addr, 1);
246 	} else {
247 		os_log(OS_LOG_DEFAULT, "%s: peer disconnected unp_gencnt %llu",
248 		    __func__, unp->unp_gencnt);
249 		*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
250 	}
251 	return 0;
252 }
253 
254 /*
255  * Returns:	0			Success
256  *		EISCONN
257  *	unp_attach:
258  */
259 static int
uipc_attach(struct socket * so,__unused int proto,__unused proc_t p)260 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
261 {
262 	struct unpcb *unp = sotounpcb(so);
263 
264 	if (unp != 0) {
265 		return EISCONN;
266 	}
267 	return unp_attach(so);
268 }
269 
270 static int
uipc_bind(struct socket * so,struct sockaddr * nam,proc_t p)271 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
272 {
273 	struct unpcb *unp = sotounpcb(so);
274 
275 	if (unp == 0) {
276 		return EINVAL;
277 	}
278 
279 	return unp_bind(unp, nam, p);
280 }
281 
282 /*
283  * Returns:	0			Success
284  *		EINVAL
285  *	unp_connect:???			[See elsewhere in this file]
286  */
287 static int
uipc_connect(struct socket * so,struct sockaddr * nam,proc_t p)288 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
289 {
290 	struct unpcb *unp = sotounpcb(so);
291 
292 	if (unp == 0) {
293 		return EINVAL;
294 	}
295 	return unp_connect(so, nam, p);
296 }
297 
298 /*
299  * Returns:	0			Success
300  *		EINVAL
301  *	unp_connect2:EPROTOTYPE		Protocol wrong type for socket
302  *	unp_connect2:EINVAL		Invalid argument
303  */
304 static int
uipc_connect2(struct socket * so1,struct socket * so2)305 uipc_connect2(struct socket *so1, struct socket *so2)
306 {
307 	struct unpcb *unp = sotounpcb(so1);
308 
309 	if (unp == 0) {
310 		return EINVAL;
311 	}
312 
313 	return unp_connect2(so1, so2);
314 }
315 
316 /* control is EOPNOTSUPP */
317 
318 static int
uipc_detach(struct socket * so)319 uipc_detach(struct socket *so)
320 {
321 	struct unpcb *unp = sotounpcb(so);
322 
323 	if (unp == 0) {
324 		return EINVAL;
325 	}
326 
327 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
328 	unp_detach(unp);
329 	return 0;
330 }
331 
332 static int
uipc_disconnect(struct socket * so)333 uipc_disconnect(struct socket *so)
334 {
335 	struct unpcb *unp = sotounpcb(so);
336 
337 	if (unp == 0) {
338 		return EINVAL;
339 	}
340 	unp_disconnect(unp);
341 	return 0;
342 }
343 
344 /*
345  * Returns:	0			Success
346  *		EINVAL
347  */
348 static int
uipc_listen(struct socket * so,__unused proc_t p)349 uipc_listen(struct socket *so, __unused proc_t p)
350 {
351 	struct unpcb *unp = sotounpcb(so);
352 
353 	if (unp == 0 || unp->unp_vnode == 0) {
354 		return EINVAL;
355 	}
356 	return unp_listen(unp, p);
357 }
358 
359 static int
uipc_peeraddr(struct socket * so,struct sockaddr ** nam)360 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
361 {
362 	struct unpcb *unp = sotounpcb(so);
363 	struct socket *so2;
364 
365 	if (unp == NULL) {
366 		return EINVAL;
367 	}
368 	so2 = unp->unp_conn != NULL ? unp->unp_conn->unp_socket : NULL;
369 	if (so2 != NULL) {
370 		unp_get_locks_in_order(so, so2);
371 	}
372 
373 	if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
374 		*nam = dup_sockaddr((struct sockaddr *)
375 		    unp->unp_conn->unp_addr, 1);
376 	} else {
377 		*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
378 	}
379 	if (so2 != NULL) {
380 		socket_unlock(so2, 1);
381 	}
382 	return 0;
383 }
384 
385 static int
uipc_rcvd(struct socket * so,__unused int flags)386 uipc_rcvd(struct socket *so, __unused int flags)
387 {
388 	struct unpcb *unp = sotounpcb(so);
389 	struct socket *so2;
390 
391 	if (unp == 0) {
392 		return EINVAL;
393 	}
394 	switch (so->so_type) {
395 	case SOCK_DGRAM:
396 		panic("uipc_rcvd DGRAM?");
397 	/*NOTREACHED*/
398 
399 	case SOCK_STREAM:
400 #define rcv (&so->so_rcv)
401 #define snd (&so2->so_snd)
402 		if (unp->unp_conn == 0) {
403 			break;
404 		}
405 
406 		so2 = unp->unp_conn->unp_socket;
407 		unp_get_locks_in_order(so, so2);
408 		/*
409 		 * Adjust backpressure on sender
410 		 * and wakeup any waiting to write.
411 		 */
412 		snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
413 		unp->unp_mbcnt = rcv->sb_mbcnt;
414 		snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
415 		unp->unp_cc = rcv->sb_cc;
416 		if (sb_notify(&so2->so_snd)) {
417 			sowakeup(so2, &so2->so_snd, so);
418 		}
419 
420 		socket_unlock(so2, 1);
421 
422 #undef snd
423 #undef rcv
424 		break;
425 
426 	default:
427 		panic("uipc_rcvd unknown socktype");
428 	}
429 	return 0;
430 }
431 
432 /* pru_rcvoob is EOPNOTSUPP */
433 
434 /*
435  * Returns:	0			Success
436  *		EINVAL
437  *		EOPNOTSUPP
438  *		EPIPE
439  *		ENOTCONN
440  *		EISCONN
441  *	unp_internalize:EINVAL
442  *	unp_internalize:EBADF
443  *	unp_connect:EAFNOSUPPORT	Address family not supported
444  *	unp_connect:EINVAL		Invalid argument
445  *	unp_connect:ENOTSOCK		Not a socket
446  *	unp_connect:ECONNREFUSED	Connection refused
447  *	unp_connect:EISCONN		Socket is connected
448  *	unp_connect:EPROTOTYPE		Protocol wrong type for socket
449  *	unp_connect:???
450  *	sbappendaddr:ENOBUFS		[5th argument, contents modified]
451  *	sbappendaddr:???		[whatever a filter author chooses]
452  */
453 static int
uipc_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,proc_t p)454 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
455     struct mbuf *control, proc_t p)
456 {
457 	int error = 0;
458 	struct unpcb *unp = sotounpcb(so);
459 	struct socket *so2;
460 
461 	if (unp == 0) {
462 		error = EINVAL;
463 		goto release;
464 	}
465 	if (flags & PRUS_OOB) {
466 		error = EOPNOTSUPP;
467 		goto release;
468 	}
469 
470 	if (control) {
471 		/* release lock to avoid deadlock (4436174) */
472 		socket_unlock(so, 0);
473 		error = unp_internalize(control, p);
474 		socket_lock(so, 0);
475 		if (error) {
476 			goto release;
477 		}
478 	}
479 
480 	switch (so->so_type) {
481 	case SOCK_DGRAM:
482 	{
483 		struct sockaddr *from;
484 
485 		if (nam) {
486 			if (unp->unp_conn) {
487 				error = EISCONN;
488 				break;
489 			}
490 			error = unp_connect(so, nam, p);
491 			if (error) {
492 				so->so_state &= ~SS_ISCONNECTING;
493 				break;
494 			}
495 		} else {
496 			if (unp->unp_conn == 0) {
497 				error = ENOTCONN;
498 				break;
499 			}
500 		}
501 
502 		so2 = unp->unp_conn->unp_socket;
503 		if (so != so2) {
504 			unp_get_locks_in_order(so, so2);
505 		}
506 
507 		if (unp->unp_addr) {
508 			from = (struct sockaddr *)unp->unp_addr;
509 		} else {
510 			from = &sun_noname;
511 		}
512 		/*
513 		 * sbappendaddr() will fail when the receiver runs out of
514 		 * space; in contrast to SOCK_STREAM, we will lose messages
515 		 * for the SOCK_DGRAM case when the receiver's queue overflows.
516 		 * SB_UNIX on the socket buffer implies that the callee will
517 		 * not free the control message, if any, because we would need
518 		 * to call unp_dispose() on it.
519 		 */
520 		if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
521 			control = NULL;
522 			if (sb_notify(&so2->so_rcv)) {
523 				sowakeup(so2, &so2->so_rcv, so);
524 			}
525 		} else if (control != NULL && error == 0) {
526 			/* A socket filter took control; don't touch it */
527 			control = NULL;
528 		}
529 
530 		if (so != so2) {
531 			socket_unlock(so2, 1);
532 		}
533 
534 		m = NULL;
535 		if (nam) {
536 			unp_disconnect(unp);
537 		}
538 		break;
539 	}
540 
541 	case SOCK_STREAM: {
542 		int didreceive = 0;
543 #define rcv (&so2->so_rcv)
544 #define snd (&so->so_snd)
545 		/* Connect if not connected yet. */
546 		/*
547 		 * Note: A better implementation would complain
548 		 * if not equal to the peer's address.
549 		 */
550 		if ((so->so_state & SS_ISCONNECTED) == 0) {
551 			if (nam) {
552 				error = unp_connect(so, nam, p);
553 				if (error) {
554 					so->so_state &= ~SS_ISCONNECTING;
555 					break;  /* XXX */
556 				}
557 			} else {
558 				error = ENOTCONN;
559 				break;
560 			}
561 		}
562 
563 		if (so->so_state & SS_CANTSENDMORE) {
564 			error = EPIPE;
565 			break;
566 		}
567 		if (unp->unp_conn == 0) {
568 			panic("uipc_send connected but no connection? "
569 			    "socket state: %x socket flags: %x socket flags1: %x.",
570 			    so->so_state, so->so_flags, so->so_flags1);
571 		}
572 
573 		so2 = unp->unp_conn->unp_socket;
574 		unp_get_locks_in_order(so, so2);
575 
576 		/* Check socket state again as we might have unlocked the socket
577 		 * while trying to get the locks in order
578 		 */
579 
580 		if ((so->so_state & SS_CANTSENDMORE)) {
581 			error = EPIPE;
582 			socket_unlock(so2, 1);
583 			break;
584 		}
585 
586 		if (unp->unp_flags & UNP_TRACE_MDNS) {
587 			struct mdns_ipc_msg_hdr hdr;
588 
589 			if (mbuf_copydata(m, 0, sizeof(hdr), &hdr) == 0 &&
590 			    hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
591 				os_log(OS_LOG_DEFAULT,
592 				    "%s[mDNSResponder] pid=%d (%s): op=0x%x",
593 				    __func__, proc_getpid(p), p->p_comm, ntohl(hdr.op));
594 			}
595 		}
596 
597 		/*
598 		 * Send to paired receive port, and then reduce send buffer
599 		 * hiwater marks to maintain backpressure.  Wake up readers.
600 		 * SB_UNIX flag will allow new record to be appended to the
601 		 * receiver's queue even when it is already full.  It is
602 		 * possible, however, that append might fail.  In that case,
603 		 * we will need to call unp_dispose() on the control message;
604 		 * the callee will not free it since SB_UNIX is set.
605 		 */
606 		didreceive = control ?
607 		    sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
608 
609 		snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
610 		unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
611 		if ((int32_t)snd->sb_hiwat >=
612 		    (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
613 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
614 		} else {
615 			snd->sb_hiwat = 0;
616 		}
617 		unp->unp_conn->unp_cc = rcv->sb_cc;
618 		if (didreceive) {
619 			control = NULL;
620 			if (sb_notify(&so2->so_rcv)) {
621 				sowakeup(so2, &so2->so_rcv, so);
622 			}
623 		} else if (control != NULL && error == 0) {
624 			/* A socket filter took control; don't touch it */
625 			control = NULL;
626 		}
627 
628 		socket_unlock(so2, 1);
629 		m = NULL;
630 #undef snd
631 #undef rcv
632 	}
633 	break;
634 
635 	default:
636 		panic("uipc_send unknown socktype");
637 	}
638 
639 	/*
640 	 * SEND_EOF is equivalent to a SEND followed by
641 	 * a SHUTDOWN.
642 	 */
643 	if (flags & PRUS_EOF) {
644 		socantsendmore(so);
645 		unp_shutdown(unp);
646 	}
647 
648 	if (control && error != 0) {
649 		socket_unlock(so, 0);
650 		unp_dispose(control);
651 		socket_lock(so, 0);
652 	}
653 
654 release:
655 	if (control) {
656 		m_freem(control);
657 	}
658 	if (m) {
659 		m_freem(m);
660 	}
661 	return error;
662 }
663 
664 static int
uipc_sense(struct socket * so,void * ub,int isstat64)665 uipc_sense(struct socket *so, void *ub, int isstat64)
666 {
667 	struct unpcb *unp = sotounpcb(so);
668 	struct socket *so2;
669 	blksize_t blksize;
670 
671 	if (unp == 0) {
672 		return EINVAL;
673 	}
674 
675 	blksize = so->so_snd.sb_hiwat;
676 	if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
677 		so2 = unp->unp_conn->unp_socket;
678 		blksize += so2->so_rcv.sb_cc;
679 	}
680 	if (unp->unp_ino == 0) {
681 		unp->unp_ino = unp_ino++;
682 	}
683 
684 	if (isstat64 != 0) {
685 		struct stat64  *sb64;
686 
687 		sb64 = (struct stat64 *)ub;
688 		sb64->st_blksize = blksize;
689 		sb64->st_dev = NODEV;
690 		sb64->st_ino = (ino64_t)unp->unp_ino;
691 	} else {
692 		struct stat *sb;
693 
694 		sb = (struct stat *)ub;
695 		sb->st_blksize = blksize;
696 		sb->st_dev = NODEV;
697 		sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
698 	}
699 
700 	return 0;
701 }
702 
703 /*
704  * Returns:	0		Success
705  *		EINVAL
706  *
707  * Notes:	This is not strictly correct, as unp_shutdown() also calls
708  *		socantrcvmore().  These should maybe both be conditionalized
709  *		on the 'how' argument in soshutdown() as called from the
710  *		shutdown() system call.
711  */
712 static int
uipc_shutdown(struct socket * so)713 uipc_shutdown(struct socket *so)
714 {
715 	struct unpcb *unp = sotounpcb(so);
716 
717 	if (unp == 0) {
718 		return EINVAL;
719 	}
720 	socantsendmore(so);
721 	unp_shutdown(unp);
722 	return 0;
723 }
724 
725 /*
726  * Returns:	0			Success
727  *		EINVAL			Invalid argument
728  */
729 static int
uipc_sockaddr(struct socket * so,struct sockaddr ** nam)730 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
731 {
732 	struct unpcb *unp = sotounpcb(so);
733 
734 	if (unp == NULL) {
735 		return EINVAL;
736 	}
737 	if (unp->unp_addr != NULL) {
738 		*nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
739 	} else {
740 		*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
741 	}
742 	return 0;
743 }
744 
745 struct pr_usrreqs uipc_usrreqs = {
746 	.pru_abort =            uipc_abort,
747 	.pru_accept =           uipc_accept,
748 	.pru_attach =           uipc_attach,
749 	.pru_bind =             uipc_bind,
750 	.pru_connect =          uipc_connect,
751 	.pru_connect2 =         uipc_connect2,
752 	.pru_detach =           uipc_detach,
753 	.pru_disconnect =       uipc_disconnect,
754 	.pru_listen =           uipc_listen,
755 	.pru_peeraddr =         uipc_peeraddr,
756 	.pru_rcvd =             uipc_rcvd,
757 	.pru_send =             uipc_send,
758 	.pru_sense =            uipc_sense,
759 	.pru_shutdown =         uipc_shutdown,
760 	.pru_sockaddr =         uipc_sockaddr,
761 	.pru_sosend =           sosend,
762 	.pru_soreceive =        soreceive,
763 };
764 
765 int
uipc_ctloutput(struct socket * so,struct sockopt * sopt)766 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
767 {
768 	struct unpcb *unp = sotounpcb(so);
769 	int error = 0;
770 	pid_t peerpid;
771 	proc_t p;
772 	task_t t;
773 	struct socket *peerso;
774 
775 	switch (sopt->sopt_dir) {
776 	case SOPT_GET:
777 		switch (sopt->sopt_name) {
778 		case LOCAL_PEERCRED:
779 			if (unp->unp_flags & UNP_HAVEPC) {
780 				error = sooptcopyout(sopt, &unp->unp_peercred,
781 				    sizeof(unp->unp_peercred));
782 			} else {
783 				if (so->so_type == SOCK_STREAM) {
784 					error = ENOTCONN;
785 				} else {
786 					error = EINVAL;
787 				}
788 			}
789 			break;
790 		case LOCAL_PEERPID:
791 		case LOCAL_PEEREPID:
792 			if (unp->unp_conn == NULL) {
793 				error = ENOTCONN;
794 				break;
795 			}
796 			peerso = unp->unp_conn->unp_socket;
797 			if (peerso == NULL) {
798 				panic("peer is connected but has no socket?");
799 			}
800 			unp_get_locks_in_order(so, peerso);
801 			if (sopt->sopt_name == LOCAL_PEEREPID &&
802 			    peerso->so_flags & SOF_DELEGATED) {
803 				peerpid = peerso->e_pid;
804 			} else {
805 				peerpid = peerso->last_pid;
806 			}
807 			socket_unlock(peerso, 1);
808 			error = sooptcopyout(sopt, &peerpid, sizeof(peerpid));
809 			break;
810 		case LOCAL_PEERUUID:
811 		case LOCAL_PEEREUUID:
812 			if (unp->unp_conn == NULL) {
813 				error = ENOTCONN;
814 				break;
815 			}
816 			peerso = unp->unp_conn->unp_socket;
817 			if (peerso == NULL) {
818 				panic("peer is connected but has no socket?");
819 			}
820 			unp_get_locks_in_order(so, peerso);
821 			if (sopt->sopt_name == LOCAL_PEEREUUID &&
822 			    peerso->so_flags & SOF_DELEGATED) {
823 				error = sooptcopyout(sopt, &peerso->e_uuid,
824 				    sizeof(peerso->e_uuid));
825 			} else {
826 				error = sooptcopyout(sopt, &peerso->last_uuid,
827 				    sizeof(peerso->last_uuid));
828 			}
829 			socket_unlock(peerso, 1);
830 			break;
831 		case LOCAL_PEERTOKEN:
832 			if (unp->unp_conn == NULL) {
833 				error = ENOTCONN;
834 				break;
835 			}
836 			peerso = unp->unp_conn->unp_socket;
837 			if (peerso == NULL) {
838 				panic("peer is connected but has no socket?");
839 			}
840 			unp_get_locks_in_order(so, peerso);
841 			peerpid = peerso->last_pid;
842 			p = proc_find(peerpid);
843 			if (p != PROC_NULL) {
844 				t = proc_task(p);
845 				if (t != TASK_NULL) {
846 					audit_token_t peertoken;
847 					mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
848 					if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&peertoken, &count) == KERN_SUCCESS) {
849 						error = sooptcopyout(sopt, &peertoken, sizeof(peertoken));
850 					} else {
851 						error = EINVAL;
852 					}
853 				} else {
854 					error = EINVAL;
855 				}
856 				proc_rele(p);
857 			} else {
858 				error = EINVAL;
859 			}
860 			socket_unlock(peerso, 1);
861 			break;
862 		default:
863 			error = EOPNOTSUPP;
864 			break;
865 		}
866 		break;
867 	case SOPT_SET:
868 	default:
869 		error = EOPNOTSUPP;
870 		break;
871 	}
872 
873 	return error;
874 }
875 
876 /*
877  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
878  * for stream sockets, although the total for sender and receiver is
879  * actually only PIPSIZ.
880  * Datagram sockets really use the sendspace as the maximum datagram size,
881  * and don't really want to reserve the sendspace.  Their recvspace should
882  * be large enough for at least one max-size datagram plus address.
883  */
884 #ifndef PIPSIZ
885 #define PIPSIZ  8192
886 #endif
887 static u_int32_t        unpst_sendspace = PIPSIZ;
888 static u_int32_t        unpst_recvspace = PIPSIZ;
889 static u_int32_t        unpdg_sendspace = 2 * 1024;       /* really max datagram size */
890 static u_int32_t        unpdg_recvspace = 4 * 1024;
891 
892 static int      unp_rights;                     /* file descriptors in flight */
893 static int      unp_disposed;                   /* discarded file descriptors */
894 
895 SYSCTL_DECL(_net_local_stream);
896 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
897     &unpst_sendspace, 0, "");
898 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
899     &unpst_recvspace, 0, "");
900 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
901     &unpst_tracemdns, 0, "");
902 SYSCTL_DECL(_net_local_dgram);
903 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
904     &unpdg_sendspace, 0, "");
905 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
906     &unpdg_recvspace, 0, "");
907 SYSCTL_DECL(_net_local);
908 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
909 
910 /*
911  * Returns:	0			Success
912  *		ENOBUFS
913  *	soreserve:ENOBUFS
914  */
915 static int
unp_attach(struct socket * so)916 unp_attach(struct socket *so)
917 {
918 	struct unpcb *unp;
919 	int error = 0;
920 
921 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
922 		switch (so->so_type) {
923 		case SOCK_STREAM:
924 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
925 			break;
926 
927 		case SOCK_DGRAM:
928 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
929 			break;
930 
931 		default:
932 			panic("unp_attach");
933 		}
934 		if (error) {
935 			return error;
936 		}
937 	}
938 	unp = zalloc_flags(unp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
939 
940 	lck_mtx_init(&unp->unp_mtx, &unp_mtx_grp, &unp_mtx_attr);
941 
942 	lck_rw_lock_exclusive(&unp_list_mtx);
943 	LIST_INIT(&unp->unp_refs);
944 	unp->unp_socket = so;
945 	unp->unp_gencnt = ++unp_gencnt;
946 	unp_count++;
947 	LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
948 	    &unp_dhead : &unp_shead, unp, unp_link);
949 	lck_rw_done(&unp_list_mtx);
950 	so->so_pcb = (caddr_t)unp;
951 	/*
952 	 * Mark AF_UNIX socket buffers accordingly so that:
953 	 *
954 	 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
955 	 *    the lack of space; this essentially loosens the sbspace() check,
956 	 *    since there is disconnect between sosend() and uipc_send() with
957 	 *    respect to flow control that might result in our dropping the
958 	 *    data in uipc_send().  By setting this, we allow for slightly
959 	 *    more records to be appended to the receiving socket to avoid
960 	 *    losing data (which we can't afford in the SOCK_STREAM case).
961 	 *    Flow control still takes place since we adjust the sender's
962 	 *    hiwat during each send.  This doesn't affect the SOCK_DGRAM
963 	 *    case and append would still fail when the queue overflows.
964 	 *
965 	 * b. In the presence of control messages containing internalized
966 	 *    file descriptors, the append routines will not free them since
967 	 *    we'd need to undo the work first via unp_dispose().
968 	 */
969 	so->so_rcv.sb_flags |= SB_UNIX;
970 	so->so_snd.sb_flags |= SB_UNIX;
971 	return 0;
972 }
973 
974 static void
unp_detach(struct unpcb * unp)975 unp_detach(struct unpcb *unp)
976 {
977 	int so_locked = 1;
978 
979 	lck_rw_lock_exclusive(&unp_list_mtx);
980 	LIST_REMOVE(unp, unp_link);
981 	--unp_count;
982 	++unp_gencnt;
983 	lck_rw_done(&unp_list_mtx);
984 	if (unp->unp_vnode) {
985 		struct vnode *tvp = NULL;
986 		socket_unlock(unp->unp_socket, 0);
987 
988 		/* Holding unp_connect_lock will avoid a race between
989 		 * a thread closing the listening socket and a thread
990 		 * connecting to it.
991 		 */
992 		lck_mtx_lock(&unp_connect_lock);
993 		socket_lock(unp->unp_socket, 0);
994 		if (unp->unp_vnode) {
995 			tvp = unp->unp_vnode;
996 			unp->unp_vnode->v_socket = NULL;
997 			unp->unp_vnode = NULL;
998 		}
999 		lck_mtx_unlock(&unp_connect_lock);
1000 		if (tvp != NULL) {
1001 			vnode_rele(tvp);                /* drop the usecount */
1002 		}
1003 	}
1004 	if (unp->unp_conn) {
1005 		unp_disconnect(unp);
1006 	}
1007 	while (unp->unp_refs.lh_first) {
1008 		struct unpcb *unp2 = NULL;
1009 
1010 		/* This datagram socket is connected to one or more
1011 		 * sockets. In order to avoid a race condition between removing
1012 		 * this reference and closing the connected socket, we need
1013 		 * to check disconnect_in_progress
1014 		 */
1015 		if (so_locked == 1) {
1016 			socket_unlock(unp->unp_socket, 0);
1017 			so_locked = 0;
1018 		}
1019 		lck_mtx_lock(&unp_disconnect_lock);
1020 		while (disconnect_in_progress != 0) {
1021 			(void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1022 			    PSOCK, "disconnect", NULL);
1023 		}
1024 		disconnect_in_progress = 1;
1025 		lck_mtx_unlock(&unp_disconnect_lock);
1026 
1027 		/* Now we are sure that any unpcb socket disconnect is not happening */
1028 		if (unp->unp_refs.lh_first != NULL) {
1029 			unp2 = unp->unp_refs.lh_first;
1030 			socket_lock(unp2->unp_socket, 1);
1031 		}
1032 
1033 		lck_mtx_lock(&unp_disconnect_lock);
1034 		disconnect_in_progress = 0;
1035 		wakeup(&disconnect_in_progress);
1036 		lck_mtx_unlock(&unp_disconnect_lock);
1037 
1038 		if (unp2 != NULL) {
1039 			/* We already locked this socket and have a reference on it */
1040 			unp_drop(unp2, ECONNRESET);
1041 			socket_unlock(unp2->unp_socket, 1);
1042 		}
1043 	}
1044 
1045 	if (so_locked == 0) {
1046 		socket_lock(unp->unp_socket, 0);
1047 		so_locked = 1;
1048 	}
1049 	soisdisconnected(unp->unp_socket);
1050 	/* makes sure we're getting dealloced */
1051 	unp->unp_socket->so_flags |= SOF_PCBCLEARING;
1052 }
1053 
1054 /*
1055  * Returns:	0			Success
1056  *		EAFNOSUPPORT
1057  *		EINVAL
1058  *		EADDRINUSE
1059  *		namei:???		[anything namei can return]
1060  *		vnode_authorize:???	[anything vnode_authorize can return]
1061  *
1062  * Notes:	p at this point is the current process, as this function is
1063  *		only called by sobind().
1064  */
1065 static int
unp_bind(struct unpcb * unp,struct sockaddr * nam,proc_t p)1066 unp_bind(
1067 	struct unpcb *unp,
1068 	struct sockaddr *nam,
1069 	proc_t p)
1070 {
1071 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1072 	struct vnode *vp, *dvp;
1073 	struct vnode_attr va;
1074 	vfs_context_t ctx = vfs_context_current();
1075 	int error, namelen;
1076 	struct nameidata nd;
1077 	struct socket *so = unp->unp_socket;
1078 	char buf[SOCK_MAXADDRLEN];
1079 
1080 	if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1081 		return EAFNOSUPPORT;
1082 	}
1083 
1084 	/*
1085 	 * Check if the socket is already bound to an address
1086 	 */
1087 	if (unp->unp_vnode != NULL) {
1088 		return EINVAL;
1089 	}
1090 	/*
1091 	 * Check if the socket may have been shut down
1092 	 */
1093 	if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1094 	    (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1095 		return EINVAL;
1096 	}
1097 
1098 	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
1099 	if (namelen <= 0) {
1100 		return EINVAL;
1101 	}
1102 	/*
1103 	 * Note: sun_path is not a zero terminated "C" string
1104 	 */
1105 	if (namelen >= SOCK_MAXADDRLEN) {
1106 		return EINVAL;
1107 	}
1108 	bcopy(soun->sun_path, buf, namelen);
1109 	buf[namelen] = 0;
1110 
1111 	socket_unlock(so, 0);
1112 
1113 	NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
1114 	    CAST_USER_ADDR_T(buf), ctx);
1115 	/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1116 	error = namei(&nd);
1117 	if (error) {
1118 		socket_lock(so, 0);
1119 		return error;
1120 	}
1121 	dvp = nd.ni_dvp;
1122 	vp = nd.ni_vp;
1123 
1124 	if (vp != NULL) {
1125 		/*
1126 		 * need to do this before the vnode_put of dvp
1127 		 * since we may have to release an fs_nodelock
1128 		 */
1129 		nameidone(&nd);
1130 
1131 		vnode_put(dvp);
1132 		vnode_put(vp);
1133 
1134 		socket_lock(so, 0);
1135 		return EADDRINUSE;
1136 	}
1137 
1138 	VATTR_INIT(&va);
1139 	VATTR_SET(&va, va_type, VSOCK);
1140 	VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd.fd_cmask));
1141 
1142 #if CONFIG_MACF
1143 	error = mac_vnode_check_create(ctx,
1144 	    nd.ni_dvp, &nd.ni_cnd, &va);
1145 
1146 	if (error == 0)
1147 #endif /* CONFIG_MACF */
1148 #if CONFIG_MACF_SOCKET_SUBSET
1149 	error = mac_vnode_check_uipc_bind(ctx,
1150 	    nd.ni_dvp, &nd.ni_cnd, &va);
1151 
1152 	if (error == 0)
1153 #endif /* MAC_SOCKET_SUBSET */
1154 	/* authorize before creating */
1155 	error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1156 
1157 	if (!error) {
1158 		/* create the socket */
1159 		error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
1160 	}
1161 
1162 	nameidone(&nd);
1163 	vnode_put(dvp);
1164 
1165 	if (error) {
1166 		socket_lock(so, 0);
1167 		return error;
1168 	}
1169 
1170 	socket_lock(so, 0);
1171 
1172 	if (unp->unp_vnode != NULL) {
1173 		vnode_put(vp); /* drop the iocount */
1174 		return EINVAL;
1175 	}
1176 
1177 	error = vnode_ref(vp);  /* gain a longterm reference */
1178 	if (error) {
1179 		vnode_put(vp); /* drop the iocount */
1180 		return error;
1181 	}
1182 
1183 	vp->v_socket = unp->unp_socket;
1184 	unp->unp_vnode = vp;
1185 	unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
1186 	vnode_put(vp);          /* drop the iocount */
1187 
1188 	return 0;
1189 }
1190 
1191 
1192 /*
1193  * Returns:	0			Success
1194  *		EAFNOSUPPORT		Address family not supported
1195  *		EINVAL			Invalid argument
1196  *		ENOTSOCK		Not a socket
1197  *		ECONNREFUSED		Connection refused
1198  *		EPROTOTYPE		Protocol wrong type for socket
1199  *		EISCONN			Socket is connected
1200  *	unp_connect2:EPROTOTYPE		Protocol wrong type for socket
1201  *	unp_connect2:EINVAL		Invalid argument
1202  *	namei:???			[anything namei can return]
1203  *	vnode_authorize:????		[anything vnode_authorize can return]
1204  *
1205  * Notes:	p at this point is the current process, as this function is
1206  *		only called by sosend(), sendfile(), and soconnectlock().
1207  */
1208 static int
unp_connect(struct socket * so,struct sockaddr * nam,__unused proc_t p)1209 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1210 {
1211 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1212 	struct vnode *vp;
1213 	struct socket *so2, *so3, *list_so = NULL;
1214 	struct unpcb *unp, *unp2, *unp3;
1215 	vfs_context_t ctx = vfs_context_current();
1216 	int error, len;
1217 	struct nameidata nd;
1218 	char buf[SOCK_MAXADDRLEN];
1219 
1220 	if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1221 		return EAFNOSUPPORT;
1222 	}
1223 
1224 	unp = sotounpcb(so);
1225 	so2 = so3 = NULL;
1226 
1227 	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1228 	if (len <= 0) {
1229 		return EINVAL;
1230 	}
1231 	/*
1232 	 * Note: sun_path is not a zero terminated "C" string
1233 	 */
1234 	if (len >= SOCK_MAXADDRLEN) {
1235 		return EINVAL;
1236 	}
1237 
1238 	soisconnecting(so);
1239 
1240 	bcopy(soun->sun_path, buf, len);
1241 	buf[len] = 0;
1242 
1243 	socket_unlock(so, 0);
1244 
1245 	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1246 	    CAST_USER_ADDR_T(buf), ctx);
1247 	error = namei(&nd);
1248 	if (error) {
1249 		socket_lock(so, 0);
1250 		return error;
1251 	}
1252 	nameidone(&nd);
1253 	vp = nd.ni_vp;
1254 	if (vp->v_type != VSOCK) {
1255 		error = ENOTSOCK;
1256 		socket_lock(so, 0);
1257 		goto out;
1258 	}
1259 
1260 #if CONFIG_MACF_SOCKET_SUBSET
1261 	error = mac_vnode_check_uipc_connect(ctx, vp, so);
1262 	if (error) {
1263 		socket_lock(so, 0);
1264 		goto out;
1265 	}
1266 #endif /* MAC_SOCKET_SUBSET */
1267 
1268 	error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1269 	if (error) {
1270 		socket_lock(so, 0);
1271 		goto out;
1272 	}
1273 
1274 	lck_mtx_lock(&unp_connect_lock);
1275 
1276 	if (vp->v_socket == 0) {
1277 		lck_mtx_unlock(&unp_connect_lock);
1278 		error = ECONNREFUSED;
1279 		socket_lock(so, 0);
1280 		goto out;
1281 	}
1282 
1283 	socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1284 	so2 = vp->v_socket;
1285 	lck_mtx_unlock(&unp_connect_lock);
1286 
1287 
1288 	if (so2->so_pcb == NULL) {
1289 		error = ECONNREFUSED;
1290 		if (so != so2) {
1291 			socket_unlock(so2, 1);
1292 			socket_lock(so, 0);
1293 		} else {
1294 			/* Release the reference held for the listen socket */
1295 			VERIFY(so2->so_usecount > 0);
1296 			so2->so_usecount--;
1297 		}
1298 		goto out;
1299 	}
1300 
1301 	if (so < so2) {
1302 		socket_unlock(so2, 0);
1303 		socket_lock(so, 0);
1304 		socket_lock(so2, 0);
1305 	} else if (so > so2) {
1306 		socket_lock(so, 0);
1307 	}
1308 	/*
1309 	 * Check if socket was connected while we were trying to
1310 	 * get the socket locks in order.
1311 	 * XXX - probably shouldn't return an error for SOCK_DGRAM
1312 	 */
1313 	if ((so->so_state & SS_ISCONNECTED) != 0) {
1314 		error = EISCONN;
1315 		goto decref_out;
1316 	}
1317 
1318 	if (so->so_type != so2->so_type) {
1319 		error = EPROTOTYPE;
1320 		goto decref_out;
1321 	}
1322 
1323 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1324 		/* Release the incoming socket but keep a reference */
1325 		socket_unlock(so, 0);
1326 
1327 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1328 		    (so3 = sonewconn(so2, 0, nam)) == 0) {
1329 			error = ECONNREFUSED;
1330 			if (so != so2) {
1331 				socket_unlock(so2, 1);
1332 				socket_lock(so, 0);
1333 			} else {
1334 				socket_lock(so, 0);
1335 				/* Release the reference held for
1336 				 * listen socket.
1337 				 */
1338 				VERIFY(so2->so_usecount > 0);
1339 				so2->so_usecount--;
1340 			}
1341 			goto out;
1342 		}
1343 		unp2 = sotounpcb(so2);
1344 		unp3 = sotounpcb(so3);
1345 		if (unp2->unp_addr) {
1346 			unp3->unp_addr = (struct sockaddr_un *)
1347 			    dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
1348 		}
1349 
1350 		/*
1351 		 * unp_peercred management:
1352 		 *
1353 		 * The connecter's (client's) credentials are copied
1354 		 * from its process structure at the time of connect()
1355 		 * (which is now).
1356 		 */
1357 		cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1358 		unp3->unp_flags |= UNP_HAVEPC;
1359 		/*
1360 		 * The receiver's (server's) credentials are copied
1361 		 * from the unp_peercred member of socket on which the
1362 		 * former called listen(); unp_listen() cached that
1363 		 * process's credentials at that time so we can use
1364 		 * them now.
1365 		 */
1366 		KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1367 		    ("unp_connect: listener without cached peercred"));
1368 
1369 		/* Here we need to have both so and so2 locks and so2
1370 		 * is already locked. Lock ordering is required.
1371 		 */
1372 		if (so < so2) {
1373 			socket_unlock(so2, 0);
1374 			socket_lock(so, 0);
1375 			socket_lock(so2, 0);
1376 		} else {
1377 			socket_lock(so, 0);
1378 		}
1379 
1380 		/* Check again if the socket state changed when its lock was released */
1381 		if ((so->so_state & SS_ISCONNECTED) != 0) {
1382 			error = EISCONN;
1383 			socket_unlock(so2, 1);
1384 			socket_lock(so3, 0);
1385 			sofreelastref(so3, 1);
1386 			goto out;
1387 		}
1388 		memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1389 		    sizeof(unp->unp_peercred));
1390 		unp->unp_flags |= UNP_HAVEPC;
1391 
1392 		/* Hold the reference on listening socket until the end */
1393 		socket_unlock(so2, 0);
1394 		list_so = so2;
1395 
1396 		/* Lock ordering doesn't matter because so3 was just created */
1397 		socket_lock(so3, 1);
1398 		so2 = so3;
1399 
1400 		/*
1401 		 * Enable tracing for mDNSResponder endpoints.  (The use
1402 		 * of sizeof instead of strlen below takes the null
1403 		 * terminating character into account.)
1404 		 */
1405 		if (unpst_tracemdns &&
1406 		    !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1407 		    sizeof(MDNSRESPONDER_PATH))) {
1408 			unp->unp_flags |= UNP_TRACE_MDNS;
1409 			unp2->unp_flags |= UNP_TRACE_MDNS;
1410 		}
1411 	}
1412 
1413 	error = unp_connect2(so, so2);
1414 
1415 decref_out:
1416 	if (so2 != NULL) {
1417 		if (so != so2) {
1418 			socket_unlock(so2, 1);
1419 		} else {
1420 			/* Release the extra reference held for the listen socket.
1421 			 * This is possible only for SOCK_DGRAM sockets. We refuse
1422 			 * connecting to the same socket for SOCK_STREAM sockets.
1423 			 */
1424 			VERIFY(so2->so_usecount > 0);
1425 			so2->so_usecount--;
1426 		}
1427 	}
1428 
1429 	if (list_so != NULL) {
1430 		socket_lock(list_so, 0);
1431 		socket_unlock(list_so, 1);
1432 	}
1433 
1434 out:
1435 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1436 	vnode_put(vp);
1437 	return error;
1438 }
1439 
1440 /*
1441  * Returns:	0			Success
1442  *		EPROTOTYPE		Protocol wrong type for socket
1443  *		EINVAL			Invalid argument
1444  */
1445 int
unp_connect2(struct socket * so,struct socket * so2)1446 unp_connect2(struct socket *so, struct socket *so2)
1447 {
1448 	struct unpcb *unp = sotounpcb(so);
1449 	struct unpcb *unp2;
1450 
1451 	if (so2->so_type != so->so_type) {
1452 		return EPROTOTYPE;
1453 	}
1454 
1455 	unp2 = sotounpcb(so2);
1456 
1457 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1458 	LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1459 
1460 	/* Verify both sockets are still opened */
1461 	if (unp == 0 || unp2 == 0) {
1462 		return EINVAL;
1463 	}
1464 
1465 	unp->unp_conn = unp2;
1466 	so2->so_usecount++;
1467 
1468 	switch (so->so_type) {
1469 	case SOCK_DGRAM:
1470 		LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1471 
1472 		if (so != so2) {
1473 			/* Avoid lock order reversals due to drop/acquire in soisconnected. */
1474 			/* Keep an extra reference on so2 that will be dropped
1475 			 * soon after getting the locks in order
1476 			 */
1477 			socket_unlock(so2, 0);
1478 			soisconnected(so);
1479 			unp_get_locks_in_order(so, so2);
1480 			VERIFY(so2->so_usecount > 0);
1481 			so2->so_usecount--;
1482 		} else {
1483 			soisconnected(so);
1484 		}
1485 
1486 		break;
1487 
1488 	case SOCK_STREAM:
1489 		/* This takes care of socketpair */
1490 		if (!(unp->unp_flags & UNP_HAVEPC) &&
1491 		    !(unp2->unp_flags & UNP_HAVEPC)) {
1492 			cru2x(kauth_cred_get(), &unp->unp_peercred);
1493 			unp->unp_flags |= UNP_HAVEPC;
1494 
1495 			cru2x(kauth_cred_get(), &unp2->unp_peercred);
1496 			unp2->unp_flags |= UNP_HAVEPC;
1497 		}
1498 		unp2->unp_conn = unp;
1499 		so->so_usecount++;
1500 
1501 		/* Avoid lock order reversals due to drop/acquire in soisconnected. */
1502 		socket_unlock(so, 0);
1503 		soisconnected(so2);
1504 
1505 		/* Keep an extra reference on so2, that will be dropped soon after
1506 		 * getting the locks in order again.
1507 		 */
1508 		socket_unlock(so2, 0);
1509 
1510 		socket_lock(so, 0);
1511 		soisconnected(so);
1512 
1513 		unp_get_locks_in_order(so, so2);
1514 		/* Decrement the extra reference left before */
1515 		VERIFY(so2->so_usecount > 0);
1516 		so2->so_usecount--;
1517 		break;
1518 
1519 	default:
1520 		panic("unknown socket type %d in unp_connect2", so->so_type);
1521 	}
1522 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1523 	LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1524 	return 0;
1525 }
1526 
1527 static void
unp_disconnect(struct unpcb * unp)1528 unp_disconnect(struct unpcb *unp)
1529 {
1530 	struct unpcb *unp2 = NULL;
1531 	struct socket *so2 = NULL, *so;
1532 	struct socket *waitso;
1533 	int so_locked = 1, strdisconn = 0;
1534 
1535 	so = unp->unp_socket;
1536 	if (unp->unp_conn == NULL) {
1537 		return;
1538 	}
1539 	lck_mtx_lock(&unp_disconnect_lock);
1540 	while (disconnect_in_progress != 0) {
1541 		if (so_locked == 1) {
1542 			socket_unlock(so, 0);
1543 			so_locked = 0;
1544 		}
1545 		(void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1546 		    PSOCK, "disconnect", NULL);
1547 	}
1548 	disconnect_in_progress = 1;
1549 	lck_mtx_unlock(&unp_disconnect_lock);
1550 
1551 	if (so_locked == 0) {
1552 		socket_lock(so, 0);
1553 		so_locked = 1;
1554 	}
1555 
1556 	unp2 = unp->unp_conn;
1557 
1558 	if (unp2 == 0 || unp2->unp_socket == NULL) {
1559 		goto out;
1560 	}
1561 	so2 = unp2->unp_socket;
1562 
1563 try_again:
1564 	if (so == so2) {
1565 		if (so_locked == 0) {
1566 			socket_lock(so, 0);
1567 		}
1568 		waitso = so;
1569 	} else if (so < so2) {
1570 		if (so_locked == 0) {
1571 			socket_lock(so, 0);
1572 		}
1573 		socket_lock(so2, 1);
1574 		waitso = so2;
1575 	} else {
1576 		if (so_locked == 1) {
1577 			socket_unlock(so, 0);
1578 		}
1579 		socket_lock(so2, 1);
1580 		socket_lock(so, 0);
1581 		waitso = so;
1582 	}
1583 	so_locked = 1;
1584 
1585 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1586 	LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1587 
1588 	/* Check for the UNP_DONTDISCONNECT flag, if it
1589 	 * is set, release both sockets and go to sleep
1590 	 */
1591 
1592 	if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1593 		if (so != so2) {
1594 			socket_unlock(so2, 1);
1595 		}
1596 		so_locked = 0;
1597 
1598 		(void)msleep(waitso->so_pcb, &unp->unp_mtx,
1599 		    PSOCK | PDROP, "unpdisconnect", NULL);
1600 		goto try_again;
1601 	}
1602 
1603 	if (unp->unp_conn == NULL) {
1604 		panic("unp_conn became NULL after sleep");
1605 	}
1606 
1607 	unp->unp_conn = NULL;
1608 	VERIFY(so2->so_usecount > 0);
1609 	so2->so_usecount--;
1610 
1611 	if (unp->unp_flags & UNP_TRACE_MDNS) {
1612 		unp->unp_flags &= ~UNP_TRACE_MDNS;
1613 	}
1614 
1615 	switch (unp->unp_socket->so_type) {
1616 	case SOCK_DGRAM:
1617 		LIST_REMOVE(unp, unp_reflink);
1618 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1619 		if (so != so2) {
1620 			socket_unlock(so2, 1);
1621 		}
1622 		break;
1623 
1624 	case SOCK_STREAM:
1625 		unp2->unp_conn = NULL;
1626 		VERIFY(so->so_usecount > 0);
1627 		so->so_usecount--;
1628 
1629 		/*
1630 		 * Set the socket state correctly but do a wakeup later when
1631 		 * we release all locks except the socket lock, this will avoid
1632 		 * a deadlock.
1633 		 */
1634 		unp->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1635 		unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1636 
1637 		unp2->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1638 		unp2->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1639 
1640 		if (unp2->unp_flags & UNP_TRACE_MDNS) {
1641 			unp2->unp_flags &= ~UNP_TRACE_MDNS;
1642 		}
1643 
1644 		strdisconn = 1;
1645 		break;
1646 	default:
1647 		panic("unknown socket type %d", so->so_type);
1648 	}
1649 out:
1650 	lck_mtx_lock(&unp_disconnect_lock);
1651 	disconnect_in_progress = 0;
1652 	wakeup(&disconnect_in_progress);
1653 	lck_mtx_unlock(&unp_disconnect_lock);
1654 
1655 	if (strdisconn) {
1656 		socket_unlock(so, 0);
1657 		soisdisconnected(so2);
1658 		socket_unlock(so2, 1);
1659 
1660 		socket_lock(so, 0);
1661 		soisdisconnected(so);
1662 	}
1663 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1664 	return;
1665 }
1666 
1667 /*
1668  * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1669  * The unpcb_compat data structure is passed to user space and must not change.
1670  */
1671 static void
unpcb_to_compat(struct unpcb * up,struct unpcb_compat * cp)1672 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1673 {
1674 #if defined(__LP64__)
1675 	cp->unp_link.le_next = (u_int32_t)
1676 	    VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1677 	cp->unp_link.le_prev = (u_int32_t)
1678 	    VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1679 #else
1680 	cp->unp_link.le_next = (struct unpcb_compat *)
1681 	    VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1682 	cp->unp_link.le_prev = (struct unpcb_compat **)
1683 	    VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1684 #endif
1685 	cp->unp_socket = (_UNPCB_PTR(struct socket *))
1686 	    VM_KERNEL_ADDRPERM(up->unp_socket);
1687 	cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1688 	    VM_KERNEL_ADDRPERM(up->unp_vnode);
1689 	cp->unp_ino = up->unp_ino;
1690 	cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1691 	    VM_KERNEL_ADDRPERM(up->unp_conn);
1692 	cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1693 #if defined(__LP64__)
1694 	cp->unp_reflink.le_next =
1695 	    (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1696 	cp->unp_reflink.le_prev =
1697 	    (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1698 #else
1699 	cp->unp_reflink.le_next =
1700 	    (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1701 	cp->unp_reflink.le_prev =
1702 	    (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1703 #endif
1704 	cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1705 	    VM_KERNEL_ADDRPERM(up->unp_addr);
1706 	cp->unp_cc = up->unp_cc;
1707 	cp->unp_mbcnt = up->unp_mbcnt;
1708 	cp->unp_gencnt = up->unp_gencnt;
1709 }
1710 
1711 static int
1712 unp_pcblist SYSCTL_HANDLER_ARGS
1713 {
1714 #pragma unused(oidp,arg2)
1715 	int error, i, n;
1716 	struct unpcb *unp, **unp_list;
1717 	unp_gen_t gencnt;
1718 	struct xunpgen xug;
1719 	struct unp_head *head;
1720 
1721 	lck_rw_lock_shared(&unp_list_mtx);
1722 	head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1723 
1724 	/*
1725 	 * The process of preparing the PCB list is too time-consuming and
1726 	 * resource-intensive to repeat twice on every request.
1727 	 */
1728 	if (req->oldptr == USER_ADDR_NULL) {
1729 		n = unp_count;
1730 		req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1731 		    sizeof(struct xunpcb);
1732 		lck_rw_done(&unp_list_mtx);
1733 		return 0;
1734 	}
1735 
1736 	if (req->newptr != USER_ADDR_NULL) {
1737 		lck_rw_done(&unp_list_mtx);
1738 		return EPERM;
1739 	}
1740 
1741 	/*
1742 	 * OK, now we're committed to doing something.
1743 	 */
1744 	gencnt = unp_gencnt;
1745 	n = unp_count;
1746 
1747 	bzero(&xug, sizeof(xug));
1748 	xug.xug_len = sizeof(xug);
1749 	xug.xug_count = n;
1750 	xug.xug_gen = gencnt;
1751 	xug.xug_sogen = so_gencnt;
1752 	error = SYSCTL_OUT(req, &xug, sizeof(xug));
1753 	if (error) {
1754 		lck_rw_done(&unp_list_mtx);
1755 		return error;
1756 	}
1757 
1758 	/*
1759 	 * We are done if there is no pcb
1760 	 */
1761 	if (n == 0) {
1762 		lck_rw_done(&unp_list_mtx);
1763 		return 0;
1764 	}
1765 
1766 	size_t unp_list_len = n;
1767 	unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1768 	if (unp_list == 0) {
1769 		lck_rw_done(&unp_list_mtx);
1770 		return ENOMEM;
1771 	}
1772 
1773 	for (unp = head->lh_first, i = 0; unp && i < n;
1774 	    unp = unp->unp_link.le_next) {
1775 		if (unp->unp_gencnt <= gencnt) {
1776 			unp_list[i++] = unp;
1777 		}
1778 	}
1779 	n = i;                  /* in case we lost some during malloc */
1780 
1781 	error = 0;
1782 	for (i = 0; i < n; i++) {
1783 		unp = unp_list[i];
1784 		if (unp->unp_gencnt <= gencnt) {
1785 			struct xunpcb xu;
1786 
1787 			bzero(&xu, sizeof(xu));
1788 			xu.xu_len = sizeof(xu);
1789 			xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1790 			    VM_KERNEL_ADDRPERM(unp);
1791 			/*
1792 			 * XXX - need more locking here to protect against
1793 			 * connect/disconnect races for SMP.
1794 			 */
1795 			if (unp->unp_addr) {
1796 				bcopy(unp->unp_addr, &xu.xu_au,
1797 				    unp->unp_addr->sun_len);
1798 			}
1799 			if (unp->unp_conn && unp->unp_conn->unp_addr) {
1800 				bcopy(unp->unp_conn->unp_addr,
1801 				    &xu.xu_cau,
1802 				    unp->unp_conn->unp_addr->sun_len);
1803 			}
1804 			unpcb_to_compat(unp, &xu.xu_unp);
1805 			sotoxsocket(unp->unp_socket, &xu.xu_socket);
1806 			error = SYSCTL_OUT(req, &xu, sizeof(xu));
1807 		}
1808 	}
1809 	if (!error) {
1810 		/*
1811 		 * Give the user an updated idea of our state.
1812 		 * If the generation differs from what we told
1813 		 * her before, she knows that something happened
1814 		 * while we were processing this request, and it
1815 		 * might be necessary to retry.
1816 		 */
1817 		bzero(&xug, sizeof(xug));
1818 		xug.xug_len = sizeof(xug);
1819 		xug.xug_gen = unp_gencnt;
1820 		xug.xug_sogen = so_gencnt;
1821 		xug.xug_count = unp_count;
1822 		error = SYSCTL_OUT(req, &xug, sizeof(xug));
1823 	}
1824 	kfree_type(struct unpcb *, unp_list_len, unp_list);
1825 	lck_rw_done(&unp_list_mtx);
1826 	return error;
1827 }
1828 
1829 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
1830     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1831     (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1832     "List of active local datagram sockets");
1833 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
1834     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1835     (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1836     "List of active local stream sockets");
1837 
1838 #if XNU_TARGET_OS_OSX
1839 
1840 static int
1841 unp_pcblist64 SYSCTL_HANDLER_ARGS
1842 {
1843 #pragma unused(oidp,arg2)
1844 	int error, i, n;
1845 	struct unpcb *unp, **unp_list;
1846 	unp_gen_t gencnt;
1847 	struct xunpgen xug;
1848 	struct unp_head *head;
1849 
1850 	lck_rw_lock_shared(&unp_list_mtx);
1851 	head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1852 
1853 	/*
1854 	 * The process of preparing the PCB list is too time-consuming and
1855 	 * resource-intensive to repeat twice on every request.
1856 	 */
1857 	if (req->oldptr == USER_ADDR_NULL) {
1858 		n = unp_count;
1859 		req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1860 		    (sizeof(struct xunpcb64));
1861 		lck_rw_done(&unp_list_mtx);
1862 		return 0;
1863 	}
1864 
1865 	if (req->newptr != USER_ADDR_NULL) {
1866 		lck_rw_done(&unp_list_mtx);
1867 		return EPERM;
1868 	}
1869 
1870 	/*
1871 	 * OK, now we're committed to doing something.
1872 	 */
1873 	gencnt = unp_gencnt;
1874 	n = unp_count;
1875 
1876 	bzero(&xug, sizeof(xug));
1877 	xug.xug_len = sizeof(xug);
1878 	xug.xug_count = n;
1879 	xug.xug_gen = gencnt;
1880 	xug.xug_sogen = so_gencnt;
1881 	error = SYSCTL_OUT(req, &xug, sizeof(xug));
1882 	if (error) {
1883 		lck_rw_done(&unp_list_mtx);
1884 		return error;
1885 	}
1886 
1887 	/*
1888 	 * We are done if there is no pcb
1889 	 */
1890 	if (n == 0) {
1891 		lck_rw_done(&unp_list_mtx);
1892 		return 0;
1893 	}
1894 
1895 	size_t unp_list_len = n;
1896 	unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1897 	if (unp_list == 0) {
1898 		lck_rw_done(&unp_list_mtx);
1899 		return ENOMEM;
1900 	}
1901 
1902 	for (unp = head->lh_first, i = 0; unp && i < n;
1903 	    unp = unp->unp_link.le_next) {
1904 		if (unp->unp_gencnt <= gencnt) {
1905 			unp_list[i++] = unp;
1906 		}
1907 	}
1908 	n = i;                  /* in case we lost some during malloc */
1909 
1910 	error = 0;
1911 	for (i = 0; i < n; i++) {
1912 		unp = unp_list[i];
1913 		if (unp->unp_gencnt <= gencnt) {
1914 			struct xunpcb64 xu;
1915 			size_t          xu_len = sizeof(struct xunpcb64);
1916 
1917 			bzero(&xu, xu_len);
1918 			xu.xu_len = (u_int32_t)xu_len;
1919 			xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1920 			xu.xunp_link.le_next = (u_int64_t)
1921 			    VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1922 			xu.xunp_link.le_prev = (u_int64_t)
1923 			    VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1924 			xu.xunp_socket = (u_int64_t)
1925 			    VM_KERNEL_ADDRPERM(unp->unp_socket);
1926 			xu.xunp_vnode = (u_int64_t)
1927 			    VM_KERNEL_ADDRPERM(unp->unp_vnode);
1928 			xu.xunp_ino = unp->unp_ino;
1929 			xu.xunp_conn = (u_int64_t)
1930 			    VM_KERNEL_ADDRPERM(unp->unp_conn);
1931 			xu.xunp_refs = (u_int64_t)
1932 			    VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1933 			xu.xunp_reflink.le_next = (u_int64_t)
1934 			    VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1935 			xu.xunp_reflink.le_prev = (u_int64_t)
1936 			    VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1937 			xu.xunp_cc = unp->unp_cc;
1938 			xu.xunp_mbcnt = unp->unp_mbcnt;
1939 			xu.xunp_gencnt = unp->unp_gencnt;
1940 
1941 			if (unp->unp_socket) {
1942 				sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1943 			}
1944 
1945 			/*
1946 			 * XXX - need more locking here to protect against
1947 			 * connect/disconnect races for SMP.
1948 			 */
1949 			if (unp->unp_addr) {
1950 				bcopy(unp->unp_addr, &xu.xu_au,
1951 				    unp->unp_addr->sun_len);
1952 			}
1953 			if (unp->unp_conn && unp->unp_conn->unp_addr) {
1954 				bcopy(unp->unp_conn->unp_addr,
1955 				    &xu.xu_cau,
1956 				    unp->unp_conn->unp_addr->sun_len);
1957 			}
1958 
1959 			error = SYSCTL_OUT(req, &xu, xu_len);
1960 		}
1961 	}
1962 	if (!error) {
1963 		/*
1964 		 * Give the user an updated idea of our state.
1965 		 * If the generation differs from what we told
1966 		 * her before, she knows that something happened
1967 		 * while we were processing this request, and it
1968 		 * might be necessary to retry.
1969 		 */
1970 		bzero(&xug, sizeof(xug));
1971 		xug.xug_len = sizeof(xug);
1972 		xug.xug_gen = unp_gencnt;
1973 		xug.xug_sogen = so_gencnt;
1974 		xug.xug_count = unp_count;
1975 		error = SYSCTL_OUT(req, &xug, sizeof(xug));
1976 	}
1977 	kfree_type(struct unpcb *, unp_list_len, unp_list);
1978 	lck_rw_done(&unp_list_mtx);
1979 	return error;
1980 }
1981 
1982 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
1983     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1984     (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
1985     "List of active local datagram sockets 64 bit");
1986 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
1987     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1988     (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
1989     "List of active local stream sockets 64 bit");
1990 
1991 #endif /* XNU_TARGET_OS_OSX */
1992 
1993 static void
unp_shutdown(struct unpcb * unp)1994 unp_shutdown(struct unpcb *unp)
1995 {
1996 	struct socket *so = unp->unp_socket;
1997 	struct socket *so2;
1998 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
1999 		so2 = unp->unp_conn->unp_socket;
2000 		unp_get_locks_in_order(so, so2);
2001 		socantrcvmore(so2);
2002 		socket_unlock(so2, 1);
2003 	}
2004 }
2005 
2006 static void
unp_drop(struct unpcb * unp,int errno)2007 unp_drop(struct unpcb *unp, int errno)
2008 {
2009 	struct socket *so = unp->unp_socket;
2010 
2011 	so->so_error = (u_short)errno;
2012 	unp_disconnect(unp);
2013 }
2014 
2015 /*
2016  * fg_insertuipc_mark
2017  *
2018  * Description:	Mark fileglob for insertion onto message queue if needed
2019  *		Also takes fileglob reference
2020  *
2021  * Parameters:	fg	Fileglob pointer to insert
2022  *
2023  * Returns:	true, if the fileglob needs to be inserted onto msg queue
2024  *
2025  * Locks:	Takes and drops fg_lock, potentially many times
2026  */
2027 static boolean_t
fg_insertuipc_mark(struct fileglob * fg)2028 fg_insertuipc_mark(struct fileglob * fg)
2029 {
2030 	boolean_t insert = FALSE;
2031 
2032 	lck_mtx_lock_spin(&fg->fg_lock);
2033 	while (fg->fg_lflags & FG_RMMSGQ) {
2034 		lck_mtx_convert_spin(&fg->fg_lock);
2035 
2036 		fg->fg_lflags |= FG_WRMMSGQ;
2037 		msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
2038 	}
2039 
2040 	os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2041 	fg->fg_msgcount++;
2042 	if (fg->fg_msgcount == 1) {
2043 		fg->fg_lflags |= FG_INSMSGQ;
2044 		insert = TRUE;
2045 	}
2046 	lck_mtx_unlock(&fg->fg_lock);
2047 	return insert;
2048 }
2049 
2050 /*
2051  * fg_insertuipc
2052  *
2053  * Description:	Insert marked fileglob onto message queue
2054  *
2055  * Parameters:	fg	Fileglob pointer to insert
2056  *
2057  * Returns:	void
2058  *
2059  * Locks:	Takes and drops fg_lock & uipc_lock
2060  *		DO NOT call this function with proc_fdlock held as unp_gc()
2061  *		can potentially try to acquire proc_fdlock, which can result
2062  *		in a deadlock.
2063  */
2064 static void
fg_insertuipc(struct fileglob * fg)2065 fg_insertuipc(struct fileglob * fg)
2066 {
2067 	if (fg->fg_lflags & FG_INSMSGQ) {
2068 		lck_mtx_lock(&uipc_lock);
2069 		LIST_INSERT_HEAD(&unp_msghead, fg, f_msglist);
2070 		lck_mtx_unlock(&uipc_lock);
2071 		lck_mtx_lock(&fg->fg_lock);
2072 		fg->fg_lflags &= ~FG_INSMSGQ;
2073 		if (fg->fg_lflags & FG_WINSMSGQ) {
2074 			fg->fg_lflags &= ~FG_WINSMSGQ;
2075 			wakeup(&fg->fg_lflags);
2076 		}
2077 		lck_mtx_unlock(&fg->fg_lock);
2078 	}
2079 }
2080 
2081 /*
2082  * fg_removeuipc_mark
2083  *
2084  * Description:	Mark the fileglob for removal from message queue if needed
2085  *		Also releases fileglob message queue reference
2086  *
2087  * Parameters:	fg	Fileglob pointer to remove
2088  *
2089  * Returns:	true, if the fileglob needs to be removed from msg queue
2090  *
2091  * Locks:	Takes and drops fg_lock, potentially many times
2092  */
2093 static boolean_t
fg_removeuipc_mark(struct fileglob * fg)2094 fg_removeuipc_mark(struct fileglob * fg)
2095 {
2096 	boolean_t remove = FALSE;
2097 
2098 	lck_mtx_lock_spin(&fg->fg_lock);
2099 	while (fg->fg_lflags & FG_INSMSGQ) {
2100 		lck_mtx_convert_spin(&fg->fg_lock);
2101 
2102 		fg->fg_lflags |= FG_WINSMSGQ;
2103 		msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
2104 	}
2105 	fg->fg_msgcount--;
2106 	if (fg->fg_msgcount == 0) {
2107 		fg->fg_lflags |= FG_RMMSGQ;
2108 		remove = TRUE;
2109 	}
2110 	lck_mtx_unlock(&fg->fg_lock);
2111 	return remove;
2112 }
2113 
2114 /*
2115  * fg_removeuipc
2116  *
2117  * Description:	Remove marked fileglob from message queue
2118  *
2119  * Parameters:	fg	Fileglob pointer to remove
2120  *
2121  * Returns:	void
2122  *
2123  * Locks:	Takes and drops fg_lock & uipc_lock
2124  *		DO NOT call this function with proc_fdlock held as unp_gc()
2125  *		can potentially try to acquire proc_fdlock, which can result
2126  *		in a deadlock.
2127  */
2128 static void
fg_removeuipc(struct fileglob * fg)2129 fg_removeuipc(struct fileglob * fg)
2130 {
2131 	if (fg->fg_lflags & FG_RMMSGQ) {
2132 		lck_mtx_lock(&uipc_lock);
2133 		LIST_REMOVE(fg, f_msglist);
2134 		lck_mtx_unlock(&uipc_lock);
2135 		lck_mtx_lock(&fg->fg_lock);
2136 		fg->fg_lflags &= ~FG_RMMSGQ;
2137 		if (fg->fg_lflags & FG_WRMMSGQ) {
2138 			fg->fg_lflags &= ~FG_WRMMSGQ;
2139 			wakeup(&fg->fg_lflags);
2140 		}
2141 		lck_mtx_unlock(&fg->fg_lock);
2142 	}
2143 }
2144 
2145 /*
2146  * Returns:	0			Success
2147  *		EMSGSIZE		The new fd's will not fit
2148  *		ENOBUFS			Cannot alloc struct fileproc
2149  */
2150 int
unp_externalize(struct mbuf * rights)2151 unp_externalize(struct mbuf *rights)
2152 {
2153 	proc_t p = current_proc();
2154 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
2155 	struct fileglob **rp = (struct fileglob **)(cm + 1);
2156 	const int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2157 	int *fds;
2158 	int error = 0;
2159 
2160 	fds = kalloc_data(newfds * sizeof(int), Z_WAITOK);
2161 	if (fds == NULL) {
2162 		error = ENOMEM;
2163 		goto out;
2164 	}
2165 
2166 	/*
2167 	 * Step 1:
2168 	 *	Allocate all the fds, and if it doesn't fit,
2169 	 *	then fail and discard everything.
2170 	 */
2171 	proc_fdlock(p);
2172 
2173 	if (fdt_available_locked(p, newfds)) {
2174 		for (int i = 0; i < newfds; i++) {
2175 			error = fdalloc(p, 0, &fds[i]);
2176 			if (error) {
2177 				while (i-- > 0) {
2178 					fdrelse(p, fds[i]);
2179 				}
2180 				break;
2181 			}
2182 		}
2183 	} else {
2184 		error = EMSGSIZE;
2185 	}
2186 
2187 	proc_fdunlock(p);
2188 
2189 	if (error) {
2190 		goto out;
2191 	}
2192 
2193 	/*
2194 	 * Step 2:
2195 	 *	At this point we are commited, and can't fail anymore.
2196 	 *	Allocate all the fileprocs, and remove the files
2197 	 *	from the queue.
2198 	 *
2199 	 *	Until we call procfdtbl_releasefd(), fds are in flux
2200 	 *	and can't be closed.
2201 	 */
2202 	for (int i = 0; i < newfds; i++) {
2203 		struct fileproc *fp = NULL;
2204 
2205 		fp = fileproc_alloc_init();
2206 		fp->fp_glob = rp[i];
2207 		if (fg_removeuipc_mark(rp[i])) {
2208 			fg_removeuipc(rp[i]);
2209 		}
2210 
2211 		proc_fdlock(p);
2212 		procfdtbl_releasefd(p, fds[i], fp);
2213 		proc_fdunlock(p);
2214 	}
2215 
2216 	/*
2217 	 * Step 3:
2218 	 *	Return the fds into `cm`.
2219 	 *	Handle the fact ints and pointers do not have the same size.
2220 	 */
2221 	int *fds_out = (int *)(cm + 1);
2222 	memcpy(fds_out, fds, newfds * sizeof(int));
2223 	if (sizeof(struct fileglob *) != sizeof(int)) {
2224 		bzero(fds_out + newfds,
2225 		    newfds * (sizeof(struct fileglob *) - sizeof(int)));
2226 	}
2227 	OSAddAtomic(-newfds, &unp_rights);
2228 
2229 out:
2230 	if (error) {
2231 		for (int i = 0; i < newfds; i++) {
2232 			unp_discard(rp[i], p);
2233 		}
2234 		bzero(rp, newfds * sizeof(struct fileglob *));
2235 	}
2236 
2237 	kfree_data(fds, newfds * sizeof(int));
2238 	return error;
2239 }
2240 
2241 void
unp_init(void)2242 unp_init(void)
2243 {
2244 	_CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
2245 	LIST_INIT(&unp_dhead);
2246 	LIST_INIT(&unp_shead);
2247 }
2248 
2249 #ifndef MIN
2250 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
2251 #endif
2252 
2253 /*
2254  * Returns:	0			Success
2255  *		EINVAL
2256  *		EBADF
2257  */
2258 static int
unp_internalize(struct mbuf * control,proc_t p)2259 unp_internalize(struct mbuf *control, proc_t p)
2260 {
2261 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
2262 	int *fds;
2263 	struct fileglob **rp;
2264 	struct fileproc *fp;
2265 	int i, error;
2266 	int oldfds;
2267 	uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];
2268 
2269 	/* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
2270 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
2271 	    (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
2272 		return EINVAL;
2273 	}
2274 	oldfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2275 	bzero(fg_ins, sizeof(fg_ins));
2276 
2277 	proc_fdlock(p);
2278 	fds = (int *)(cm + 1);
2279 
2280 	for (i = 0; i < oldfds; i++) {
2281 		struct fileproc *tmpfp;
2282 		if ((tmpfp = fp_get_noref_locked(p, fds[i])) == NULL) {
2283 			proc_fdunlock(p);
2284 			return EBADF;
2285 		} else if (!fg_sendable(tmpfp->fp_glob)) {
2286 			proc_fdunlock(p);
2287 			return EINVAL;
2288 		} else if (fp_isguarded(tmpfp, GUARD_SOCKET_IPC)) {
2289 			error = fp_guard_exception(p,
2290 			    fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
2291 			proc_fdunlock(p);
2292 			return error;
2293 		}
2294 	}
2295 	rp = (struct fileglob **)(cm + 1);
2296 
2297 	/* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
2298 	 * and doing them in-order would result in stomping over unprocessed fd's
2299 	 */
2300 	for (i = (oldfds - 1); i >= 0; i--) {
2301 		fp = fp_get_noref_locked(p, fds[i]);
2302 		if (fg_insertuipc_mark(fp->fp_glob)) {
2303 			fg_ins[i / 8] |= 0x80 >> (i % 8);
2304 		}
2305 		rp[i] = fp->fp_glob;
2306 	}
2307 	proc_fdunlock(p);
2308 
2309 	for (i = 0; i < oldfds; i++) {
2310 		if (fg_ins[i / 8] & (0x80 >> (i % 8))) {
2311 			VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
2312 			fg_insertuipc(rp[i]);
2313 		}
2314 		(void) OSAddAtomic(1, &unp_rights);
2315 	}
2316 
2317 	return 0;
2318 }
2319 
2320 static void
unp_gc(thread_call_param_t arg0,thread_call_param_t arg1)2321 unp_gc(thread_call_param_t arg0, thread_call_param_t arg1)
2322 {
2323 #pragma unused(arg0, arg1)
2324 	struct fileglob *fg;
2325 	struct socket *so;
2326 	static struct fileglob **extra_ref;
2327 	struct fileglob **fpp;
2328 	int nunref, i;
2329 
2330 restart:
2331 	lck_mtx_lock(&uipc_lock);
2332 	unp_defer = 0;
2333 	/*
2334 	 * before going through all this, set all FDs to
2335 	 * be NOT defered and NOT externally accessible
2336 	 */
2337 	LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2338 		os_atomic_andnot(&fg->fg_flag, FMARK | FDEFER, relaxed);
2339 	}
2340 	do {
2341 		LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2342 			lck_mtx_lock(&fg->fg_lock);
2343 			/*
2344 			 * If the file is not open, skip it
2345 			 */
2346 			if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2347 				lck_mtx_unlock(&fg->fg_lock);
2348 				continue;
2349 			}
2350 			/*
2351 			 * If we already marked it as 'defer'  in a
2352 			 * previous pass, then try process it this time
2353 			 * and un-mark it
2354 			 */
2355 			if (fg->fg_flag & FDEFER) {
2356 				os_atomic_andnot(&fg->fg_flag, FDEFER, relaxed);
2357 				unp_defer--;
2358 			} else {
2359 				/*
2360 				 * if it's not defered, then check if it's
2361 				 * already marked.. if so skip it
2362 				 */
2363 				if (fg->fg_flag & FMARK) {
2364 					lck_mtx_unlock(&fg->fg_lock);
2365 					continue;
2366 				}
2367 				/*
2368 				 * If all references are from messages
2369 				 * in transit, then skip it. it's not
2370 				 * externally accessible.
2371 				 */
2372 				if (os_ref_get_count_raw(&fg->fg_count) ==
2373 				    fg->fg_msgcount) {
2374 					lck_mtx_unlock(&fg->fg_lock);
2375 					continue;
2376 				}
2377 				/*
2378 				 * If it got this far then it must be
2379 				 * externally accessible.
2380 				 */
2381 				os_atomic_or(&fg->fg_flag, FMARK, relaxed);
2382 			}
2383 			/*
2384 			 * either it was defered, or it is externally
2385 			 * accessible and not already marked so.
2386 			 * Now check if it is possibly one of OUR sockets.
2387 			 */
2388 			if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
2389 			    (so = (struct socket *)fg_get_data(fg)) == 0) {
2390 				lck_mtx_unlock(&fg->fg_lock);
2391 				continue;
2392 			}
2393 			if (so->so_proto->pr_domain != localdomain ||
2394 			    (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
2395 				lck_mtx_unlock(&fg->fg_lock);
2396 				continue;
2397 			}
2398 			/*
2399 			 * So, Ok, it's one of our sockets and it IS externally
2400 			 * accessible (or was defered). Now we look
2401 			 * to see if we hold any file descriptors in its
2402 			 * message buffers. Follow those links and mark them
2403 			 * as accessible too.
2404 			 *
2405 			 * In case a file is passed onto itself we need to
2406 			 * release the file lock.
2407 			 */
2408 			lck_mtx_unlock(&fg->fg_lock);
2409 			/*
2410 			 * It's safe to lock the socket after dropping fg_lock
2411 			 * because the socket isn't going away at this point.
2412 			 *
2413 			 * If we couldn't lock the socket or the socket buffer,
2414 			 * then it's because someone holding one of these
2415 			 * locks is stuck in unp_{internalize,externalize}().
2416 			 * Yield to that process and restart the garbage
2417 			 * collection.
2418 			 */
2419 			if (!socket_try_lock(so)) {
2420 				lck_mtx_unlock(&uipc_lock);
2421 				goto restart;
2422 			}
2423 			so->so_usecount++;
2424 			/*
2425 			 * Lock the receive socket buffer so that we can
2426 			 * iterate over its mbuf list.
2427 			 */
2428 			if (sblock(&so->so_rcv, SBL_NOINTR | SBL_IGNDEFUNCT)) {
2429 				socket_unlock(so, 1);
2430 				lck_mtx_unlock(&uipc_lock);
2431 				goto restart;
2432 			}
2433 			VERIFY(so->so_rcv.sb_flags & SB_LOCK);
2434 			socket_unlock(so, 0);
2435 			unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
2436 			socket_lock(so, 0);
2437 			sbunlock(&so->so_rcv, TRUE);
2438 			/*
2439 			 * Unlock and release the reference acquired above.
2440 			 */
2441 			socket_unlock(so, 1);
2442 		}
2443 	} while (unp_defer);
2444 	/*
2445 	 * We grab an extra reference to each of the file table entries
2446 	 * that are not otherwise accessible and then free the rights
2447 	 * that are stored in messages on them.
2448 	 *
2449 	 * Here, we first take an extra reference to each inaccessible
2450 	 * descriptor.  Then, we call sorflush ourself, since we know
2451 	 * it is a Unix domain socket anyhow.  After we destroy all the
2452 	 * rights carried in messages, we do a last closef to get rid
2453 	 * of our extra reference.  This is the last close, and the
2454 	 * unp_detach etc will shut down the socket.
2455 	 *
2456 	 * 91/09/19, [email protected]
2457 	 */
2458 	size_t extra_ref_size = nfiles;
2459 	extra_ref = kalloc_type(struct fileglob *, extra_ref_size, Z_WAITOK);
2460 	if (extra_ref == NULL) {
2461 		lck_mtx_unlock(&uipc_lock);
2462 		return;
2463 	}
2464 	nunref = 0;
2465 	fpp = extra_ref;
2466 	LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2467 		lck_mtx_lock(&fg->fg_lock);
2468 		/*
2469 		 * If it's not open, skip it
2470 		 */
2471 		if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2472 			lck_mtx_unlock(&fg->fg_lock);
2473 			continue;
2474 		}
2475 		/*
2476 		 * If all refs are from msgs, and it's not marked accessible
2477 		 * then it must be referenced from some unreachable cycle
2478 		 * of (shut-down) FDs, so include it in our
2479 		 * list of FDs to remove
2480 		 */
2481 		if (fg->fg_flag & FMARK) {
2482 			lck_mtx_unlock(&fg->fg_lock);
2483 			continue;
2484 		}
2485 		if (os_ref_get_count_raw(&fg->fg_count) == fg->fg_msgcount) {
2486 			os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2487 			*fpp++ = fg;
2488 			nunref++;
2489 		}
2490 		lck_mtx_unlock(&fg->fg_lock);
2491 	}
2492 	lck_mtx_unlock(&uipc_lock);
2493 
2494 	/*
2495 	 * for each FD on our hit list, do the following two things
2496 	 */
2497 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2498 		struct fileglob *tfg;
2499 
2500 		tfg = *fpp;
2501 
2502 		if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET) {
2503 			so = (struct socket *)fg_get_data(tfg);
2504 
2505 			if (so) {
2506 				socket_lock(so, 0);
2507 				sorflush(so);
2508 				socket_unlock(so, 0);
2509 			}
2510 		}
2511 	}
2512 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2513 		fg_drop(PROC_NULL, *fpp);
2514 	}
2515 
2516 	kfree_type(struct fileglob *, extra_ref_size, extra_ref);
2517 }
2518 
2519 void
unp_dispose(struct mbuf * m)2520 unp_dispose(struct mbuf *m)
2521 {
2522 	if (m) {
2523 		unp_scan(m, unp_discard, NULL);
2524 	}
2525 }
2526 
2527 /*
2528  * Returns:	0			Success
2529  */
2530 static int
unp_listen(struct unpcb * unp,proc_t p)2531 unp_listen(struct unpcb *unp, proc_t p)
2532 {
2533 	kauth_cred_t safecred = kauth_cred_proc_ref(p);
2534 	cru2x(safecred, &unp->unp_peercred);
2535 	kauth_cred_unref(&safecred);
2536 	unp->unp_flags |= UNP_HAVEPCCACHED;
2537 	return 0;
2538 }
2539 
2540 static void
unp_scan(struct mbuf * m0,void (* op)(struct fileglob *,void * arg),void * arg)2541 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)
2542 {
2543 	struct mbuf *m;
2544 	struct fileglob **rp;
2545 	struct cmsghdr *cm;
2546 	int i;
2547 	int qfds;
2548 
2549 	while (m0) {
2550 		for (m = m0; m; m = m->m_next) {
2551 			if (m->m_type == MT_CONTROL &&
2552 			    (size_t)m->m_len >= sizeof(*cm)) {
2553 				cm = mtod(m, struct cmsghdr *);
2554 				if (cm->cmsg_level != SOL_SOCKET ||
2555 				    cm->cmsg_type != SCM_RIGHTS) {
2556 					continue;
2557 				}
2558 				qfds = (cm->cmsg_len - sizeof(*cm)) /
2559 				    sizeof(int);
2560 				rp = (struct fileglob **)(cm + 1);
2561 				for (i = 0; i < qfds; i++) {
2562 					(*op)(*rp++, arg);
2563 				}
2564 				break;          /* XXX, but saves time */
2565 			}
2566 		}
2567 		m0 = m0->m_act;
2568 	}
2569 }
2570 
2571 static void
unp_mark(struct fileglob * fg,__unused void * arg)2572 unp_mark(struct fileglob *fg, __unused void *arg)
2573 {
2574 	uint32_t oflags, nflags;
2575 
2576 	os_atomic_rmw_loop(&fg->fg_flag, oflags, nflags, relaxed, {
2577 		if (oflags & FMARK) {
2578 		        os_atomic_rmw_loop_give_up(return );
2579 		}
2580 		nflags = oflags | FMARK | FDEFER;
2581 	});
2582 
2583 	unp_defer++;
2584 }
2585 
2586 static void
unp_discard(struct fileglob * fg,void * p)2587 unp_discard(struct fileglob *fg, void *p)
2588 {
2589 	if (p == NULL) {
2590 		p = current_proc();             /* XXX */
2591 	}
2592 	(void) OSAddAtomic(1, &unp_disposed);
2593 	if (fg_removeuipc_mark(fg)) {
2594 		VERIFY(fg->fg_lflags & FG_RMMSGQ);
2595 		fg_removeuipc(fg);
2596 	}
2597 	(void) OSAddAtomic(-1, &unp_rights);
2598 
2599 	(void) fg_drop(p, fg);
2600 }
2601 
2602 int
unp_lock(struct socket * so,int refcount,void * lr)2603 unp_lock(struct socket *so, int refcount, void * lr)
2604 {
2605 	void * lr_saved;
2606 	if (lr == 0) {
2607 		lr_saved = (void *)  __builtin_return_address(0);
2608 	} else {
2609 		lr_saved = lr;
2610 	}
2611 
2612 	if (so->so_pcb) {
2613 		lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2614 	} else {
2615 		panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x",
2616 		    so, lr_saved, so->so_usecount);
2617 	}
2618 
2619 	if (so->so_usecount < 0) {
2620 		panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x",
2621 		    so, so->so_pcb, lr_saved, so->so_usecount);
2622 	}
2623 
2624 	if (refcount) {
2625 		VERIFY(so->so_usecount > 0);
2626 		so->so_usecount++;
2627 	}
2628 	so->lock_lr[so->next_lock_lr] = lr_saved;
2629 	so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
2630 	return 0;
2631 }
2632 
2633 int
unp_unlock(struct socket * so,int refcount,void * lr)2634 unp_unlock(struct socket *so, int refcount, void * lr)
2635 {
2636 	void * lr_saved;
2637 	lck_mtx_t * mutex_held = NULL;
2638 	struct unpcb *unp = sotounpcb(so);
2639 
2640 	if (lr == 0) {
2641 		lr_saved = (void *) __builtin_return_address(0);
2642 	} else {
2643 		lr_saved = lr;
2644 	}
2645 
2646 	if (refcount) {
2647 		so->so_usecount--;
2648 	}
2649 
2650 	if (so->so_usecount < 0) {
2651 		panic("unp_unlock: so=%p usecount=%x", so, so->so_usecount);
2652 	}
2653 	if (so->so_pcb == NULL) {
2654 		panic("unp_unlock: so=%p NO PCB usecount=%x", so, so->so_usecount);
2655 	} else {
2656 		mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2657 	}
2658 	LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2659 	so->unlock_lr[so->next_unlock_lr] = lr_saved;
2660 	so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2661 
2662 	if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2663 		sofreelastref(so, 1);
2664 
2665 		if (unp->unp_addr != NULL) {
2666 			free_sockaddr(unp->unp_addr);
2667 		}
2668 
2669 		lck_mtx_unlock(mutex_held);
2670 
2671 		lck_mtx_destroy(&unp->unp_mtx, &unp_mtx_grp);
2672 		zfree(unp_zone, unp);
2673 		thread_call_enter(unp_gc_tcall);
2674 	} else {
2675 		lck_mtx_unlock(mutex_held);
2676 	}
2677 
2678 	return 0;
2679 }
2680 
2681 lck_mtx_t *
unp_getlock(struct socket * so,__unused int flags)2682 unp_getlock(struct socket *so, __unused int flags)
2683 {
2684 	struct unpcb *unp = (struct unpcb *)so->so_pcb;
2685 
2686 
2687 	if (so->so_pcb) {
2688 		if (so->so_usecount < 0) {
2689 			panic("unp_getlock: so=%p usecount=%x", so, so->so_usecount);
2690 		}
2691 		return &unp->unp_mtx;
2692 	} else {
2693 		panic("unp_getlock: so=%p NULL so_pcb", so);
2694 		return so->so_proto->pr_domain->dom_mtx;
2695 	}
2696 }
2697