xref: /xnu-8019.80.24/bsd/kern/uipc_usrreq.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1989, 1991, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
61  */
62 /*
63  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64  * support for mandatory and extensible security protections.  This notice
65  * is included in support of clause 2.2 (b) of the Apple Public License,
66  * Version 2.0.
67  */
68 #include <os/log.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/domain.h>
73 #include <sys/fcntl.h>
74 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
75 #include <sys/file_internal.h>
76 #include <sys/guarded.h>
77 #include <sys/filedesc.h>
78 #include <sys/lock.h>
79 #include <sys/mbuf.h>
80 #include <sys/namei.h>
81 #include <sys/proc_internal.h>
82 #include <sys/kauth.h>
83 #include <sys/protosw.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/stat.h>
87 #include <sys/sysctl.h>
88 #include <sys/un.h>
89 #include <sys/unpcb.h>
90 #include <sys/vnode_internal.h>
91 #include <sys/kdebug.h>
92 #include <sys/mcache.h>
93 
94 #include <kern/zalloc.h>
95 #include <kern/locks.h>
96 #include <kern/task.h>
97 
98 #if CONFIG_MACF
99 #include <security/mac_framework.h>
100 #endif /* CONFIG_MACF */
101 
102 #include <mach/vm_param.h>
103 
104 /*
105  * Maximum number of FDs that can be passed in an mbuf
106  */
107 #define UIPC_MAX_CMSG_FD        512
108 
109 ZONE_DECLARE(unp_zone, "unpzone", sizeof(struct unpcb), ZC_NONE);
110 static  unp_gen_t unp_gencnt;
111 static  u_int unp_count;
112 
113 static  LCK_ATTR_DECLARE(unp_mtx_attr, 0, 0);
114 static  LCK_GRP_DECLARE(unp_mtx_grp, "unp_list");
115 static  LCK_RW_DECLARE_ATTR(unp_list_mtx, &unp_mtx_grp, &unp_mtx_attr);
116 
117 static  LCK_MTX_DECLARE_ATTR(unp_disconnect_lock, &unp_mtx_grp, &unp_mtx_attr);
118 static  LCK_MTX_DECLARE_ATTR(unp_connect_lock, &unp_mtx_grp, &unp_mtx_attr);
119 static  LCK_MTX_DECLARE_ATTR(uipc_lock, &unp_mtx_grp, &unp_mtx_attr);
120 
121 static  u_int                   disconnect_in_progress;
122 
123 static struct unp_head unp_shead, unp_dhead;
124 static int      unp_defer;
125 static thread_call_t unp_gc_tcall;
126 static LIST_HEAD(, fileglob) unp_msghead = LIST_HEAD_INITIALIZER(unp_msghead);
127 
128 
129 /*
130  * mDNSResponder tracing.  When enabled, endpoints connected to
131  * /var/run/mDNSResponder will be traced; during each send on
132  * the traced socket, we log the PID and process name of the
133  * sending process.  We also print out a bit of info related
134  * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
135  * of mDNSResponder stays the same.
136  */
137 #define MDNSRESPONDER_PATH      "/var/run/mDNSResponder"
138 
139 static int unpst_tracemdns;     /* enable tracing */
140 
141 #define MDNS_IPC_MSG_HDR_VERSION_1      1
142 
143 struct mdns_ipc_msg_hdr {
144 	uint32_t version;
145 	uint32_t datalen;
146 	uint32_t ipc_flags;
147 	uint32_t op;
148 	union {
149 		void *context;
150 		uint32_t u32[2];
151 	} __attribute__((packed));
152 	uint32_t reg_index;
153 } __attribute__((packed));
154 
155 /*
156  * Unix communications domain.
157  *
158  * TODO:
159  *	SEQPACKET, RDM
160  *	rethink name space problems
161  *	need a proper out-of-band
162  *	lock pushdown
163  */
164 static struct   sockaddr sun_noname = { .sa_len = sizeof(sun_noname), .sa_family = AF_LOCAL, .sa_data = { 0 } };
165 static ino_t    unp_ino;                /* prototype for fake inode numbers */
166 
167 static int      unp_attach(struct socket *);
168 static void     unp_detach(struct unpcb *);
169 static int      unp_bind(struct unpcb *, struct sockaddr *, proc_t);
170 static int      unp_connect(struct socket *, struct sockaddr *, proc_t);
171 static void     unp_disconnect(struct unpcb *);
172 static void     unp_shutdown(struct unpcb *);
173 static void     unp_drop(struct unpcb *, int);
174 static void     unp_gc(thread_call_param_t arg0, thread_call_param_t arg1);
175 static void     unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);
176 static void     unp_mark(struct fileglob *, __unused void *);
177 static void     unp_discard(struct fileglob *, void *);
178 static int      unp_internalize(struct mbuf *, proc_t);
179 static int      unp_listen(struct unpcb *, proc_t);
180 static void     unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
181 static void     unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
182 
183 __startup_func
184 static void
unp_gc_setup(void)185 unp_gc_setup(void)
186 {
187 	unp_gc_tcall = thread_call_allocate_with_options(unp_gc,
188 	    NULL, THREAD_CALL_PRIORITY_KERNEL,
189 	    THREAD_CALL_OPTIONS_ONCE);
190 }
191 STARTUP(THREAD_CALL, STARTUP_RANK_MIDDLE, unp_gc_setup);
192 
193 static void
unp_get_locks_in_order(struct socket * so,struct socket * conn_so)194 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
195 {
196 	if (so < conn_so) {
197 		socket_lock(conn_so, 1);
198 	} else {
199 		struct unpcb *unp = sotounpcb(so);
200 		unp->unp_flags |= UNP_DONTDISCONNECT;
201 		unp->rw_thrcount++;
202 		socket_unlock(so, 0);
203 
204 		/* Get the locks in the correct order */
205 		socket_lock(conn_so, 1);
206 		socket_lock(so, 0);
207 		unp->rw_thrcount--;
208 		if (unp->rw_thrcount == 0) {
209 			unp->unp_flags &= ~UNP_DONTDISCONNECT;
210 			wakeup(unp);
211 		}
212 	}
213 }
214 
215 static int
uipc_abort(struct socket * so)216 uipc_abort(struct socket *so)
217 {
218 	struct unpcb *unp = sotounpcb(so);
219 
220 	if (unp == 0) {
221 		return EINVAL;
222 	}
223 	unp_drop(unp, ECONNABORTED);
224 	unp_detach(unp);
225 	sofree(so);
226 	return 0;
227 }
228 
229 static int
uipc_accept(struct socket * so,struct sockaddr ** nam)230 uipc_accept(struct socket *so, struct sockaddr **nam)
231 {
232 	struct unpcb *unp = sotounpcb(so);
233 
234 	if (unp == 0) {
235 		return EINVAL;
236 	}
237 
238 	/*
239 	 * Pass back name of connected socket,
240 	 * if it was bound and we are still connected
241 	 * (our peer may have closed already!).
242 	 */
243 	if (unp->unp_conn && unp->unp_conn->unp_addr) {
244 		*nam = dup_sockaddr((struct sockaddr *)
245 		    unp->unp_conn->unp_addr, 1);
246 	} else {
247 		*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
248 	}
249 	return 0;
250 }
251 
252 /*
253  * Returns:	0			Success
254  *		EISCONN
255  *	unp_attach:
256  */
257 static int
uipc_attach(struct socket * so,__unused int proto,__unused proc_t p)258 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
259 {
260 	struct unpcb *unp = sotounpcb(so);
261 
262 	if (unp != 0) {
263 		return EISCONN;
264 	}
265 	return unp_attach(so);
266 }
267 
268 static int
uipc_bind(struct socket * so,struct sockaddr * nam,proc_t p)269 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
270 {
271 	struct unpcb *unp = sotounpcb(so);
272 
273 	if (unp == 0) {
274 		return EINVAL;
275 	}
276 
277 	return unp_bind(unp, nam, p);
278 }
279 
280 /*
281  * Returns:	0			Success
282  *		EINVAL
283  *	unp_connect:???			[See elsewhere in this file]
284  */
285 static int
uipc_connect(struct socket * so,struct sockaddr * nam,proc_t p)286 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
287 {
288 	struct unpcb *unp = sotounpcb(so);
289 
290 	if (unp == 0) {
291 		return EINVAL;
292 	}
293 	return unp_connect(so, nam, p);
294 }
295 
296 /*
297  * Returns:	0			Success
298  *		EINVAL
299  *	unp_connect2:EPROTOTYPE		Protocol wrong type for socket
300  *	unp_connect2:EINVAL		Invalid argument
301  */
302 static int
uipc_connect2(struct socket * so1,struct socket * so2)303 uipc_connect2(struct socket *so1, struct socket *so2)
304 {
305 	struct unpcb *unp = sotounpcb(so1);
306 
307 	if (unp == 0) {
308 		return EINVAL;
309 	}
310 
311 	return unp_connect2(so1, so2);
312 }
313 
314 /* control is EOPNOTSUPP */
315 
316 static int
uipc_detach(struct socket * so)317 uipc_detach(struct socket *so)
318 {
319 	struct unpcb *unp = sotounpcb(so);
320 
321 	if (unp == 0) {
322 		return EINVAL;
323 	}
324 
325 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
326 	unp_detach(unp);
327 	return 0;
328 }
329 
330 static int
uipc_disconnect(struct socket * so)331 uipc_disconnect(struct socket *so)
332 {
333 	struct unpcb *unp = sotounpcb(so);
334 
335 	if (unp == 0) {
336 		return EINVAL;
337 	}
338 	unp_disconnect(unp);
339 	return 0;
340 }
341 
342 /*
343  * Returns:	0			Success
344  *		EINVAL
345  */
346 static int
uipc_listen(struct socket * so,__unused proc_t p)347 uipc_listen(struct socket *so, __unused proc_t p)
348 {
349 	struct unpcb *unp = sotounpcb(so);
350 
351 	if (unp == 0 || unp->unp_vnode == 0) {
352 		return EINVAL;
353 	}
354 	return unp_listen(unp, p);
355 }
356 
357 static int
uipc_peeraddr(struct socket * so,struct sockaddr ** nam)358 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
359 {
360 	struct unpcb *unp = sotounpcb(so);
361 
362 	if (unp == NULL) {
363 		return EINVAL;
364 	}
365 	if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
366 		*nam = dup_sockaddr((struct sockaddr *)
367 		    unp->unp_conn->unp_addr, 1);
368 	} else {
369 		*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
370 	}
371 	return 0;
372 }
373 
374 static int
uipc_rcvd(struct socket * so,__unused int flags)375 uipc_rcvd(struct socket *so, __unused int flags)
376 {
377 	struct unpcb *unp = sotounpcb(so);
378 	struct socket *so2;
379 
380 	if (unp == 0) {
381 		return EINVAL;
382 	}
383 	switch (so->so_type) {
384 	case SOCK_DGRAM:
385 		panic("uipc_rcvd DGRAM?");
386 	/*NOTREACHED*/
387 
388 	case SOCK_STREAM:
389 #define rcv (&so->so_rcv)
390 #define snd (&so2->so_snd)
391 		if (unp->unp_conn == 0) {
392 			break;
393 		}
394 
395 		so2 = unp->unp_conn->unp_socket;
396 		unp_get_locks_in_order(so, so2);
397 		/*
398 		 * Adjust backpressure on sender
399 		 * and wakeup any waiting to write.
400 		 */
401 		snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
402 		unp->unp_mbcnt = rcv->sb_mbcnt;
403 		snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
404 		unp->unp_cc = rcv->sb_cc;
405 		if (sb_notify(&so2->so_snd)) {
406 			sowakeup(so2, &so2->so_snd, so);
407 		}
408 
409 		socket_unlock(so2, 1);
410 
411 #undef snd
412 #undef rcv
413 		break;
414 
415 	default:
416 		panic("uipc_rcvd unknown socktype");
417 	}
418 	return 0;
419 }
420 
421 /* pru_rcvoob is EOPNOTSUPP */
422 
423 /*
424  * Returns:	0			Success
425  *		EINVAL
426  *		EOPNOTSUPP
427  *		EPIPE
428  *		ENOTCONN
429  *		EISCONN
430  *	unp_internalize:EINVAL
431  *	unp_internalize:EBADF
432  *	unp_connect:EAFNOSUPPORT	Address family not supported
433  *	unp_connect:EINVAL		Invalid argument
434  *	unp_connect:ENOTSOCK		Not a socket
435  *	unp_connect:ECONNREFUSED	Connection refused
436  *	unp_connect:EISCONN		Socket is connected
437  *	unp_connect:EPROTOTYPE		Protocol wrong type for socket
438  *	unp_connect:???
439  *	sbappendaddr:ENOBUFS		[5th argument, contents modified]
440  *	sbappendaddr:???		[whatever a filter author chooses]
441  */
442 static int
uipc_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,proc_t p)443 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
444     struct mbuf *control, proc_t p)
445 {
446 	int error = 0;
447 	struct unpcb *unp = sotounpcb(so);
448 	struct socket *so2;
449 
450 	if (unp == 0) {
451 		error = EINVAL;
452 		goto release;
453 	}
454 	if (flags & PRUS_OOB) {
455 		error = EOPNOTSUPP;
456 		goto release;
457 	}
458 
459 	if (control) {
460 		/* release lock to avoid deadlock (4436174) */
461 		socket_unlock(so, 0);
462 		error = unp_internalize(control, p);
463 		socket_lock(so, 0);
464 		if (error) {
465 			goto release;
466 		}
467 	}
468 
469 	switch (so->so_type) {
470 	case SOCK_DGRAM:
471 	{
472 		struct sockaddr *from;
473 
474 		if (nam) {
475 			if (unp->unp_conn) {
476 				error = EISCONN;
477 				break;
478 			}
479 			error = unp_connect(so, nam, p);
480 			if (error) {
481 				so->so_state &= ~SS_ISCONNECTING;
482 				break;
483 			}
484 		} else {
485 			if (unp->unp_conn == 0) {
486 				error = ENOTCONN;
487 				break;
488 			}
489 		}
490 
491 		so2 = unp->unp_conn->unp_socket;
492 		if (so != so2) {
493 			unp_get_locks_in_order(so, so2);
494 		}
495 
496 		if (unp->unp_addr) {
497 			from = (struct sockaddr *)unp->unp_addr;
498 		} else {
499 			from = &sun_noname;
500 		}
501 		/*
502 		 * sbappendaddr() will fail when the receiver runs out of
503 		 * space; in contrast to SOCK_STREAM, we will lose messages
504 		 * for the SOCK_DGRAM case when the receiver's queue overflows.
505 		 * SB_UNIX on the socket buffer implies that the callee will
506 		 * not free the control message, if any, because we would need
507 		 * to call unp_dispose() on it.
508 		 */
509 		if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
510 			control = NULL;
511 			if (sb_notify(&so2->so_rcv)) {
512 				sowakeup(so2, &so2->so_rcv, so);
513 			}
514 		} else if (control != NULL && error == 0) {
515 			/* A socket filter took control; don't touch it */
516 			control = NULL;
517 		}
518 
519 		if (so != so2) {
520 			socket_unlock(so2, 1);
521 		}
522 
523 		m = NULL;
524 		if (nam) {
525 			unp_disconnect(unp);
526 		}
527 		break;
528 	}
529 
530 	case SOCK_STREAM: {
531 		int didreceive = 0;
532 #define rcv (&so2->so_rcv)
533 #define snd (&so->so_snd)
534 		/* Connect if not connected yet. */
535 		/*
536 		 * Note: A better implementation would complain
537 		 * if not equal to the peer's address.
538 		 */
539 		if ((so->so_state & SS_ISCONNECTED) == 0) {
540 			if (nam) {
541 				error = unp_connect(so, nam, p);
542 				if (error) {
543 					so->so_state &= ~SS_ISCONNECTING;
544 					break;  /* XXX */
545 				}
546 			} else {
547 				error = ENOTCONN;
548 				break;
549 			}
550 		}
551 
552 		if (so->so_state & SS_CANTSENDMORE) {
553 			error = EPIPE;
554 			break;
555 		}
556 		if (unp->unp_conn == 0) {
557 			panic("uipc_send connected but no connection? "
558 			    "socket state: %x socket flags: %x socket flags1: %x.",
559 			    so->so_state, so->so_flags, so->so_flags1);
560 		}
561 
562 		so2 = unp->unp_conn->unp_socket;
563 		unp_get_locks_in_order(so, so2);
564 
565 		/* Check socket state again as we might have unlocked the socket
566 		 * while trying to get the locks in order
567 		 */
568 
569 		if ((so->so_state & SS_CANTSENDMORE)) {
570 			error = EPIPE;
571 			socket_unlock(so2, 1);
572 			break;
573 		}
574 
575 		if (unp->unp_flags & UNP_TRACE_MDNS) {
576 			struct mdns_ipc_msg_hdr hdr;
577 
578 			if (mbuf_copydata(m, 0, sizeof(hdr), &hdr) == 0 &&
579 			    hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
580 				os_log(OS_LOG_DEFAULT,
581 				    "%s[mDNSResponder] pid=%d (%s): op=0x%x",
582 				    __func__, proc_getpid(p), p->p_comm, ntohl(hdr.op));
583 			}
584 		}
585 
586 		/*
587 		 * Send to paired receive port, and then reduce send buffer
588 		 * hiwater marks to maintain backpressure.  Wake up readers.
589 		 * SB_UNIX flag will allow new record to be appended to the
590 		 * receiver's queue even when it is already full.  It is
591 		 * possible, however, that append might fail.  In that case,
592 		 * we will need to call unp_dispose() on the control message;
593 		 * the callee will not free it since SB_UNIX is set.
594 		 */
595 		didreceive = control ?
596 		    sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
597 
598 		snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
599 		unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
600 		if ((int32_t)snd->sb_hiwat >=
601 		    (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
602 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
603 		} else {
604 			snd->sb_hiwat = 0;
605 		}
606 		unp->unp_conn->unp_cc = rcv->sb_cc;
607 		if (didreceive) {
608 			control = NULL;
609 			if (sb_notify(&so2->so_rcv)) {
610 				sowakeup(so2, &so2->so_rcv, so);
611 			}
612 		} else if (control != NULL && error == 0) {
613 			/* A socket filter took control; don't touch it */
614 			control = NULL;
615 		}
616 
617 		socket_unlock(so2, 1);
618 		m = NULL;
619 #undef snd
620 #undef rcv
621 	}
622 	break;
623 
624 	default:
625 		panic("uipc_send unknown socktype");
626 	}
627 
628 	/*
629 	 * SEND_EOF is equivalent to a SEND followed by
630 	 * a SHUTDOWN.
631 	 */
632 	if (flags & PRUS_EOF) {
633 		socantsendmore(so);
634 		unp_shutdown(unp);
635 	}
636 
637 	if (control && error != 0) {
638 		socket_unlock(so, 0);
639 		unp_dispose(control);
640 		socket_lock(so, 0);
641 	}
642 
643 release:
644 	if (control) {
645 		m_freem(control);
646 	}
647 	if (m) {
648 		m_freem(m);
649 	}
650 	return error;
651 }
652 
653 static int
uipc_sense(struct socket * so,void * ub,int isstat64)654 uipc_sense(struct socket *so, void *ub, int isstat64)
655 {
656 	struct unpcb *unp = sotounpcb(so);
657 	struct socket *so2;
658 	blksize_t blksize;
659 
660 	if (unp == 0) {
661 		return EINVAL;
662 	}
663 
664 	blksize = so->so_snd.sb_hiwat;
665 	if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
666 		so2 = unp->unp_conn->unp_socket;
667 		blksize += so2->so_rcv.sb_cc;
668 	}
669 	if (unp->unp_ino == 0) {
670 		unp->unp_ino = unp_ino++;
671 	}
672 
673 	if (isstat64 != 0) {
674 		struct stat64  *sb64;
675 
676 		sb64 = (struct stat64 *)ub;
677 		sb64->st_blksize = blksize;
678 		sb64->st_dev = NODEV;
679 		sb64->st_ino = (ino64_t)unp->unp_ino;
680 	} else {
681 		struct stat *sb;
682 
683 		sb = (struct stat *)ub;
684 		sb->st_blksize = blksize;
685 		sb->st_dev = NODEV;
686 		sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
687 	}
688 
689 	return 0;
690 }
691 
692 /*
693  * Returns:	0		Success
694  *		EINVAL
695  *
696  * Notes:	This is not strictly correct, as unp_shutdown() also calls
697  *		socantrcvmore().  These should maybe both be conditionalized
698  *		on the 'how' argument in soshutdown() as called from the
699  *		shutdown() system call.
700  */
701 static int
uipc_shutdown(struct socket * so)702 uipc_shutdown(struct socket *so)
703 {
704 	struct unpcb *unp = sotounpcb(so);
705 
706 	if (unp == 0) {
707 		return EINVAL;
708 	}
709 	socantsendmore(so);
710 	unp_shutdown(unp);
711 	return 0;
712 }
713 
714 /*
715  * Returns:	0			Success
716  *		EINVAL			Invalid argument
717  */
718 static int
uipc_sockaddr(struct socket * so,struct sockaddr ** nam)719 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
720 {
721 	struct unpcb *unp = sotounpcb(so);
722 
723 	if (unp == NULL) {
724 		return EINVAL;
725 	}
726 	if (unp->unp_addr != NULL) {
727 		*nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
728 	} else {
729 		*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
730 	}
731 	return 0;
732 }
733 
734 struct pr_usrreqs uipc_usrreqs = {
735 	.pru_abort =            uipc_abort,
736 	.pru_accept =           uipc_accept,
737 	.pru_attach =           uipc_attach,
738 	.pru_bind =             uipc_bind,
739 	.pru_connect =          uipc_connect,
740 	.pru_connect2 =         uipc_connect2,
741 	.pru_detach =           uipc_detach,
742 	.pru_disconnect =       uipc_disconnect,
743 	.pru_listen =           uipc_listen,
744 	.pru_peeraddr =         uipc_peeraddr,
745 	.pru_rcvd =             uipc_rcvd,
746 	.pru_send =             uipc_send,
747 	.pru_sense =            uipc_sense,
748 	.pru_shutdown =         uipc_shutdown,
749 	.pru_sockaddr =         uipc_sockaddr,
750 	.pru_sosend =           sosend,
751 	.pru_soreceive =        soreceive,
752 };
753 
754 int
uipc_ctloutput(struct socket * so,struct sockopt * sopt)755 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
756 {
757 	struct unpcb *unp = sotounpcb(so);
758 	int error = 0;
759 	pid_t peerpid;
760 	proc_t p;
761 	task_t t;
762 	struct socket *peerso;
763 
764 	switch (sopt->sopt_dir) {
765 	case SOPT_GET:
766 		switch (sopt->sopt_name) {
767 		case LOCAL_PEERCRED:
768 			if (unp->unp_flags & UNP_HAVEPC) {
769 				error = sooptcopyout(sopt, &unp->unp_peercred,
770 				    sizeof(unp->unp_peercred));
771 			} else {
772 				if (so->so_type == SOCK_STREAM) {
773 					error = ENOTCONN;
774 				} else {
775 					error = EINVAL;
776 				}
777 			}
778 			break;
779 		case LOCAL_PEERPID:
780 		case LOCAL_PEEREPID:
781 			if (unp->unp_conn == NULL) {
782 				error = ENOTCONN;
783 				break;
784 			}
785 			peerso = unp->unp_conn->unp_socket;
786 			if (peerso == NULL) {
787 				panic("peer is connected but has no socket?");
788 			}
789 			unp_get_locks_in_order(so, peerso);
790 			if (sopt->sopt_name == LOCAL_PEEREPID &&
791 			    peerso->so_flags & SOF_DELEGATED) {
792 				peerpid = peerso->e_pid;
793 			} else {
794 				peerpid = peerso->last_pid;
795 			}
796 			socket_unlock(peerso, 1);
797 			error = sooptcopyout(sopt, &peerpid, sizeof(peerpid));
798 			break;
799 		case LOCAL_PEERUUID:
800 		case LOCAL_PEEREUUID:
801 			if (unp->unp_conn == NULL) {
802 				error = ENOTCONN;
803 				break;
804 			}
805 			peerso = unp->unp_conn->unp_socket;
806 			if (peerso == NULL) {
807 				panic("peer is connected but has no socket?");
808 			}
809 			unp_get_locks_in_order(so, peerso);
810 			if (sopt->sopt_name == LOCAL_PEEREUUID &&
811 			    peerso->so_flags & SOF_DELEGATED) {
812 				error = sooptcopyout(sopt, &peerso->e_uuid,
813 				    sizeof(peerso->e_uuid));
814 			} else {
815 				error = sooptcopyout(sopt, &peerso->last_uuid,
816 				    sizeof(peerso->last_uuid));
817 			}
818 			socket_unlock(peerso, 1);
819 			break;
820 		case LOCAL_PEERTOKEN:
821 			if (unp->unp_conn == NULL) {
822 				error = ENOTCONN;
823 				break;
824 			}
825 			peerso = unp->unp_conn->unp_socket;
826 			if (peerso == NULL) {
827 				panic("peer is connected but has no socket?");
828 			}
829 			unp_get_locks_in_order(so, peerso);
830 			peerpid = peerso->last_pid;
831 			p = proc_find(peerpid);
832 			if (p != PROC_NULL) {
833 				t = proc_task(p);
834 				if (t != TASK_NULL) {
835 					audit_token_t peertoken;
836 					mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
837 					if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&peertoken, &count) == KERN_SUCCESS) {
838 						error = sooptcopyout(sopt, &peertoken, sizeof(peertoken));
839 					} else {
840 						error = EINVAL;
841 					}
842 				} else {
843 					error = EINVAL;
844 				}
845 				proc_rele(p);
846 			} else {
847 				error = EINVAL;
848 			}
849 			socket_unlock(peerso, 1);
850 			break;
851 		default:
852 			error = EOPNOTSUPP;
853 			break;
854 		}
855 		break;
856 	case SOPT_SET:
857 	default:
858 		error = EOPNOTSUPP;
859 		break;
860 	}
861 
862 	return error;
863 }
864 
865 /*
866  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
867  * for stream sockets, although the total for sender and receiver is
868  * actually only PIPSIZ.
869  * Datagram sockets really use the sendspace as the maximum datagram size,
870  * and don't really want to reserve the sendspace.  Their recvspace should
871  * be large enough for at least one max-size datagram plus address.
872  */
873 #ifndef PIPSIZ
874 #define PIPSIZ  8192
875 #endif
876 static u_int32_t        unpst_sendspace = PIPSIZ;
877 static u_int32_t        unpst_recvspace = PIPSIZ;
878 static u_int32_t        unpdg_sendspace = 2 * 1024;       /* really max datagram size */
879 static u_int32_t        unpdg_recvspace = 4 * 1024;
880 
881 static int      unp_rights;                     /* file descriptors in flight */
882 static int      unp_disposed;                   /* discarded file descriptors */
883 
884 SYSCTL_DECL(_net_local_stream);
885 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
886     &unpst_sendspace, 0, "");
887 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
888     &unpst_recvspace, 0, "");
889 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
890     &unpst_tracemdns, 0, "");
891 SYSCTL_DECL(_net_local_dgram);
892 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
893     &unpdg_sendspace, 0, "");
894 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
895     &unpdg_recvspace, 0, "");
896 SYSCTL_DECL(_net_local);
897 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
898 
899 /*
900  * Returns:	0			Success
901  *		ENOBUFS
902  *	soreserve:ENOBUFS
903  */
904 static int
unp_attach(struct socket * so)905 unp_attach(struct socket *so)
906 {
907 	struct unpcb *unp;
908 	int error = 0;
909 
910 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
911 		switch (so->so_type) {
912 		case SOCK_STREAM:
913 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
914 			break;
915 
916 		case SOCK_DGRAM:
917 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
918 			break;
919 
920 		default:
921 			panic("unp_attach");
922 		}
923 		if (error) {
924 			return error;
925 		}
926 	}
927 	unp = zalloc_flags(unp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
928 
929 	lck_mtx_init(&unp->unp_mtx, &unp_mtx_grp, &unp_mtx_attr);
930 
931 	lck_rw_lock_exclusive(&unp_list_mtx);
932 	LIST_INIT(&unp->unp_refs);
933 	unp->unp_socket = so;
934 	unp->unp_gencnt = ++unp_gencnt;
935 	unp_count++;
936 	LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
937 	    &unp_dhead : &unp_shead, unp, unp_link);
938 	lck_rw_done(&unp_list_mtx);
939 	so->so_pcb = (caddr_t)unp;
940 	/*
941 	 * Mark AF_UNIX socket buffers accordingly so that:
942 	 *
943 	 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
944 	 *    the lack of space; this essentially loosens the sbspace() check,
945 	 *    since there is disconnect between sosend() and uipc_send() with
946 	 *    respect to flow control that might result in our dropping the
947 	 *    data in uipc_send().  By setting this, we allow for slightly
948 	 *    more records to be appended to the receiving socket to avoid
949 	 *    losing data (which we can't afford in the SOCK_STREAM case).
950 	 *    Flow control still takes place since we adjust the sender's
951 	 *    hiwat during each send.  This doesn't affect the SOCK_DGRAM
952 	 *    case and append would still fail when the queue overflows.
953 	 *
954 	 * b. In the presence of control messages containing internalized
955 	 *    file descriptors, the append routines will not free them since
956 	 *    we'd need to undo the work first via unp_dispose().
957 	 */
958 	so->so_rcv.sb_flags |= SB_UNIX;
959 	so->so_snd.sb_flags |= SB_UNIX;
960 	return 0;
961 }
962 
963 static void
unp_detach(struct unpcb * unp)964 unp_detach(struct unpcb *unp)
965 {
966 	int so_locked = 1;
967 
968 	lck_rw_lock_exclusive(&unp_list_mtx);
969 	LIST_REMOVE(unp, unp_link);
970 	--unp_count;
971 	++unp_gencnt;
972 	lck_rw_done(&unp_list_mtx);
973 	if (unp->unp_vnode) {
974 		struct vnode *tvp = NULL;
975 		socket_unlock(unp->unp_socket, 0);
976 
977 		/* Holding unp_connect_lock will avoid a race between
978 		 * a thread closing the listening socket and a thread
979 		 * connecting to it.
980 		 */
981 		lck_mtx_lock(&unp_connect_lock);
982 		socket_lock(unp->unp_socket, 0);
983 		if (unp->unp_vnode) {
984 			tvp = unp->unp_vnode;
985 			unp->unp_vnode->v_socket = NULL;
986 			unp->unp_vnode = NULL;
987 		}
988 		lck_mtx_unlock(&unp_connect_lock);
989 		if (tvp != NULL) {
990 			vnode_rele(tvp);                /* drop the usecount */
991 		}
992 	}
993 	if (unp->unp_conn) {
994 		unp_disconnect(unp);
995 	}
996 	while (unp->unp_refs.lh_first) {
997 		struct unpcb *unp2 = NULL;
998 
999 		/* This datagram socket is connected to one or more
1000 		 * sockets. In order to avoid a race condition between removing
1001 		 * this reference and closing the connected socket, we need
1002 		 * to check disconnect_in_progress
1003 		 */
1004 		if (so_locked == 1) {
1005 			socket_unlock(unp->unp_socket, 0);
1006 			so_locked = 0;
1007 		}
1008 		lck_mtx_lock(&unp_disconnect_lock);
1009 		while (disconnect_in_progress != 0) {
1010 			(void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1011 			    PSOCK, "disconnect", NULL);
1012 		}
1013 		disconnect_in_progress = 1;
1014 		lck_mtx_unlock(&unp_disconnect_lock);
1015 
1016 		/* Now we are sure that any unpcb socket disconnect is not happening */
1017 		if (unp->unp_refs.lh_first != NULL) {
1018 			unp2 = unp->unp_refs.lh_first;
1019 			socket_lock(unp2->unp_socket, 1);
1020 		}
1021 
1022 		lck_mtx_lock(&unp_disconnect_lock);
1023 		disconnect_in_progress = 0;
1024 		wakeup(&disconnect_in_progress);
1025 		lck_mtx_unlock(&unp_disconnect_lock);
1026 
1027 		if (unp2 != NULL) {
1028 			/* We already locked this socket and have a reference on it */
1029 			unp_drop(unp2, ECONNRESET);
1030 			socket_unlock(unp2->unp_socket, 1);
1031 		}
1032 	}
1033 
1034 	if (so_locked == 0) {
1035 		socket_lock(unp->unp_socket, 0);
1036 		so_locked = 1;
1037 	}
1038 	soisdisconnected(unp->unp_socket);
1039 	/* makes sure we're getting dealloced */
1040 	unp->unp_socket->so_flags |= SOF_PCBCLEARING;
1041 }
1042 
1043 /*
1044  * Returns:	0			Success
1045  *		EAFNOSUPPORT
1046  *		EINVAL
1047  *		EADDRINUSE
1048  *		namei:???		[anything namei can return]
1049  *		vnode_authorize:???	[anything vnode_authorize can return]
1050  *
1051  * Notes:	p at this point is the current process, as this function is
1052  *		only called by sobind().
1053  */
1054 static int
unp_bind(struct unpcb * unp,struct sockaddr * nam,proc_t p)1055 unp_bind(
1056 	struct unpcb *unp,
1057 	struct sockaddr *nam,
1058 	proc_t p)
1059 {
1060 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1061 	struct vnode *vp, *dvp;
1062 	struct vnode_attr va;
1063 	vfs_context_t ctx = vfs_context_current();
1064 	int error, namelen;
1065 	struct nameidata nd;
1066 	struct socket *so = unp->unp_socket;
1067 	char buf[SOCK_MAXADDRLEN];
1068 
1069 	if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1070 		return EAFNOSUPPORT;
1071 	}
1072 
1073 	/*
1074 	 * Check if the socket is already bound to an address
1075 	 */
1076 	if (unp->unp_vnode != NULL) {
1077 		return EINVAL;
1078 	}
1079 	/*
1080 	 * Check if the socket may have been shut down
1081 	 */
1082 	if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1083 	    (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1084 		return EINVAL;
1085 	}
1086 
1087 	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
1088 	if (namelen <= 0) {
1089 		return EINVAL;
1090 	}
1091 	/*
1092 	 * Note: sun_path is not a zero terminated "C" string
1093 	 */
1094 	if (namelen >= SOCK_MAXADDRLEN) {
1095 		return EINVAL;
1096 	}
1097 	bcopy(soun->sun_path, buf, namelen);
1098 	buf[namelen] = 0;
1099 
1100 	socket_unlock(so, 0);
1101 
1102 	NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
1103 	    CAST_USER_ADDR_T(buf), ctx);
1104 	/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1105 	error = namei(&nd);
1106 	if (error) {
1107 		socket_lock(so, 0);
1108 		return error;
1109 	}
1110 	dvp = nd.ni_dvp;
1111 	vp = nd.ni_vp;
1112 
1113 	if (vp != NULL) {
1114 		/*
1115 		 * need to do this before the vnode_put of dvp
1116 		 * since we may have to release an fs_nodelock
1117 		 */
1118 		nameidone(&nd);
1119 
1120 		vnode_put(dvp);
1121 		vnode_put(vp);
1122 
1123 		socket_lock(so, 0);
1124 		return EADDRINUSE;
1125 	}
1126 
1127 	VATTR_INIT(&va);
1128 	VATTR_SET(&va, va_type, VSOCK);
1129 	VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd.fd_cmask));
1130 
1131 #if CONFIG_MACF
1132 	error = mac_vnode_check_create(ctx,
1133 	    nd.ni_dvp, &nd.ni_cnd, &va);
1134 
1135 	if (error == 0)
1136 #endif /* CONFIG_MACF */
1137 #if CONFIG_MACF_SOCKET_SUBSET
1138 	error = mac_vnode_check_uipc_bind(ctx,
1139 	    nd.ni_dvp, &nd.ni_cnd, &va);
1140 
1141 	if (error == 0)
1142 #endif /* MAC_SOCKET_SUBSET */
1143 	/* authorize before creating */
1144 	error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1145 
1146 	if (!error) {
1147 		/* create the socket */
1148 		error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
1149 	}
1150 
1151 	nameidone(&nd);
1152 	vnode_put(dvp);
1153 
1154 	if (error) {
1155 		socket_lock(so, 0);
1156 		return error;
1157 	}
1158 
1159 	socket_lock(so, 0);
1160 
1161 	if (unp->unp_vnode != NULL) {
1162 		vnode_put(vp); /* drop the iocount */
1163 		return EINVAL;
1164 	}
1165 
1166 	error = vnode_ref(vp);  /* gain a longterm reference */
1167 	if (error) {
1168 		vnode_put(vp); /* drop the iocount */
1169 		return error;
1170 	}
1171 
1172 	vp->v_socket = unp->unp_socket;
1173 	unp->unp_vnode = vp;
1174 	unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
1175 	vnode_put(vp);          /* drop the iocount */
1176 
1177 	return 0;
1178 }
1179 
1180 
1181 /*
1182  * Returns:	0			Success
1183  *		EAFNOSUPPORT		Address family not supported
1184  *		EINVAL			Invalid argument
1185  *		ENOTSOCK		Not a socket
1186  *		ECONNREFUSED		Connection refused
1187  *		EPROTOTYPE		Protocol wrong type for socket
1188  *		EISCONN			Socket is connected
1189  *	unp_connect2:EPROTOTYPE		Protocol wrong type for socket
1190  *	unp_connect2:EINVAL		Invalid argument
1191  *	namei:???			[anything namei can return]
1192  *	vnode_authorize:????		[anything vnode_authorize can return]
1193  *
1194  * Notes:	p at this point is the current process, as this function is
1195  *		only called by sosend(), sendfile(), and soconnectlock().
1196  */
1197 static int
unp_connect(struct socket * so,struct sockaddr * nam,__unused proc_t p)1198 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1199 {
1200 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1201 	struct vnode *vp;
1202 	struct socket *so2, *so3, *list_so = NULL;
1203 	struct unpcb *unp, *unp2, *unp3;
1204 	vfs_context_t ctx = vfs_context_current();
1205 	int error, len;
1206 	struct nameidata nd;
1207 	char buf[SOCK_MAXADDRLEN];
1208 
1209 	if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1210 		return EAFNOSUPPORT;
1211 	}
1212 
1213 	unp = sotounpcb(so);
1214 	so2 = so3 = NULL;
1215 
1216 	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1217 	if (len <= 0) {
1218 		return EINVAL;
1219 	}
1220 	/*
1221 	 * Note: sun_path is not a zero terminated "C" string
1222 	 */
1223 	if (len >= SOCK_MAXADDRLEN) {
1224 		return EINVAL;
1225 	}
1226 
1227 	soisconnecting(so);
1228 
1229 	bcopy(soun->sun_path, buf, len);
1230 	buf[len] = 0;
1231 
1232 	socket_unlock(so, 0);
1233 
1234 	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1235 	    CAST_USER_ADDR_T(buf), ctx);
1236 	error = namei(&nd);
1237 	if (error) {
1238 		socket_lock(so, 0);
1239 		return error;
1240 	}
1241 	nameidone(&nd);
1242 	vp = nd.ni_vp;
1243 	if (vp->v_type != VSOCK) {
1244 		error = ENOTSOCK;
1245 		socket_lock(so, 0);
1246 		goto out;
1247 	}
1248 
1249 #if CONFIG_MACF_SOCKET_SUBSET
1250 	error = mac_vnode_check_uipc_connect(ctx, vp, so);
1251 	if (error) {
1252 		socket_lock(so, 0);
1253 		goto out;
1254 	}
1255 #endif /* MAC_SOCKET_SUBSET */
1256 
1257 	error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1258 	if (error) {
1259 		socket_lock(so, 0);
1260 		goto out;
1261 	}
1262 
1263 	lck_mtx_lock(&unp_connect_lock);
1264 
1265 	if (vp->v_socket == 0) {
1266 		lck_mtx_unlock(&unp_connect_lock);
1267 		error = ECONNREFUSED;
1268 		socket_lock(so, 0);
1269 		goto out;
1270 	}
1271 
1272 	socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1273 	so2 = vp->v_socket;
1274 	lck_mtx_unlock(&unp_connect_lock);
1275 
1276 
1277 	if (so2->so_pcb == NULL) {
1278 		error = ECONNREFUSED;
1279 		if (so != so2) {
1280 			socket_unlock(so2, 1);
1281 			socket_lock(so, 0);
1282 		} else {
1283 			/* Release the reference held for the listen socket */
1284 			VERIFY(so2->so_usecount > 0);
1285 			so2->so_usecount--;
1286 		}
1287 		goto out;
1288 	}
1289 
1290 	if (so < so2) {
1291 		socket_unlock(so2, 0);
1292 		socket_lock(so, 0);
1293 		socket_lock(so2, 0);
1294 	} else if (so > so2) {
1295 		socket_lock(so, 0);
1296 	}
1297 	/*
1298 	 * Check if socket was connected while we were trying to
1299 	 * get the socket locks in order.
1300 	 * XXX - probably shouldn't return an error for SOCK_DGRAM
1301 	 */
1302 	if ((so->so_state & SS_ISCONNECTED) != 0) {
1303 		error = EISCONN;
1304 		goto decref_out;
1305 	}
1306 
1307 	if (so->so_type != so2->so_type) {
1308 		error = EPROTOTYPE;
1309 		goto decref_out;
1310 	}
1311 
1312 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1313 		/* Release the incoming socket but keep a reference */
1314 		socket_unlock(so, 0);
1315 
1316 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1317 		    (so3 = sonewconn(so2, 0, nam)) == 0) {
1318 			error = ECONNREFUSED;
1319 			if (so != so2) {
1320 				socket_unlock(so2, 1);
1321 				socket_lock(so, 0);
1322 			} else {
1323 				socket_lock(so, 0);
1324 				/* Release the reference held for
1325 				 * listen socket.
1326 				 */
1327 				VERIFY(so2->so_usecount > 0);
1328 				so2->so_usecount--;
1329 			}
1330 			goto out;
1331 		}
1332 		unp2 = sotounpcb(so2);
1333 		unp3 = sotounpcb(so3);
1334 		if (unp2->unp_addr) {
1335 			unp3->unp_addr = (struct sockaddr_un *)
1336 			    dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
1337 		}
1338 
1339 		/*
1340 		 * unp_peercred management:
1341 		 *
1342 		 * The connecter's (client's) credentials are copied
1343 		 * from its process structure at the time of connect()
1344 		 * (which is now).
1345 		 */
1346 		cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1347 		unp3->unp_flags |= UNP_HAVEPC;
1348 		/*
1349 		 * The receiver's (server's) credentials are copied
1350 		 * from the unp_peercred member of socket on which the
1351 		 * former called listen(); unp_listen() cached that
1352 		 * process's credentials at that time so we can use
1353 		 * them now.
1354 		 */
1355 		KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1356 		    ("unp_connect: listener without cached peercred"));
1357 
1358 		/* Here we need to have both so and so2 locks and so2
1359 		 * is already locked. Lock ordering is required.
1360 		 */
1361 		if (so < so2) {
1362 			socket_unlock(so2, 0);
1363 			socket_lock(so, 0);
1364 			socket_lock(so2, 0);
1365 		} else {
1366 			socket_lock(so, 0);
1367 		}
1368 
1369 		/* Check again if the socket state changed when its lock was released */
1370 		if ((so->so_state & SS_ISCONNECTED) != 0) {
1371 			error = EISCONN;
1372 			socket_unlock(so2, 1);
1373 			socket_lock(so3, 0);
1374 			sofreelastref(so3, 1);
1375 			goto out;
1376 		}
1377 		memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1378 		    sizeof(unp->unp_peercred));
1379 		unp->unp_flags |= UNP_HAVEPC;
1380 
1381 		/* Hold the reference on listening socket until the end */
1382 		socket_unlock(so2, 0);
1383 		list_so = so2;
1384 
1385 		/* Lock ordering doesn't matter because so3 was just created */
1386 		socket_lock(so3, 1);
1387 		so2 = so3;
1388 
1389 		/*
1390 		 * Enable tracing for mDNSResponder endpoints.  (The use
1391 		 * of sizeof instead of strlen below takes the null
1392 		 * terminating character into account.)
1393 		 */
1394 		if (unpst_tracemdns &&
1395 		    !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1396 		    sizeof(MDNSRESPONDER_PATH))) {
1397 			unp->unp_flags |= UNP_TRACE_MDNS;
1398 			unp2->unp_flags |= UNP_TRACE_MDNS;
1399 		}
1400 	}
1401 
1402 	error = unp_connect2(so, so2);
1403 
1404 decref_out:
1405 	if (so2 != NULL) {
1406 		if (so != so2) {
1407 			socket_unlock(so2, 1);
1408 		} else {
1409 			/* Release the extra reference held for the listen socket.
1410 			 * This is possible only for SOCK_DGRAM sockets. We refuse
1411 			 * connecting to the same socket for SOCK_STREAM sockets.
1412 			 */
1413 			VERIFY(so2->so_usecount > 0);
1414 			so2->so_usecount--;
1415 		}
1416 	}
1417 
1418 	if (list_so != NULL) {
1419 		socket_lock(list_so, 0);
1420 		socket_unlock(list_so, 1);
1421 	}
1422 
1423 out:
1424 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1425 	vnode_put(vp);
1426 	return error;
1427 }
1428 
1429 /*
1430  * Returns:	0			Success
1431  *		EPROTOTYPE		Protocol wrong type for socket
1432  *		EINVAL			Invalid argument
1433  */
1434 int
unp_connect2(struct socket * so,struct socket * so2)1435 unp_connect2(struct socket *so, struct socket *so2)
1436 {
1437 	struct unpcb *unp = sotounpcb(so);
1438 	struct unpcb *unp2;
1439 
1440 	if (so2->so_type != so->so_type) {
1441 		return EPROTOTYPE;
1442 	}
1443 
1444 	unp2 = sotounpcb(so2);
1445 
1446 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1447 	LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1448 
1449 	/* Verify both sockets are still opened */
1450 	if (unp == 0 || unp2 == 0) {
1451 		return EINVAL;
1452 	}
1453 
1454 	unp->unp_conn = unp2;
1455 	so2->so_usecount++;
1456 
1457 	switch (so->so_type) {
1458 	case SOCK_DGRAM:
1459 		LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1460 
1461 		if (so != so2) {
1462 			/* Avoid lock order reversals due to drop/acquire in soisconnected. */
1463 			/* Keep an extra reference on so2 that will be dropped
1464 			 * soon after getting the locks in order
1465 			 */
1466 			socket_unlock(so2, 0);
1467 			soisconnected(so);
1468 			unp_get_locks_in_order(so, so2);
1469 			VERIFY(so2->so_usecount > 0);
1470 			so2->so_usecount--;
1471 		} else {
1472 			soisconnected(so);
1473 		}
1474 
1475 		break;
1476 
1477 	case SOCK_STREAM:
1478 		/* This takes care of socketpair */
1479 		if (!(unp->unp_flags & UNP_HAVEPC) &&
1480 		    !(unp2->unp_flags & UNP_HAVEPC)) {
1481 			cru2x(kauth_cred_get(), &unp->unp_peercred);
1482 			unp->unp_flags |= UNP_HAVEPC;
1483 
1484 			cru2x(kauth_cred_get(), &unp2->unp_peercred);
1485 			unp2->unp_flags |= UNP_HAVEPC;
1486 		}
1487 		unp2->unp_conn = unp;
1488 		so->so_usecount++;
1489 
1490 		/* Avoid lock order reversals due to drop/acquire in soisconnected. */
1491 		socket_unlock(so, 0);
1492 		soisconnected(so2);
1493 
1494 		/* Keep an extra reference on so2, that will be dropped soon after
1495 		 * getting the locks in order again.
1496 		 */
1497 		socket_unlock(so2, 0);
1498 
1499 		socket_lock(so, 0);
1500 		soisconnected(so);
1501 
1502 		unp_get_locks_in_order(so, so2);
1503 		/* Decrement the extra reference left before */
1504 		VERIFY(so2->so_usecount > 0);
1505 		so2->so_usecount--;
1506 		break;
1507 
1508 	default:
1509 		panic("unknown socket type %d in unp_connect2", so->so_type);
1510 	}
1511 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1512 	LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1513 	return 0;
1514 }
1515 
1516 static void
unp_disconnect(struct unpcb * unp)1517 unp_disconnect(struct unpcb *unp)
1518 {
1519 	struct unpcb *unp2 = NULL;
1520 	struct socket *so2 = NULL, *so;
1521 	struct socket *waitso;
1522 	int so_locked = 1, strdisconn = 0;
1523 
1524 	so = unp->unp_socket;
1525 	if (unp->unp_conn == NULL) {
1526 		return;
1527 	}
1528 	lck_mtx_lock(&unp_disconnect_lock);
1529 	while (disconnect_in_progress != 0) {
1530 		if (so_locked == 1) {
1531 			socket_unlock(so, 0);
1532 			so_locked = 0;
1533 		}
1534 		(void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1535 		    PSOCK, "disconnect", NULL);
1536 	}
1537 	disconnect_in_progress = 1;
1538 	lck_mtx_unlock(&unp_disconnect_lock);
1539 
1540 	if (so_locked == 0) {
1541 		socket_lock(so, 0);
1542 		so_locked = 1;
1543 	}
1544 
1545 	unp2 = unp->unp_conn;
1546 
1547 	if (unp2 == 0 || unp2->unp_socket == NULL) {
1548 		goto out;
1549 	}
1550 	so2 = unp2->unp_socket;
1551 
1552 try_again:
1553 	if (so == so2) {
1554 		if (so_locked == 0) {
1555 			socket_lock(so, 0);
1556 		}
1557 		waitso = so;
1558 	} else if (so < so2) {
1559 		if (so_locked == 0) {
1560 			socket_lock(so, 0);
1561 		}
1562 		socket_lock(so2, 1);
1563 		waitso = so2;
1564 	} else {
1565 		if (so_locked == 1) {
1566 			socket_unlock(so, 0);
1567 		}
1568 		socket_lock(so2, 1);
1569 		socket_lock(so, 0);
1570 		waitso = so;
1571 	}
1572 	so_locked = 1;
1573 
1574 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1575 	LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1576 
1577 	/* Check for the UNP_DONTDISCONNECT flag, if it
1578 	 * is set, release both sockets and go to sleep
1579 	 */
1580 
1581 	if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1582 		if (so != so2) {
1583 			socket_unlock(so2, 1);
1584 		}
1585 		so_locked = 0;
1586 
1587 		(void)msleep(waitso->so_pcb, &unp->unp_mtx,
1588 		    PSOCK | PDROP, "unpdisconnect", NULL);
1589 		goto try_again;
1590 	}
1591 
1592 	if (unp->unp_conn == NULL) {
1593 		panic("unp_conn became NULL after sleep");
1594 	}
1595 
1596 	unp->unp_conn = NULL;
1597 	VERIFY(so2->so_usecount > 0);
1598 	so2->so_usecount--;
1599 
1600 	if (unp->unp_flags & UNP_TRACE_MDNS) {
1601 		unp->unp_flags &= ~UNP_TRACE_MDNS;
1602 	}
1603 
1604 	switch (unp->unp_socket->so_type) {
1605 	case SOCK_DGRAM:
1606 		LIST_REMOVE(unp, unp_reflink);
1607 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1608 		if (so != so2) {
1609 			socket_unlock(so2, 1);
1610 		}
1611 		break;
1612 
1613 	case SOCK_STREAM:
1614 		unp2->unp_conn = NULL;
1615 		VERIFY(so->so_usecount > 0);
1616 		so->so_usecount--;
1617 
1618 		/*
1619 		 * Set the socket state correctly but do a wakeup later when
1620 		 * we release all locks except the socket lock, this will avoid
1621 		 * a deadlock.
1622 		 */
1623 		unp->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1624 		unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1625 
1626 		unp2->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1627 		unp2->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1628 
1629 		if (unp2->unp_flags & UNP_TRACE_MDNS) {
1630 			unp2->unp_flags &= ~UNP_TRACE_MDNS;
1631 		}
1632 
1633 		strdisconn = 1;
1634 		break;
1635 	default:
1636 		panic("unknown socket type %d", so->so_type);
1637 	}
1638 out:
1639 	lck_mtx_lock(&unp_disconnect_lock);
1640 	disconnect_in_progress = 0;
1641 	wakeup(&disconnect_in_progress);
1642 	lck_mtx_unlock(&unp_disconnect_lock);
1643 
1644 	if (strdisconn) {
1645 		socket_unlock(so, 0);
1646 		soisdisconnected(so2);
1647 		socket_unlock(so2, 1);
1648 
1649 		socket_lock(so, 0);
1650 		soisdisconnected(so);
1651 	}
1652 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1653 	return;
1654 }
1655 
1656 /*
1657  * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1658  * The unpcb_compat data structure is passed to user space and must not change.
1659  */
1660 static void
unpcb_to_compat(struct unpcb * up,struct unpcb_compat * cp)1661 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1662 {
1663 #if defined(__LP64__)
1664 	cp->unp_link.le_next = (u_int32_t)
1665 	    VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1666 	cp->unp_link.le_prev = (u_int32_t)
1667 	    VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1668 #else
1669 	cp->unp_link.le_next = (struct unpcb_compat *)
1670 	    VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1671 	cp->unp_link.le_prev = (struct unpcb_compat **)
1672 	    VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1673 #endif
1674 	cp->unp_socket = (_UNPCB_PTR(struct socket *))
1675 	    VM_KERNEL_ADDRPERM(up->unp_socket);
1676 	cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1677 	    VM_KERNEL_ADDRPERM(up->unp_vnode);
1678 	cp->unp_ino = up->unp_ino;
1679 	cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1680 	    VM_KERNEL_ADDRPERM(up->unp_conn);
1681 	cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1682 #if defined(__LP64__)
1683 	cp->unp_reflink.le_next =
1684 	    (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1685 	cp->unp_reflink.le_prev =
1686 	    (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1687 #else
1688 	cp->unp_reflink.le_next =
1689 	    (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1690 	cp->unp_reflink.le_prev =
1691 	    (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1692 #endif
1693 	cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1694 	    VM_KERNEL_ADDRPERM(up->unp_addr);
1695 	cp->unp_cc = up->unp_cc;
1696 	cp->unp_mbcnt = up->unp_mbcnt;
1697 	cp->unp_gencnt = up->unp_gencnt;
1698 }
1699 
1700 static int
1701 unp_pcblist SYSCTL_HANDLER_ARGS
1702 {
1703 #pragma unused(oidp,arg2)
1704 	int error, i, n;
1705 	struct unpcb *unp, **unp_list;
1706 	unp_gen_t gencnt;
1707 	struct xunpgen xug;
1708 	struct unp_head *head;
1709 
1710 	lck_rw_lock_shared(&unp_list_mtx);
1711 	head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1712 
1713 	/*
1714 	 * The process of preparing the PCB list is too time-consuming and
1715 	 * resource-intensive to repeat twice on every request.
1716 	 */
1717 	if (req->oldptr == USER_ADDR_NULL) {
1718 		n = unp_count;
1719 		req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1720 		    sizeof(struct xunpcb);
1721 		lck_rw_done(&unp_list_mtx);
1722 		return 0;
1723 	}
1724 
1725 	if (req->newptr != USER_ADDR_NULL) {
1726 		lck_rw_done(&unp_list_mtx);
1727 		return EPERM;
1728 	}
1729 
1730 	/*
1731 	 * OK, now we're committed to doing something.
1732 	 */
1733 	gencnt = unp_gencnt;
1734 	n = unp_count;
1735 
1736 	bzero(&xug, sizeof(xug));
1737 	xug.xug_len = sizeof(xug);
1738 	xug.xug_count = n;
1739 	xug.xug_gen = gencnt;
1740 	xug.xug_sogen = so_gencnt;
1741 	error = SYSCTL_OUT(req, &xug, sizeof(xug));
1742 	if (error) {
1743 		lck_rw_done(&unp_list_mtx);
1744 		return error;
1745 	}
1746 
1747 	/*
1748 	 * We are done if there is no pcb
1749 	 */
1750 	if (n == 0) {
1751 		lck_rw_done(&unp_list_mtx);
1752 		return 0;
1753 	}
1754 
1755 	size_t unp_list_len = n;
1756 	unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1757 	if (unp_list == 0) {
1758 		lck_rw_done(&unp_list_mtx);
1759 		return ENOMEM;
1760 	}
1761 
1762 	for (unp = head->lh_first, i = 0; unp && i < n;
1763 	    unp = unp->unp_link.le_next) {
1764 		if (unp->unp_gencnt <= gencnt) {
1765 			unp_list[i++] = unp;
1766 		}
1767 	}
1768 	n = i;                  /* in case we lost some during malloc */
1769 
1770 	error = 0;
1771 	for (i = 0; i < n; i++) {
1772 		unp = unp_list[i];
1773 		if (unp->unp_gencnt <= gencnt) {
1774 			struct xunpcb xu;
1775 
1776 			bzero(&xu, sizeof(xu));
1777 			xu.xu_len = sizeof(xu);
1778 			xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1779 			    VM_KERNEL_ADDRPERM(unp);
1780 			/*
1781 			 * XXX - need more locking here to protect against
1782 			 * connect/disconnect races for SMP.
1783 			 */
1784 			if (unp->unp_addr) {
1785 				bcopy(unp->unp_addr, &xu.xu_au,
1786 				    unp->unp_addr->sun_len);
1787 			}
1788 			if (unp->unp_conn && unp->unp_conn->unp_addr) {
1789 				bcopy(unp->unp_conn->unp_addr,
1790 				    &xu.xu_cau,
1791 				    unp->unp_conn->unp_addr->sun_len);
1792 			}
1793 			unpcb_to_compat(unp, &xu.xu_unp);
1794 			sotoxsocket(unp->unp_socket, &xu.xu_socket);
1795 			error = SYSCTL_OUT(req, &xu, sizeof(xu));
1796 		}
1797 	}
1798 	if (!error) {
1799 		/*
1800 		 * Give the user an updated idea of our state.
1801 		 * If the generation differs from what we told
1802 		 * her before, she knows that something happened
1803 		 * while we were processing this request, and it
1804 		 * might be necessary to retry.
1805 		 */
1806 		bzero(&xug, sizeof(xug));
1807 		xug.xug_len = sizeof(xug);
1808 		xug.xug_gen = unp_gencnt;
1809 		xug.xug_sogen = so_gencnt;
1810 		xug.xug_count = unp_count;
1811 		error = SYSCTL_OUT(req, &xug, sizeof(xug));
1812 	}
1813 	kfree_type(struct unpcb *, unp_list_len, unp_list);
1814 	lck_rw_done(&unp_list_mtx);
1815 	return error;
1816 }
1817 
1818 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
1819     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1820     (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1821     "List of active local datagram sockets");
1822 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
1823     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1824     (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1825     "List of active local stream sockets");
1826 
1827 #if XNU_TARGET_OS_OSX
1828 
1829 static int
1830 unp_pcblist64 SYSCTL_HANDLER_ARGS
1831 {
1832 #pragma unused(oidp,arg2)
1833 	int error, i, n;
1834 	struct unpcb *unp, **unp_list;
1835 	unp_gen_t gencnt;
1836 	struct xunpgen xug;
1837 	struct unp_head *head;
1838 
1839 	lck_rw_lock_shared(&unp_list_mtx);
1840 	head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1841 
1842 	/*
1843 	 * The process of preparing the PCB list is too time-consuming and
1844 	 * resource-intensive to repeat twice on every request.
1845 	 */
1846 	if (req->oldptr == USER_ADDR_NULL) {
1847 		n = unp_count;
1848 		req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1849 		    (sizeof(struct xunpcb64));
1850 		lck_rw_done(&unp_list_mtx);
1851 		return 0;
1852 	}
1853 
1854 	if (req->newptr != USER_ADDR_NULL) {
1855 		lck_rw_done(&unp_list_mtx);
1856 		return EPERM;
1857 	}
1858 
1859 	/*
1860 	 * OK, now we're committed to doing something.
1861 	 */
1862 	gencnt = unp_gencnt;
1863 	n = unp_count;
1864 
1865 	bzero(&xug, sizeof(xug));
1866 	xug.xug_len = sizeof(xug);
1867 	xug.xug_count = n;
1868 	xug.xug_gen = gencnt;
1869 	xug.xug_sogen = so_gencnt;
1870 	error = SYSCTL_OUT(req, &xug, sizeof(xug));
1871 	if (error) {
1872 		lck_rw_done(&unp_list_mtx);
1873 		return error;
1874 	}
1875 
1876 	/*
1877 	 * We are done if there is no pcb
1878 	 */
1879 	if (n == 0) {
1880 		lck_rw_done(&unp_list_mtx);
1881 		return 0;
1882 	}
1883 
1884 	size_t unp_list_len = n;
1885 	unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1886 	if (unp_list == 0) {
1887 		lck_rw_done(&unp_list_mtx);
1888 		return ENOMEM;
1889 	}
1890 
1891 	for (unp = head->lh_first, i = 0; unp && i < n;
1892 	    unp = unp->unp_link.le_next) {
1893 		if (unp->unp_gencnt <= gencnt) {
1894 			unp_list[i++] = unp;
1895 		}
1896 	}
1897 	n = i;                  /* in case we lost some during malloc */
1898 
1899 	error = 0;
1900 	for (i = 0; i < n; i++) {
1901 		unp = unp_list[i];
1902 		if (unp->unp_gencnt <= gencnt) {
1903 			struct xunpcb64 xu;
1904 			size_t          xu_len = sizeof(struct xunpcb64);
1905 
1906 			bzero(&xu, xu_len);
1907 			xu.xu_len = (u_int32_t)xu_len;
1908 			xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1909 			xu.xunp_link.le_next = (u_int64_t)
1910 			    VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1911 			xu.xunp_link.le_prev = (u_int64_t)
1912 			    VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1913 			xu.xunp_socket = (u_int64_t)
1914 			    VM_KERNEL_ADDRPERM(unp->unp_socket);
1915 			xu.xunp_vnode = (u_int64_t)
1916 			    VM_KERNEL_ADDRPERM(unp->unp_vnode);
1917 			xu.xunp_ino = unp->unp_ino;
1918 			xu.xunp_conn = (u_int64_t)
1919 			    VM_KERNEL_ADDRPERM(unp->unp_conn);
1920 			xu.xunp_refs = (u_int64_t)
1921 			    VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1922 			xu.xunp_reflink.le_next = (u_int64_t)
1923 			    VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1924 			xu.xunp_reflink.le_prev = (u_int64_t)
1925 			    VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1926 			xu.xunp_cc = unp->unp_cc;
1927 			xu.xunp_mbcnt = unp->unp_mbcnt;
1928 			xu.xunp_gencnt = unp->unp_gencnt;
1929 
1930 			if (unp->unp_socket) {
1931 				sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1932 			}
1933 
1934 			/*
1935 			 * XXX - need more locking here to protect against
1936 			 * connect/disconnect races for SMP.
1937 			 */
1938 			if (unp->unp_addr) {
1939 				bcopy(unp->unp_addr, &xu.xu_au,
1940 				    unp->unp_addr->sun_len);
1941 			}
1942 			if (unp->unp_conn && unp->unp_conn->unp_addr) {
1943 				bcopy(unp->unp_conn->unp_addr,
1944 				    &xu.xu_cau,
1945 				    unp->unp_conn->unp_addr->sun_len);
1946 			}
1947 
1948 			error = SYSCTL_OUT(req, &xu, xu_len);
1949 		}
1950 	}
1951 	if (!error) {
1952 		/*
1953 		 * Give the user an updated idea of our state.
1954 		 * If the generation differs from what we told
1955 		 * her before, she knows that something happened
1956 		 * while we were processing this request, and it
1957 		 * might be necessary to retry.
1958 		 */
1959 		bzero(&xug, sizeof(xug));
1960 		xug.xug_len = sizeof(xug);
1961 		xug.xug_gen = unp_gencnt;
1962 		xug.xug_sogen = so_gencnt;
1963 		xug.xug_count = unp_count;
1964 		error = SYSCTL_OUT(req, &xug, sizeof(xug));
1965 	}
1966 	kfree_type(struct unpcb *, unp_list_len, unp_list);
1967 	lck_rw_done(&unp_list_mtx);
1968 	return error;
1969 }
1970 
1971 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
1972     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1973     (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
1974     "List of active local datagram sockets 64 bit");
1975 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
1976     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1977     (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
1978     "List of active local stream sockets 64 bit");
1979 
1980 #endif /* XNU_TARGET_OS_OSX */
1981 
1982 static void
unp_shutdown(struct unpcb * unp)1983 unp_shutdown(struct unpcb *unp)
1984 {
1985 	struct socket *so = unp->unp_socket;
1986 	struct socket *so2;
1987 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
1988 		so2 = unp->unp_conn->unp_socket;
1989 		unp_get_locks_in_order(so, so2);
1990 		socantrcvmore(so2);
1991 		socket_unlock(so2, 1);
1992 	}
1993 }
1994 
1995 static void
unp_drop(struct unpcb * unp,int errno)1996 unp_drop(struct unpcb *unp, int errno)
1997 {
1998 	struct socket *so = unp->unp_socket;
1999 
2000 	so->so_error = (u_short)errno;
2001 	unp_disconnect(unp);
2002 }
2003 
2004 /*
2005  * fg_insertuipc_mark
2006  *
2007  * Description:	Mark fileglob for insertion onto message queue if needed
2008  *		Also takes fileglob reference
2009  *
2010  * Parameters:	fg	Fileglob pointer to insert
2011  *
2012  * Returns:	true, if the fileglob needs to be inserted onto msg queue
2013  *
2014  * Locks:	Takes and drops fg_lock, potentially many times
2015  */
2016 static boolean_t
fg_insertuipc_mark(struct fileglob * fg)2017 fg_insertuipc_mark(struct fileglob * fg)
2018 {
2019 	boolean_t insert = FALSE;
2020 
2021 	lck_mtx_lock_spin(&fg->fg_lock);
2022 	while (fg->fg_lflags & FG_RMMSGQ) {
2023 		lck_mtx_convert_spin(&fg->fg_lock);
2024 
2025 		fg->fg_lflags |= FG_WRMMSGQ;
2026 		msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
2027 	}
2028 
2029 	os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2030 	fg->fg_msgcount++;
2031 	if (fg->fg_msgcount == 1) {
2032 		fg->fg_lflags |= FG_INSMSGQ;
2033 		insert = TRUE;
2034 	}
2035 	lck_mtx_unlock(&fg->fg_lock);
2036 	return insert;
2037 }
2038 
2039 /*
2040  * fg_insertuipc
2041  *
2042  * Description:	Insert marked fileglob onto message queue
2043  *
2044  * Parameters:	fg	Fileglob pointer to insert
2045  *
2046  * Returns:	void
2047  *
2048  * Locks:	Takes and drops fg_lock & uipc_lock
2049  *		DO NOT call this function with proc_fdlock held as unp_gc()
2050  *		can potentially try to acquire proc_fdlock, which can result
2051  *		in a deadlock.
2052  */
2053 static void
fg_insertuipc(struct fileglob * fg)2054 fg_insertuipc(struct fileglob * fg)
2055 {
2056 	if (fg->fg_lflags & FG_INSMSGQ) {
2057 		lck_mtx_lock(&uipc_lock);
2058 		LIST_INSERT_HEAD(&unp_msghead, fg, f_msglist);
2059 		lck_mtx_unlock(&uipc_lock);
2060 		lck_mtx_lock(&fg->fg_lock);
2061 		fg->fg_lflags &= ~FG_INSMSGQ;
2062 		if (fg->fg_lflags & FG_WINSMSGQ) {
2063 			fg->fg_lflags &= ~FG_WINSMSGQ;
2064 			wakeup(&fg->fg_lflags);
2065 		}
2066 		lck_mtx_unlock(&fg->fg_lock);
2067 	}
2068 }
2069 
2070 /*
2071  * fg_removeuipc_mark
2072  *
2073  * Description:	Mark the fileglob for removal from message queue if needed
2074  *		Also releases fileglob message queue reference
2075  *
2076  * Parameters:	fg	Fileglob pointer to remove
2077  *
2078  * Returns:	true, if the fileglob needs to be removed from msg queue
2079  *
2080  * Locks:	Takes and drops fg_lock, potentially many times
2081  */
2082 static boolean_t
fg_removeuipc_mark(struct fileglob * fg)2083 fg_removeuipc_mark(struct fileglob * fg)
2084 {
2085 	boolean_t remove = FALSE;
2086 
2087 	lck_mtx_lock_spin(&fg->fg_lock);
2088 	while (fg->fg_lflags & FG_INSMSGQ) {
2089 		lck_mtx_convert_spin(&fg->fg_lock);
2090 
2091 		fg->fg_lflags |= FG_WINSMSGQ;
2092 		msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
2093 	}
2094 	fg->fg_msgcount--;
2095 	if (fg->fg_msgcount == 0) {
2096 		fg->fg_lflags |= FG_RMMSGQ;
2097 		remove = TRUE;
2098 	}
2099 	lck_mtx_unlock(&fg->fg_lock);
2100 	return remove;
2101 }
2102 
2103 /*
2104  * fg_removeuipc
2105  *
2106  * Description:	Remove marked fileglob from message queue
2107  *
2108  * Parameters:	fg	Fileglob pointer to remove
2109  *
2110  * Returns:	void
2111  *
2112  * Locks:	Takes and drops fg_lock & uipc_lock
2113  *		DO NOT call this function with proc_fdlock held as unp_gc()
2114  *		can potentially try to acquire proc_fdlock, which can result
2115  *		in a deadlock.
2116  */
2117 static void
fg_removeuipc(struct fileglob * fg)2118 fg_removeuipc(struct fileglob * fg)
2119 {
2120 	if (fg->fg_lflags & FG_RMMSGQ) {
2121 		lck_mtx_lock(&uipc_lock);
2122 		LIST_REMOVE(fg, f_msglist);
2123 		lck_mtx_unlock(&uipc_lock);
2124 		lck_mtx_lock(&fg->fg_lock);
2125 		fg->fg_lflags &= ~FG_RMMSGQ;
2126 		if (fg->fg_lflags & FG_WRMMSGQ) {
2127 			fg->fg_lflags &= ~FG_WRMMSGQ;
2128 			wakeup(&fg->fg_lflags);
2129 		}
2130 		lck_mtx_unlock(&fg->fg_lock);
2131 	}
2132 }
2133 
2134 /*
2135  * Returns:	0			Success
2136  *		EMSGSIZE		The new fd's will not fit
2137  *		ENOBUFS			Cannot alloc struct fileproc
2138  */
2139 int
unp_externalize(struct mbuf * rights)2140 unp_externalize(struct mbuf *rights)
2141 {
2142 	proc_t p = current_proc();
2143 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
2144 	struct fileglob **rp = (struct fileglob **)(cm + 1);
2145 	const int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2146 	int *fds;
2147 	int error = 0;
2148 
2149 	fds = kalloc_data(newfds * sizeof(int), Z_WAITOK);
2150 	if (fds == NULL) {
2151 		error = ENOMEM;
2152 		goto out;
2153 	}
2154 
2155 	/*
2156 	 * Step 1:
2157 	 *	Allocate all the fds, and if it doesn't fit,
2158 	 *	then fail and discard everything.
2159 	 */
2160 	proc_fdlock(p);
2161 
2162 	if (fdt_available_locked(p, newfds)) {
2163 		for (int i = 0; i < newfds; i++) {
2164 			error = fdalloc(p, 0, &fds[i]);
2165 			if (error) {
2166 				while (i-- > 0) {
2167 					fdrelse(p, fds[i]);
2168 				}
2169 				break;
2170 			}
2171 		}
2172 	} else {
2173 		error = EMSGSIZE;
2174 	}
2175 
2176 	proc_fdunlock(p);
2177 
2178 	if (error) {
2179 		goto out;
2180 	}
2181 
2182 	/*
2183 	 * Step 2:
2184 	 *	At this point we are commited, and can't fail anymore.
2185 	 *	Allocate all the fileprocs, and remove the files
2186 	 *	from the queue.
2187 	 *
2188 	 *	Until we call procfdtbl_releasefd(), fds are in flux
2189 	 *	and can't be closed.
2190 	 */
2191 	for (int i = 0; i < newfds; i++) {
2192 		struct fileproc *fp = NULL;
2193 
2194 		fp = fileproc_alloc_init();
2195 		fp->fp_glob = rp[i];
2196 		if (fg_removeuipc_mark(rp[i])) {
2197 			fg_removeuipc(rp[i]);
2198 		}
2199 
2200 		proc_fdlock(p);
2201 		procfdtbl_releasefd(p, fds[i], fp);
2202 		proc_fdunlock(p);
2203 	}
2204 
2205 	/*
2206 	 * Step 3:
2207 	 *	Return the fds into `cm`.
2208 	 *	Handle the fact ints and pointers do not have the same size.
2209 	 */
2210 	int *fds_out = (int *)(cm + 1);
2211 	memcpy(fds_out, fds, newfds * sizeof(int));
2212 	if (sizeof(struct fileglob *) != sizeof(int)) {
2213 		bzero(fds_out + newfds,
2214 		    newfds * (sizeof(struct fileglob *) - sizeof(int)));
2215 	}
2216 	OSAddAtomic(-newfds, &unp_rights);
2217 
2218 out:
2219 	if (error) {
2220 		for (int i = 0; i < newfds; i++) {
2221 			unp_discard(rp[i], p);
2222 		}
2223 		bzero(rp, newfds * sizeof(struct fileglob *));
2224 	}
2225 
2226 	kfree_data(fds, newfds * sizeof(int));
2227 	return error;
2228 }
2229 
2230 void
unp_init(void)2231 unp_init(void)
2232 {
2233 	_CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
2234 	LIST_INIT(&unp_dhead);
2235 	LIST_INIT(&unp_shead);
2236 }
2237 
2238 #ifndef MIN
2239 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
2240 #endif
2241 
2242 /*
2243  * Returns:	0			Success
2244  *		EINVAL
2245  *		EBADF
2246  */
2247 static int
unp_internalize(struct mbuf * control,proc_t p)2248 unp_internalize(struct mbuf *control, proc_t p)
2249 {
2250 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
2251 	int *fds;
2252 	struct fileglob **rp;
2253 	struct fileproc *fp;
2254 	int i, error;
2255 	int oldfds;
2256 	uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];
2257 
2258 	/* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
2259 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
2260 	    (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
2261 		return EINVAL;
2262 	}
2263 	oldfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2264 	bzero(fg_ins, sizeof(fg_ins));
2265 
2266 	proc_fdlock(p);
2267 	fds = (int *)(cm + 1);
2268 
2269 	for (i = 0; i < oldfds; i++) {
2270 		struct fileproc *tmpfp;
2271 		if ((tmpfp = fp_get_noref_locked(p, fds[i])) == NULL) {
2272 			proc_fdunlock(p);
2273 			return EBADF;
2274 		} else if (!fg_sendable(tmpfp->fp_glob)) {
2275 			proc_fdunlock(p);
2276 			return EINVAL;
2277 		} else if (fp_isguarded(tmpfp, GUARD_SOCKET_IPC)) {
2278 			error = fp_guard_exception(p,
2279 			    fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
2280 			proc_fdunlock(p);
2281 			return error;
2282 		}
2283 	}
2284 	rp = (struct fileglob **)(cm + 1);
2285 
2286 	/* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
2287 	 * and doing them in-order would result in stomping over unprocessed fd's
2288 	 */
2289 	for (i = (oldfds - 1); i >= 0; i--) {
2290 		fp = fp_get_noref_locked(p, fds[i]);
2291 		if (fg_insertuipc_mark(fp->fp_glob)) {
2292 			fg_ins[i / 8] |= 0x80 >> (i % 8);
2293 		}
2294 		rp[i] = fp->fp_glob;
2295 	}
2296 	proc_fdunlock(p);
2297 
2298 	for (i = 0; i < oldfds; i++) {
2299 		if (fg_ins[i / 8] & (0x80 >> (i % 8))) {
2300 			VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
2301 			fg_insertuipc(rp[i]);
2302 		}
2303 		(void) OSAddAtomic(1, &unp_rights);
2304 	}
2305 
2306 	return 0;
2307 }
2308 
2309 static void
unp_gc(thread_call_param_t arg0,thread_call_param_t arg1)2310 unp_gc(thread_call_param_t arg0, thread_call_param_t arg1)
2311 {
2312 #pragma unused(arg0, arg1)
2313 	struct fileglob *fg;
2314 	struct socket *so;
2315 	static struct fileglob **extra_ref;
2316 	struct fileglob **fpp;
2317 	int nunref, i;
2318 
2319 restart:
2320 	lck_mtx_lock(&uipc_lock);
2321 	unp_defer = 0;
2322 	/*
2323 	 * before going through all this, set all FDs to
2324 	 * be NOT defered and NOT externally accessible
2325 	 */
2326 	LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2327 		os_atomic_andnot(&fg->fg_flag, FMARK | FDEFER, relaxed);
2328 	}
2329 	do {
2330 		LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2331 			lck_mtx_lock(&fg->fg_lock);
2332 			/*
2333 			 * If the file is not open, skip it
2334 			 */
2335 			if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2336 				lck_mtx_unlock(&fg->fg_lock);
2337 				continue;
2338 			}
2339 			/*
2340 			 * If we already marked it as 'defer'  in a
2341 			 * previous pass, then try process it this time
2342 			 * and un-mark it
2343 			 */
2344 			if (fg->fg_flag & FDEFER) {
2345 				os_atomic_andnot(&fg->fg_flag, FDEFER, relaxed);
2346 				unp_defer--;
2347 			} else {
2348 				/*
2349 				 * if it's not defered, then check if it's
2350 				 * already marked.. if so skip it
2351 				 */
2352 				if (fg->fg_flag & FMARK) {
2353 					lck_mtx_unlock(&fg->fg_lock);
2354 					continue;
2355 				}
2356 				/*
2357 				 * If all references are from messages
2358 				 * in transit, then skip it. it's not
2359 				 * externally accessible.
2360 				 */
2361 				if (os_ref_get_count_raw(&fg->fg_count) ==
2362 				    fg->fg_msgcount) {
2363 					lck_mtx_unlock(&fg->fg_lock);
2364 					continue;
2365 				}
2366 				/*
2367 				 * If it got this far then it must be
2368 				 * externally accessible.
2369 				 */
2370 				os_atomic_or(&fg->fg_flag, FMARK, relaxed);
2371 			}
2372 			/*
2373 			 * either it was defered, or it is externally
2374 			 * accessible and not already marked so.
2375 			 * Now check if it is possibly one of OUR sockets.
2376 			 */
2377 			if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
2378 			    (so = (struct socket *)fg_get_data(fg)) == 0) {
2379 				lck_mtx_unlock(&fg->fg_lock);
2380 				continue;
2381 			}
2382 			if (so->so_proto->pr_domain != localdomain ||
2383 			    (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
2384 				lck_mtx_unlock(&fg->fg_lock);
2385 				continue;
2386 			}
2387 			/*
2388 			 * So, Ok, it's one of our sockets and it IS externally
2389 			 * accessible (or was defered). Now we look
2390 			 * to see if we hold any file descriptors in its
2391 			 * message buffers. Follow those links and mark them
2392 			 * as accessible too.
2393 			 *
2394 			 * In case a file is passed onto itself we need to
2395 			 * release the file lock.
2396 			 */
2397 			lck_mtx_unlock(&fg->fg_lock);
2398 			/*
2399 			 * It's safe to lock the socket after dropping fg_lock
2400 			 * because the socket isn't going away at this point.
2401 			 *
2402 			 * If we couldn't lock the socket or the socket buffer,
2403 			 * then it's because someone holding one of these
2404 			 * locks is stuck in unp_{internalize,externalize}().
2405 			 * Yield to that process and restart the garbage
2406 			 * collection.
2407 			 */
2408 			if (!socket_try_lock(so)) {
2409 				lck_mtx_unlock(&uipc_lock);
2410 				goto restart;
2411 			}
2412 			so->so_usecount++;
2413 			/*
2414 			 * Lock the receive socket buffer so that we can
2415 			 * iterate over its mbuf list.
2416 			 */
2417 			if (sblock(&so->so_rcv, SBL_NOINTR | SBL_IGNDEFUNCT)) {
2418 				socket_unlock(so, 1);
2419 				lck_mtx_unlock(&uipc_lock);
2420 				goto restart;
2421 			}
2422 			VERIFY(so->so_rcv.sb_flags & SB_LOCK);
2423 			socket_unlock(so, 0);
2424 			unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
2425 			socket_lock(so, 0);
2426 			sbunlock(&so->so_rcv, TRUE);
2427 			/*
2428 			 * Unlock and release the reference acquired above.
2429 			 */
2430 			socket_unlock(so, 1);
2431 		}
2432 	} while (unp_defer);
2433 	/*
2434 	 * We grab an extra reference to each of the file table entries
2435 	 * that are not otherwise accessible and then free the rights
2436 	 * that are stored in messages on them.
2437 	 *
2438 	 * Here, we first take an extra reference to each inaccessible
2439 	 * descriptor.  Then, we call sorflush ourself, since we know
2440 	 * it is a Unix domain socket anyhow.  After we destroy all the
2441 	 * rights carried in messages, we do a last closef to get rid
2442 	 * of our extra reference.  This is the last close, and the
2443 	 * unp_detach etc will shut down the socket.
2444 	 *
2445 	 * 91/09/19, [email protected]
2446 	 */
2447 	size_t extra_ref_size = nfiles;
2448 	extra_ref = kalloc_type(struct fileglob *, extra_ref_size, Z_WAITOK);
2449 	if (extra_ref == NULL) {
2450 		lck_mtx_unlock(&uipc_lock);
2451 		return;
2452 	}
2453 	nunref = 0;
2454 	fpp = extra_ref;
2455 	LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2456 		lck_mtx_lock(&fg->fg_lock);
2457 		/*
2458 		 * If it's not open, skip it
2459 		 */
2460 		if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2461 			lck_mtx_unlock(&fg->fg_lock);
2462 			continue;
2463 		}
2464 		/*
2465 		 * If all refs are from msgs, and it's not marked accessible
2466 		 * then it must be referenced from some unreachable cycle
2467 		 * of (shut-down) FDs, so include it in our
2468 		 * list of FDs to remove
2469 		 */
2470 		if (fg->fg_flag & FMARK) {
2471 			lck_mtx_unlock(&fg->fg_lock);
2472 			continue;
2473 		}
2474 		if (os_ref_get_count_raw(&fg->fg_count) == fg->fg_msgcount) {
2475 			os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2476 			*fpp++ = fg;
2477 			nunref++;
2478 		}
2479 		lck_mtx_unlock(&fg->fg_lock);
2480 	}
2481 	lck_mtx_unlock(&uipc_lock);
2482 
2483 	/*
2484 	 * for each FD on our hit list, do the following two things
2485 	 */
2486 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2487 		struct fileglob *tfg;
2488 
2489 		tfg = *fpp;
2490 
2491 		if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET) {
2492 			so = (struct socket *)fg_get_data(tfg);
2493 
2494 			if (so) {
2495 				socket_lock(so, 0);
2496 				sorflush(so);
2497 				socket_unlock(so, 0);
2498 			}
2499 		}
2500 	}
2501 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2502 		fg_drop(PROC_NULL, *fpp);
2503 	}
2504 
2505 	kfree_type(struct fileglob *, extra_ref_size, extra_ref);
2506 }
2507 
2508 void
unp_dispose(struct mbuf * m)2509 unp_dispose(struct mbuf *m)
2510 {
2511 	if (m) {
2512 		unp_scan(m, unp_discard, NULL);
2513 	}
2514 }
2515 
2516 /*
2517  * Returns:	0			Success
2518  */
2519 static int
unp_listen(struct unpcb * unp,proc_t p)2520 unp_listen(struct unpcb *unp, proc_t p)
2521 {
2522 	kauth_cred_t safecred = kauth_cred_proc_ref(p);
2523 	cru2x(safecred, &unp->unp_peercred);
2524 	kauth_cred_unref(&safecred);
2525 	unp->unp_flags |= UNP_HAVEPCCACHED;
2526 	return 0;
2527 }
2528 
2529 static void
unp_scan(struct mbuf * m0,void (* op)(struct fileglob *,void * arg),void * arg)2530 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)
2531 {
2532 	struct mbuf *m;
2533 	struct fileglob **rp;
2534 	struct cmsghdr *cm;
2535 	int i;
2536 	int qfds;
2537 
2538 	while (m0) {
2539 		for (m = m0; m; m = m->m_next) {
2540 			if (m->m_type == MT_CONTROL &&
2541 			    (size_t)m->m_len >= sizeof(*cm)) {
2542 				cm = mtod(m, struct cmsghdr *);
2543 				if (cm->cmsg_level != SOL_SOCKET ||
2544 				    cm->cmsg_type != SCM_RIGHTS) {
2545 					continue;
2546 				}
2547 				qfds = (cm->cmsg_len - sizeof(*cm)) /
2548 				    sizeof(int);
2549 				rp = (struct fileglob **)(cm + 1);
2550 				for (i = 0; i < qfds; i++) {
2551 					(*op)(*rp++, arg);
2552 				}
2553 				break;          /* XXX, but saves time */
2554 			}
2555 		}
2556 		m0 = m0->m_act;
2557 	}
2558 }
2559 
2560 static void
unp_mark(struct fileglob * fg,__unused void * arg)2561 unp_mark(struct fileglob *fg, __unused void *arg)
2562 {
2563 	uint32_t oflags, nflags;
2564 
2565 	os_atomic_rmw_loop(&fg->fg_flag, oflags, nflags, relaxed, {
2566 		if (oflags & FMARK) {
2567 		        os_atomic_rmw_loop_give_up(return );
2568 		}
2569 		nflags = oflags | FMARK | FDEFER;
2570 	});
2571 
2572 	unp_defer++;
2573 }
2574 
2575 static void
unp_discard(struct fileglob * fg,void * p)2576 unp_discard(struct fileglob *fg, void *p)
2577 {
2578 	if (p == NULL) {
2579 		p = current_proc();             /* XXX */
2580 	}
2581 	(void) OSAddAtomic(1, &unp_disposed);
2582 	if (fg_removeuipc_mark(fg)) {
2583 		VERIFY(fg->fg_lflags & FG_RMMSGQ);
2584 		fg_removeuipc(fg);
2585 	}
2586 	(void) OSAddAtomic(-1, &unp_rights);
2587 
2588 	(void) fg_drop(p, fg);
2589 }
2590 
2591 int
unp_lock(struct socket * so,int refcount,void * lr)2592 unp_lock(struct socket *so, int refcount, void * lr)
2593 {
2594 	void * lr_saved;
2595 	if (lr == 0) {
2596 		lr_saved = (void *)  __builtin_return_address(0);
2597 	} else {
2598 		lr_saved = lr;
2599 	}
2600 
2601 	if (so->so_pcb) {
2602 		lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2603 	} else {
2604 		panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x",
2605 		    so, lr_saved, so->so_usecount);
2606 	}
2607 
2608 	if (so->so_usecount < 0) {
2609 		panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x",
2610 		    so, so->so_pcb, lr_saved, so->so_usecount);
2611 	}
2612 
2613 	if (refcount) {
2614 		VERIFY(so->so_usecount > 0);
2615 		so->so_usecount++;
2616 	}
2617 	so->lock_lr[so->next_lock_lr] = lr_saved;
2618 	so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
2619 	return 0;
2620 }
2621 
2622 int
unp_unlock(struct socket * so,int refcount,void * lr)2623 unp_unlock(struct socket *so, int refcount, void * lr)
2624 {
2625 	void * lr_saved;
2626 	lck_mtx_t * mutex_held = NULL;
2627 	struct unpcb *unp = sotounpcb(so);
2628 
2629 	if (lr == 0) {
2630 		lr_saved = (void *) __builtin_return_address(0);
2631 	} else {
2632 		lr_saved = lr;
2633 	}
2634 
2635 	if (refcount) {
2636 		so->so_usecount--;
2637 	}
2638 
2639 	if (so->so_usecount < 0) {
2640 		panic("unp_unlock: so=%p usecount=%x", so, so->so_usecount);
2641 	}
2642 	if (so->so_pcb == NULL) {
2643 		panic("unp_unlock: so=%p NO PCB usecount=%x", so, so->so_usecount);
2644 	} else {
2645 		mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2646 	}
2647 	LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2648 	so->unlock_lr[so->next_unlock_lr] = lr_saved;
2649 	so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2650 
2651 	if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2652 		sofreelastref(so, 1);
2653 
2654 		if (unp->unp_addr != NULL) {
2655 			free_sockaddr(unp->unp_addr);
2656 		}
2657 
2658 		lck_mtx_unlock(mutex_held);
2659 
2660 		lck_mtx_destroy(&unp->unp_mtx, &unp_mtx_grp);
2661 		zfree(unp_zone, unp);
2662 		thread_call_enter(unp_gc_tcall);
2663 	} else {
2664 		lck_mtx_unlock(mutex_held);
2665 	}
2666 
2667 	return 0;
2668 }
2669 
2670 lck_mtx_t *
unp_getlock(struct socket * so,__unused int flags)2671 unp_getlock(struct socket *so, __unused int flags)
2672 {
2673 	struct unpcb *unp = (struct unpcb *)so->so_pcb;
2674 
2675 
2676 	if (so->so_pcb) {
2677 		if (so->so_usecount < 0) {
2678 			panic("unp_getlock: so=%p usecount=%x", so, so->so_usecount);
2679 		}
2680 		return &unp->unp_mtx;
2681 	} else {
2682 		panic("unp_getlock: so=%p NULL so_pcb", so);
2683 		return so->so_proto->pr_domain->dom_mtx;
2684 	}
2685 }
2686