xref: /xnu-8792.61.2/bsd/kern/uipc_usrreq.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1989, 1991, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
61  */
62 /*
63  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64  * support for mandatory and extensible security protections.  This notice
65  * is included in support of clause 2.2 (b) of the Apple Public License,
66  * Version 2.0.
67  */
68 #include <os/log.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/domain.h>
73 #include <sys/fcntl.h>
74 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
75 #include <sys/file_internal.h>
76 #include <sys/guarded.h>
77 #include <sys/filedesc.h>
78 #include <sys/lock.h>
79 #include <sys/mbuf.h>
80 #include <sys/namei.h>
81 #include <sys/proc_internal.h>
82 #include <sys/kauth.h>
83 #include <sys/protosw.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/stat.h>
87 #include <sys/sysctl.h>
88 #include <sys/un.h>
89 #include <sys/unpcb.h>
90 #include <sys/vnode_internal.h>
91 #include <sys/kdebug.h>
92 #include <sys/mcache.h>
93 
94 #include <kern/zalloc.h>
95 #include <kern/locks.h>
96 #include <kern/task.h>
97 
98 #if CONFIG_MACF
99 #include <security/mac_framework.h>
100 #endif /* CONFIG_MACF */
101 
102 #include <mach/vm_param.h>
103 
104 #ifndef ROUNDUP64
105 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
106 #endif
107 
108 #ifndef ADVANCE64
109 #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
110 #endif
111 
112 /*
113  * Maximum number of FDs that can be passed in an mbuf
114  */
115 #define UIPC_MAX_CMSG_FD        512
116 
117 ZONE_DEFINE_TYPE(unp_zone, "unpzone", struct unpcb, ZC_NONE);
118 static  unp_gen_t unp_gencnt;
119 static  u_int unp_count;
120 
121 static  LCK_ATTR_DECLARE(unp_mtx_attr, 0, 0);
122 static  LCK_GRP_DECLARE(unp_mtx_grp, "unp_list");
123 static  LCK_RW_DECLARE_ATTR(unp_list_mtx, &unp_mtx_grp, &unp_mtx_attr);
124 
125 static  LCK_MTX_DECLARE_ATTR(unp_disconnect_lock, &unp_mtx_grp, &unp_mtx_attr);
126 static  LCK_MTX_DECLARE_ATTR(unp_connect_lock, &unp_mtx_grp, &unp_mtx_attr);
127 static  LCK_MTX_DECLARE_ATTR(uipc_lock, &unp_mtx_grp, &unp_mtx_attr);
128 
129 static  u_int                   disconnect_in_progress;
130 
131 static struct unp_head unp_shead, unp_dhead;
132 static int      unp_defer;
133 static thread_call_t unp_gc_tcall;
134 static LIST_HEAD(, fileglob) unp_msghead = LIST_HEAD_INITIALIZER(unp_msghead);
135 
136 SYSCTL_DECL(_net_local);
137 
138 static int      unp_rights;                     /* file descriptors in flight */
139 static int      unp_disposed;                   /* discarded file descriptors */
140 
141 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
142 
143 #define ULEF_CONNECTION 0x01
144 uint32_t unp_log_enable_flags = 0;
145 
146 SYSCTL_UINT(_net_local, OID_AUTO, log, CTLFLAG_RD | CTLFLAG_LOCKED,
147     &unp_log_enable_flags, 0, "");
148 
149 
150 /*
151  * mDNSResponder tracing.  When enabled, endpoints connected to
152  * /var/run/mDNSResponder will be traced; during each send on
153  * the traced socket, we log the PID and process name of the
154  * sending process.  We also print out a bit of info related
155  * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
156  * of mDNSResponder stays the same.
157  */
158 #define MDNSRESPONDER_PATH      "/var/run/mDNSResponder"
159 
160 static int unpst_tracemdns;     /* enable tracing */
161 
162 #define MDNS_IPC_MSG_HDR_VERSION_1      1
163 
164 struct mdns_ipc_msg_hdr {
165 	uint32_t version;
166 	uint32_t datalen;
167 	uint32_t ipc_flags;
168 	uint32_t op;
169 	union {
170 		void *context;
171 		uint32_t u32[2];
172 	} __attribute__((packed));
173 	uint32_t reg_index;
174 } __attribute__((packed));
175 
176 /*
177  * Unix communications domain.
178  *
179  * TODO:
180  *	SEQPACKET, RDM
181  *	rethink name space problems
182  *	need a proper out-of-band
183  *	lock pushdown
184  */
185 static struct   sockaddr sun_noname = { .sa_len = sizeof(sun_noname), .sa_family = AF_LOCAL, .sa_data = { 0 } };
186 static ino_t    unp_ino;                /* prototype for fake inode numbers */
187 
188 static int      unp_attach(struct socket *);
189 static void     unp_detach(struct unpcb *);
190 static int      unp_bind(struct unpcb *, struct sockaddr *, proc_t);
191 static int      unp_connect(struct socket *, struct sockaddr *, proc_t);
192 static void     unp_disconnect(struct unpcb *);
193 static void     unp_shutdown(struct unpcb *);
194 static void     unp_drop(struct unpcb *, int);
195 static void     unp_gc(thread_call_param_t arg0, thread_call_param_t arg1);
196 static void     unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);
197 static void     unp_mark(struct fileglob *, __unused void *);
198 static void     unp_discard(struct fileglob *, void *);
199 static int      unp_internalize(struct mbuf *, proc_t);
200 static int      unp_listen(struct unpcb *, proc_t);
201 static void     unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
202 static void     unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
203 
204 __startup_func
205 static void
unp_gc_setup(void)206 unp_gc_setup(void)
207 {
208 	unp_gc_tcall = thread_call_allocate_with_options(unp_gc,
209 	    NULL, THREAD_CALL_PRIORITY_KERNEL,
210 	    THREAD_CALL_OPTIONS_ONCE);
211 }
212 STARTUP(THREAD_CALL, STARTUP_RANK_MIDDLE, unp_gc_setup);
213 
214 static void
unp_get_locks_in_order(struct socket * so,struct socket * conn_so)215 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
216 {
217 	if (so < conn_so) {
218 		socket_lock(conn_so, 1);
219 	} else {
220 		struct unpcb *unp = sotounpcb(so);
221 		unp->unp_flags |= UNP_DONTDISCONNECT;
222 		unp->rw_thrcount++;
223 		socket_unlock(so, 0);
224 
225 		/* Get the locks in the correct order */
226 		socket_lock(conn_so, 1);
227 		socket_lock(so, 0);
228 		unp->rw_thrcount--;
229 		if (unp->rw_thrcount == 0) {
230 			unp->unp_flags &= ~UNP_DONTDISCONNECT;
231 			wakeup(unp);
232 		}
233 	}
234 }
235 
236 static int
uipc_abort(struct socket * so)237 uipc_abort(struct socket *so)
238 {
239 	struct unpcb *unp = sotounpcb(so);
240 
241 	if (unp == 0) {
242 		return EINVAL;
243 	}
244 	unp_drop(unp, ECONNABORTED);
245 	unp_detach(unp);
246 	sofree(so);
247 	return 0;
248 }
249 
250 static int
uipc_accept(struct socket * so,struct sockaddr ** nam)251 uipc_accept(struct socket *so, struct sockaddr **nam)
252 {
253 	struct unpcb *unp = sotounpcb(so);
254 
255 	if (unp == 0) {
256 		return EINVAL;
257 	}
258 
259 	/*
260 	 * Pass back name of connected socket,
261 	 * if it was bound and we are still connected
262 	 * (our peer may have closed already!).
263 	 */
264 	if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
265 		*nam = dup_sockaddr((struct sockaddr *)
266 		    unp->unp_conn->unp_addr, 1);
267 	} else {
268 		if (unp_log_enable_flags & ULEF_CONNECTION) {
269 			os_log(OS_LOG_DEFAULT, "%s: peer disconnected unp_gencnt %llu",
270 			    __func__, unp->unp_gencnt);
271 		}
272 		*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
273 	}
274 	return 0;
275 }
276 
277 /*
278  * Returns:	0			Success
279  *		EISCONN
280  *	unp_attach:
281  */
282 static int
uipc_attach(struct socket * so,__unused int proto,__unused proc_t p)283 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
284 {
285 	struct unpcb *unp = sotounpcb(so);
286 
287 	if (unp != 0) {
288 		return EISCONN;
289 	}
290 	return unp_attach(so);
291 }
292 
293 static int
uipc_bind(struct socket * so,struct sockaddr * nam,proc_t p)294 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
295 {
296 	struct unpcb *unp = sotounpcb(so);
297 
298 	if (unp == 0) {
299 		return EINVAL;
300 	}
301 
302 	return unp_bind(unp, nam, p);
303 }
304 
305 /*
306  * Returns:	0			Success
307  *		EINVAL
308  *	unp_connect:???			[See elsewhere in this file]
309  */
310 static int
uipc_connect(struct socket * so,struct sockaddr * nam,proc_t p)311 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
312 {
313 	struct unpcb *unp = sotounpcb(so);
314 
315 	if (unp == 0) {
316 		return EINVAL;
317 	}
318 	return unp_connect(so, nam, p);
319 }
320 
321 /*
322  * Returns:	0			Success
323  *		EINVAL
324  *	unp_connect2:EPROTOTYPE		Protocol wrong type for socket
325  *	unp_connect2:EINVAL		Invalid argument
326  */
327 static int
uipc_connect2(struct socket * so1,struct socket * so2)328 uipc_connect2(struct socket *so1, struct socket *so2)
329 {
330 	struct unpcb *unp = sotounpcb(so1);
331 
332 	if (unp == 0) {
333 		return EINVAL;
334 	}
335 
336 	return unp_connect2(so1, so2);
337 }
338 
339 /* control is EOPNOTSUPP */
340 
341 static int
uipc_detach(struct socket * so)342 uipc_detach(struct socket *so)
343 {
344 	struct unpcb *unp = sotounpcb(so);
345 
346 	if (unp == 0) {
347 		return EINVAL;
348 	}
349 
350 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
351 	unp_detach(unp);
352 	return 0;
353 }
354 
355 static int
uipc_disconnect(struct socket * so)356 uipc_disconnect(struct socket *so)
357 {
358 	struct unpcb *unp = sotounpcb(so);
359 
360 	if (unp == 0) {
361 		return EINVAL;
362 	}
363 	unp_disconnect(unp);
364 	return 0;
365 }
366 
367 /*
368  * Returns:	0			Success
369  *		EINVAL
370  */
371 static int
uipc_listen(struct socket * so,__unused proc_t p)372 uipc_listen(struct socket *so, __unused proc_t p)
373 {
374 	struct unpcb *unp = sotounpcb(so);
375 
376 	if (unp == 0 || unp->unp_vnode == 0) {
377 		return EINVAL;
378 	}
379 	return unp_listen(unp, p);
380 }
381 
382 static int
uipc_peeraddr(struct socket * so,struct sockaddr ** nam)383 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
384 {
385 	struct unpcb *unp = sotounpcb(so);
386 	struct socket *so2;
387 
388 	if (unp == NULL) {
389 		return EINVAL;
390 	}
391 	so2 = unp->unp_conn != NULL ? unp->unp_conn->unp_socket : NULL;
392 	if (so2 != NULL) {
393 		unp_get_locks_in_order(so, so2);
394 	}
395 
396 	if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
397 		*nam = dup_sockaddr((struct sockaddr *)
398 		    unp->unp_conn->unp_addr, 1);
399 	} else {
400 		*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
401 	}
402 	if (so2 != NULL) {
403 		socket_unlock(so2, 1);
404 	}
405 	return 0;
406 }
407 
408 static int
uipc_rcvd(struct socket * so,__unused int flags)409 uipc_rcvd(struct socket *so, __unused int flags)
410 {
411 	struct unpcb *unp = sotounpcb(so);
412 	struct socket *so2;
413 
414 	if (unp == 0) {
415 		return EINVAL;
416 	}
417 	switch (so->so_type) {
418 	case SOCK_DGRAM:
419 		panic("uipc_rcvd DGRAM?");
420 	/*NOTREACHED*/
421 
422 	case SOCK_STREAM:
423 #define rcv (&so->so_rcv)
424 #define snd (&so2->so_snd)
425 		if (unp->unp_conn == 0) {
426 			break;
427 		}
428 
429 		so2 = unp->unp_conn->unp_socket;
430 		unp_get_locks_in_order(so, so2);
431 		/*
432 		 * Adjust backpressure on sender
433 		 * and wakeup any waiting to write.
434 		 */
435 		snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
436 		unp->unp_mbcnt = rcv->sb_mbcnt;
437 		snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
438 		unp->unp_cc = rcv->sb_cc;
439 		if (sb_notify(&so2->so_snd)) {
440 			sowakeup(so2, &so2->so_snd, so);
441 		}
442 
443 		socket_unlock(so2, 1);
444 
445 #undef snd
446 #undef rcv
447 		break;
448 
449 	default:
450 		panic("uipc_rcvd unknown socktype");
451 	}
452 	return 0;
453 }
454 
455 /* pru_rcvoob is EOPNOTSUPP */
456 
457 /*
458  * Returns:	0			Success
459  *		EINVAL
460  *		EOPNOTSUPP
461  *		EPIPE
462  *		ENOTCONN
463  *		EISCONN
464  *	unp_internalize:EINVAL
465  *	unp_internalize:EBADF
466  *	unp_connect:EAFNOSUPPORT	Address family not supported
467  *	unp_connect:EINVAL		Invalid argument
468  *	unp_connect:ENOTSOCK		Not a socket
469  *	unp_connect:ECONNREFUSED	Connection refused
470  *	unp_connect:EISCONN		Socket is connected
471  *	unp_connect:EPROTOTYPE		Protocol wrong type for socket
472  *	unp_connect:???
473  *	sbappendaddr:ENOBUFS		[5th argument, contents modified]
474  *	sbappendaddr:???		[whatever a filter author chooses]
475  */
476 static int
uipc_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,proc_t p)477 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
478     struct mbuf *control, proc_t p)
479 {
480 	int error = 0;
481 	struct unpcb *unp = sotounpcb(so);
482 	struct socket *so2;
483 	int32_t len = m_pktlen(m);
484 
485 	if (unp == 0) {
486 		error = EINVAL;
487 		goto release;
488 	}
489 	if (flags & PRUS_OOB) {
490 		error = EOPNOTSUPP;
491 		goto release;
492 	}
493 
494 	if (control) {
495 		/* release lock to avoid deadlock (4436174) */
496 		socket_unlock(so, 0);
497 		error = unp_internalize(control, p);
498 		socket_lock(so, 0);
499 		if (error) {
500 			goto release;
501 		}
502 	}
503 
504 	switch (so->so_type) {
505 	case SOCK_DGRAM:
506 	{
507 		struct sockaddr *from;
508 
509 		if (nam) {
510 			if (unp->unp_conn) {
511 				error = EISCONN;
512 				break;
513 			}
514 			error = unp_connect(so, nam, p);
515 			if (error) {
516 				so->so_state &= ~SS_ISCONNECTING;
517 				break;
518 			}
519 		} else {
520 			if (unp->unp_conn == 0) {
521 				error = ENOTCONN;
522 				break;
523 			}
524 		}
525 
526 		so2 = unp->unp_conn->unp_socket;
527 		if (so != so2) {
528 			unp_get_locks_in_order(so, so2);
529 		}
530 
531 		if (unp->unp_addr) {
532 			from = (struct sockaddr *)unp->unp_addr;
533 		} else {
534 			from = &sun_noname;
535 		}
536 		/*
537 		 * sbappendaddr() will fail when the receiver runs out of
538 		 * space; in contrast to SOCK_STREAM, we will lose messages
539 		 * for the SOCK_DGRAM case when the receiver's queue overflows.
540 		 * SB_UNIX on the socket buffer implies that the callee will
541 		 * not free the control message, if any, because we would need
542 		 * to call unp_dispose() on it.
543 		 */
544 		if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
545 			control = NULL;
546 			if (sb_notify(&so2->so_rcv)) {
547 				sowakeup(so2, &so2->so_rcv, so);
548 			}
549 			so2->so_tc_stats[0].rxpackets += 1;
550 			so2->so_tc_stats[0].rxbytes += len;
551 		} else if (control != NULL && error == 0) {
552 			/* A socket filter took control; don't touch it */
553 			control = NULL;
554 		}
555 
556 		if (so != so2) {
557 			socket_unlock(so2, 1);
558 		}
559 
560 		m = NULL;
561 		if (nam) {
562 			unp_disconnect(unp);
563 		}
564 		break;
565 	}
566 
567 	case SOCK_STREAM: {
568 		int didreceive = 0;
569 #define rcv (&so2->so_rcv)
570 #define snd (&so->so_snd)
571 		/* Connect if not connected yet. */
572 		/*
573 		 * Note: A better implementation would complain
574 		 * if not equal to the peer's address.
575 		 */
576 		if ((so->so_state & SS_ISCONNECTED) == 0) {
577 			if (nam) {
578 				error = unp_connect(so, nam, p);
579 				if (error) {
580 					so->so_state &= ~SS_ISCONNECTING;
581 					break;  /* XXX */
582 				}
583 			} else {
584 				error = ENOTCONN;
585 				break;
586 			}
587 		}
588 
589 		if (so->so_state & SS_CANTSENDMORE) {
590 			error = EPIPE;
591 			break;
592 		}
593 		if (unp->unp_conn == 0) {
594 			panic("uipc_send connected but no connection? "
595 			    "socket state: %x socket flags: %x socket flags1: %x.",
596 			    so->so_state, so->so_flags, so->so_flags1);
597 		}
598 
599 		so2 = unp->unp_conn->unp_socket;
600 		unp_get_locks_in_order(so, so2);
601 
602 		/* Check socket state again as we might have unlocked the socket
603 		 * while trying to get the locks in order
604 		 */
605 
606 		if ((so->so_state & SS_CANTSENDMORE)) {
607 			error = EPIPE;
608 			socket_unlock(so2, 1);
609 			break;
610 		}
611 
612 		if (unp->unp_flags & UNP_TRACE_MDNS) {
613 			struct mdns_ipc_msg_hdr hdr;
614 
615 			if (mbuf_copydata(m, 0, sizeof(hdr), &hdr) == 0 &&
616 			    hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
617 				os_log(OS_LOG_DEFAULT,
618 				    "%s[mDNSResponder] pid=%d (%s): op=0x%x",
619 				    __func__, proc_getpid(p), p->p_comm, ntohl(hdr.op));
620 			}
621 		}
622 
623 		/*
624 		 * Send to paired receive port, and then reduce send buffer
625 		 * hiwater marks to maintain backpressure.  Wake up readers.
626 		 * SB_UNIX flag will allow new record to be appended to the
627 		 * receiver's queue even when it is already full.  It is
628 		 * possible, however, that append might fail.  In that case,
629 		 * we will need to call unp_dispose() on the control message;
630 		 * the callee will not free it since SB_UNIX is set.
631 		 */
632 		didreceive = control ?
633 		    sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
634 
635 		snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
636 		unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
637 		if ((int32_t)snd->sb_hiwat >=
638 		    (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
639 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
640 		} else {
641 			snd->sb_hiwat = 0;
642 		}
643 		unp->unp_conn->unp_cc = rcv->sb_cc;
644 		if (didreceive) {
645 			control = NULL;
646 			if (sb_notify(&so2->so_rcv)) {
647 				sowakeup(so2, &so2->so_rcv, so);
648 			}
649 			so2->so_tc_stats[0].rxpackets += 1;
650 			so2->so_tc_stats[0].rxbytes += len;
651 		} else if (control != NULL && error == 0) {
652 			/* A socket filter took control; don't touch it */
653 			control = NULL;
654 		}
655 
656 		socket_unlock(so2, 1);
657 		m = NULL;
658 #undef snd
659 #undef rcv
660 	}
661 	break;
662 
663 	default:
664 		panic("uipc_send unknown socktype");
665 	}
666 
667 	so->so_tc_stats[0].txpackets += 1;
668 	so->so_tc_stats[0].txbytes += len;
669 
670 	/*
671 	 * SEND_EOF is equivalent to a SEND followed by
672 	 * a SHUTDOWN.
673 	 */
674 	if (flags & PRUS_EOF) {
675 		socantsendmore(so);
676 		unp_shutdown(unp);
677 	}
678 
679 	if (control && error != 0) {
680 		socket_unlock(so, 0);
681 		unp_dispose(control);
682 		socket_lock(so, 0);
683 	}
684 
685 release:
686 	if (control) {
687 		m_freem(control);
688 	}
689 	if (m) {
690 		m_freem(m);
691 	}
692 	return error;
693 }
694 
695 static int
uipc_sense(struct socket * so,void * ub,int isstat64)696 uipc_sense(struct socket *so, void *ub, int isstat64)
697 {
698 	struct unpcb *unp = sotounpcb(so);
699 	struct socket *so2;
700 	blksize_t blksize;
701 
702 	if (unp == 0) {
703 		return EINVAL;
704 	}
705 
706 	blksize = so->so_snd.sb_hiwat;
707 	if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
708 		so2 = unp->unp_conn->unp_socket;
709 		blksize += so2->so_rcv.sb_cc;
710 	}
711 	if (unp->unp_ino == 0) {
712 		unp->unp_ino = unp_ino++;
713 	}
714 
715 	if (isstat64 != 0) {
716 		struct stat64  *sb64;
717 
718 		sb64 = (struct stat64 *)ub;
719 		sb64->st_blksize = blksize;
720 		sb64->st_dev = NODEV;
721 		sb64->st_ino = (ino64_t)unp->unp_ino;
722 	} else {
723 		struct stat *sb;
724 
725 		sb = (struct stat *)ub;
726 		sb->st_blksize = blksize;
727 		sb->st_dev = NODEV;
728 		sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
729 	}
730 
731 	return 0;
732 }
733 
734 /*
735  * Returns:	0		Success
736  *		EINVAL
737  *
738  * Notes:	This is not strictly correct, as unp_shutdown() also calls
739  *		socantrcvmore().  These should maybe both be conditionalized
740  *		on the 'how' argument in soshutdown() as called from the
741  *		shutdown() system call.
742  */
743 static int
uipc_shutdown(struct socket * so)744 uipc_shutdown(struct socket *so)
745 {
746 	struct unpcb *unp = sotounpcb(so);
747 
748 	if (unp == 0) {
749 		return EINVAL;
750 	}
751 	socantsendmore(so);
752 	unp_shutdown(unp);
753 	return 0;
754 }
755 
756 /*
757  * Returns:	0			Success
758  *		EINVAL			Invalid argument
759  */
760 static int
uipc_sockaddr(struct socket * so,struct sockaddr ** nam)761 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
762 {
763 	struct unpcb *unp = sotounpcb(so);
764 
765 	if (unp == NULL) {
766 		return EINVAL;
767 	}
768 	if (unp->unp_addr != NULL) {
769 		*nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
770 	} else {
771 		*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
772 	}
773 	return 0;
774 }
775 
776 struct pr_usrreqs uipc_usrreqs = {
777 	.pru_abort =            uipc_abort,
778 	.pru_accept =           uipc_accept,
779 	.pru_attach =           uipc_attach,
780 	.pru_bind =             uipc_bind,
781 	.pru_connect =          uipc_connect,
782 	.pru_connect2 =         uipc_connect2,
783 	.pru_detach =           uipc_detach,
784 	.pru_disconnect =       uipc_disconnect,
785 	.pru_listen =           uipc_listen,
786 	.pru_peeraddr =         uipc_peeraddr,
787 	.pru_rcvd =             uipc_rcvd,
788 	.pru_send =             uipc_send,
789 	.pru_sense =            uipc_sense,
790 	.pru_shutdown =         uipc_shutdown,
791 	.pru_sockaddr =         uipc_sockaddr,
792 	.pru_sosend =           sosend,
793 	.pru_soreceive =        soreceive,
794 };
795 
796 int
uipc_ctloutput(struct socket * so,struct sockopt * sopt)797 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
798 {
799 	struct unpcb *unp = sotounpcb(so);
800 	int error = 0;
801 	pid_t peerpid;
802 	proc_t p;
803 	task_t t;
804 	struct socket *peerso;
805 
806 	switch (sopt->sopt_dir) {
807 	case SOPT_GET:
808 		switch (sopt->sopt_name) {
809 		case LOCAL_PEERCRED:
810 			if (unp->unp_flags & UNP_HAVEPC) {
811 				error = sooptcopyout(sopt, &unp->unp_peercred,
812 				    sizeof(unp->unp_peercred));
813 			} else {
814 				if (so->so_type == SOCK_STREAM) {
815 					error = ENOTCONN;
816 				} else {
817 					error = EINVAL;
818 				}
819 			}
820 			break;
821 		case LOCAL_PEERPID:
822 		case LOCAL_PEEREPID:
823 			if (unp->unp_conn == NULL) {
824 				error = ENOTCONN;
825 				break;
826 			}
827 			peerso = unp->unp_conn->unp_socket;
828 			if (peerso == NULL) {
829 				panic("peer is connected but has no socket?");
830 			}
831 			unp_get_locks_in_order(so, peerso);
832 			if (sopt->sopt_name == LOCAL_PEEREPID &&
833 			    peerso->so_flags & SOF_DELEGATED) {
834 				peerpid = peerso->e_pid;
835 			} else {
836 				peerpid = peerso->last_pid;
837 			}
838 			socket_unlock(peerso, 1);
839 			error = sooptcopyout(sopt, &peerpid, sizeof(peerpid));
840 			break;
841 		case LOCAL_PEERUUID:
842 		case LOCAL_PEEREUUID:
843 			if (unp->unp_conn == NULL) {
844 				error = ENOTCONN;
845 				break;
846 			}
847 			peerso = unp->unp_conn->unp_socket;
848 			if (peerso == NULL) {
849 				panic("peer is connected but has no socket?");
850 			}
851 			unp_get_locks_in_order(so, peerso);
852 			if (sopt->sopt_name == LOCAL_PEEREUUID &&
853 			    peerso->so_flags & SOF_DELEGATED) {
854 				error = sooptcopyout(sopt, &peerso->e_uuid,
855 				    sizeof(peerso->e_uuid));
856 			} else {
857 				error = sooptcopyout(sopt, &peerso->last_uuid,
858 				    sizeof(peerso->last_uuid));
859 			}
860 			socket_unlock(peerso, 1);
861 			break;
862 		case LOCAL_PEERTOKEN:
863 			if (unp->unp_conn == NULL) {
864 				error = ENOTCONN;
865 				break;
866 			}
867 			peerso = unp->unp_conn->unp_socket;
868 			if (peerso == NULL) {
869 				panic("peer is connected but has no socket?");
870 			}
871 			unp_get_locks_in_order(so, peerso);
872 			peerpid = peerso->last_pid;
873 			p = proc_find(peerpid);
874 			if (p != PROC_NULL) {
875 				t = proc_task(p);
876 				if (t != TASK_NULL) {
877 					audit_token_t peertoken;
878 					mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
879 					if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&peertoken, &count) == KERN_SUCCESS) {
880 						error = sooptcopyout(sopt, &peertoken, sizeof(peertoken));
881 					} else {
882 						error = EINVAL;
883 					}
884 				} else {
885 					error = EINVAL;
886 				}
887 				proc_rele(p);
888 			} else {
889 				error = EINVAL;
890 			}
891 			socket_unlock(peerso, 1);
892 			break;
893 		default:
894 			error = EOPNOTSUPP;
895 			break;
896 		}
897 		break;
898 	case SOPT_SET:
899 	default:
900 		error = EOPNOTSUPP;
901 		break;
902 	}
903 
904 	return error;
905 }
906 
907 /*
908  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
909  * for stream sockets, although the total for sender and receiver is
910  * actually only PIPSIZ.
911  * Datagram sockets really use the sendspace as the maximum datagram size,
912  * and don't really want to reserve the sendspace.  Their recvspace should
913  * be large enough for at least one max-size datagram plus address.
914  */
915 #ifndef PIPSIZ
916 #define PIPSIZ  8192
917 #endif
918 static u_int32_t        unpst_sendspace = PIPSIZ;
919 static u_int32_t        unpst_recvspace = PIPSIZ;
920 static u_int32_t        unpdg_sendspace = 2 * 1024;       /* really max datagram size */
921 static u_int32_t        unpdg_recvspace = 4 * 1024;
922 
923 SYSCTL_DECL(_net_local_stream);
924 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
925     &unpst_sendspace, 0, "");
926 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
927     &unpst_recvspace, 0, "");
928 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
929     &unpst_tracemdns, 0, "");
930 SYSCTL_DECL(_net_local_dgram);
931 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
932     &unpdg_sendspace, 0, "");
933 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
934     &unpdg_recvspace, 0, "");
935 
936 /*
937  * Returns:	0			Success
938  *		ENOBUFS
939  *	soreserve:ENOBUFS
940  */
941 static int
unp_attach(struct socket * so)942 unp_attach(struct socket *so)
943 {
944 	struct unpcb *unp;
945 	int error = 0;
946 
947 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
948 		switch (so->so_type) {
949 		case SOCK_STREAM:
950 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
951 			break;
952 
953 		case SOCK_DGRAM:
954 			/*
955 			 * By default soreserve() will set the low water
956 			 * mark to MCLBYTES which is too high given our
957 			 * default sendspace.  Override it here to something
958 			 * sensible.
959 			 */
960 			so->so_snd.sb_lowat = 1;
961 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
962 			break;
963 
964 		default:
965 			panic("unp_attach");
966 		}
967 		if (error) {
968 			return error;
969 		}
970 	}
971 	unp = zalloc_flags(unp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
972 
973 	lck_mtx_init(&unp->unp_mtx, &unp_mtx_grp, &unp_mtx_attr);
974 
975 	lck_rw_lock_exclusive(&unp_list_mtx);
976 	LIST_INIT(&unp->unp_refs);
977 	unp->unp_socket = so;
978 	unp->unp_gencnt = ++unp_gencnt;
979 	unp_count++;
980 	LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
981 	    &unp_dhead : &unp_shead, unp, unp_link);
982 	lck_rw_done(&unp_list_mtx);
983 	so->so_pcb = (caddr_t)unp;
984 	/*
985 	 * Mark AF_UNIX socket buffers accordingly so that:
986 	 *
987 	 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
988 	 *    the lack of space; this essentially loosens the sbspace() check,
989 	 *    since there is disconnect between sosend() and uipc_send() with
990 	 *    respect to flow control that might result in our dropping the
991 	 *    data in uipc_send().  By setting this, we allow for slightly
992 	 *    more records to be appended to the receiving socket to avoid
993 	 *    losing data (which we can't afford in the SOCK_STREAM case).
994 	 *    Flow control still takes place since we adjust the sender's
995 	 *    hiwat during each send.  This doesn't affect the SOCK_DGRAM
996 	 *    case and append would still fail when the queue overflows.
997 	 *
998 	 * b. In the presence of control messages containing internalized
999 	 *    file descriptors, the append routines will not free them since
1000 	 *    we'd need to undo the work first via unp_dispose().
1001 	 */
1002 	so->so_rcv.sb_flags |= SB_UNIX;
1003 	so->so_snd.sb_flags |= SB_UNIX;
1004 	return 0;
1005 }
1006 
1007 static void
unp_detach(struct unpcb * unp)1008 unp_detach(struct unpcb *unp)
1009 {
1010 	int so_locked = 1;
1011 
1012 	lck_rw_lock_exclusive(&unp_list_mtx);
1013 	LIST_REMOVE(unp, unp_link);
1014 	--unp_count;
1015 	++unp_gencnt;
1016 	lck_rw_done(&unp_list_mtx);
1017 	if (unp->unp_vnode) {
1018 		struct vnode *tvp = NULL;
1019 		socket_unlock(unp->unp_socket, 0);
1020 
1021 		/* Holding unp_connect_lock will avoid a race between
1022 		 * a thread closing the listening socket and a thread
1023 		 * connecting to it.
1024 		 */
1025 		lck_mtx_lock(&unp_connect_lock);
1026 		socket_lock(unp->unp_socket, 0);
1027 		if (unp->unp_vnode) {
1028 			tvp = unp->unp_vnode;
1029 			unp->unp_vnode->v_socket = NULL;
1030 			unp->unp_vnode = NULL;
1031 		}
1032 		lck_mtx_unlock(&unp_connect_lock);
1033 		if (tvp != NULL) {
1034 			vnode_rele(tvp);                /* drop the usecount */
1035 		}
1036 	}
1037 	if (unp->unp_conn) {
1038 		unp_disconnect(unp);
1039 	}
1040 	while (unp->unp_refs.lh_first) {
1041 		struct unpcb *unp2 = NULL;
1042 
1043 		/* This datagram socket is connected to one or more
1044 		 * sockets. In order to avoid a race condition between removing
1045 		 * this reference and closing the connected socket, we need
1046 		 * to check disconnect_in_progress
1047 		 */
1048 		if (so_locked == 1) {
1049 			socket_unlock(unp->unp_socket, 0);
1050 			so_locked = 0;
1051 		}
1052 		lck_mtx_lock(&unp_disconnect_lock);
1053 		while (disconnect_in_progress != 0) {
1054 			(void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1055 			    PSOCK, "disconnect", NULL);
1056 		}
1057 		disconnect_in_progress = 1;
1058 		lck_mtx_unlock(&unp_disconnect_lock);
1059 
1060 		/* Now we are sure that any unpcb socket disconnect is not happening */
1061 		if (unp->unp_refs.lh_first != NULL) {
1062 			unp2 = unp->unp_refs.lh_first;
1063 			socket_lock(unp2->unp_socket, 1);
1064 		}
1065 
1066 		lck_mtx_lock(&unp_disconnect_lock);
1067 		disconnect_in_progress = 0;
1068 		wakeup(&disconnect_in_progress);
1069 		lck_mtx_unlock(&unp_disconnect_lock);
1070 
1071 		if (unp2 != NULL) {
1072 			/* We already locked this socket and have a reference on it */
1073 			unp_drop(unp2, ECONNRESET);
1074 			socket_unlock(unp2->unp_socket, 1);
1075 		}
1076 	}
1077 
1078 	if (so_locked == 0) {
1079 		socket_lock(unp->unp_socket, 0);
1080 		so_locked = 1;
1081 	}
1082 	soisdisconnected(unp->unp_socket);
1083 	/* makes sure we're getting dealloced */
1084 	unp->unp_socket->so_flags |= SOF_PCBCLEARING;
1085 }
1086 
1087 /*
1088  * Returns:	0			Success
1089  *		EAFNOSUPPORT
1090  *		EINVAL
1091  *		EADDRINUSE
1092  *		namei:???		[anything namei can return]
1093  *		vnode_authorize:???	[anything vnode_authorize can return]
1094  *
1095  * Notes:	p at this point is the current process, as this function is
1096  *		only called by sobind().
1097  */
1098 static int
unp_bind(struct unpcb * unp,struct sockaddr * nam,proc_t p)1099 unp_bind(
1100 	struct unpcb *unp,
1101 	struct sockaddr *nam,
1102 	proc_t p)
1103 {
1104 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1105 	struct vnode *vp, *dvp;
1106 	struct vnode_attr va;
1107 	vfs_context_t ctx = vfs_context_current();
1108 	int error, namelen;
1109 	struct nameidata nd;
1110 	struct socket *so = unp->unp_socket;
1111 	char buf[SOCK_MAXADDRLEN];
1112 
1113 	if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1114 		return EAFNOSUPPORT;
1115 	}
1116 
1117 	/*
1118 	 * Check if the socket is already bound to an address
1119 	 */
1120 	if (unp->unp_vnode != NULL) {
1121 		return EINVAL;
1122 	}
1123 	/*
1124 	 * Check if the socket may have been shut down
1125 	 */
1126 	if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1127 	    (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1128 		return EINVAL;
1129 	}
1130 
1131 	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
1132 	if (namelen <= 0) {
1133 		return EINVAL;
1134 	}
1135 	/*
1136 	 * Note: sun_path is not a zero terminated "C" string
1137 	 */
1138 	if (namelen >= SOCK_MAXADDRLEN) {
1139 		return EINVAL;
1140 	}
1141 	bcopy(soun->sun_path, buf, namelen);
1142 	buf[namelen] = 0;
1143 
1144 	socket_unlock(so, 0);
1145 
1146 	NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
1147 	    CAST_USER_ADDR_T(buf), ctx);
1148 	/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1149 	error = namei(&nd);
1150 	if (error) {
1151 		socket_lock(so, 0);
1152 		return error;
1153 	}
1154 	dvp = nd.ni_dvp;
1155 	vp = nd.ni_vp;
1156 
1157 	if (vp != NULL) {
1158 		/*
1159 		 * need to do this before the vnode_put of dvp
1160 		 * since we may have to release an fs_nodelock
1161 		 */
1162 		nameidone(&nd);
1163 
1164 		vnode_put(dvp);
1165 		vnode_put(vp);
1166 
1167 		socket_lock(so, 0);
1168 		return EADDRINUSE;
1169 	}
1170 
1171 	VATTR_INIT(&va);
1172 	VATTR_SET(&va, va_type, VSOCK);
1173 	VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd.fd_cmask));
1174 
1175 #if CONFIG_MACF
1176 	error = mac_vnode_check_create(ctx,
1177 	    nd.ni_dvp, &nd.ni_cnd, &va);
1178 
1179 	if (error == 0)
1180 #endif /* CONFIG_MACF */
1181 #if CONFIG_MACF_SOCKET_SUBSET
1182 	error = mac_vnode_check_uipc_bind(ctx,
1183 	    nd.ni_dvp, &nd.ni_cnd, &va);
1184 
1185 	if (error == 0)
1186 #endif /* MAC_SOCKET_SUBSET */
1187 	/* authorize before creating */
1188 	error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1189 
1190 	if (!error) {
1191 		/* create the socket */
1192 		error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
1193 	}
1194 
1195 	nameidone(&nd);
1196 	vnode_put(dvp);
1197 
1198 	if (error) {
1199 		socket_lock(so, 0);
1200 		return error;
1201 	}
1202 
1203 	socket_lock(so, 0);
1204 
1205 	if (unp->unp_vnode != NULL) {
1206 		vnode_put(vp); /* drop the iocount */
1207 		return EINVAL;
1208 	}
1209 
1210 	error = vnode_ref(vp);  /* gain a longterm reference */
1211 	if (error) {
1212 		vnode_put(vp); /* drop the iocount */
1213 		return error;
1214 	}
1215 
1216 	vp->v_socket = unp->unp_socket;
1217 	unp->unp_vnode = vp;
1218 	unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
1219 	vnode_put(vp);          /* drop the iocount */
1220 
1221 	return 0;
1222 }
1223 
1224 
1225 /*
1226  * Returns:	0			Success
1227  *		EAFNOSUPPORT		Address family not supported
1228  *		EINVAL			Invalid argument
1229  *		ENOTSOCK		Not a socket
1230  *		ECONNREFUSED		Connection refused
1231  *		EPROTOTYPE		Protocol wrong type for socket
1232  *		EISCONN			Socket is connected
1233  *	unp_connect2:EPROTOTYPE		Protocol wrong type for socket
1234  *	unp_connect2:EINVAL		Invalid argument
1235  *	namei:???			[anything namei can return]
1236  *	vnode_authorize:????		[anything vnode_authorize can return]
1237  *
1238  * Notes:	p at this point is the current process, as this function is
1239  *		only called by sosend(), sendfile(), and soconnectlock().
1240  */
1241 static int
unp_connect(struct socket * so,struct sockaddr * nam,__unused proc_t p)1242 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1243 {
1244 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
1245 	struct vnode *vp;
1246 	struct socket *so2, *so3, *list_so = NULL;
1247 	struct unpcb *unp, *unp2, *unp3;
1248 	vfs_context_t ctx = vfs_context_current();
1249 	int error, len;
1250 	struct nameidata nd;
1251 	char buf[SOCK_MAXADDRLEN];
1252 
1253 	if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1254 		return EAFNOSUPPORT;
1255 	}
1256 
1257 	unp = sotounpcb(so);
1258 	so2 = so3 = NULL;
1259 
1260 	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1261 	if (len <= 0) {
1262 		return EINVAL;
1263 	}
1264 	/*
1265 	 * Note: sun_path is not a zero terminated "C" string
1266 	 */
1267 	if (len >= SOCK_MAXADDRLEN) {
1268 		return EINVAL;
1269 	}
1270 
1271 	soisconnecting(so);
1272 
1273 	bcopy(soun->sun_path, buf, len);
1274 	buf[len] = 0;
1275 
1276 	socket_unlock(so, 0);
1277 
1278 	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1279 	    CAST_USER_ADDR_T(buf), ctx);
1280 	error = namei(&nd);
1281 	if (error) {
1282 		socket_lock(so, 0);
1283 		return error;
1284 	}
1285 	nameidone(&nd);
1286 	vp = nd.ni_vp;
1287 	if (vp->v_type != VSOCK) {
1288 		error = ENOTSOCK;
1289 		socket_lock(so, 0);
1290 		goto out;
1291 	}
1292 
1293 #if CONFIG_MACF_SOCKET_SUBSET
1294 	error = mac_vnode_check_uipc_connect(ctx, vp, so);
1295 	if (error) {
1296 		socket_lock(so, 0);
1297 		goto out;
1298 	}
1299 #endif /* MAC_SOCKET_SUBSET */
1300 
1301 	error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1302 	if (error) {
1303 		socket_lock(so, 0);
1304 		goto out;
1305 	}
1306 
1307 	lck_mtx_lock(&unp_connect_lock);
1308 
1309 	if (vp->v_socket == 0) {
1310 		lck_mtx_unlock(&unp_connect_lock);
1311 		error = ECONNREFUSED;
1312 		socket_lock(so, 0);
1313 		goto out;
1314 	}
1315 
1316 	socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1317 	so2 = vp->v_socket;
1318 	lck_mtx_unlock(&unp_connect_lock);
1319 
1320 
1321 	if (so2->so_pcb == NULL) {
1322 		error = ECONNREFUSED;
1323 		if (so != so2) {
1324 			socket_unlock(so2, 1);
1325 			socket_lock(so, 0);
1326 		} else {
1327 			/* Release the reference held for the listen socket */
1328 			VERIFY(so2->so_usecount > 0);
1329 			so2->so_usecount--;
1330 		}
1331 		goto out;
1332 	}
1333 
1334 	if (so < so2) {
1335 		socket_unlock(so2, 0);
1336 		socket_lock(so, 0);
1337 		socket_lock(so2, 0);
1338 	} else if (so > so2) {
1339 		socket_lock(so, 0);
1340 	}
1341 	/*
1342 	 * Check if socket was connected while we were trying to
1343 	 * get the socket locks in order.
1344 	 * XXX - probably shouldn't return an error for SOCK_DGRAM
1345 	 */
1346 	if ((so->so_state & SS_ISCONNECTED) != 0) {
1347 		error = EISCONN;
1348 		goto decref_out;
1349 	}
1350 
1351 	if (so->so_type != so2->so_type) {
1352 		error = EPROTOTYPE;
1353 		goto decref_out;
1354 	}
1355 
1356 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1357 		/* Release the incoming socket but keep a reference */
1358 		socket_unlock(so, 0);
1359 
1360 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1361 		    (so3 = sonewconn(so2, 0, nam)) == 0) {
1362 			error = ECONNREFUSED;
1363 			if (so != so2) {
1364 				socket_unlock(so2, 1);
1365 				socket_lock(so, 0);
1366 			} else {
1367 				socket_lock(so, 0);
1368 				/* Release the reference held for
1369 				 * listen socket.
1370 				 */
1371 				VERIFY(so2->so_usecount > 0);
1372 				so2->so_usecount--;
1373 			}
1374 			goto out;
1375 		}
1376 		unp2 = sotounpcb(so2);
1377 		unp3 = sotounpcb(so3);
1378 		if (unp2->unp_addr) {
1379 			unp3->unp_addr = (struct sockaddr_un *)
1380 			    dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);
1381 		}
1382 
1383 		/*
1384 		 * unp_peercred management:
1385 		 *
1386 		 * The connecter's (client's) credentials are copied
1387 		 * from its process structure at the time of connect()
1388 		 * (which is now).
1389 		 */
1390 		cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1391 		unp3->unp_flags |= UNP_HAVEPC;
1392 		/*
1393 		 * The receiver's (server's) credentials are copied
1394 		 * from the unp_peercred member of socket on which the
1395 		 * former called listen(); unp_listen() cached that
1396 		 * process's credentials at that time so we can use
1397 		 * them now.
1398 		 */
1399 		KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1400 		    ("unp_connect: listener without cached peercred"));
1401 
1402 		/* Here we need to have both so and so2 locks and so2
1403 		 * is already locked. Lock ordering is required.
1404 		 */
1405 		if (so < so2) {
1406 			socket_unlock(so2, 0);
1407 			socket_lock(so, 0);
1408 			socket_lock(so2, 0);
1409 		} else {
1410 			socket_lock(so, 0);
1411 		}
1412 
1413 		/* Check again if the socket state changed when its lock was released */
1414 		if ((so->so_state & SS_ISCONNECTED) != 0) {
1415 			error = EISCONN;
1416 			socket_unlock(so2, 1);
1417 			socket_lock(so3, 0);
1418 			sofreelastref(so3, 1);
1419 			goto out;
1420 		}
1421 		memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1422 		    sizeof(unp->unp_peercred));
1423 		unp->unp_flags |= UNP_HAVEPC;
1424 
1425 		/* Hold the reference on listening socket until the end */
1426 		socket_unlock(so2, 0);
1427 		list_so = so2;
1428 
1429 		/* Lock ordering doesn't matter because so3 was just created */
1430 		socket_lock(so3, 1);
1431 		so2 = so3;
1432 
1433 		/*
1434 		 * Enable tracing for mDNSResponder endpoints.  (The use
1435 		 * of sizeof instead of strlen below takes the null
1436 		 * terminating character into account.)
1437 		 */
1438 		if (unpst_tracemdns &&
1439 		    !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
1440 		    sizeof(MDNSRESPONDER_PATH))) {
1441 			unp->unp_flags |= UNP_TRACE_MDNS;
1442 			unp2->unp_flags |= UNP_TRACE_MDNS;
1443 		}
1444 	}
1445 
1446 	error = unp_connect2(so, so2);
1447 
1448 decref_out:
1449 	if (so2 != NULL) {
1450 		if (so != so2) {
1451 			socket_unlock(so2, 1);
1452 		} else {
1453 			/* Release the extra reference held for the listen socket.
1454 			 * This is possible only for SOCK_DGRAM sockets. We refuse
1455 			 * connecting to the same socket for SOCK_STREAM sockets.
1456 			 */
1457 			VERIFY(so2->so_usecount > 0);
1458 			so2->so_usecount--;
1459 		}
1460 	}
1461 
1462 	if (list_so != NULL) {
1463 		socket_lock(list_so, 0);
1464 		socket_unlock(list_so, 1);
1465 	}
1466 
1467 out:
1468 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1469 	vnode_put(vp);
1470 	return error;
1471 }
1472 
1473 /*
1474  * Returns:	0			Success
1475  *		EPROTOTYPE		Protocol wrong type for socket
1476  *		EINVAL			Invalid argument
1477  */
1478 int
unp_connect2(struct socket * so,struct socket * so2)1479 unp_connect2(struct socket *so, struct socket *so2)
1480 {
1481 	struct unpcb *unp = sotounpcb(so);
1482 	struct unpcb *unp2;
1483 
1484 	if (so2->so_type != so->so_type) {
1485 		return EPROTOTYPE;
1486 	}
1487 
1488 	unp2 = sotounpcb(so2);
1489 
1490 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1491 	LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1492 
1493 	/* Verify both sockets are still opened */
1494 	if (unp == 0 || unp2 == 0) {
1495 		return EINVAL;
1496 	}
1497 
1498 	unp->unp_conn = unp2;
1499 	so2->so_usecount++;
1500 
1501 	switch (so->so_type) {
1502 	case SOCK_DGRAM:
1503 		LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1504 
1505 		if (so != so2) {
1506 			/* Avoid lock order reversals due to drop/acquire in soisconnected. */
1507 			/* Keep an extra reference on so2 that will be dropped
1508 			 * soon after getting the locks in order
1509 			 */
1510 			socket_unlock(so2, 0);
1511 			soisconnected(so);
1512 			unp_get_locks_in_order(so, so2);
1513 			VERIFY(so2->so_usecount > 0);
1514 			so2->so_usecount--;
1515 		} else {
1516 			soisconnected(so);
1517 		}
1518 
1519 		break;
1520 
1521 	case SOCK_STREAM:
1522 		/* This takes care of socketpair */
1523 		if (!(unp->unp_flags & UNP_HAVEPC) &&
1524 		    !(unp2->unp_flags & UNP_HAVEPC)) {
1525 			cru2x(kauth_cred_get(), &unp->unp_peercred);
1526 			unp->unp_flags |= UNP_HAVEPC;
1527 
1528 			cru2x(kauth_cred_get(), &unp2->unp_peercred);
1529 			unp2->unp_flags |= UNP_HAVEPC;
1530 		}
1531 		unp2->unp_conn = unp;
1532 		so->so_usecount++;
1533 
1534 		/* Avoid lock order reversals due to drop/acquire in soisconnected. */
1535 		socket_unlock(so, 0);
1536 		soisconnected(so2);
1537 
1538 		/* Keep an extra reference on so2, that will be dropped soon after
1539 		 * getting the locks in order again.
1540 		 */
1541 		socket_unlock(so2, 0);
1542 
1543 		socket_lock(so, 0);
1544 		soisconnected(so);
1545 
1546 		unp_get_locks_in_order(so, so2);
1547 		/* Decrement the extra reference left before */
1548 		VERIFY(so2->so_usecount > 0);
1549 		so2->so_usecount--;
1550 		break;
1551 
1552 	default:
1553 		panic("unknown socket type %d in unp_connect2", so->so_type);
1554 	}
1555 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1556 	LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1557 	return 0;
1558 }
1559 
1560 static void
unp_disconnect(struct unpcb * unp)1561 unp_disconnect(struct unpcb *unp)
1562 {
1563 	struct unpcb *unp2 = NULL;
1564 	struct socket *so2 = NULL, *so;
1565 	struct socket *waitso;
1566 	int so_locked = 1, strdisconn = 0;
1567 
1568 	so = unp->unp_socket;
1569 	if (unp->unp_conn == NULL) {
1570 		return;
1571 	}
1572 	lck_mtx_lock(&unp_disconnect_lock);
1573 	while (disconnect_in_progress != 0) {
1574 		if (so_locked == 1) {
1575 			socket_unlock(so, 0);
1576 			so_locked = 0;
1577 		}
1578 		(void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1579 		    PSOCK, "disconnect", NULL);
1580 	}
1581 	disconnect_in_progress = 1;
1582 	lck_mtx_unlock(&unp_disconnect_lock);
1583 
1584 	if (so_locked == 0) {
1585 		socket_lock(so, 0);
1586 		so_locked = 1;
1587 	}
1588 
1589 	unp2 = unp->unp_conn;
1590 
1591 	if (unp2 == 0 || unp2->unp_socket == NULL) {
1592 		goto out;
1593 	}
1594 	so2 = unp2->unp_socket;
1595 
1596 try_again:
1597 	if (so == so2) {
1598 		if (so_locked == 0) {
1599 			socket_lock(so, 0);
1600 		}
1601 		waitso = so;
1602 	} else if (so < so2) {
1603 		if (so_locked == 0) {
1604 			socket_lock(so, 0);
1605 		}
1606 		socket_lock(so2, 1);
1607 		waitso = so2;
1608 	} else {
1609 		if (so_locked == 1) {
1610 			socket_unlock(so, 0);
1611 		}
1612 		socket_lock(so2, 1);
1613 		socket_lock(so, 0);
1614 		waitso = so;
1615 	}
1616 	so_locked = 1;
1617 
1618 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1619 	LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1620 
1621 	/* Check for the UNP_DONTDISCONNECT flag, if it
1622 	 * is set, release both sockets and go to sleep
1623 	 */
1624 
1625 	if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1626 		if (so != so2) {
1627 			socket_unlock(so2, 1);
1628 		}
1629 		so_locked = 0;
1630 
1631 		(void)msleep(waitso->so_pcb, &unp->unp_mtx,
1632 		    PSOCK | PDROP, "unpdisconnect", NULL);
1633 		goto try_again;
1634 	}
1635 
1636 	if (unp->unp_conn == NULL) {
1637 		panic("unp_conn became NULL after sleep");
1638 	}
1639 
1640 	unp->unp_conn = NULL;
1641 	VERIFY(so2->so_usecount > 0);
1642 	so2->so_usecount--;
1643 
1644 	if (unp->unp_flags & UNP_TRACE_MDNS) {
1645 		unp->unp_flags &= ~UNP_TRACE_MDNS;
1646 	}
1647 
1648 	switch (unp->unp_socket->so_type) {
1649 	case SOCK_DGRAM:
1650 		LIST_REMOVE(unp, unp_reflink);
1651 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1652 		if (so != so2) {
1653 			socket_unlock(so2, 1);
1654 		}
1655 		break;
1656 
1657 	case SOCK_STREAM:
1658 		unp2->unp_conn = NULL;
1659 		VERIFY(so->so_usecount > 0);
1660 		so->so_usecount--;
1661 
1662 		/*
1663 		 * Set the socket state correctly but do a wakeup later when
1664 		 * we release all locks except the socket lock, this will avoid
1665 		 * a deadlock.
1666 		 */
1667 		unp->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1668 		unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1669 
1670 		unp2->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1671 		unp2->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1672 
1673 		if (unp2->unp_flags & UNP_TRACE_MDNS) {
1674 			unp2->unp_flags &= ~UNP_TRACE_MDNS;
1675 		}
1676 
1677 		strdisconn = 1;
1678 		break;
1679 	default:
1680 		panic("unknown socket type %d", so->so_type);
1681 	}
1682 out:
1683 	lck_mtx_lock(&unp_disconnect_lock);
1684 	disconnect_in_progress = 0;
1685 	wakeup(&disconnect_in_progress);
1686 	lck_mtx_unlock(&unp_disconnect_lock);
1687 
1688 	if (strdisconn) {
1689 		socket_unlock(so, 0);
1690 		soisdisconnected(so2);
1691 		socket_unlock(so2, 1);
1692 
1693 		socket_lock(so, 0);
1694 		soisdisconnected(so);
1695 	}
1696 	LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1697 	return;
1698 }
1699 
1700 /*
1701  * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1702  * The unpcb_compat data structure is passed to user space and must not change.
1703  */
1704 static void
unpcb_to_compat(struct unpcb * up,struct unpcb_compat * cp)1705 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1706 {
1707 #if defined(__LP64__)
1708 	cp->unp_link.le_next = (u_int32_t)
1709 	    VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1710 	cp->unp_link.le_prev = (u_int32_t)
1711 	    VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1712 #else
1713 	cp->unp_link.le_next = (struct unpcb_compat *)
1714 	    VM_KERNEL_ADDRPERM(up->unp_link.le_next);
1715 	cp->unp_link.le_prev = (struct unpcb_compat **)
1716 	    VM_KERNEL_ADDRPERM(up->unp_link.le_prev);
1717 #endif
1718 	cp->unp_socket = (_UNPCB_PTR(struct socket *))
1719 	    VM_KERNEL_ADDRPERM(up->unp_socket);
1720 	cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1721 	    VM_KERNEL_ADDRPERM(up->unp_vnode);
1722 	cp->unp_ino = up->unp_ino;
1723 	cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1724 	    VM_KERNEL_ADDRPERM(up->unp_conn);
1725 	cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);
1726 #if defined(__LP64__)
1727 	cp->unp_reflink.le_next =
1728 	    (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1729 	cp->unp_reflink.le_prev =
1730 	    (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1731 #else
1732 	cp->unp_reflink.le_next =
1733 	    (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);
1734 	cp->unp_reflink.le_prev =
1735 	    (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);
1736 #endif
1737 	cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1738 	    VM_KERNEL_ADDRPERM(up->unp_addr);
1739 	cp->unp_cc = up->unp_cc;
1740 	cp->unp_mbcnt = up->unp_mbcnt;
1741 	cp->unp_gencnt = up->unp_gencnt;
1742 }
1743 
1744 static int
1745 unp_pcblist SYSCTL_HANDLER_ARGS
1746 {
1747 #pragma unused(oidp,arg2)
1748 	int error, i, n;
1749 	struct unpcb *unp, **unp_list;
1750 	unp_gen_t gencnt;
1751 	struct xunpgen xug;
1752 	struct unp_head *head;
1753 
1754 	lck_rw_lock_shared(&unp_list_mtx);
1755 	head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1756 
1757 	/*
1758 	 * The process of preparing the PCB list is too time-consuming and
1759 	 * resource-intensive to repeat twice on every request.
1760 	 */
1761 	if (req->oldptr == USER_ADDR_NULL) {
1762 		n = unp_count;
1763 		req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1764 		    sizeof(struct xunpcb);
1765 		lck_rw_done(&unp_list_mtx);
1766 		return 0;
1767 	}
1768 
1769 	if (req->newptr != USER_ADDR_NULL) {
1770 		lck_rw_done(&unp_list_mtx);
1771 		return EPERM;
1772 	}
1773 
1774 	/*
1775 	 * OK, now we're committed to doing something.
1776 	 */
1777 	gencnt = unp_gencnt;
1778 	n = unp_count;
1779 
1780 	bzero(&xug, sizeof(xug));
1781 	xug.xug_len = sizeof(xug);
1782 	xug.xug_count = n;
1783 	xug.xug_gen = gencnt;
1784 	xug.xug_sogen = so_gencnt;
1785 	error = SYSCTL_OUT(req, &xug, sizeof(xug));
1786 	if (error) {
1787 		lck_rw_done(&unp_list_mtx);
1788 		return error;
1789 	}
1790 
1791 	/*
1792 	 * We are done if there is no pcb
1793 	 */
1794 	if (n == 0) {
1795 		lck_rw_done(&unp_list_mtx);
1796 		return 0;
1797 	}
1798 
1799 	size_t unp_list_len = n;
1800 	unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1801 	if (unp_list == 0) {
1802 		lck_rw_done(&unp_list_mtx);
1803 		return ENOMEM;
1804 	}
1805 
1806 	for (unp = head->lh_first, i = 0; unp && i < n;
1807 	    unp = unp->unp_link.le_next) {
1808 		if (unp->unp_gencnt <= gencnt) {
1809 			unp_list[i++] = unp;
1810 		}
1811 	}
1812 	n = i;                  /* in case we lost some during malloc */
1813 
1814 	error = 0;
1815 	for (i = 0; i < n; i++) {
1816 		unp = unp_list[i];
1817 		if (unp->unp_gencnt <= gencnt) {
1818 			struct xunpcb xu;
1819 
1820 			bzero(&xu, sizeof(xu));
1821 			xu.xu_len = sizeof(xu);
1822 			xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1823 			    VM_KERNEL_ADDRPERM(unp);
1824 			/*
1825 			 * XXX - need more locking here to protect against
1826 			 * connect/disconnect races for SMP.
1827 			 */
1828 			if (unp->unp_addr) {
1829 				bcopy(unp->unp_addr, &xu.xu_au,
1830 				    unp->unp_addr->sun_len);
1831 			}
1832 			if (unp->unp_conn && unp->unp_conn->unp_addr) {
1833 				bcopy(unp->unp_conn->unp_addr,
1834 				    &xu.xu_cau,
1835 				    unp->unp_conn->unp_addr->sun_len);
1836 			}
1837 			unpcb_to_compat(unp, &xu.xu_unp);
1838 			sotoxsocket(unp->unp_socket, &xu.xu_socket);
1839 			error = SYSCTL_OUT(req, &xu, sizeof(xu));
1840 		}
1841 	}
1842 	if (!error) {
1843 		/*
1844 		 * Give the user an updated idea of our state.
1845 		 * If the generation differs from what we told
1846 		 * her before, she knows that something happened
1847 		 * while we were processing this request, and it
1848 		 * might be necessary to retry.
1849 		 */
1850 		bzero(&xug, sizeof(xug));
1851 		xug.xug_len = sizeof(xug);
1852 		xug.xug_gen = unp_gencnt;
1853 		xug.xug_sogen = so_gencnt;
1854 		xug.xug_count = unp_count;
1855 		error = SYSCTL_OUT(req, &xug, sizeof(xug));
1856 	}
1857 	kfree_type(struct unpcb *, unp_list_len, unp_list);
1858 	lck_rw_done(&unp_list_mtx);
1859 	return error;
1860 }
1861 
1862 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
1863     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1864     (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1865     "List of active local datagram sockets");
1866 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
1867     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1868     (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1869     "List of active local stream sockets");
1870 
1871 #if XNU_TARGET_OS_OSX
1872 
1873 static int
1874 unp_pcblist64 SYSCTL_HANDLER_ARGS
1875 {
1876 #pragma unused(oidp,arg2)
1877 	int error, i, n;
1878 	struct unpcb *unp, **unp_list;
1879 	unp_gen_t gencnt;
1880 	struct xunpgen xug;
1881 	struct unp_head *head;
1882 
1883 	lck_rw_lock_shared(&unp_list_mtx);
1884 	head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1885 
1886 	/*
1887 	 * The process of preparing the PCB list is too time-consuming and
1888 	 * resource-intensive to repeat twice on every request.
1889 	 */
1890 	if (req->oldptr == USER_ADDR_NULL) {
1891 		n = unp_count;
1892 		req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1893 		    (sizeof(struct xunpcb64));
1894 		lck_rw_done(&unp_list_mtx);
1895 		return 0;
1896 	}
1897 
1898 	if (req->newptr != USER_ADDR_NULL) {
1899 		lck_rw_done(&unp_list_mtx);
1900 		return EPERM;
1901 	}
1902 
1903 	/*
1904 	 * OK, now we're committed to doing something.
1905 	 */
1906 	gencnt = unp_gencnt;
1907 	n = unp_count;
1908 
1909 	bzero(&xug, sizeof(xug));
1910 	xug.xug_len = sizeof(xug);
1911 	xug.xug_count = n;
1912 	xug.xug_gen = gencnt;
1913 	xug.xug_sogen = so_gencnt;
1914 	error = SYSCTL_OUT(req, &xug, sizeof(xug));
1915 	if (error) {
1916 		lck_rw_done(&unp_list_mtx);
1917 		return error;
1918 	}
1919 
1920 	/*
1921 	 * We are done if there is no pcb
1922 	 */
1923 	if (n == 0) {
1924 		lck_rw_done(&unp_list_mtx);
1925 		return 0;
1926 	}
1927 
1928 	size_t unp_list_len = n;
1929 	unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1930 	if (unp_list == 0) {
1931 		lck_rw_done(&unp_list_mtx);
1932 		return ENOMEM;
1933 	}
1934 
1935 	for (unp = head->lh_first, i = 0; unp && i < n;
1936 	    unp = unp->unp_link.le_next) {
1937 		if (unp->unp_gencnt <= gencnt) {
1938 			unp_list[i++] = unp;
1939 		}
1940 	}
1941 	n = i;                  /* in case we lost some during malloc */
1942 
1943 	error = 0;
1944 	for (i = 0; i < n; i++) {
1945 		unp = unp_list[i];
1946 		if (unp->unp_gencnt <= gencnt) {
1947 			struct xunpcb64 xu;
1948 			size_t          xu_len = sizeof(struct xunpcb64);
1949 
1950 			bzero(&xu, xu_len);
1951 			xu.xu_len = (u_int32_t)xu_len;
1952 			xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);
1953 			xu.xunp_link.le_next = (u_int64_t)
1954 			    VM_KERNEL_ADDRPERM(unp->unp_link.le_next);
1955 			xu.xunp_link.le_prev = (u_int64_t)
1956 			    VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);
1957 			xu.xunp_socket = (u_int64_t)
1958 			    VM_KERNEL_ADDRPERM(unp->unp_socket);
1959 			xu.xunp_vnode = (u_int64_t)
1960 			    VM_KERNEL_ADDRPERM(unp->unp_vnode);
1961 			xu.xunp_ino = unp->unp_ino;
1962 			xu.xunp_conn = (u_int64_t)
1963 			    VM_KERNEL_ADDRPERM(unp->unp_conn);
1964 			xu.xunp_refs = (u_int64_t)
1965 			    VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
1966 			xu.xunp_reflink.le_next = (u_int64_t)
1967 			    VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
1968 			xu.xunp_reflink.le_prev = (u_int64_t)
1969 			    VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);
1970 			xu.xunp_cc = unp->unp_cc;
1971 			xu.xunp_mbcnt = unp->unp_mbcnt;
1972 			xu.xunp_gencnt = unp->unp_gencnt;
1973 
1974 			if (unp->unp_socket) {
1975 				sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1976 			}
1977 
1978 			/*
1979 			 * XXX - need more locking here to protect against
1980 			 * connect/disconnect races for SMP.
1981 			 */
1982 			if (unp->unp_addr) {
1983 				bcopy(unp->unp_addr, &xu.xu_au,
1984 				    unp->unp_addr->sun_len);
1985 			}
1986 			if (unp->unp_conn && unp->unp_conn->unp_addr) {
1987 				bcopy(unp->unp_conn->unp_addr,
1988 				    &xu.xu_cau,
1989 				    unp->unp_conn->unp_addr->sun_len);
1990 			}
1991 
1992 			error = SYSCTL_OUT(req, &xu, xu_len);
1993 		}
1994 	}
1995 	if (!error) {
1996 		/*
1997 		 * Give the user an updated idea of our state.
1998 		 * If the generation differs from what we told
1999 		 * her before, she knows that something happened
2000 		 * while we were processing this request, and it
2001 		 * might be necessary to retry.
2002 		 */
2003 		bzero(&xug, sizeof(xug));
2004 		xug.xug_len = sizeof(xug);
2005 		xug.xug_gen = unp_gencnt;
2006 		xug.xug_sogen = so_gencnt;
2007 		xug.xug_count = unp_count;
2008 		error = SYSCTL_OUT(req, &xug, sizeof(xug));
2009 	}
2010 	kfree_type(struct unpcb *, unp_list_len, unp_list);
2011 	lck_rw_done(&unp_list_mtx);
2012 	return error;
2013 }
2014 
2015 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
2016     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2017     (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
2018     "List of active local datagram sockets 64 bit");
2019 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
2020     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2021     (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
2022     "List of active local stream sockets 64 bit");
2023 
2024 #endif /* XNU_TARGET_OS_OSX */
2025 
2026 static int
2027 unp_pcblist_n SYSCTL_HANDLER_ARGS
2028 {
2029 #pragma unused(oidp,arg2)
2030 	int error = 0;
2031 	int i, n;
2032 	struct unpcb *unp;
2033 	unp_gen_t gencnt;
2034 	struct xunpgen xug;
2035 	struct unp_head *head;
2036 	void *buf = NULL;
2037 	size_t item_size = ROUNDUP64(sizeof(struct xunpcb_n)) +
2038 	    ROUNDUP64(sizeof(struct xsocket_n)) +
2039 	    2 * ROUNDUP64(sizeof(struct xsockbuf_n)) +
2040 	    ROUNDUP64(sizeof(struct xsockstat_n));
2041 
2042 	buf = kalloc_data(item_size, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2043 
2044 	lck_rw_lock_shared(&unp_list_mtx);
2045 
2046 	head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
2047 
2048 	/*
2049 	 * The process of preparing the PCB list is too time-consuming and
2050 	 * resource-intensive to repeat twice on every request.
2051 	 */
2052 	if (req->oldptr == USER_ADDR_NULL) {
2053 		n = unp_count;
2054 		req->oldidx = 2 * sizeof(xug) + (n + n / 8) * item_size;
2055 		goto done;
2056 	}
2057 
2058 	if (req->newptr != USER_ADDR_NULL) {
2059 		error = EPERM;
2060 		goto done;
2061 	}
2062 
2063 	/*
2064 	 * OK, now we're committed to doing something.
2065 	 */
2066 	gencnt = unp_gencnt;
2067 	n = unp_count;
2068 
2069 	bzero(&xug, sizeof(xug));
2070 	xug.xug_len = sizeof(xug);
2071 	xug.xug_count = n;
2072 	xug.xug_gen = gencnt;
2073 	xug.xug_sogen = so_gencnt;
2074 	error = SYSCTL_OUT(req, &xug, sizeof(xug));
2075 	if (error != 0) {
2076 		goto done;
2077 	}
2078 
2079 	/*
2080 	 * We are done if there is no pcb
2081 	 */
2082 	if (n == 0) {
2083 		goto done;
2084 	}
2085 
2086 	for (i = 0, unp = head->lh_first;
2087 	    i < n && unp != NULL;
2088 	    i++, unp = unp->unp_link.le_next) {
2089 		struct xunpcb_n *xu = (struct xunpcb_n *)buf;
2090 		struct xsocket_n *xso = (struct xsocket_n *)
2091 		    ADVANCE64(xu, sizeof(*xu));
2092 		struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *)
2093 		    ADVANCE64(xso, sizeof(*xso));
2094 		struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *)
2095 		    ADVANCE64(xsbrcv, sizeof(*xsbrcv));
2096 		struct xsockstat_n *xsostats = (struct xsockstat_n *)
2097 		    ADVANCE64(xsbsnd, sizeof(*xsbsnd));
2098 
2099 		if (unp->unp_gencnt > gencnt) {
2100 			continue;
2101 		}
2102 
2103 		bzero(buf, item_size);
2104 
2105 		xu->xunp_len = sizeof(struct xunpcb_n);
2106 		xu->xunp_kind = XSO_UNPCB;
2107 		xu->xunp_unpp = (uint64_t)VM_KERNEL_ADDRPERM(unp);
2108 		xu->xunp_vnode = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_vnode);
2109 		xu->xunp_ino = unp->unp_ino;
2110 		xu->xunp_conn = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_conn);
2111 		xu->xunp_refs = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);
2112 		xu->xunp_reflink = (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);
2113 		xu->xunp_cc = unp->unp_cc;
2114 		xu->xunp_mbcnt = unp->unp_mbcnt;
2115 		xu->xunp_flags = unp->unp_flags;
2116 		xu->xunp_gencnt = unp->unp_gencnt;
2117 
2118 		if (unp->unp_addr) {
2119 			bcopy(unp->unp_addr, &xu->xu_au,
2120 			    unp->unp_addr->sun_len);
2121 		}
2122 		if (unp->unp_conn && unp->unp_conn->unp_addr) {
2123 			bcopy(unp->unp_conn->unp_addr,
2124 			    &xu->xu_cau,
2125 			    unp->unp_conn->unp_addr->sun_len);
2126 		}
2127 		sotoxsocket_n(unp->unp_socket, xso);
2128 		sbtoxsockbuf_n(unp->unp_socket ?
2129 		    &unp->unp_socket->so_rcv : NULL, xsbrcv);
2130 		sbtoxsockbuf_n(unp->unp_socket ?
2131 		    &unp->unp_socket->so_snd : NULL, xsbsnd);
2132 		sbtoxsockstat_n(unp->unp_socket, xsostats);
2133 
2134 		error = SYSCTL_OUT(req, buf, item_size);
2135 		if (error != 0) {
2136 			break;
2137 		}
2138 	}
2139 	if (error == 0) {
2140 		/*
2141 		 * Give the user an updated idea of our state.
2142 		 * If the generation differs from what we told
2143 		 * her before, she knows that something happened
2144 		 * while we were processing this request, and it
2145 		 * might be necessary to retry.
2146 		 */
2147 		bzero(&xug, sizeof(xug));
2148 		xug.xug_len = sizeof(xug);
2149 		xug.xug_gen = unp_gencnt;
2150 		xug.xug_sogen = so_gencnt;
2151 		xug.xug_count = unp_count;
2152 		error = SYSCTL_OUT(req, &xug, sizeof(xug));
2153 	}
2154 done:
2155 	lck_rw_done(&unp_list_mtx);
2156 	kfree_data(buf, item_size);
2157 	return error;
2158 }
2159 
2160 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist_n,
2161     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2162     (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist_n, "S,xunpcb_n",
2163     "List of active local datagram sockets");
2164 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist_n,
2165     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2166     (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist_n, "S,xunpcb_n",
2167     "List of active local stream sockets");
2168 
2169 static void
unp_shutdown(struct unpcb * unp)2170 unp_shutdown(struct unpcb *unp)
2171 {
2172 	struct socket *so = unp->unp_socket;
2173 	struct socket *so2;
2174 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
2175 		so2 = unp->unp_conn->unp_socket;
2176 		unp_get_locks_in_order(so, so2);
2177 		socantrcvmore(so2);
2178 		socket_unlock(so2, 1);
2179 	}
2180 }
2181 
2182 static void
unp_drop(struct unpcb * unp,int errno)2183 unp_drop(struct unpcb *unp, int errno)
2184 {
2185 	struct socket *so = unp->unp_socket;
2186 
2187 	so->so_error = (u_short)errno;
2188 	unp_disconnect(unp);
2189 }
2190 
2191 /*
2192  * fg_insertuipc_mark
2193  *
2194  * Description:	Mark fileglob for insertion onto message queue if needed
2195  *		Also takes fileglob reference
2196  *
2197  * Parameters:	fg	Fileglob pointer to insert
2198  *
2199  * Returns:	true, if the fileglob needs to be inserted onto msg queue
2200  *
2201  * Locks:	Takes and drops fg_lock, potentially many times
2202  */
2203 static boolean_t
fg_insertuipc_mark(struct fileglob * fg)2204 fg_insertuipc_mark(struct fileglob * fg)
2205 {
2206 	boolean_t insert = FALSE;
2207 
2208 	lck_mtx_lock_spin(&fg->fg_lock);
2209 	while (fg->fg_lflags & FG_RMMSGQ) {
2210 		lck_mtx_convert_spin(&fg->fg_lock);
2211 
2212 		fg->fg_lflags |= FG_WRMMSGQ;
2213 		msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
2214 	}
2215 
2216 	os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2217 	fg->fg_msgcount++;
2218 	if (fg->fg_msgcount == 1) {
2219 		fg->fg_lflags |= FG_INSMSGQ;
2220 		insert = TRUE;
2221 	}
2222 	lck_mtx_unlock(&fg->fg_lock);
2223 	return insert;
2224 }
2225 
2226 /*
2227  * fg_insertuipc
2228  *
2229  * Description:	Insert marked fileglob onto message queue
2230  *
2231  * Parameters:	fg	Fileglob pointer to insert
2232  *
2233  * Returns:	void
2234  *
2235  * Locks:	Takes and drops fg_lock & uipc_lock
2236  *		DO NOT call this function with proc_fdlock held as unp_gc()
2237  *		can potentially try to acquire proc_fdlock, which can result
2238  *		in a deadlock.
2239  */
2240 static void
fg_insertuipc(struct fileglob * fg)2241 fg_insertuipc(struct fileglob * fg)
2242 {
2243 	if (fg->fg_lflags & FG_INSMSGQ) {
2244 		lck_mtx_lock(&uipc_lock);
2245 		LIST_INSERT_HEAD(&unp_msghead, fg, f_msglist);
2246 		lck_mtx_unlock(&uipc_lock);
2247 		lck_mtx_lock(&fg->fg_lock);
2248 		fg->fg_lflags &= ~FG_INSMSGQ;
2249 		if (fg->fg_lflags & FG_WINSMSGQ) {
2250 			fg->fg_lflags &= ~FG_WINSMSGQ;
2251 			wakeup(&fg->fg_lflags);
2252 		}
2253 		lck_mtx_unlock(&fg->fg_lock);
2254 	}
2255 }
2256 
2257 /*
2258  * fg_removeuipc_mark
2259  *
2260  * Description:	Mark the fileglob for removal from message queue if needed
2261  *		Also releases fileglob message queue reference
2262  *
2263  * Parameters:	fg	Fileglob pointer to remove
2264  *
2265  * Returns:	true, if the fileglob needs to be removed from msg queue
2266  *
2267  * Locks:	Takes and drops fg_lock, potentially many times
2268  */
2269 static boolean_t
fg_removeuipc_mark(struct fileglob * fg)2270 fg_removeuipc_mark(struct fileglob * fg)
2271 {
2272 	boolean_t remove = FALSE;
2273 
2274 	lck_mtx_lock_spin(&fg->fg_lock);
2275 	while (fg->fg_lflags & FG_INSMSGQ) {
2276 		lck_mtx_convert_spin(&fg->fg_lock);
2277 
2278 		fg->fg_lflags |= FG_WINSMSGQ;
2279 		msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
2280 	}
2281 	fg->fg_msgcount--;
2282 	if (fg->fg_msgcount == 0) {
2283 		fg->fg_lflags |= FG_RMMSGQ;
2284 		remove = TRUE;
2285 	}
2286 	lck_mtx_unlock(&fg->fg_lock);
2287 	return remove;
2288 }
2289 
2290 /*
2291  * fg_removeuipc
2292  *
2293  * Description:	Remove marked fileglob from message queue
2294  *
2295  * Parameters:	fg	Fileglob pointer to remove
2296  *
2297  * Returns:	void
2298  *
2299  * Locks:	Takes and drops fg_lock & uipc_lock
2300  *		DO NOT call this function with proc_fdlock held as unp_gc()
2301  *		can potentially try to acquire proc_fdlock, which can result
2302  *		in a deadlock.
2303  */
2304 static void
fg_removeuipc(struct fileglob * fg)2305 fg_removeuipc(struct fileglob * fg)
2306 {
2307 	if (fg->fg_lflags & FG_RMMSGQ) {
2308 		lck_mtx_lock(&uipc_lock);
2309 		LIST_REMOVE(fg, f_msglist);
2310 		lck_mtx_unlock(&uipc_lock);
2311 		lck_mtx_lock(&fg->fg_lock);
2312 		fg->fg_lflags &= ~FG_RMMSGQ;
2313 		if (fg->fg_lflags & FG_WRMMSGQ) {
2314 			fg->fg_lflags &= ~FG_WRMMSGQ;
2315 			wakeup(&fg->fg_lflags);
2316 		}
2317 		lck_mtx_unlock(&fg->fg_lock);
2318 	}
2319 }
2320 
2321 /*
2322  * Returns:	0			Success
2323  *		EMSGSIZE		The new fd's will not fit
2324  *		ENOBUFS			Cannot alloc struct fileproc
2325  */
2326 int
unp_externalize(struct mbuf * rights)2327 unp_externalize(struct mbuf *rights)
2328 {
2329 	proc_t p = current_proc();
2330 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
2331 	struct fileglob **rp = (struct fileglob **)(cm + 1);
2332 	const int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2333 	int *fds;
2334 	int error = 0;
2335 
2336 	fds = kalloc_data(newfds * sizeof(int), Z_WAITOK);
2337 	if (fds == NULL) {
2338 		error = ENOMEM;
2339 		goto out;
2340 	}
2341 
2342 	/*
2343 	 * Step 1:
2344 	 *	Allocate all the fds, and if it doesn't fit,
2345 	 *	then fail and discard everything.
2346 	 */
2347 	proc_fdlock(p);
2348 
2349 	if (fdt_available_locked(p, newfds)) {
2350 		for (int i = 0; i < newfds; i++) {
2351 			error = fdalloc(p, 0, &fds[i]);
2352 			if (error) {
2353 				while (i-- > 0) {
2354 					fdrelse(p, fds[i]);
2355 				}
2356 				break;
2357 			}
2358 		}
2359 	} else {
2360 		error = EMSGSIZE;
2361 	}
2362 
2363 	proc_fdunlock(p);
2364 
2365 	if (error) {
2366 		goto out;
2367 	}
2368 
2369 	/*
2370 	 * Step 2:
2371 	 *	At this point we are commited, and can't fail anymore.
2372 	 *	Allocate all the fileprocs, and remove the files
2373 	 *	from the queue.
2374 	 *
2375 	 *	Until we call procfdtbl_releasefd(), fds are in flux
2376 	 *	and can't be closed.
2377 	 */
2378 	for (int i = 0; i < newfds; i++) {
2379 		struct fileproc *fp = NULL;
2380 
2381 		fp = fileproc_alloc_init();
2382 		fp->fp_glob = rp[i];
2383 		if (fg_removeuipc_mark(rp[i])) {
2384 			fg_removeuipc(rp[i]);
2385 		}
2386 
2387 		proc_fdlock(p);
2388 		procfdtbl_releasefd(p, fds[i], fp);
2389 		proc_fdunlock(p);
2390 	}
2391 
2392 	/*
2393 	 * Step 3:
2394 	 *	Return the fds into `cm`.
2395 	 *	Handle the fact ints and pointers do not have the same size.
2396 	 */
2397 	int *fds_out = (int *)(cm + 1);
2398 	memcpy(fds_out, fds, newfds * sizeof(int));
2399 	if (sizeof(struct fileglob *) != sizeof(int)) {
2400 		bzero(fds_out + newfds,
2401 		    newfds * (sizeof(struct fileglob *) - sizeof(int)));
2402 	}
2403 	OSAddAtomic(-newfds, &unp_rights);
2404 
2405 out:
2406 	if (error) {
2407 		for (int i = 0; i < newfds; i++) {
2408 			unp_discard(rp[i], p);
2409 		}
2410 		bzero(rp, newfds * sizeof(struct fileglob *));
2411 	}
2412 
2413 	kfree_data(fds, newfds * sizeof(int));
2414 	return error;
2415 }
2416 
2417 void
unp_init(void)2418 unp_init(void)
2419 {
2420 	_CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
2421 	LIST_INIT(&unp_dhead);
2422 	LIST_INIT(&unp_shead);
2423 }
2424 
2425 #ifndef MIN
2426 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
2427 #endif
2428 
2429 /*
2430  * Returns:	0			Success
2431  *		EINVAL
2432  *		EBADF
2433  */
2434 static int
unp_internalize(struct mbuf * control,proc_t p)2435 unp_internalize(struct mbuf *control, proc_t p)
2436 {
2437 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
2438 	int *fds;
2439 	struct fileglob **rp;
2440 	struct fileproc *fp;
2441 	int i, error;
2442 	int oldfds;
2443 	uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];
2444 
2445 	/* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
2446 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
2447 	    (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
2448 		return EINVAL;
2449 	}
2450 	oldfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2451 	bzero(fg_ins, sizeof(fg_ins));
2452 
2453 	proc_fdlock(p);
2454 	fds = (int *)(cm + 1);
2455 
2456 	for (i = 0; i < oldfds; i++) {
2457 		struct fileproc *tmpfp;
2458 		if ((tmpfp = fp_get_noref_locked(p, fds[i])) == NULL) {
2459 			proc_fdunlock(p);
2460 			return EBADF;
2461 		} else if (!fg_sendable(tmpfp->fp_glob)) {
2462 			proc_fdunlock(p);
2463 			return EINVAL;
2464 		} else if (fp_isguarded(tmpfp, GUARD_SOCKET_IPC)) {
2465 			error = fp_guard_exception(p,
2466 			    fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
2467 			proc_fdunlock(p);
2468 			return error;
2469 		}
2470 	}
2471 	rp = (struct fileglob **)(cm + 1);
2472 
2473 	/* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
2474 	 * and doing them in-order would result in stomping over unprocessed fd's
2475 	 */
2476 	for (i = (oldfds - 1); i >= 0; i--) {
2477 		fp = fp_get_noref_locked(p, fds[i]);
2478 		if (fg_insertuipc_mark(fp->fp_glob)) {
2479 			fg_ins[i / 8] |= 0x80 >> (i % 8);
2480 		}
2481 		rp[i] = fp->fp_glob;
2482 	}
2483 	proc_fdunlock(p);
2484 
2485 	for (i = 0; i < oldfds; i++) {
2486 		if (fg_ins[i / 8] & (0x80 >> (i % 8))) {
2487 			VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
2488 			fg_insertuipc(rp[i]);
2489 		}
2490 		(void) OSAddAtomic(1, &unp_rights);
2491 	}
2492 
2493 	return 0;
2494 }
2495 
2496 static void
unp_gc(thread_call_param_t arg0,thread_call_param_t arg1)2497 unp_gc(thread_call_param_t arg0, thread_call_param_t arg1)
2498 {
2499 #pragma unused(arg0, arg1)
2500 	struct fileglob *fg;
2501 	struct socket *so;
2502 	static struct fileglob **extra_ref;
2503 	struct fileglob **fpp;
2504 	int nunref, i;
2505 
2506 restart:
2507 	lck_mtx_lock(&uipc_lock);
2508 	unp_defer = 0;
2509 	/*
2510 	 * before going through all this, set all FDs to
2511 	 * be NOT defered and NOT externally accessible
2512 	 */
2513 	LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2514 		os_atomic_andnot(&fg->fg_flag, FMARK | FDEFER, relaxed);
2515 	}
2516 	do {
2517 		LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2518 			lck_mtx_lock(&fg->fg_lock);
2519 			/*
2520 			 * If the file is not open, skip it
2521 			 */
2522 			if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2523 				lck_mtx_unlock(&fg->fg_lock);
2524 				continue;
2525 			}
2526 			/*
2527 			 * If we already marked it as 'defer'  in a
2528 			 * previous pass, then try process it this time
2529 			 * and un-mark it
2530 			 */
2531 			if (fg->fg_flag & FDEFER) {
2532 				os_atomic_andnot(&fg->fg_flag, FDEFER, relaxed);
2533 				unp_defer--;
2534 			} else {
2535 				/*
2536 				 * if it's not defered, then check if it's
2537 				 * already marked.. if so skip it
2538 				 */
2539 				if (fg->fg_flag & FMARK) {
2540 					lck_mtx_unlock(&fg->fg_lock);
2541 					continue;
2542 				}
2543 				/*
2544 				 * If all references are from messages
2545 				 * in transit, then skip it. it's not
2546 				 * externally accessible.
2547 				 */
2548 				if (os_ref_get_count_raw(&fg->fg_count) ==
2549 				    fg->fg_msgcount) {
2550 					lck_mtx_unlock(&fg->fg_lock);
2551 					continue;
2552 				}
2553 				/*
2554 				 * If it got this far then it must be
2555 				 * externally accessible.
2556 				 */
2557 				os_atomic_or(&fg->fg_flag, FMARK, relaxed);
2558 			}
2559 			/*
2560 			 * either it was defered, or it is externally
2561 			 * accessible and not already marked so.
2562 			 * Now check if it is possibly one of OUR sockets.
2563 			 */
2564 			if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
2565 			    (so = (struct socket *)fg_get_data(fg)) == 0) {
2566 				lck_mtx_unlock(&fg->fg_lock);
2567 				continue;
2568 			}
2569 			if (so->so_proto->pr_domain != localdomain ||
2570 			    (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
2571 				lck_mtx_unlock(&fg->fg_lock);
2572 				continue;
2573 			}
2574 			/*
2575 			 * So, Ok, it's one of our sockets and it IS externally
2576 			 * accessible (or was defered). Now we look
2577 			 * to see if we hold any file descriptors in its
2578 			 * message buffers. Follow those links and mark them
2579 			 * as accessible too.
2580 			 *
2581 			 * In case a file is passed onto itself we need to
2582 			 * release the file lock.
2583 			 */
2584 			lck_mtx_unlock(&fg->fg_lock);
2585 			/*
2586 			 * It's safe to lock the socket after dropping fg_lock
2587 			 * because the socket isn't going away at this point.
2588 			 *
2589 			 * If we couldn't lock the socket or the socket buffer,
2590 			 * then it's because someone holding one of these
2591 			 * locks is stuck in unp_{internalize,externalize}().
2592 			 * Yield to that process and restart the garbage
2593 			 * collection.
2594 			 */
2595 			if (!socket_try_lock(so)) {
2596 				lck_mtx_unlock(&uipc_lock);
2597 				goto restart;
2598 			}
2599 			so->so_usecount++;
2600 			/*
2601 			 * Lock the receive socket buffer so that we can
2602 			 * iterate over its mbuf list.
2603 			 */
2604 			if (sblock(&so->so_rcv, SBL_NOINTR | SBL_IGNDEFUNCT)) {
2605 				socket_unlock(so, 1);
2606 				lck_mtx_unlock(&uipc_lock);
2607 				goto restart;
2608 			}
2609 			VERIFY(so->so_rcv.sb_flags & SB_LOCK);
2610 			socket_unlock(so, 0);
2611 			unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
2612 			socket_lock(so, 0);
2613 			sbunlock(&so->so_rcv, TRUE);
2614 			/*
2615 			 * Unlock and release the reference acquired above.
2616 			 */
2617 			socket_unlock(so, 1);
2618 		}
2619 	} while (unp_defer);
2620 	/*
2621 	 * We grab an extra reference to each of the file table entries
2622 	 * that are not otherwise accessible and then free the rights
2623 	 * that are stored in messages on them.
2624 	 *
2625 	 * Here, we first take an extra reference to each inaccessible
2626 	 * descriptor.  Then, we call sorflush ourself, since we know
2627 	 * it is a Unix domain socket anyhow.  After we destroy all the
2628 	 * rights carried in messages, we do a last closef to get rid
2629 	 * of our extra reference.  This is the last close, and the
2630 	 * unp_detach etc will shut down the socket.
2631 	 *
2632 	 * 91/09/19, [email protected]
2633 	 */
2634 	size_t extra_ref_size = nfiles;
2635 	extra_ref = kalloc_type(struct fileglob *, extra_ref_size, Z_WAITOK);
2636 	if (extra_ref == NULL) {
2637 		lck_mtx_unlock(&uipc_lock);
2638 		return;
2639 	}
2640 	nunref = 0;
2641 	fpp = extra_ref;
2642 	LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2643 		lck_mtx_lock(&fg->fg_lock);
2644 		/*
2645 		 * If it's not open, skip it
2646 		 */
2647 		if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2648 			lck_mtx_unlock(&fg->fg_lock);
2649 			continue;
2650 		}
2651 		/*
2652 		 * If all refs are from msgs, and it's not marked accessible
2653 		 * then it must be referenced from some unreachable cycle
2654 		 * of (shut-down) FDs, so include it in our
2655 		 * list of FDs to remove
2656 		 */
2657 		if (fg->fg_flag & FMARK) {
2658 			lck_mtx_unlock(&fg->fg_lock);
2659 			continue;
2660 		}
2661 		if (os_ref_get_count_raw(&fg->fg_count) == fg->fg_msgcount) {
2662 			os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2663 			*fpp++ = fg;
2664 			nunref++;
2665 		}
2666 		lck_mtx_unlock(&fg->fg_lock);
2667 	}
2668 	lck_mtx_unlock(&uipc_lock);
2669 
2670 	/*
2671 	 * for each FD on our hit list, do the following two things
2672 	 */
2673 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2674 		struct fileglob *tfg;
2675 
2676 		tfg = *fpp;
2677 
2678 		if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET) {
2679 			so = (struct socket *)fg_get_data(tfg);
2680 
2681 			if (so) {
2682 				socket_lock(so, 0);
2683 				sorflush(so);
2684 				socket_unlock(so, 0);
2685 			}
2686 		}
2687 	}
2688 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2689 		fg_drop(PROC_NULL, *fpp);
2690 	}
2691 
2692 	kfree_type(struct fileglob *, extra_ref_size, extra_ref);
2693 }
2694 
2695 void
unp_dispose(struct mbuf * m)2696 unp_dispose(struct mbuf *m)
2697 {
2698 	if (m) {
2699 		unp_scan(m, unp_discard, NULL);
2700 	}
2701 }
2702 
2703 /*
2704  * Returns:	0			Success
2705  */
2706 static int
unp_listen(struct unpcb * unp,proc_t p)2707 unp_listen(struct unpcb *unp, proc_t p)
2708 {
2709 	kauth_cred_t safecred = kauth_cred_proc_ref(p);
2710 	cru2x(safecred, &unp->unp_peercred);
2711 	kauth_cred_unref(&safecred);
2712 	unp->unp_flags |= UNP_HAVEPCCACHED;
2713 	return 0;
2714 }
2715 
2716 static void
unp_scan(struct mbuf * m0,void (* op)(struct fileglob *,void * arg),void * arg)2717 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)
2718 {
2719 	struct mbuf *m;
2720 	struct fileglob **rp;
2721 	struct cmsghdr *cm;
2722 	int i;
2723 	int qfds;
2724 
2725 	while (m0) {
2726 		for (m = m0; m; m = m->m_next) {
2727 			if (m->m_type == MT_CONTROL &&
2728 			    (size_t)m->m_len >= sizeof(*cm)) {
2729 				cm = mtod(m, struct cmsghdr *);
2730 				if (cm->cmsg_level != SOL_SOCKET ||
2731 				    cm->cmsg_type != SCM_RIGHTS) {
2732 					continue;
2733 				}
2734 				qfds = (cm->cmsg_len - sizeof(*cm)) /
2735 				    sizeof(int);
2736 				rp = (struct fileglob **)(cm + 1);
2737 				for (i = 0; i < qfds; i++) {
2738 					(*op)(*rp++, arg);
2739 				}
2740 				break;          /* XXX, but saves time */
2741 			}
2742 		}
2743 		m0 = m0->m_act;
2744 	}
2745 }
2746 
2747 static void
unp_mark(struct fileglob * fg,__unused void * arg)2748 unp_mark(struct fileglob *fg, __unused void *arg)
2749 {
2750 	uint32_t oflags, nflags;
2751 
2752 	os_atomic_rmw_loop(&fg->fg_flag, oflags, nflags, relaxed, {
2753 		if (oflags & FMARK) {
2754 		        os_atomic_rmw_loop_give_up(return );
2755 		}
2756 		nflags = oflags | FMARK | FDEFER;
2757 	});
2758 
2759 	unp_defer++;
2760 }
2761 
2762 static void
unp_discard(struct fileglob * fg,void * p)2763 unp_discard(struct fileglob *fg, void *p)
2764 {
2765 	if (p == NULL) {
2766 		p = current_proc();             /* XXX */
2767 	}
2768 	(void) OSAddAtomic(1, &unp_disposed);
2769 	if (fg_removeuipc_mark(fg)) {
2770 		VERIFY(fg->fg_lflags & FG_RMMSGQ);
2771 		fg_removeuipc(fg);
2772 	}
2773 	(void) OSAddAtomic(-1, &unp_rights);
2774 
2775 	(void) fg_drop(p, fg);
2776 }
2777 
2778 int
unp_lock(struct socket * so,int refcount,void * lr)2779 unp_lock(struct socket *so, int refcount, void * lr)
2780 {
2781 	void * lr_saved;
2782 	if (lr == 0) {
2783 		lr_saved = (void *)  __builtin_return_address(0);
2784 	} else {
2785 		lr_saved = lr;
2786 	}
2787 
2788 	if (so->so_pcb) {
2789 		lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2790 	} else {
2791 		panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x",
2792 		    so, lr_saved, so->so_usecount);
2793 	}
2794 
2795 	if (so->so_usecount < 0) {
2796 		panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x",
2797 		    so, so->so_pcb, lr_saved, so->so_usecount);
2798 	}
2799 
2800 	if (refcount) {
2801 		VERIFY(so->so_usecount > 0);
2802 		so->so_usecount++;
2803 	}
2804 	so->lock_lr[so->next_lock_lr] = lr_saved;
2805 	so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
2806 	return 0;
2807 }
2808 
2809 int
unp_unlock(struct socket * so,int refcount,void * lr)2810 unp_unlock(struct socket *so, int refcount, void * lr)
2811 {
2812 	void * lr_saved;
2813 	lck_mtx_t * mutex_held = NULL;
2814 	struct unpcb *unp = sotounpcb(so);
2815 
2816 	if (lr == 0) {
2817 		lr_saved = (void *) __builtin_return_address(0);
2818 	} else {
2819 		lr_saved = lr;
2820 	}
2821 
2822 	if (refcount) {
2823 		so->so_usecount--;
2824 	}
2825 
2826 	if (so->so_usecount < 0) {
2827 		panic("unp_unlock: so=%p usecount=%x", so, so->so_usecount);
2828 	}
2829 	if (so->so_pcb == NULL) {
2830 		panic("unp_unlock: so=%p NO PCB usecount=%x", so, so->so_usecount);
2831 	} else {
2832 		mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2833 	}
2834 	LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2835 	so->unlock_lr[so->next_unlock_lr] = lr_saved;
2836 	so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2837 
2838 	if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2839 		sofreelastref(so, 1);
2840 
2841 		if (unp->unp_addr != NULL) {
2842 			free_sockaddr(unp->unp_addr);
2843 		}
2844 
2845 		lck_mtx_unlock(mutex_held);
2846 
2847 		lck_mtx_destroy(&unp->unp_mtx, &unp_mtx_grp);
2848 		zfree(unp_zone, unp);
2849 		thread_call_enter(unp_gc_tcall);
2850 	} else {
2851 		lck_mtx_unlock(mutex_held);
2852 	}
2853 
2854 	return 0;
2855 }
2856 
2857 lck_mtx_t *
unp_getlock(struct socket * so,__unused int flags)2858 unp_getlock(struct socket *so, __unused int flags)
2859 {
2860 	struct unpcb *unp = (struct unpcb *)so->so_pcb;
2861 
2862 
2863 	if (so->so_pcb) {
2864 		if (so->so_usecount < 0) {
2865 			panic("unp_getlock: so=%p usecount=%x", so, so->so_usecount);
2866 		}
2867 		return &unp->unp_mtx;
2868 	} else {
2869 		panic("unp_getlock: so=%p NULL so_pcb", so);
2870 		return so->so_proto->pr_domain->dom_mtx;
2871 	}
2872 }
2873