1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
61 */
62 /*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68 #include <os/log.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/domain.h>
73 #include <sys/fcntl.h>
74 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */
75 #include <sys/file_internal.h>
76 #include <sys/guarded.h>
77 #include <sys/filedesc.h>
78 #include <sys/lock.h>
79 #include <sys/mbuf.h>
80 #include <sys/namei.h>
81 #include <sys/proc_internal.h>
82 #include <sys/kauth.h>
83 #include <sys/protosw.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/stat.h>
87 #include <sys/sysctl.h>
88 #include <sys/un.h>
89 #include <sys/unpcb.h>
90 #include <sys/vnode_internal.h>
91 #include <sys/kdebug.h>
92 #include <sys/mcache.h>
93
94 #include <kern/zalloc.h>
95 #include <kern/locks.h>
96 #include <kern/task.h>
97
98 #include <net/sockaddr_utils.h>
99
100 #if __has_ptrcheck
101 #include <machine/trap.h>
102 #endif /* __has_ptrcheck */
103
104 #if CONFIG_MACF
105 #include <security/mac_framework.h>
106 #endif /* CONFIG_MACF */
107
108 #include <mach/vm_param.h>
109
110 #ifndef ROUNDUP64
111 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
112 #endif
113
114 #ifndef ADVANCE64
115 #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
116 #endif
117
118 /*
119 * Maximum number of FDs that can be passed in an mbuf
120 */
121 #define UIPC_MAX_CMSG_FD 512
122
123 ZONE_DEFINE_TYPE(unp_zone, "unpzone", struct unpcb, ZC_NONE);
124 static unp_gen_t unp_gencnt;
125 static u_int unp_count;
126
127 static LCK_ATTR_DECLARE(unp_mtx_attr, 0, 0);
128 static LCK_GRP_DECLARE(unp_mtx_grp, "unp_list");
129 static LCK_RW_DECLARE_ATTR(unp_list_mtx, &unp_mtx_grp, &unp_mtx_attr);
130
131 static LCK_MTX_DECLARE_ATTR(unp_disconnect_lock, &unp_mtx_grp, &unp_mtx_attr);
132 static LCK_MTX_DECLARE_ATTR(unp_connect_lock, &unp_mtx_grp, &unp_mtx_attr);
133 static LCK_MTX_DECLARE_ATTR(uipc_lock, &unp_mtx_grp, &unp_mtx_attr);
134
135 static u_int disconnect_in_progress;
136
137 static struct unp_head unp_shead, unp_dhead;
138 static int unp_defer;
139 static thread_call_t unp_gc_tcall;
140 static LIST_HEAD(, fileglob) unp_msghead = LIST_HEAD_INITIALIZER(unp_msghead);
141
142 SYSCTL_DECL(_net_local);
143
144 static int unp_rights; /* file descriptors in flight */
145 static int unp_disposed; /* discarded file descriptors */
146
147 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
148
149 #define ULEF_CONNECTION 0x01
150 uint32_t unp_log_enable_flags = 0;
151
152 SYSCTL_UINT(_net_local, OID_AUTO, log, CTLFLAG_RD | CTLFLAG_LOCKED,
153 &unp_log_enable_flags, 0, "");
154
155 SYSCTL_UINT(_net_local, OID_AUTO, pcbcount, CTLFLAG_RD | CTLFLAG_LOCKED,
156 &unp_count, 0, "");
157
158 /*
159 * mDNSResponder tracing. When enabled, endpoints connected to
160 * /var/run/mDNSResponder will be traced; during each send on
161 * the traced socket, we log the PID and process name of the
162 * sending process. We also print out a bit of info related
163 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
164 * of mDNSResponder stays the same.
165 */
166 #define MDNSRESPONDER_PATH "/var/run/mDNSResponder"
167
168 static int unpst_tracemdns; /* enable tracing */
169
170 #define MDNS_IPC_MSG_HDR_VERSION_1 1
171
172 struct mdns_ipc_msg_hdr {
173 uint32_t version;
174 uint32_t datalen;
175 uint32_t ipc_flags;
176 uint32_t op;
177 union {
178 void *context;
179 uint32_t u32[2];
180 } __attribute__((packed));
181 uint32_t reg_index;
182 } __attribute__((packed));
183
184 /*
185 * Unix communications domain.
186 *
187 * TODO:
188 * SEQPACKET, RDM
189 * rethink name space problems
190 * need a proper out-of-band
191 * lock pushdown
192 */
193 static struct sockaddr sun_noname = {
194 .sa_len = sizeof(struct sockaddr),
195 .sa_family = AF_LOCAL,
196 .sa_data = {
197 0, 0, 0, 0, 0, 0, 0,
198 0, 0, 0, 0, 0, 0, 0
199 }
200 };
201
202 static ino_t unp_ino; /* prototype for fake inode numbers */
203
204 static int unp_attach(struct socket *);
205 static void unp_detach(struct unpcb *);
206 static int unp_bind(struct unpcb *, struct sockaddr *, proc_t);
207 static int unp_connect(struct socket *, struct sockaddr *, proc_t);
208 static void unp_disconnect(struct unpcb *);
209 static void unp_shutdown(struct unpcb *);
210 static void unp_drop(struct unpcb *, int);
211 static void unp_gc(thread_call_param_t arg0, thread_call_param_t arg1);
212 static void unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);
213 static void unp_mark(struct fileglob *, __unused void *);
214 static void unp_discard(struct fileglob *, void *);
215 static int unp_internalize(struct mbuf *, proc_t);
216 static int unp_listen(struct unpcb *, proc_t);
217 static void unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
218 static void unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
219
220 __startup_func
221 static void
unp_gc_setup(void)222 unp_gc_setup(void)
223 {
224 unp_gc_tcall = thread_call_allocate_with_options(unp_gc,
225 NULL, THREAD_CALL_PRIORITY_KERNEL,
226 THREAD_CALL_OPTIONS_ONCE);
227 }
228 STARTUP(THREAD_CALL, STARTUP_RANK_MIDDLE, unp_gc_setup);
229
230 static void
unp_get_locks_in_order(struct socket * so,struct socket * conn_so)231 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
232 {
233 ASSERT(so != conn_so);
234
235 if (so < conn_so) {
236 socket_lock(conn_so, 1);
237 } else {
238 struct unpcb *unp = sotounpcb(so);
239 unp->unp_flags |= UNP_DONTDISCONNECT;
240 unp->rw_thrcount++;
241 socket_unlock(so, 0);
242
243 /* Get the locks in the correct order */
244 socket_lock(conn_so, 1);
245 socket_lock(so, 0);
246 unp->rw_thrcount--;
247 if (unp->rw_thrcount == 0) {
248 unp->unp_flags &= ~UNP_DONTDISCONNECT;
249 wakeup(unp);
250 }
251 }
252 }
253
254 static int
uipc_abort(struct socket * so)255 uipc_abort(struct socket *so)
256 {
257 struct unpcb *unp = sotounpcb(so);
258
259 if (unp == 0) {
260 return EINVAL;
261 }
262 unp_drop(unp, ECONNABORTED);
263 unp_detach(unp);
264 sofree(so);
265 return 0;
266 }
267
268 static int
uipc_accept(struct socket * so,struct sockaddr ** nam)269 uipc_accept(struct socket *so, struct sockaddr **nam)
270 {
271 struct unpcb *unp = sotounpcb(so);
272
273 if (unp == 0) {
274 return EINVAL;
275 }
276
277 /*
278 * Pass back name of connected socket,
279 * if it was bound and we are still connected
280 * (our peer may have closed already!).
281 */
282 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
283 *nam = dup_sockaddr(SA(unp->unp_conn->unp_addr), 1);
284 } else {
285 if (unp_log_enable_flags & ULEF_CONNECTION) {
286 os_log(OS_LOG_DEFAULT, "%s: peer disconnected unp_gencnt %llu",
287 __func__, unp->unp_gencnt);
288 }
289 *nam = dup_sockaddr(SA(&sun_noname), 1);
290 }
291 return 0;
292 }
293
294 /*
295 * Returns: 0 Success
296 * EISCONN
297 * unp_attach:
298 */
299 static int
uipc_attach(struct socket * so,__unused int proto,__unused proc_t p)300 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
301 {
302 struct unpcb *unp = sotounpcb(so);
303
304 if (unp != 0) {
305 return EISCONN;
306 }
307 return unp_attach(so);
308 }
309
310 static int
uipc_bind(struct socket * so,struct sockaddr * nam,proc_t p)311 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
312 {
313 struct unpcb *unp = sotounpcb(so);
314
315 if (unp == 0) {
316 return EINVAL;
317 }
318
319 return unp_bind(unp, nam, p);
320 }
321
322 /*
323 * Returns: 0 Success
324 * EINVAL
325 * unp_connect:??? [See elsewhere in this file]
326 */
327 static int
uipc_connect(struct socket * so,struct sockaddr * nam,proc_t p)328 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
329 {
330 struct unpcb *unp = sotounpcb(so);
331
332 if (unp == 0) {
333 return EINVAL;
334 }
335 return unp_connect(so, nam, p);
336 }
337
338 /*
339 * Returns: 0 Success
340 * EINVAL
341 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
342 * unp_connect2:EINVAL Invalid argument
343 */
344 static int
uipc_connect2(struct socket * so1,struct socket * so2)345 uipc_connect2(struct socket *so1, struct socket *so2)
346 {
347 struct unpcb *unp = sotounpcb(so1);
348
349 if (unp == 0) {
350 return EINVAL;
351 }
352
353 return unp_connect2(so1, so2);
354 }
355
356 /* control is EOPNOTSUPP */
357
358 static int
uipc_detach(struct socket * so)359 uipc_detach(struct socket *so)
360 {
361 struct unpcb *unp = sotounpcb(so);
362
363 if (unp == 0) {
364 return EINVAL;
365 }
366
367 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
368 unp_detach(unp);
369 return 0;
370 }
371
372 static int
uipc_disconnect(struct socket * so)373 uipc_disconnect(struct socket *so)
374 {
375 struct unpcb *unp = sotounpcb(so);
376
377 if (unp == 0) {
378 return EINVAL;
379 }
380 unp_disconnect(unp);
381 return 0;
382 }
383
384 /*
385 * Returns: 0 Success
386 * EINVAL
387 */
388 static int
uipc_listen(struct socket * so,__unused proc_t p)389 uipc_listen(struct socket *so, __unused proc_t p)
390 {
391 struct unpcb *unp = sotounpcb(so);
392
393 if (unp == 0 || unp->unp_vnode == 0) {
394 return EINVAL;
395 }
396 return unp_listen(unp, p);
397 }
398
399 static int
uipc_peeraddr(struct socket * so,struct sockaddr ** nam)400 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
401 {
402 struct unpcb *unp = sotounpcb(so);
403 struct socket *so2;
404
405 if (unp == NULL) {
406 return EINVAL;
407 }
408 so2 = unp->unp_conn != NULL ? unp->unp_conn->unp_socket : NULL;
409 if (so2 != NULL && so != so2) {
410 unp_get_locks_in_order(so, so2);
411 }
412
413 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
414 *nam = dup_sockaddr(SA(unp->unp_conn->unp_addr), 1);
415 } else {
416 *nam = dup_sockaddr(SA(&sun_noname), 1);
417 }
418 if (so2 != NULL && so != so2) {
419 socket_unlock(so2, 1);
420 }
421 return 0;
422 }
423
424 static int
uipc_rcvd(struct socket * so,__unused int flags)425 uipc_rcvd(struct socket *so, __unused int flags)
426 {
427 struct unpcb *unp = sotounpcb(so);
428 struct socket *so2;
429
430 if (unp == NULL) {
431 return EINVAL;
432 }
433 #define rcv (&so->so_rcv)
434 #define snd (&so2->so_snd)
435 switch (so->so_type) {
436 case SOCK_DGRAM:
437 if (unp->unp_conn == NULL) {
438 break;
439 }
440 so2 = unp->unp_conn->unp_socket;
441
442 if (so != so2) {
443 unp_get_locks_in_order(so, so2);
444 }
445 if (sb_notify(&so2->so_snd)) {
446 sowakeup(so2, &so2->so_snd, so);
447 }
448 if (so != so2) {
449 socket_unlock(so2, 1);
450 }
451 break;
452 case SOCK_STREAM:
453 if (unp->unp_conn == NULL) {
454 break;
455 }
456
457 so2 = unp->unp_conn->unp_socket;
458 if (so != so2) {
459 unp_get_locks_in_order(so, so2);
460 }
461 /*
462 * Adjust backpressure on sender
463 * and wakeup any waiting to write.
464 */
465 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
466 unp->unp_mbcnt = rcv->sb_mbcnt;
467 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
468 unp->unp_cc = rcv->sb_cc;
469 if (sb_notify(&so2->so_snd)) {
470 sowakeup(so2, &so2->so_snd, so);
471 }
472 if (so != so2) {
473 socket_unlock(so2, 1);
474 }
475 #undef snd
476 #undef rcv
477 break;
478 default:
479 panic("uipc_rcvd unknown socktype");
480 }
481 return 0;
482 }
483
484 /* pru_rcvoob is EOPNOTSUPP */
485
486 /*
487 * Returns: 0 Success
488 * EINVAL
489 * EOPNOTSUPP
490 * EPIPE
491 * ENOTCONN
492 * EISCONN
493 * unp_internalize:EINVAL
494 * unp_internalize:EBADF
495 * unp_connect:EAFNOSUPPORT Address family not supported
496 * unp_connect:EINVAL Invalid argument
497 * unp_connect:ENOTSOCK Not a socket
498 * unp_connect:ECONNREFUSED Connection refused
499 * unp_connect:EISCONN Socket is connected
500 * unp_connect:EPROTOTYPE Protocol wrong type for socket
501 * unp_connect:???
502 * sbappendaddr:ENOBUFS [5th argument, contents modified]
503 * sbappendaddr:??? [whatever a filter author chooses]
504 */
505 static int
uipc_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,proc_t p)506 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
507 struct mbuf *control, proc_t p)
508 {
509 int error = 0;
510 struct unpcb *unp = sotounpcb(so);
511 struct socket *so2;
512 int32_t len = m_pktlen(m);
513
514 if (unp == 0) {
515 error = EINVAL;
516 goto release;
517 }
518 if (flags & PRUS_OOB) {
519 error = EOPNOTSUPP;
520 goto release;
521 }
522
523 if (control) {
524 /* release lock to avoid deadlock (4436174) */
525 socket_unlock(so, 0);
526 error = unp_internalize(control, p);
527 socket_lock(so, 0);
528 if (error) {
529 goto release;
530 }
531 }
532
533 switch (so->so_type) {
534 case SOCK_DGRAM:
535 {
536 struct sockaddr *from;
537
538 if (nam) {
539 if (unp->unp_conn) {
540 error = EISCONN;
541 break;
542 }
543 error = unp_connect(so, nam, p);
544 if (error) {
545 so->so_state &= ~SS_ISCONNECTING;
546 break;
547 }
548 } else {
549 if (unp->unp_conn == 0) {
550 error = ENOTCONN;
551 break;
552 }
553 }
554
555 so2 = unp->unp_conn->unp_socket;
556 if (so != so2) {
557 unp_get_locks_in_order(so, so2);
558 }
559
560 if (unp->unp_addr) {
561 from = SA(unp->unp_addr);
562 } else {
563 from = &sun_noname;
564 }
565 /*
566 * sbappendaddr() will fail when the receiver runs out of
567 * space; in contrast to SOCK_STREAM, we will lose messages
568 * for the SOCK_DGRAM case when the receiver's queue overflows.
569 * SB_UNIX on the socket buffer implies that the callee will
570 * not free the control message, if any, because we would need
571 * to call unp_dispose() on it.
572 */
573 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
574 control = NULL;
575 if (sb_notify(&so2->so_rcv)) {
576 sowakeup(so2, &so2->so_rcv, so);
577 }
578 } else if (control != NULL && error == 0) {
579 /* A socket filter took control; don't touch it */
580 control = NULL;
581 }
582
583 if (so != so2) {
584 socket_unlock(so2, 1);
585 }
586
587 m = NULL;
588 if (nam) {
589 unp_disconnect(unp);
590 }
591 break;
592 }
593
594 case SOCK_STREAM: {
595 int didreceive = 0;
596 #define rcv (&so2->so_rcv)
597 #define snd (&so->so_snd)
598 /* Connect if not connected yet. */
599 /*
600 * Note: A better implementation would complain
601 * if not equal to the peer's address.
602 */
603 if ((so->so_state & SS_ISCONNECTED) == 0) {
604 if (nam) {
605 error = unp_connect(so, nam, p);
606 if (error) {
607 so->so_state &= ~SS_ISCONNECTING;
608 break; /* XXX */
609 }
610 } else {
611 error = ENOTCONN;
612 break;
613 }
614 }
615
616 if (so->so_state & SS_CANTSENDMORE) {
617 error = EPIPE;
618 break;
619 }
620 if (unp->unp_conn == 0) {
621 panic("uipc_send connected but no connection? "
622 "socket state: %x socket flags: %x socket flags1: %x.",
623 so->so_state, so->so_flags, so->so_flags1);
624 }
625
626 so2 = unp->unp_conn->unp_socket;
627 if (so != so2) {
628 unp_get_locks_in_order(so, so2);
629 }
630
631 /* Check socket state again as we might have unlocked the socket
632 * while trying to get the locks in order
633 */
634
635 if ((so->so_state & SS_CANTSENDMORE)) {
636 error = EPIPE;
637 if (so != so2) {
638 socket_unlock(so2, 1);
639 }
640 break;
641 }
642
643 if (unp->unp_flags & UNP_TRACE_MDNS) {
644 struct mdns_ipc_msg_hdr hdr;
645
646 if (mbuf_copydata(m, 0, sizeof(hdr), &hdr) == 0 &&
647 hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
648 os_log(OS_LOG_DEFAULT,
649 "%s[mDNSResponder] pid=%d (%s): op=0x%x",
650 __func__, proc_getpid(p), p->p_comm, ntohl(hdr.op));
651 }
652 }
653
654 /*
655 * Send to paired receive port, and then reduce send buffer
656 * hiwater marks to maintain backpressure. Wake up readers.
657 * SB_UNIX flag will allow new record to be appended to the
658 * receiver's queue even when it is already full. It is
659 * possible, however, that append might fail. In that case,
660 * we will need to call unp_dispose() on the control message;
661 * the callee will not free it since SB_UNIX is set.
662 */
663 didreceive = control ?
664 sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
665
666 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
667 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
668 if ((int32_t)snd->sb_hiwat >=
669 (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
670 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
671 } else {
672 snd->sb_hiwat = 0;
673 }
674 unp->unp_conn->unp_cc = rcv->sb_cc;
675 if (didreceive) {
676 control = NULL;
677 if (sb_notify(&so2->so_rcv)) {
678 sowakeup(so2, &so2->so_rcv, so);
679 }
680 } else if (control != NULL && error == 0) {
681 /* A socket filter took control; don't touch it */
682 control = NULL;
683 }
684
685 if (so != so2) {
686 socket_unlock(so2, 1);
687 }
688 m = NULL;
689 #undef snd
690 #undef rcv
691 }
692 break;
693
694 default:
695 panic("uipc_send unknown socktype");
696 }
697
698 so_update_tx_data_stats(so, 1, len);
699
700 /*
701 * SEND_EOF is equivalent to a SEND followed by
702 * a SHUTDOWN.
703 */
704 if (flags & PRUS_EOF) {
705 socantsendmore(so);
706 unp_shutdown(unp);
707 }
708
709 if (control && error != 0) {
710 socket_unlock(so, 0);
711 unp_dispose(control);
712 socket_lock(so, 0);
713 }
714
715 release:
716 if (control) {
717 m_freem(control);
718 }
719 if (m) {
720 m_freem(m);
721 }
722 return error;
723 }
724
725 static int
uipc_sense(struct socket * so,void * ub,int isstat64)726 uipc_sense(struct socket *so, void *ub, int isstat64)
727 {
728 struct unpcb *unp = sotounpcb(so);
729 struct socket *so2;
730 blksize_t blksize;
731
732 if (unp == 0) {
733 return EINVAL;
734 }
735
736 blksize = so->so_snd.sb_hiwat;
737 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
738 so2 = unp->unp_conn->unp_socket;
739 blksize += so2->so_rcv.sb_cc;
740 }
741 if (unp->unp_ino == 0) {
742 unp->unp_ino = unp_ino++;
743 }
744
745 if (isstat64 != 0) {
746 struct stat64 *sb64;
747
748 sb64 = (struct stat64 *)ub;
749 sb64->st_blksize = blksize;
750 sb64->st_dev = NODEV;
751 sb64->st_ino = (ino64_t)unp->unp_ino;
752 } else {
753 struct stat *sb;
754
755 sb = (struct stat *)ub;
756 sb->st_blksize = blksize;
757 sb->st_dev = NODEV;
758 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
759 }
760
761 return 0;
762 }
763
764 /*
765 * Returns: 0 Success
766 * EINVAL
767 *
768 * Notes: This is not strictly correct, as unp_shutdown() also calls
769 * socantrcvmore(). These should maybe both be conditionalized
770 * on the 'how' argument in soshutdown() as called from the
771 * shutdown() system call.
772 */
773 static int
uipc_shutdown(struct socket * so)774 uipc_shutdown(struct socket *so)
775 {
776 struct unpcb *unp = sotounpcb(so);
777
778 if (unp == 0) {
779 return EINVAL;
780 }
781 socantsendmore(so);
782 unp_shutdown(unp);
783 return 0;
784 }
785
786 /*
787 * Returns: 0 Success
788 * EINVAL Invalid argument
789 */
790 static int
uipc_sockaddr(struct socket * so,struct sockaddr ** nam)791 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
792 {
793 struct unpcb *unp = sotounpcb(so);
794
795 if (unp == NULL) {
796 return EINVAL;
797 }
798 if (unp->unp_addr != NULL) {
799 *nam = dup_sockaddr(SA(unp->unp_addr), 1);
800 } else {
801 *nam = dup_sockaddr(SA(&sun_noname), 1);
802 }
803 return 0;
804 }
805
806 struct pr_usrreqs uipc_usrreqs = {
807 .pru_abort = uipc_abort,
808 .pru_accept = uipc_accept,
809 .pru_attach = uipc_attach,
810 .pru_bind = uipc_bind,
811 .pru_connect = uipc_connect,
812 .pru_connect2 = uipc_connect2,
813 .pru_detach = uipc_detach,
814 .pru_disconnect = uipc_disconnect,
815 .pru_listen = uipc_listen,
816 .pru_peeraddr = uipc_peeraddr,
817 .pru_rcvd = uipc_rcvd,
818 .pru_send = uipc_send,
819 .pru_sense = uipc_sense,
820 .pru_shutdown = uipc_shutdown,
821 .pru_sockaddr = uipc_sockaddr,
822 .pru_sosend = sosend,
823 .pru_soreceive = soreceive,
824 };
825
826 int
uipc_ctloutput(struct socket * so,struct sockopt * sopt)827 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
828 {
829 struct unpcb *unp = sotounpcb(so);
830 int error = 0;
831 pid_t peerpid;
832 proc_t p;
833 task_t t __single;
834 struct socket *peerso;
835
836 switch (sopt->sopt_dir) {
837 case SOPT_GET:
838 switch (sopt->sopt_name) {
839 case LOCAL_PEERCRED:
840 if (unp->unp_flags & UNP_HAVEPC) {
841 error = sooptcopyout(sopt, &unp->unp_peercred,
842 sizeof(unp->unp_peercred));
843 } else {
844 if (so->so_type == SOCK_STREAM) {
845 error = ENOTCONN;
846 } else {
847 error = EINVAL;
848 }
849 }
850 break;
851 case LOCAL_PEERPID:
852 case LOCAL_PEEREPID:
853 if (unp->unp_conn == NULL) {
854 error = ENOTCONN;
855 break;
856 }
857 peerso = unp->unp_conn->unp_socket;
858 if (peerso == NULL) {
859 panic("peer is connected but has no socket?");
860 }
861 if (so != peerso) {
862 unp_get_locks_in_order(so, peerso);
863 }
864 if (sopt->sopt_name == LOCAL_PEEREPID &&
865 peerso->so_flags & SOF_DELEGATED) {
866 peerpid = peerso->e_pid;
867 } else {
868 peerpid = peerso->last_pid;
869 }
870 if (so != peerso) {
871 socket_unlock(peerso, 1);
872 }
873 error = sooptcopyout(sopt, &peerpid, sizeof(peerpid));
874 break;
875 case LOCAL_PEERUUID:
876 case LOCAL_PEEREUUID:
877 if (unp->unp_conn == NULL) {
878 error = ENOTCONN;
879 break;
880 }
881 peerso = unp->unp_conn->unp_socket;
882 if (peerso == NULL) {
883 panic("peer is connected but has no socket?");
884 }
885 if (so != peerso) {
886 unp_get_locks_in_order(so, peerso);
887 }
888 if (sopt->sopt_name == LOCAL_PEEREUUID &&
889 peerso->so_flags & SOF_DELEGATED) {
890 error = sooptcopyout(sopt, &peerso->e_uuid,
891 sizeof(peerso->e_uuid));
892 } else {
893 error = sooptcopyout(sopt, &peerso->last_uuid,
894 sizeof(peerso->last_uuid));
895 }
896 if (so != peerso) {
897 socket_unlock(peerso, 1);
898 }
899 break;
900 case LOCAL_PEERTOKEN:
901 if (unp->unp_conn == NULL) {
902 error = ENOTCONN;
903 break;
904 }
905 peerso = unp->unp_conn->unp_socket;
906 if (peerso == NULL) {
907 panic("peer is connected but has no socket?");
908 }
909 if (so != peerso) {
910 unp_get_locks_in_order(so, peerso);
911 }
912 peerpid = peerso->last_pid;
913 p = proc_find(peerpid);
914 if (p != PROC_NULL) {
915 t = proc_task(p);
916 if (t != TASK_NULL) {
917 audit_token_t peertoken;
918 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
919 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&peertoken, &count) == KERN_SUCCESS) {
920 error = sooptcopyout(sopt, &peertoken, sizeof(peertoken));
921 } else {
922 error = EINVAL;
923 }
924 } else {
925 error = EINVAL;
926 }
927 proc_rele(p);
928 } else {
929 error = EINVAL;
930 }
931 if (so != peerso) {
932 socket_unlock(peerso, 1);
933 }
934 break;
935 default:
936 error = EOPNOTSUPP;
937 break;
938 }
939 break;
940 case SOPT_SET:
941 default:
942 error = EOPNOTSUPP;
943 break;
944 }
945
946 return error;
947 }
948
949 /*
950 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
951 * for stream sockets, although the total for sender and receiver is
952 * actually only PIPSIZ.
953 * Datagram sockets really use the sendspace as the maximum datagram size,
954 * and don't really want to reserve the sendspace. Their recvspace should
955 * be large enough for at least one max-size datagram plus address.
956 */
957 #ifndef PIPSIZ
958 #define PIPSIZ 8192
959 #endif
960 static u_int32_t unpst_sendspace = PIPSIZ;
961 static u_int32_t unpst_recvspace = PIPSIZ;
962 static u_int32_t unpdg_sendspace = 2 * 1024; /* really max datagram size */
963 static u_int32_t unpdg_recvspace = 4 * 1024;
964
965 SYSCTL_DECL(_net_local_stream);
966 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
967 &unpst_sendspace, 0, "");
968 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
969 &unpst_recvspace, 0, "");
970 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
971 &unpst_tracemdns, 0, "");
972 SYSCTL_DECL(_net_local_dgram);
973 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
974 &unpdg_sendspace, 0, "");
975 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
976 &unpdg_recvspace, 0, "");
977
978 /*
979 * Returns: 0 Success
980 * ENOBUFS
981 * soreserve:ENOBUFS
982 */
983 static int
unp_attach(struct socket * so)984 unp_attach(struct socket *so)
985 {
986 struct unpcb *unp;
987 int error = 0;
988
989 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
990 switch (so->so_type) {
991 case SOCK_STREAM:
992 error = soreserve(so, unpst_sendspace, unpst_recvspace);
993 break;
994
995 case SOCK_DGRAM:
996 /*
997 * By default soreserve() will set the low water
998 * mark to MCLBYTES which is too high given our
999 * default sendspace. Override it here to something
1000 * sensible.
1001 */
1002 so->so_snd.sb_lowat = 1;
1003 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
1004 break;
1005
1006 default:
1007 panic("unp_attach");
1008 }
1009 if (error) {
1010 return error;
1011 }
1012 }
1013 unp = zalloc_flags(unp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1014
1015 lck_mtx_init(&unp->unp_mtx, &unp_mtx_grp, &unp_mtx_attr);
1016
1017 lck_rw_lock_exclusive(&unp_list_mtx);
1018 LIST_INIT(&unp->unp_refs);
1019 unp->unp_socket = so;
1020 unp->unp_gencnt = ++unp_gencnt;
1021 unp_count++;
1022 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
1023 &unp_dhead : &unp_shead, unp, unp_link);
1024 lck_rw_done(&unp_list_mtx);
1025 so->so_pcb = (caddr_t)unp;
1026 /*
1027 * Mark AF_UNIX socket buffers accordingly so that:
1028 *
1029 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
1030 * the lack of space; this essentially loosens the sbspace() check,
1031 * since there is disconnect between sosend() and uipc_send() with
1032 * respect to flow control that might result in our dropping the
1033 * data in uipc_send(). By setting this, we allow for slightly
1034 * more records to be appended to the receiving socket to avoid
1035 * losing data (which we can't afford in the SOCK_STREAM case).
1036 * Flow control still takes place since we adjust the sender's
1037 * hiwat during each send. This doesn't affect the SOCK_DGRAM
1038 * case and append would still fail when the queue overflows.
1039 *
1040 * b. In the presence of control messages containing internalized
1041 * file descriptors, the append routines will not free them since
1042 * we'd need to undo the work first via unp_dispose().
1043 */
1044 so->so_rcv.sb_flags |= SB_UNIX;
1045 so->so_snd.sb_flags |= SB_UNIX;
1046 return 0;
1047 }
1048
1049 static void
unp_detach(struct unpcb * unp)1050 unp_detach(struct unpcb *unp)
1051 {
1052 int so_locked = 1;
1053
1054 lck_rw_lock_exclusive(&unp_list_mtx);
1055 LIST_REMOVE(unp, unp_link);
1056 --unp_count;
1057 ++unp_gencnt;
1058 lck_rw_done(&unp_list_mtx);
1059 if (unp->unp_vnode) {
1060 struct vnode *tvp = NULL;
1061 socket_unlock(unp->unp_socket, 0);
1062
1063 /* Holding unp_connect_lock will avoid a race between
1064 * a thread closing the listening socket and a thread
1065 * connecting to it.
1066 */
1067 lck_mtx_lock(&unp_connect_lock);
1068 socket_lock(unp->unp_socket, 0);
1069 if (unp->unp_vnode) {
1070 tvp = unp->unp_vnode;
1071 unp->unp_vnode->v_socket = NULL;
1072 unp->unp_vnode = NULL;
1073 }
1074 lck_mtx_unlock(&unp_connect_lock);
1075 if (tvp != NULL) {
1076 vnode_rele(tvp); /* drop the usecount */
1077 }
1078 }
1079 if (unp->unp_conn) {
1080 unp_disconnect(unp);
1081 }
1082 while (unp->unp_refs.lh_first) {
1083 struct unpcb *unp2 = NULL;
1084
1085 /* This datagram socket is connected to one or more
1086 * sockets. In order to avoid a race condition between removing
1087 * this reference and closing the connected socket, we need
1088 * to check disconnect_in_progress
1089 */
1090 if (so_locked == 1) {
1091 socket_unlock(unp->unp_socket, 0);
1092 so_locked = 0;
1093 }
1094 lck_mtx_lock(&unp_disconnect_lock);
1095 while (disconnect_in_progress != 0) {
1096 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1097 PSOCK, "disconnect", NULL);
1098 }
1099 disconnect_in_progress = 1;
1100 lck_mtx_unlock(&unp_disconnect_lock);
1101
1102 /* Now we are sure that any unpcb socket disconnect is not happening */
1103 if (unp->unp_refs.lh_first != NULL) {
1104 unp2 = unp->unp_refs.lh_first;
1105 socket_lock(unp2->unp_socket, 1);
1106 }
1107
1108 lck_mtx_lock(&unp_disconnect_lock);
1109 disconnect_in_progress = 0;
1110 wakeup(&disconnect_in_progress);
1111 lck_mtx_unlock(&unp_disconnect_lock);
1112
1113 if (unp2 != NULL) {
1114 /* We already locked this socket and have a reference on it */
1115 unp_drop(unp2, ECONNRESET);
1116 socket_unlock(unp2->unp_socket, 1);
1117 }
1118 }
1119
1120 if (so_locked == 0) {
1121 socket_lock(unp->unp_socket, 0);
1122 so_locked = 1;
1123 }
1124 soisdisconnected(unp->unp_socket);
1125 /* makes sure we're getting dealloced */
1126 unp->unp_socket->so_flags |= SOF_PCBCLEARING;
1127 }
1128
1129 /*
1130 * Returns: 0 Success
1131 * EAFNOSUPPORT
1132 * EINVAL
1133 * EADDRINUSE
1134 * namei:??? [anything namei can return]
1135 * vnode_authorize:??? [anything vnode_authorize can return]
1136 *
1137 * Notes: p at this point is the current process, as this function is
1138 * only called by sobind().
1139 */
1140 static int
unp_bind(struct unpcb * unp,struct sockaddr * nam,proc_t p)1141 unp_bind(
1142 struct unpcb *unp,
1143 struct sockaddr *nam,
1144 proc_t p)
1145 {
1146 struct sockaddr_un *soun = SUN(nam);
1147 struct vnode *vp __single, *dvp;
1148 struct vnode_attr va;
1149 vfs_context_t ctx = vfs_context_current();
1150 int error, namelen;
1151 struct nameidata nd;
1152 struct socket *so = unp->unp_socket;
1153 char buf[SOCK_MAXADDRLEN];
1154
1155 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1156 return EAFNOSUPPORT;
1157 }
1158
1159 /*
1160 * Check if the socket is already bound to an address
1161 */
1162 if (unp->unp_vnode != NULL) {
1163 return EINVAL;
1164 }
1165 /*
1166 * Check if the socket may have been shut down
1167 */
1168 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1169 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1170 return EINVAL;
1171 }
1172
1173 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
1174 if (namelen <= 0) {
1175 return EINVAL;
1176 }
1177 /*
1178 * Note: sun_path is not a zero terminated "C" string
1179 */
1180 if (namelen >= SOCK_MAXADDRLEN) {
1181 return EINVAL;
1182 }
1183 char *path = UNP_FORGE_PATH(soun, namelen);
1184 bcopy(path, buf, namelen);
1185 buf[namelen] = 0;
1186
1187 socket_unlock(so, 0);
1188
1189 NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
1190 CAST_USER_ADDR_T(buf), ctx);
1191 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1192 error = namei(&nd);
1193 if (error) {
1194 socket_lock(so, 0);
1195 return error;
1196 }
1197 dvp = nd.ni_dvp;
1198 vp = nd.ni_vp;
1199
1200 if (vp != NULL) {
1201 /*
1202 * need to do this before the vnode_put of dvp
1203 * since we may have to release an fs_nodelock
1204 */
1205 nameidone(&nd);
1206
1207 vnode_put(dvp);
1208 vnode_put(vp);
1209
1210 socket_lock(so, 0);
1211 return EADDRINUSE;
1212 }
1213
1214 VATTR_INIT(&va);
1215 VATTR_SET(&va, va_type, VSOCK);
1216 VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd.fd_cmask));
1217
1218 #if CONFIG_MACF
1219 error = mac_vnode_check_create(ctx,
1220 nd.ni_dvp, &nd.ni_cnd, &va);
1221
1222 if (error == 0)
1223 #endif /* CONFIG_MACF */
1224 #if CONFIG_MACF_SOCKET_SUBSET
1225 error = mac_vnode_check_uipc_bind(ctx,
1226 nd.ni_dvp, &nd.ni_cnd, &va);
1227
1228 if (error == 0)
1229 #endif /* MAC_SOCKET_SUBSET */
1230 /* authorize before creating */
1231 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1232
1233 if (!error) {
1234 /* create the socket */
1235 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
1236 }
1237
1238 nameidone(&nd);
1239 vnode_put(dvp);
1240
1241 if (error) {
1242 socket_lock(so, 0);
1243 return error;
1244 }
1245
1246 socket_lock(so, 0);
1247
1248 if (unp->unp_vnode != NULL) {
1249 vnode_put(vp); /* drop the iocount */
1250 return EINVAL;
1251 }
1252
1253 error = vnode_ref(vp); /* gain a longterm reference */
1254 if (error) {
1255 vnode_put(vp); /* drop the iocount */
1256 return error;
1257 }
1258
1259 vp->v_socket = unp->unp_socket;
1260 unp->unp_vnode = vp;
1261 unp->unp_addr = SUN(dup_sockaddr(nam, 1));
1262 vnode_put(vp); /* drop the iocount */
1263
1264 return 0;
1265 }
1266
1267
1268 /*
1269 * Returns: 0 Success
1270 * EAFNOSUPPORT Address family not supported
1271 * EINVAL Invalid argument
1272 * ENOTSOCK Not a socket
1273 * ECONNREFUSED Connection refused
1274 * EPROTOTYPE Protocol wrong type for socket
1275 * EISCONN Socket is connected
1276 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
1277 * unp_connect2:EINVAL Invalid argument
1278 * namei:??? [anything namei can return]
1279 * vnode_authorize:???? [anything vnode_authorize can return]
1280 *
1281 * Notes: p at this point is the current process, as this function is
1282 * only called by sosend(), sendfile(), and soconnectlock().
1283 */
1284 static int
unp_connect(struct socket * so,struct sockaddr * nam,__unused proc_t p)1285 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1286 {
1287 struct sockaddr_un *soun = SUN(nam);
1288 struct vnode *vp;
1289 struct socket *so2, *so3, *list_so = NULL;
1290 struct unpcb *unp, *unp2, *unp3;
1291 vfs_context_t ctx = vfs_context_current();
1292 int error, len;
1293 struct nameidata nd;
1294 char buf[SOCK_MAXADDRLEN];
1295
1296 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1297 return EAFNOSUPPORT;
1298 }
1299
1300 unp = sotounpcb(so);
1301 so2 = so3 = NULL;
1302
1303 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1304 if (len <= 0) {
1305 return EINVAL;
1306 }
1307 /*
1308 * Note: sun_path is not a zero terminated "C" string
1309 */
1310 if (len >= SOCK_MAXADDRLEN) {
1311 return EINVAL;
1312 }
1313
1314 soisconnecting(so);
1315
1316 char *path = UNP_FORGE_PATH(soun, len);
1317 bcopy(path, buf, len);
1318 buf[len] = 0;
1319
1320 socket_unlock(so, 0);
1321
1322 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1323 CAST_USER_ADDR_T(buf), ctx);
1324 error = namei(&nd);
1325 if (error) {
1326 socket_lock(so, 0);
1327 return error;
1328 }
1329 nameidone(&nd);
1330 vp = nd.ni_vp;
1331 if (vp->v_type != VSOCK) {
1332 error = ENOTSOCK;
1333 socket_lock(so, 0);
1334 goto out;
1335 }
1336
1337 #if CONFIG_MACF_SOCKET_SUBSET
1338 error = mac_vnode_check_uipc_connect(ctx, vp, so);
1339 if (error) {
1340 socket_lock(so, 0);
1341 goto out;
1342 }
1343 #endif /* MAC_SOCKET_SUBSET */
1344
1345 error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1346 if (error) {
1347 socket_lock(so, 0);
1348 goto out;
1349 }
1350
1351 lck_mtx_lock(&unp_connect_lock);
1352
1353 if (vp->v_socket == 0) {
1354 lck_mtx_unlock(&unp_connect_lock);
1355 error = ECONNREFUSED;
1356 socket_lock(so, 0);
1357 goto out;
1358 }
1359
1360 socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1361 so2 = vp->v_socket;
1362 lck_mtx_unlock(&unp_connect_lock);
1363
1364
1365 if (so2->so_pcb == NULL) {
1366 error = ECONNREFUSED;
1367 if (so != so2) {
1368 socket_unlock(so2, 1);
1369 socket_lock(so, 0);
1370 } else {
1371 /* Release the reference held for the listen socket */
1372 VERIFY(so2->so_usecount > 0);
1373 so2->so_usecount--;
1374 }
1375 goto out;
1376 }
1377
1378 if (so < so2) {
1379 socket_unlock(so2, 0);
1380 socket_lock(so, 0);
1381 socket_lock(so2, 0);
1382 } else if (so > so2) {
1383 socket_lock(so, 0);
1384 }
1385 /*
1386 * Check if socket was connected while we were trying to
1387 * get the socket locks in order.
1388 * XXX - probably shouldn't return an error for SOCK_DGRAM
1389 */
1390 if ((so->so_state & SS_ISCONNECTED) != 0) {
1391 error = EISCONN;
1392 goto decref_out;
1393 }
1394
1395 if (so->so_type != so2->so_type) {
1396 error = EPROTOTYPE;
1397 goto decref_out;
1398 }
1399
1400 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1401 /* Release the incoming socket but keep a reference */
1402 socket_unlock(so, 0);
1403
1404 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1405 (so3 = sonewconn(so2, 0, nam)) == 0) {
1406 error = ECONNREFUSED;
1407 if (so != so2) {
1408 socket_unlock(so2, 1);
1409 socket_lock(so, 0);
1410 } else {
1411 socket_lock(so, 0);
1412 /* Release the reference held for
1413 * listen socket.
1414 */
1415 VERIFY(so2->so_usecount > 0);
1416 so2->so_usecount--;
1417 }
1418 goto out;
1419 }
1420 unp2 = sotounpcb(so2);
1421 unp3 = sotounpcb(so3);
1422 if (unp2->unp_addr) {
1423 unp3->unp_addr = SUN(dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1));
1424 }
1425
1426 /*
1427 * unp_peercred management:
1428 *
1429 * The connecter's (client's) credentials are copied
1430 * from its process structure at the time of connect()
1431 * (which is now).
1432 */
1433 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1434 unp3->unp_flags |= UNP_HAVEPC;
1435 /*
1436 * The receiver's (server's) credentials are copied
1437 * from the unp_peercred member of socket on which the
1438 * former called listen(); unp_listen() cached that
1439 * process's credentials at that time so we can use
1440 * them now.
1441 */
1442 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1443 ("unp_connect: listener without cached peercred"));
1444
1445 /* Here we need to have both so and so2 locks and so2
1446 * is already locked. Lock ordering is required.
1447 */
1448 if (so < so2) {
1449 socket_unlock(so2, 0);
1450 socket_lock(so, 0);
1451 socket_lock(so2, 0);
1452 } else {
1453 socket_lock(so, 0);
1454 }
1455
1456 /* Check again if the socket state changed when its lock was released */
1457 if ((so->so_state & SS_ISCONNECTED) != 0) {
1458 error = EISCONN;
1459 socket_unlock(so2, 1);
1460 socket_lock(so3, 0);
1461 sofreelastref(so3, 1);
1462 goto out;
1463 }
1464 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1465 sizeof(unp->unp_peercred));
1466 unp->unp_flags |= UNP_HAVEPC;
1467
1468 /* Hold the reference on listening socket until the end */
1469 socket_unlock(so2, 0);
1470 list_so = so2;
1471
1472 /* Lock ordering doesn't matter because so3 was just created */
1473 socket_lock(so3, 1);
1474 so2 = so3;
1475
1476 /*
1477 * Enable tracing for mDNSResponder endpoints. (The use
1478 * of sizeof instead of strlen below takes the null
1479 * terminating character into account.)
1480 */
1481 if (unpst_tracemdns &&
1482 !strbufcmp(soun->sun_path, MDNSRESPONDER_PATH)) {
1483 unp->unp_flags |= UNP_TRACE_MDNS;
1484 unp2->unp_flags |= UNP_TRACE_MDNS;
1485 }
1486 }
1487
1488 error = unp_connect2(so, so2);
1489
1490 decref_out:
1491 if (so2 != NULL) {
1492 if (so != so2) {
1493 socket_unlock(so2, 1);
1494 } else {
1495 /* Release the extra reference held for the listen socket.
1496 * This is possible only for SOCK_DGRAM sockets. We refuse
1497 * connecting to the same socket for SOCK_STREAM sockets.
1498 */
1499 VERIFY(so2->so_usecount > 0);
1500 so2->so_usecount--;
1501 }
1502 }
1503
1504 if (list_so != NULL) {
1505 socket_lock(list_so, 0);
1506 socket_unlock(list_so, 1);
1507 }
1508
1509 out:
1510 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1511 vnode_put(vp);
1512 return error;
1513 }
1514
1515 /*
1516 * Returns: 0 Success
1517 * EPROTOTYPE Protocol wrong type for socket
1518 * EINVAL Invalid argument
1519 */
1520 int
unp_connect2(struct socket * so,struct socket * so2)1521 unp_connect2(struct socket *so, struct socket *so2)
1522 {
1523 struct unpcb *unp = sotounpcb(so);
1524 struct unpcb *unp2;
1525
1526 if (so2->so_type != so->so_type) {
1527 return EPROTOTYPE;
1528 }
1529
1530 unp2 = sotounpcb(so2);
1531
1532 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1533 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1534
1535 /* Verify both sockets are still opened */
1536 if (unp == 0 || unp2 == 0) {
1537 return EINVAL;
1538 }
1539
1540 unp->unp_conn = unp2;
1541 so2->so_usecount++;
1542
1543 switch (so->so_type) {
1544 case SOCK_DGRAM:
1545 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1546
1547 if (so != so2) {
1548 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1549 /* Keep an extra reference on so2 that will be dropped
1550 * soon after getting the locks in order
1551 */
1552 socket_unlock(so2, 0);
1553 soisconnected(so);
1554 unp_get_locks_in_order(so, so2);
1555 VERIFY(so2->so_usecount > 0);
1556 so2->so_usecount--;
1557 } else {
1558 soisconnected(so);
1559 }
1560
1561 break;
1562
1563 case SOCK_STREAM:
1564 /* This takes care of socketpair */
1565 if (!(unp->unp_flags & UNP_HAVEPC) &&
1566 !(unp2->unp_flags & UNP_HAVEPC)) {
1567 cru2x(kauth_cred_get(), &unp->unp_peercred);
1568 unp->unp_flags |= UNP_HAVEPC;
1569
1570 cru2x(kauth_cred_get(), &unp2->unp_peercred);
1571 unp2->unp_flags |= UNP_HAVEPC;
1572 }
1573 unp2->unp_conn = unp;
1574 so->so_usecount++;
1575
1576 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1577 socket_unlock(so, 0);
1578 soisconnected(so2);
1579
1580 /* Keep an extra reference on so2, that will be dropped soon after
1581 * getting the locks in order again.
1582 */
1583 socket_unlock(so2, 0);
1584
1585 socket_lock(so, 0);
1586 soisconnected(so);
1587
1588 unp_get_locks_in_order(so, so2);
1589 /* Decrement the extra reference left before */
1590 VERIFY(so2->so_usecount > 0);
1591 so2->so_usecount--;
1592 break;
1593
1594 default:
1595 panic("unknown socket type %d in unp_connect2", so->so_type);
1596 }
1597 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1598 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1599 return 0;
1600 }
1601
1602 static void
unp_disconnect(struct unpcb * unp)1603 unp_disconnect(struct unpcb *unp)
1604 {
1605 struct unpcb *unp2 = NULL;
1606 struct socket *so2 = NULL, *so;
1607 struct socket *waitso;
1608 int so_locked = 1, strdisconn = 0;
1609
1610 so = unp->unp_socket;
1611 if (unp->unp_conn == NULL) {
1612 return;
1613 }
1614 lck_mtx_lock(&unp_disconnect_lock);
1615 while (disconnect_in_progress != 0) {
1616 if (so_locked == 1) {
1617 socket_unlock(so, 0);
1618 so_locked = 0;
1619 }
1620 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1621 PSOCK, "disconnect", NULL);
1622 }
1623 disconnect_in_progress = 1;
1624 lck_mtx_unlock(&unp_disconnect_lock);
1625
1626 if (so_locked == 0) {
1627 socket_lock(so, 0);
1628 so_locked = 1;
1629 }
1630
1631 unp2 = unp->unp_conn;
1632
1633 if (unp2 == 0 || unp2->unp_socket == NULL) {
1634 goto out;
1635 }
1636 so2 = unp2->unp_socket;
1637
1638 try_again:
1639 if (so == so2) {
1640 if (so_locked == 0) {
1641 socket_lock(so, 0);
1642 }
1643 waitso = so;
1644 } else if (so < so2) {
1645 if (so_locked == 0) {
1646 socket_lock(so, 0);
1647 }
1648 socket_lock(so2, 1);
1649 waitso = so2;
1650 } else {
1651 if (so_locked == 1) {
1652 socket_unlock(so, 0);
1653 }
1654 socket_lock(so2, 1);
1655 socket_lock(so, 0);
1656 waitso = so;
1657 }
1658 so_locked = 1;
1659
1660 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1661 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1662
1663 /* Check for the UNP_DONTDISCONNECT flag, if it
1664 * is set, release both sockets and go to sleep
1665 */
1666
1667 if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1668 if (so != so2) {
1669 socket_unlock(so2, 1);
1670 }
1671 so_locked = 0;
1672
1673 (void)msleep(waitso->so_pcb, &unp->unp_mtx,
1674 PSOCK | PDROP, "unpdisconnect", NULL);
1675 goto try_again;
1676 }
1677
1678 if (unp->unp_conn == NULL) {
1679 panic("unp_conn became NULL after sleep");
1680 }
1681
1682 unp->unp_conn = NULL;
1683 VERIFY(so2->so_usecount > 0);
1684 so2->so_usecount--;
1685
1686 if (unp->unp_flags & UNP_TRACE_MDNS) {
1687 unp->unp_flags &= ~UNP_TRACE_MDNS;
1688 }
1689
1690 switch (unp->unp_socket->so_type) {
1691 case SOCK_DGRAM:
1692 LIST_REMOVE(unp, unp_reflink);
1693 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1694 if (so != so2) {
1695 socket_unlock(so2, 1);
1696 }
1697 break;
1698
1699 case SOCK_STREAM:
1700 unp2->unp_conn = NULL;
1701 VERIFY(so->so_usecount > 0);
1702 so->so_usecount--;
1703
1704 /*
1705 * Set the socket state correctly but do a wakeup later when
1706 * we release all locks except the socket lock, this will avoid
1707 * a deadlock.
1708 */
1709 unp->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1710 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1711
1712 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1713 unp2->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1714
1715 if (unp2->unp_flags & UNP_TRACE_MDNS) {
1716 unp2->unp_flags &= ~UNP_TRACE_MDNS;
1717 }
1718
1719 strdisconn = 1;
1720 break;
1721 default:
1722 panic("unknown socket type %d", so->so_type);
1723 }
1724 out:
1725 lck_mtx_lock(&unp_disconnect_lock);
1726 disconnect_in_progress = 0;
1727 wakeup(&disconnect_in_progress);
1728 lck_mtx_unlock(&unp_disconnect_lock);
1729
1730 if (strdisconn) {
1731 socket_unlock(so, 0);
1732 soisdisconnected(so2);
1733 socket_unlock(so2, 1);
1734
1735 socket_lock(so, 0);
1736 soisdisconnected(so);
1737 }
1738 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1739 return;
1740 }
1741
1742 /*
1743 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1744 * The unpcb_compat data structure is passed to user space and must not change.
1745 */
1746 static void
unpcb_to_compat(struct unpcb * up,struct unpcb_compat * cp)1747 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1748 {
1749 #if defined(__LP64__)
1750 cp->unp_link.le_next = (u_int32_t)
1751 VM_KERNEL_ADDRHASH(up->unp_link.le_next);
1752 cp->unp_link.le_prev = (u_int32_t)
1753 VM_KERNEL_ADDRHASH(up->unp_link.le_prev);
1754 #else
1755 cp->unp_link.le_next = (struct unpcb_compat *)
1756 VM_KERNEL_ADDRHASH(up->unp_link.le_next);
1757 cp->unp_link.le_prev = (struct unpcb_compat **)
1758 VM_KERNEL_ADDRHASH(up->unp_link.le_prev);
1759 #endif
1760 cp->unp_socket = (_UNPCB_PTR(struct socket *))
1761 VM_KERNEL_ADDRHASH(up->unp_socket);
1762 cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1763 VM_KERNEL_ADDRHASH(up->unp_vnode);
1764 cp->unp_ino = up->unp_ino;
1765 cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1766 VM_KERNEL_ADDRHASH(up->unp_conn);
1767 cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRHASH(up->unp_refs.lh_first);
1768 #if defined(__LP64__)
1769 cp->unp_reflink.le_next =
1770 (u_int32_t)VM_KERNEL_ADDRHASH(up->unp_reflink.le_next);
1771 cp->unp_reflink.le_prev =
1772 (u_int32_t)VM_KERNEL_ADDRHASH(up->unp_reflink.le_prev);
1773 #else
1774 cp->unp_reflink.le_next =
1775 (struct unpcb_compat *)VM_KERNEL_ADDRHASH(up->unp_reflink.le_next);
1776 cp->unp_reflink.le_prev =
1777 (struct unpcb_compat **)VM_KERNEL_ADDRHASH(up->unp_reflink.le_prev);
1778 #endif
1779 cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1780 VM_KERNEL_ADDRHASH(up->unp_addr);
1781 cp->unp_cc = up->unp_cc;
1782 cp->unp_mbcnt = up->unp_mbcnt;
1783 cp->unp_gencnt = up->unp_gencnt;
1784 }
1785
1786 static int
1787 unp_pcblist SYSCTL_HANDLER_ARGS
1788 {
1789 #pragma unused(oidp,arg2)
1790 int error, i, n;
1791 struct unpcb *unp, **unp_list __bidi_indexable;
1792 size_t unp_list_len;
1793 unp_gen_t gencnt;
1794 struct xunpgen xug;
1795 struct unp_head *head;
1796
1797 lck_rw_lock_shared(&unp_list_mtx);
1798 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1799
1800 /*
1801 * The process of preparing the PCB list is too time-consuming and
1802 * resource-intensive to repeat twice on every request.
1803 */
1804 if (req->oldptr == USER_ADDR_NULL) {
1805 n = unp_count;
1806 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1807 sizeof(struct xunpcb);
1808 lck_rw_done(&unp_list_mtx);
1809 return 0;
1810 }
1811
1812 if (req->newptr != USER_ADDR_NULL) {
1813 lck_rw_done(&unp_list_mtx);
1814 return EPERM;
1815 }
1816
1817 /*
1818 * OK, now we're committed to doing something.
1819 */
1820 gencnt = unp_gencnt;
1821 n = unp_count;
1822
1823 bzero(&xug, sizeof(xug));
1824 xug.xug_len = sizeof(xug);
1825 xug.xug_count = n;
1826 xug.xug_gen = gencnt;
1827 xug.xug_sogen = so_gencnt;
1828 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1829 if (error) {
1830 lck_rw_done(&unp_list_mtx);
1831 return error;
1832 }
1833
1834 /*
1835 * We are done if there is no pcb
1836 */
1837 if (n == 0) {
1838 lck_rw_done(&unp_list_mtx);
1839 return 0;
1840 }
1841
1842 unp_list_len = n;
1843 unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1844 if (unp_list == 0) {
1845 lck_rw_done(&unp_list_mtx);
1846 return ENOMEM;
1847 }
1848
1849 for (unp = head->lh_first, i = 0; unp && i < n;
1850 unp = unp->unp_link.le_next) {
1851 if (unp->unp_gencnt <= gencnt) {
1852 unp_list[i++] = unp;
1853 }
1854 }
1855 n = i; /* in case we lost some during malloc */
1856
1857 error = 0;
1858 for (i = 0; i < n; i++) {
1859 unp = unp_list[i];
1860 if (unp->unp_gencnt <= gencnt) {
1861 struct xunpcb xu;
1862
1863 bzero(&xu, sizeof(xu));
1864 xu.xu_len = sizeof(xu);
1865 xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1866 VM_KERNEL_ADDRHASH(unp);
1867 /*
1868 * XXX - need more locking here to protect against
1869 * connect/disconnect races for SMP.
1870 */
1871 if (unp->unp_addr) {
1872 struct sockaddr_un *dst __single = &xu.xu_au.xuu_addr;
1873 SOCKADDR_COPY(unp->unp_addr, dst,
1874 unp->unp_addr->sun_len);
1875 }
1876 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1877 struct sockaddr_un *dst __single = &xu.xu_cau.xuu_caddr;
1878 SOCKADDR_COPY(unp->unp_conn->unp_addr, dst,
1879 unp->unp_conn->unp_addr->sun_len);
1880 }
1881 unpcb_to_compat(unp, &xu.xu_unp);
1882 sotoxsocket(unp->unp_socket, &xu.xu_socket);
1883 error = SYSCTL_OUT(req, &xu, sizeof(xu));
1884 }
1885 }
1886 if (!error) {
1887 /*
1888 * Give the user an updated idea of our state.
1889 * If the generation differs from what we told
1890 * her before, she knows that something happened
1891 * while we were processing this request, and it
1892 * might be necessary to retry.
1893 */
1894 bzero(&xug, sizeof(xug));
1895 xug.xug_len = sizeof(xug);
1896 xug.xug_gen = unp_gencnt;
1897 xug.xug_sogen = so_gencnt;
1898 xug.xug_count = unp_count;
1899 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1900 }
1901 kfree_type(struct unpcb *, unp_list_len, unp_list);
1902 lck_rw_done(&unp_list_mtx);
1903 return error;
1904 }
1905
1906 const caddr_t SYSCTL_SOCK_DGRAM_ARG = __unsafe_forge_single(caddr_t, SOCK_DGRAM);
1907 const caddr_t SYSCTL_SOCK_STREAM_ARG = __unsafe_forge_single(caddr_t, SOCK_STREAM);
1908
1909 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
1910 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1911 SYSCTL_SOCK_DGRAM_ARG, 0, unp_pcblist, "S,xunpcb",
1912 "List of active local datagram sockets");
1913 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
1914 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1915 SYSCTL_SOCK_STREAM_ARG, 0, unp_pcblist, "S,xunpcb",
1916 "List of active local stream sockets");
1917
1918 #if XNU_TARGET_OS_OSX
1919
1920 static int
1921 unp_pcblist64 SYSCTL_HANDLER_ARGS
1922 {
1923 #pragma unused(oidp,arg2)
1924 int error, i, n;
1925 struct unpcb *unp, **unp_list;
1926 unp_gen_t gencnt;
1927 struct xunpgen xug;
1928 struct unp_head *head;
1929
1930 lck_rw_lock_shared(&unp_list_mtx);
1931 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1932
1933 /*
1934 * The process of preparing the PCB list is too time-consuming and
1935 * resource-intensive to repeat twice on every request.
1936 */
1937 if (req->oldptr == USER_ADDR_NULL) {
1938 n = unp_count;
1939 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1940 (sizeof(struct xunpcb64));
1941 lck_rw_done(&unp_list_mtx);
1942 return 0;
1943 }
1944
1945 if (req->newptr != USER_ADDR_NULL) {
1946 lck_rw_done(&unp_list_mtx);
1947 return EPERM;
1948 }
1949
1950 /*
1951 * OK, now we're committed to doing something.
1952 */
1953 gencnt = unp_gencnt;
1954 n = unp_count;
1955
1956 bzero(&xug, sizeof(xug));
1957 xug.xug_len = sizeof(xug);
1958 xug.xug_count = n;
1959 xug.xug_gen = gencnt;
1960 xug.xug_sogen = so_gencnt;
1961 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1962 if (error) {
1963 lck_rw_done(&unp_list_mtx);
1964 return error;
1965 }
1966
1967 /*
1968 * We are done if there is no pcb
1969 */
1970 if (n == 0) {
1971 lck_rw_done(&unp_list_mtx);
1972 return 0;
1973 }
1974
1975 size_t unp_list_len = n;
1976 unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1977 if (unp_list == 0) {
1978 lck_rw_done(&unp_list_mtx);
1979 return ENOMEM;
1980 }
1981
1982 for (unp = head->lh_first, i = 0; unp && i < n;
1983 unp = unp->unp_link.le_next) {
1984 if (unp->unp_gencnt <= gencnt) {
1985 unp_list[i++] = unp;
1986 }
1987 }
1988 n = i; /* in case we lost some during malloc */
1989
1990 error = 0;
1991 for (i = 0; i < n; i++) {
1992 unp = unp_list[i];
1993 if (unp->unp_gencnt <= gencnt) {
1994 struct xunpcb64 xu;
1995 size_t xu_len = sizeof(struct xunpcb64);
1996
1997 bzero(&xu, xu_len);
1998 xu.xu_len = (u_int32_t)xu_len;
1999 xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRHASH(unp);
2000 xu.xunp_link.le_next = (u_int64_t)
2001 VM_KERNEL_ADDRHASH(unp->unp_link.le_next);
2002 xu.xunp_link.le_prev = (u_int64_t)
2003 VM_KERNEL_ADDRHASH(unp->unp_link.le_prev);
2004 xu.xunp_socket = (u_int64_t)
2005 VM_KERNEL_ADDRHASH(unp->unp_socket);
2006 xu.xunp_vnode = (u_int64_t)
2007 VM_KERNEL_ADDRHASH(unp->unp_vnode);
2008 xu.xunp_ino = unp->unp_ino;
2009 xu.xunp_conn = (u_int64_t)
2010 VM_KERNEL_ADDRHASH(unp->unp_conn);
2011 xu.xunp_refs = (u_int64_t)
2012 VM_KERNEL_ADDRHASH(unp->unp_refs.lh_first);
2013 xu.xunp_reflink.le_next = (u_int64_t)
2014 VM_KERNEL_ADDRHASH(unp->unp_reflink.le_next);
2015 xu.xunp_reflink.le_prev = (u_int64_t)
2016 VM_KERNEL_ADDRHASH(unp->unp_reflink.le_prev);
2017 xu.xunp_cc = unp->unp_cc;
2018 xu.xunp_mbcnt = unp->unp_mbcnt;
2019 xu.xunp_gencnt = unp->unp_gencnt;
2020
2021 if (unp->unp_socket) {
2022 sotoxsocket64(unp->unp_socket, &xu.xu_socket);
2023 }
2024
2025 /*
2026 * XXX - need more locking here to protect against
2027 * connect/disconnect races for SMP.
2028 */
2029 if (unp->unp_addr) {
2030 struct sockaddr_un *dst __single = &xu.xu_au.xuu_addr;
2031 SOCKADDR_COPY(unp->unp_addr, dst,
2032 unp->unp_addr->sun_len);
2033 }
2034 if (unp->unp_conn && unp->unp_conn->unp_addr) {
2035 struct sockaddr_un *dst __single = &xu.xu_cau.xuu_caddr;
2036 SOCKADDR_COPY(unp->unp_conn->unp_addr,
2037 dst,
2038 unp->unp_conn->unp_addr->sun_len);
2039 }
2040
2041 error = SYSCTL_OUT(req, &xu, xu_len);
2042 }
2043 }
2044 if (!error) {
2045 /*
2046 * Give the user an updated idea of our state.
2047 * If the generation differs from what we told
2048 * her before, she knows that something happened
2049 * while we were processing this request, and it
2050 * might be necessary to retry.
2051 */
2052 bzero(&xug, sizeof(xug));
2053 xug.xug_len = sizeof(xug);
2054 xug.xug_gen = unp_gencnt;
2055 xug.xug_sogen = so_gencnt;
2056 xug.xug_count = unp_count;
2057 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2058 }
2059 kfree_type(struct unpcb *, unp_list_len, unp_list);
2060 lck_rw_done(&unp_list_mtx);
2061 return error;
2062 }
2063
2064 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
2065 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2066 SYSCTL_SOCK_DGRAM_ARG, 0, unp_pcblist64, "S,xunpcb64",
2067 "List of active local datagram sockets 64 bit");
2068 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
2069 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2070 SYSCTL_SOCK_STREAM_ARG, 0, unp_pcblist64, "S,xunpcb64",
2071 "List of active local stream sockets 64 bit");
2072
2073 #endif /* XNU_TARGET_OS_OSX */
2074
2075 static int
2076 unp_pcblist_n SYSCTL_HANDLER_ARGS
2077 {
2078 #pragma unused(oidp,arg2)
2079 int error = 0;
2080 int i, n;
2081 struct unpcb *unp;
2082 unp_gen_t gencnt;
2083 struct xunpgen xug;
2084 struct unp_head *head;
2085 size_t item_size = 0;
2086 uint8_t *__sized_by(item_size) buf = NULL;
2087
2088 const size_t size = ROUNDUP64(sizeof(struct xunpcb_n)) +
2089 ROUNDUP64(sizeof(struct xsocket_n)) +
2090 2 * ROUNDUP64(sizeof(struct xsockbuf_n)) +
2091 ROUNDUP64(sizeof(struct xsockstat_n));
2092 buf = kalloc_data(size, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2093 item_size = size;
2094
2095 lck_rw_lock_shared(&unp_list_mtx);
2096
2097 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
2098
2099 /*
2100 * The process of preparing the PCB list is too time-consuming and
2101 * resource-intensive to repeat twice on every request.
2102 */
2103 if (req->oldptr == USER_ADDR_NULL) {
2104 n = unp_count;
2105 req->oldidx = 2 * sizeof(xug) + (n + n / 8) * item_size;
2106 goto done;
2107 }
2108
2109 if (req->newptr != USER_ADDR_NULL) {
2110 error = EPERM;
2111 goto done;
2112 }
2113
2114 /*
2115 * OK, now we're committed to doing something.
2116 */
2117 gencnt = unp_gencnt;
2118 n = unp_count;
2119
2120 bzero(&xug, sizeof(xug));
2121 xug.xug_len = sizeof(xug);
2122 xug.xug_count = n;
2123 xug.xug_gen = gencnt;
2124 xug.xug_sogen = so_gencnt;
2125 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2126 if (error != 0) {
2127 goto done;
2128 }
2129
2130 /*
2131 * We are done if there is no pcb
2132 */
2133 if (n == 0) {
2134 goto done;
2135 }
2136
2137 for (i = 0, unp = head->lh_first;
2138 i < n && unp != NULL;
2139 i++, unp = unp->unp_link.le_next) {
2140 struct xunpcb_n *xu = (struct xunpcb_n *)buf;
2141 struct xsocket_n *xso = (struct xsocket_n *)
2142 ADVANCE64(xu, sizeof(*xu));
2143 struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *)
2144 ADVANCE64(xso, sizeof(*xso));
2145 struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *)
2146 ADVANCE64(xsbrcv, sizeof(*xsbrcv));
2147 struct xsockstat_n *xsostats = (struct xsockstat_n *)
2148 ADVANCE64(xsbsnd, sizeof(*xsbsnd));
2149
2150 if (unp->unp_gencnt > gencnt) {
2151 continue;
2152 }
2153
2154 bzero(buf, item_size);
2155
2156 xu->xunp_len = sizeof(struct xunpcb_n);
2157 xu->xunp_kind = XSO_UNPCB;
2158 xu->xunp_unpp = (uint64_t)VM_KERNEL_ADDRHASH(unp);
2159 xu->xunp_vnode = (uint64_t)VM_KERNEL_ADDRHASH(unp->unp_vnode);
2160 xu->xunp_ino = unp->unp_ino;
2161 xu->xunp_conn = (uint64_t)VM_KERNEL_ADDRHASH(unp->unp_conn);
2162 xu->xunp_refs = (uint64_t)VM_KERNEL_ADDRHASH(unp->unp_refs.lh_first);
2163 xu->xunp_reflink = (uint64_t)VM_KERNEL_ADDRHASH(unp->unp_reflink.le_next);
2164 xu->xunp_cc = unp->unp_cc;
2165 xu->xunp_mbcnt = unp->unp_mbcnt;
2166 xu->xunp_flags = unp->unp_flags;
2167 xu->xunp_gencnt = unp->unp_gencnt;
2168
2169 if (unp->unp_addr) {
2170 struct sockaddr_un *dst __single = &xu->xu_au.xuu_addr;
2171 SOCKADDR_COPY(unp->unp_addr, dst,
2172 unp->unp_addr->sun_len);
2173 }
2174 if (unp->unp_conn && unp->unp_conn->unp_addr) {
2175 struct sockaddr_un *dst __single = &xu->xu_cau.xuu_caddr;
2176 SOCKADDR_COPY(unp->unp_conn->unp_addr, dst,
2177 unp->unp_conn->unp_addr->sun_len);
2178 }
2179 sotoxsocket_n(unp->unp_socket, xso);
2180 sbtoxsockbuf_n(unp->unp_socket ?
2181 &unp->unp_socket->so_rcv : NULL, xsbrcv);
2182 sbtoxsockbuf_n(unp->unp_socket ?
2183 &unp->unp_socket->so_snd : NULL, xsbsnd);
2184 sbtoxsockstat_n(unp->unp_socket, xsostats);
2185
2186 error = SYSCTL_OUT(req, buf, item_size);
2187 if (error != 0) {
2188 break;
2189 }
2190 }
2191 if (error == 0) {
2192 /*
2193 * Give the user an updated idea of our state.
2194 * If the generation differs from what we told
2195 * her before, she knows that something happened
2196 * while we were processing this request, and it
2197 * might be necessary to retry.
2198 */
2199 bzero(&xug, sizeof(xug));
2200 xug.xug_len = sizeof(xug);
2201 xug.xug_gen = unp_gencnt;
2202 xug.xug_sogen = so_gencnt;
2203 xug.xug_count = unp_count;
2204 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2205 }
2206 done:
2207 lck_rw_done(&unp_list_mtx);
2208 kfree_data_sized_by(buf, item_size);
2209 return error;
2210 }
2211
2212 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist_n,
2213 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2214 SYSCTL_SOCK_DGRAM_ARG, 0, unp_pcblist_n, "S,xunpcb_n",
2215 "List of active local datagram sockets");
2216 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist_n,
2217 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2218 SYSCTL_SOCK_STREAM_ARG, 0, unp_pcblist_n, "S,xunpcb_n",
2219 "List of active local stream sockets");
2220
2221 static void
unp_shutdown(struct unpcb * unp)2222 unp_shutdown(struct unpcb *unp)
2223 {
2224 struct socket *so = unp->unp_socket;
2225 struct socket *so2;
2226 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
2227 so2 = unp->unp_conn->unp_socket;
2228 if (so != so2) {
2229 unp_get_locks_in_order(so, so2);
2230 }
2231 socantrcvmore(so2);
2232 if (so != so2) {
2233 socket_unlock(so2, 1);
2234 }
2235 }
2236 }
2237
2238 static void
unp_drop(struct unpcb * unp,int errno)2239 unp_drop(struct unpcb *unp, int errno)
2240 {
2241 struct socket *so = unp->unp_socket;
2242
2243 so->so_error = (u_short)errno;
2244 unp_disconnect(unp);
2245 }
2246
2247 /*
2248 * fg_insertuipc_mark
2249 *
2250 * Description: Mark fileglob for insertion onto message queue if needed
2251 * Also takes fileglob reference
2252 *
2253 * Parameters: fg Fileglob pointer to insert
2254 *
2255 * Returns: true, if the fileglob needs to be inserted onto msg queue
2256 *
2257 * Locks: Takes and drops fg_lock, potentially many times
2258 */
2259 static boolean_t
fg_insertuipc_mark(struct fileglob * fg)2260 fg_insertuipc_mark(struct fileglob * fg)
2261 {
2262 boolean_t insert = FALSE;
2263
2264 lck_mtx_lock_spin(&fg->fg_lock);
2265 while (fg->fg_lflags & FG_RMMSGQ) {
2266 lck_mtx_convert_spin(&fg->fg_lock);
2267
2268 fg->fg_lflags |= FG_WRMMSGQ;
2269 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
2270 }
2271
2272 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2273 fg->fg_msgcount++;
2274 if (fg->fg_msgcount == 1) {
2275 fg->fg_lflags |= FG_INSMSGQ;
2276 insert = TRUE;
2277 }
2278 lck_mtx_unlock(&fg->fg_lock);
2279 return insert;
2280 }
2281
2282 /*
2283 * fg_insertuipc
2284 *
2285 * Description: Insert marked fileglob onto message queue
2286 *
2287 * Parameters: fg Fileglob pointer to insert
2288 *
2289 * Returns: void
2290 *
2291 * Locks: Takes and drops fg_lock & uipc_lock
2292 * DO NOT call this function with proc_fdlock held as unp_gc()
2293 * can potentially try to acquire proc_fdlock, which can result
2294 * in a deadlock.
2295 */
2296 static void
fg_insertuipc(struct fileglob * fg)2297 fg_insertuipc(struct fileglob * fg)
2298 {
2299 if (fg->fg_lflags & FG_INSMSGQ) {
2300 lck_mtx_lock(&uipc_lock);
2301 LIST_INSERT_HEAD(&unp_msghead, fg, f_msglist);
2302 lck_mtx_unlock(&uipc_lock);
2303 lck_mtx_lock(&fg->fg_lock);
2304 fg->fg_lflags &= ~FG_INSMSGQ;
2305 if (fg->fg_lflags & FG_WINSMSGQ) {
2306 fg->fg_lflags &= ~FG_WINSMSGQ;
2307 wakeup(&fg->fg_lflags);
2308 }
2309 lck_mtx_unlock(&fg->fg_lock);
2310 }
2311 }
2312
2313 /*
2314 * fg_removeuipc_mark
2315 *
2316 * Description: Mark the fileglob for removal from message queue if needed
2317 * Also releases fileglob message queue reference
2318 *
2319 * Parameters: fg Fileglob pointer to remove
2320 *
2321 * Returns: true, if the fileglob needs to be removed from msg queue
2322 *
2323 * Locks: Takes and drops fg_lock, potentially many times
2324 */
2325 static boolean_t
fg_removeuipc_mark(struct fileglob * fg)2326 fg_removeuipc_mark(struct fileglob * fg)
2327 {
2328 boolean_t remove = FALSE;
2329
2330 lck_mtx_lock_spin(&fg->fg_lock);
2331 while (fg->fg_lflags & FG_INSMSGQ) {
2332 lck_mtx_convert_spin(&fg->fg_lock);
2333
2334 fg->fg_lflags |= FG_WINSMSGQ;
2335 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
2336 }
2337 fg->fg_msgcount--;
2338 if (fg->fg_msgcount == 0) {
2339 fg->fg_lflags |= FG_RMMSGQ;
2340 remove = TRUE;
2341 }
2342 lck_mtx_unlock(&fg->fg_lock);
2343 return remove;
2344 }
2345
2346 /*
2347 * fg_removeuipc
2348 *
2349 * Description: Remove marked fileglob from message queue
2350 *
2351 * Parameters: fg Fileglob pointer to remove
2352 *
2353 * Returns: void
2354 *
2355 * Locks: Takes and drops fg_lock & uipc_lock
2356 * DO NOT call this function with proc_fdlock held as unp_gc()
2357 * can potentially try to acquire proc_fdlock, which can result
2358 * in a deadlock.
2359 */
2360 static void
fg_removeuipc(struct fileglob * fg)2361 fg_removeuipc(struct fileglob * fg)
2362 {
2363 if (fg->fg_lflags & FG_RMMSGQ) {
2364 lck_mtx_lock(&uipc_lock);
2365 LIST_REMOVE(fg, f_msglist);
2366 lck_mtx_unlock(&uipc_lock);
2367 lck_mtx_lock(&fg->fg_lock);
2368 fg->fg_lflags &= ~FG_RMMSGQ;
2369 if (fg->fg_lflags & FG_WRMMSGQ) {
2370 fg->fg_lflags &= ~FG_WRMMSGQ;
2371 wakeup(&fg->fg_lflags);
2372 }
2373 lck_mtx_unlock(&fg->fg_lock);
2374 }
2375 }
2376
2377 /*
2378 * Returns: 0 Success
2379 * EMSGSIZE The new fd's will not fit
2380 * ENOBUFS Cannot alloc struct fileproc
2381 */
2382 int
unp_externalize(struct mbuf * rights)2383 unp_externalize(struct mbuf *rights)
2384 {
2385 proc_t p = current_proc();
2386 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
2387 struct fileglob **rp = (struct fileglob **)(cm + 1);
2388 const int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2389 int *fds __bidi_indexable;
2390 int error = 0;
2391
2392 fds = kalloc_data(newfds * sizeof(int), Z_WAITOK);
2393 if (fds == NULL) {
2394 error = ENOMEM;
2395 goto out;
2396 }
2397
2398 /*
2399 * Step 1:
2400 * Allocate all the fds, and if it doesn't fit,
2401 * then fail and discard everything.
2402 */
2403 proc_fdlock(p);
2404
2405 if (fdt_available_locked(p, newfds)) {
2406 for (int i = 0; i < newfds; i++) {
2407 error = fdalloc(p, 0, &fds[i]);
2408 if (error) {
2409 while (i-- > 0) {
2410 fdrelse(p, fds[i]);
2411 }
2412 break;
2413 }
2414 }
2415 } else {
2416 error = EMSGSIZE;
2417 }
2418
2419 proc_fdunlock(p);
2420
2421 if (error) {
2422 goto out;
2423 }
2424
2425 /*
2426 * Step 2:
2427 * At this point we are commited, and can't fail anymore.
2428 * Allocate all the fileprocs, and remove the files
2429 * from the queue.
2430 *
2431 * Until we call procfdtbl_releasefd(), fds are in flux
2432 * and can't be closed.
2433 */
2434 for (int i = 0; i < newfds; i++) {
2435 struct fileproc *fp = NULL;
2436
2437 fp = fileproc_alloc_init();
2438 fp->fp_glob = rp[i];
2439 if (fg_removeuipc_mark(rp[i])) {
2440 fg_removeuipc(rp[i]);
2441 }
2442
2443 proc_fdlock(p);
2444 procfdtbl_releasefd(p, fds[i], fp);
2445 proc_fdunlock(p);
2446 }
2447
2448 /*
2449 * Step 3:
2450 * Return the fds into `cm`.
2451 * Handle the fact ints and pointers do not have the same size.
2452 */
2453 int *fds_out = (int *)(cm + 1);
2454 memcpy(fds_out, fds, newfds * sizeof(int));
2455 if (sizeof(struct fileglob *) != sizeof(int)) {
2456 bzero(fds_out + newfds,
2457 newfds * (sizeof(struct fileglob *) - sizeof(int)));
2458 }
2459 OSAddAtomic(-newfds, &unp_rights);
2460
2461 out:
2462 if (error) {
2463 for (int i = 0; i < newfds; i++) {
2464 unp_discard(rp[i], p);
2465 }
2466 bzero(rp, newfds * sizeof(struct fileglob *));
2467 }
2468
2469 kfree_data(fds, newfds * sizeof(int));
2470 return error;
2471 }
2472
2473 void
unp_init(void)2474 unp_init(void)
2475 {
2476 static_assert(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
2477 LIST_INIT(&unp_dhead);
2478 LIST_INIT(&unp_shead);
2479 }
2480
2481 #ifndef MIN
2482 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
2483 #endif
2484
2485 /*
2486 * Returns: 0 Success
2487 * EINVAL
2488 * EBADF
2489 */
2490 static int
unp_internalize(struct mbuf * control,proc_t p)2491 unp_internalize(struct mbuf *control, proc_t p)
2492 {
2493 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
2494 int *fds;
2495 struct fileglob **rp;
2496 struct fileproc *fp;
2497 int i, error;
2498 int oldfds;
2499 uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];
2500
2501 /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
2502 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
2503 (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
2504 return EINVAL;
2505 }
2506 oldfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2507 bzero(fg_ins, sizeof(fg_ins));
2508
2509 proc_fdlock(p);
2510 fds = (int *)(cm + 1);
2511
2512 for (i = 0; i < oldfds; i++) {
2513 struct fileproc *tmpfp;
2514 if ((tmpfp = fp_get_noref_locked(p, fds[i])) == NULL) {
2515 proc_fdunlock(p);
2516 return EBADF;
2517 } else if (!fg_sendable(tmpfp->fp_glob)) {
2518 proc_fdunlock(p);
2519 return EINVAL;
2520 } else if (fp_isguarded(tmpfp, GUARD_SOCKET_IPC)) {
2521 error = fp_guard_exception(p,
2522 fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
2523 proc_fdunlock(p);
2524 return error;
2525 }
2526 }
2527 rp = (struct fileglob **)(cm + 1);
2528
2529 /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
2530 * and doing them in-order would result in stomping over unprocessed fd's
2531 */
2532 for (i = (oldfds - 1); i >= 0; i--) {
2533 fp = fp_get_noref_locked(p, fds[i]);
2534 if (fg_insertuipc_mark(fp->fp_glob)) {
2535 fg_ins[i / 8] |= 0x80 >> (i % 8);
2536 }
2537 rp[i] = fp->fp_glob;
2538 }
2539 proc_fdunlock(p);
2540
2541 for (i = 0; i < oldfds; i++) {
2542 if (fg_ins[i / 8] & (0x80 >> (i % 8))) {
2543 VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
2544 fg_insertuipc(rp[i]);
2545 }
2546 (void) OSAddAtomic(1, &unp_rights);
2547 }
2548
2549 return 0;
2550 }
2551
2552 static void
unp_gc(thread_call_param_t arg0,thread_call_param_t arg1)2553 unp_gc(thread_call_param_t arg0, thread_call_param_t arg1)
2554 {
2555 #pragma unused(arg0, arg1)
2556 struct fileglob *fg;
2557 struct socket *so;
2558 static struct fileglob **__indexable extra_ref;
2559 struct fileglob **fpp;
2560 int nunref, i;
2561 static struct proc *UNP_FG_NOPROC = __unsafe_forge_single(struct proc *, FG_NOPROC);
2562
2563 restart:
2564 lck_mtx_lock(&uipc_lock);
2565 unp_defer = 0;
2566 /*
2567 * before going through all this, set all FDs to
2568 * be NOT defered and NOT externally accessible
2569 */
2570 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2571 os_atomic_andnot(&fg->fg_flag, FMARK | FDEFER, relaxed);
2572 }
2573 do {
2574 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2575 lck_mtx_lock(&fg->fg_lock);
2576 /*
2577 * If the file is not open, skip it
2578 */
2579 if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2580 lck_mtx_unlock(&fg->fg_lock);
2581 continue;
2582 }
2583 /*
2584 * If we already marked it as 'defer' in a
2585 * previous pass, then try process it this time
2586 * and un-mark it
2587 */
2588 if (fg->fg_flag & FDEFER) {
2589 os_atomic_andnot(&fg->fg_flag, FDEFER, relaxed);
2590 unp_defer--;
2591 } else {
2592 /*
2593 * if it's not defered, then check if it's
2594 * already marked.. if so skip it
2595 */
2596 if (fg->fg_flag & FMARK) {
2597 lck_mtx_unlock(&fg->fg_lock);
2598 continue;
2599 }
2600 /*
2601 * If all references are from messages
2602 * in transit, then skip it. it's not
2603 * externally accessible.
2604 */
2605 if (os_ref_get_count_raw(&fg->fg_count) ==
2606 fg->fg_msgcount) {
2607 lck_mtx_unlock(&fg->fg_lock);
2608 continue;
2609 }
2610 /*
2611 * If it got this far then it must be
2612 * externally accessible.
2613 */
2614 os_atomic_or(&fg->fg_flag, FMARK, relaxed);
2615 }
2616 /*
2617 * either it was defered, or it is externally
2618 * accessible and not already marked so.
2619 * Now check if it is possibly one of OUR sockets.
2620 */
2621 if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
2622 (so = (struct socket *)fg_get_data(fg)) == 0) {
2623 lck_mtx_unlock(&fg->fg_lock);
2624 continue;
2625 }
2626 if (so->so_proto->pr_domain != localdomain ||
2627 (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
2628 lck_mtx_unlock(&fg->fg_lock);
2629 continue;
2630 }
2631 /*
2632 * So, Ok, it's one of our sockets and it IS externally
2633 * accessible (or was defered). Now we look
2634 * to see if we hold any file descriptors in its
2635 * message buffers. Follow those links and mark them
2636 * as accessible too.
2637 *
2638 * In case a file is passed onto itself we need to
2639 * release the file lock.
2640 */
2641 lck_mtx_unlock(&fg->fg_lock);
2642 /*
2643 * It's safe to lock the socket after dropping fg_lock
2644 * because the socket isn't going away at this point.
2645 *
2646 * If we couldn't lock the socket or the socket buffer,
2647 * then it's because someone holding one of these
2648 * locks is stuck in unp_{internalize,externalize}().
2649 * Yield to that process and restart the garbage
2650 * collection.
2651 */
2652 if (!socket_try_lock(so)) {
2653 lck_mtx_unlock(&uipc_lock);
2654 goto restart;
2655 }
2656 so->so_usecount++;
2657 /*
2658 * Lock the receive socket buffer so that we can
2659 * iterate over its mbuf list.
2660 */
2661 if (sblock(&so->so_rcv, SBL_NOINTR | SBL_IGNDEFUNCT)) {
2662 socket_unlock(so, 1);
2663 lck_mtx_unlock(&uipc_lock);
2664 goto restart;
2665 }
2666 VERIFY(so->so_rcv.sb_flags & SB_LOCK);
2667 socket_unlock(so, 0);
2668 unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
2669 socket_lock(so, 0);
2670 sbunlock(&so->so_rcv, TRUE);
2671 /*
2672 * Unlock and release the reference acquired above.
2673 */
2674 socket_unlock(so, 1);
2675 }
2676 } while (unp_defer);
2677 /*
2678 * We grab an extra reference to each of the file table entries
2679 * that are not otherwise accessible and then free the rights
2680 * that are stored in messages on them.
2681 *
2682 * Here, we first take an extra reference to each inaccessible
2683 * descriptor. Then, we call sorflush ourself, since we know
2684 * it is a Unix domain socket anyhow. After we destroy all the
2685 * rights carried in messages, we do a last closef to get rid
2686 * of our extra reference. This is the last close, and the
2687 * unp_detach etc will shut down the socket.
2688 *
2689 * 91/09/19, [email protected]
2690 */
2691 size_t extra_ref_size = nfiles;
2692 if (extra_ref_size == 0) {
2693 lck_mtx_unlock(&uipc_lock);
2694 return;
2695 }
2696 extra_ref = kalloc_type(struct fileglob *, extra_ref_size, Z_WAITOK);
2697 if (extra_ref == NULL) {
2698 lck_mtx_unlock(&uipc_lock);
2699 return;
2700 }
2701 nunref = 0;
2702 fpp = extra_ref;
2703 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2704 lck_mtx_lock(&fg->fg_lock);
2705 /*
2706 * If it's not open, skip it
2707 */
2708 if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2709 lck_mtx_unlock(&fg->fg_lock);
2710 continue;
2711 }
2712 /*
2713 * If all refs are from msgs, and it's not marked accessible
2714 * then it must be referenced from some unreachable cycle
2715 * of (shut-down) FDs, so include it in our
2716 * list of FDs to remove
2717 */
2718 if (fg->fg_flag & FMARK) {
2719 lck_mtx_unlock(&fg->fg_lock);
2720 continue;
2721 }
2722 if (os_ref_get_count_raw(&fg->fg_count) == fg->fg_msgcount) {
2723 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2724 *fpp++ = fg;
2725 nunref++;
2726 }
2727 lck_mtx_unlock(&fg->fg_lock);
2728 }
2729 lck_mtx_unlock(&uipc_lock);
2730
2731 /*
2732 * for each FD on our hit list, do the following two things
2733 */
2734 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2735 struct fileglob *tfg;
2736
2737 tfg = *fpp;
2738
2739 if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET) {
2740 so = (struct socket *)fg_get_data(tfg);
2741
2742 if (so) {
2743 socket_lock(so, 0);
2744 sorflush(so);
2745 socket_unlock(so, 0);
2746 }
2747 }
2748 }
2749 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2750 fg_drop(UNP_FG_NOPROC, *fpp);
2751 }
2752
2753 kfree_type(struct fileglob *, extra_ref_size, extra_ref);
2754 }
2755
2756 void
unp_dispose(struct mbuf * m)2757 unp_dispose(struct mbuf *m)
2758 {
2759 if (m) {
2760 unp_scan(m, unp_discard, NULL);
2761 }
2762 }
2763
2764 /*
2765 * Returns: 0 Success
2766 */
2767 static int
unp_listen(struct unpcb * unp,proc_t p)2768 unp_listen(struct unpcb *unp, proc_t p)
2769 {
2770 kauth_cred_t safecred __single = kauth_cred_proc_ref(p);
2771 cru2x(safecred, &unp->unp_peercred);
2772 kauth_cred_unref(&safecred);
2773 unp->unp_flags |= UNP_HAVEPCCACHED;
2774 return 0;
2775 }
2776
2777 static void
unp_scan(struct mbuf * m0,void (* op)(struct fileglob *,void * arg),void * arg)2778 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)
2779 {
2780 struct mbuf *m;
2781 struct fileglob **rp;
2782 struct cmsghdr *cm;
2783 int i;
2784 int qfds;
2785
2786 while (m0) {
2787 for (m = m0; m; m = m->m_next) {
2788 if (m->m_type == MT_CONTROL &&
2789 (size_t)m->m_len >= sizeof(*cm)) {
2790 cm = mtod(m, struct cmsghdr *);
2791 if (cm->cmsg_level != SOL_SOCKET ||
2792 cm->cmsg_type != SCM_RIGHTS) {
2793 continue;
2794 }
2795 qfds = (cm->cmsg_len - sizeof(*cm)) /
2796 sizeof(int);
2797 rp = (struct fileglob **)(cm + 1);
2798 for (i = 0; i < qfds; i++) {
2799 (*op)(*rp++, arg);
2800 }
2801 break; /* XXX, but saves time */
2802 }
2803 }
2804 m0 = m0->m_act;
2805 }
2806 }
2807
2808 static void
unp_mark(struct fileglob * fg,__unused void * arg)2809 unp_mark(struct fileglob *fg, __unused void *arg)
2810 {
2811 uint32_t oflags, nflags;
2812
2813 os_atomic_rmw_loop(&fg->fg_flag, oflags, nflags, relaxed, {
2814 if (oflags & FMARK) {
2815 os_atomic_rmw_loop_give_up(return );
2816 }
2817 nflags = oflags | FMARK | FDEFER;
2818 });
2819
2820 unp_defer++;
2821 }
2822
2823 static void
unp_discard(struct fileglob * fg,void * p)2824 unp_discard(struct fileglob *fg, void *p)
2825 {
2826 if (p == NULL) {
2827 p = current_proc(); /* XXX */
2828 }
2829 (void) OSAddAtomic(1, &unp_disposed);
2830 if (fg_removeuipc_mark(fg)) {
2831 VERIFY(fg->fg_lflags & FG_RMMSGQ);
2832 fg_removeuipc(fg);
2833 }
2834 (void) OSAddAtomic(-1, &unp_rights);
2835
2836 (void) fg_drop(p, fg);
2837 }
2838
2839 int
unp_lock(struct socket * so,int refcount,void * lr)2840 unp_lock(struct socket *so, int refcount, void * lr)
2841 {
2842 void * lr_saved __single;
2843 if (lr == 0) {
2844 lr_saved = __unsafe_forge_single(void*, __builtin_return_address(0));
2845 } else {
2846 lr_saved = lr;
2847 }
2848
2849 if (so->so_pcb) {
2850 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2851 } else {
2852 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x",
2853 so, lr_saved, so->so_usecount);
2854 }
2855
2856 if (so->so_usecount < 0) {
2857 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x",
2858 so, so->so_pcb, lr_saved, so->so_usecount);
2859 }
2860
2861 if (refcount) {
2862 VERIFY(so->so_usecount > 0);
2863 so->so_usecount++;
2864 }
2865 so->lock_lr[so->next_lock_lr] = lr_saved;
2866 so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
2867 return 0;
2868 }
2869
2870 int
unp_unlock(struct socket * so,int refcount,void * lr)2871 unp_unlock(struct socket *so, int refcount, void * lr)
2872 {
2873 void * lr_saved __single;
2874 lck_mtx_t * mutex_held = NULL;
2875 struct unpcb *unp __single = sotounpcb(so);
2876
2877 if (lr == 0) {
2878 lr_saved = __unsafe_forge_single(void*, __builtin_return_address(0));
2879 } else {
2880 lr_saved = lr;
2881 }
2882
2883 if (refcount) {
2884 so->so_usecount--;
2885 }
2886
2887 if (so->so_usecount < 0) {
2888 panic("unp_unlock: so=%p usecount=%x", so, so->so_usecount);
2889 }
2890 if (so->so_pcb == NULL) {
2891 panic("unp_unlock: so=%p NO PCB usecount=%x", so, so->so_usecount);
2892 } else {
2893 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2894 }
2895 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2896 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2897 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2898
2899 if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2900 sofreelastref(so, 1);
2901
2902 if (unp->unp_addr != NULL) {
2903 free_sockaddr(unp->unp_addr);
2904 }
2905
2906 lck_mtx_unlock(mutex_held);
2907
2908 lck_mtx_destroy(&unp->unp_mtx, &unp_mtx_grp);
2909 zfree(unp_zone, unp);
2910 thread_call_enter(unp_gc_tcall);
2911 } else {
2912 lck_mtx_unlock(mutex_held);
2913 }
2914
2915 return 0;
2916 }
2917
2918 lck_mtx_t *
unp_getlock(struct socket * so,__unused int flags)2919 unp_getlock(struct socket *so, __unused int flags)
2920 {
2921 struct unpcb *unp = (struct unpcb *)so->so_pcb;
2922
2923
2924 if (so->so_pcb) {
2925 if (so->so_usecount < 0) {
2926 panic("unp_getlock: so=%p usecount=%x", so, so->so_usecount);
2927 }
2928 return &unp->unp_mtx;
2929 } else {
2930 panic("unp_getlock: so=%p NULL so_pcb", so);
2931 return so->so_proto->pr_domain->dom_mtx;
2932 }
2933 }
2934