1 /*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1989, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
61 */
62 /*
63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
64 * support for mandatory and extensible security protections. This notice
65 * is included in support of clause 2.2 (b) of the Apple Public License,
66 * Version 2.0.
67 */
68 #include <os/log.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/domain.h>
73 #include <sys/fcntl.h>
74 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */
75 #include <sys/file_internal.h>
76 #include <sys/guarded.h>
77 #include <sys/filedesc.h>
78 #include <sys/lock.h>
79 #include <sys/mbuf.h>
80 #include <sys/namei.h>
81 #include <sys/proc_internal.h>
82 #include <sys/kauth.h>
83 #include <sys/protosw.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/stat.h>
87 #include <sys/sysctl.h>
88 #include <sys/un.h>
89 #include <sys/unpcb.h>
90 #include <sys/vnode_internal.h>
91 #include <sys/kdebug.h>
92 #include <sys/mcache.h>
93
94 #include <kern/zalloc.h>
95 #include <kern/locks.h>
96 #include <kern/task.h>
97
98 #include <net/sockaddr_utils.h>
99
100 #if __has_ptrcheck
101 #include <machine/trap.h>
102 #endif /* __has_ptrcheck */
103
104 #if CONFIG_MACF
105 #include <security/mac_framework.h>
106 #endif /* CONFIG_MACF */
107
108 #include <mach/vm_param.h>
109
110 #ifndef ROUNDUP64
111 #define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t))
112 #endif
113
114 #ifndef ADVANCE64
115 #define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
116 #endif
117
118 /*
119 * Maximum number of FDs that can be passed in an mbuf
120 */
121 #define UIPC_MAX_CMSG_FD 512
122
123 ZONE_DEFINE_TYPE(unp_zone, "unpzone", struct unpcb, ZC_NONE);
124 static unp_gen_t unp_gencnt;
125 static u_int unp_count;
126
127 static LCK_ATTR_DECLARE(unp_mtx_attr, 0, 0);
128 static LCK_GRP_DECLARE(unp_mtx_grp, "unp_list");
129 static LCK_RW_DECLARE_ATTR(unp_list_mtx, &unp_mtx_grp, &unp_mtx_attr);
130
131 static LCK_MTX_DECLARE_ATTR(unp_disconnect_lock, &unp_mtx_grp, &unp_mtx_attr);
132 static LCK_MTX_DECLARE_ATTR(unp_connect_lock, &unp_mtx_grp, &unp_mtx_attr);
133 static LCK_MTX_DECLARE_ATTR(uipc_lock, &unp_mtx_grp, &unp_mtx_attr);
134
135 static u_int disconnect_in_progress;
136
137 static struct unp_head unp_shead, unp_dhead;
138 static int unp_defer;
139 static thread_call_t unp_gc_tcall;
140 static LIST_HEAD(, fileglob) unp_msghead = LIST_HEAD_INITIALIZER(unp_msghead);
141
142 SYSCTL_DECL(_net_local);
143
144 static int unp_rights; /* file descriptors in flight */
145 static int unp_disposed; /* discarded file descriptors */
146
147 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
148
149 #define ULEF_CONNECTION 0x01
150 uint32_t unp_log_enable_flags = 0;
151
152 SYSCTL_UINT(_net_local, OID_AUTO, log, CTLFLAG_RD | CTLFLAG_LOCKED,
153 &unp_log_enable_flags, 0, "");
154
155 SYSCTL_UINT(_net_local, OID_AUTO, pcbcount, CTLFLAG_RD | CTLFLAG_LOCKED,
156 &unp_count, 0, "");
157
158 /*
159 * mDNSResponder tracing. When enabled, endpoints connected to
160 * /var/run/mDNSResponder will be traced; during each send on
161 * the traced socket, we log the PID and process name of the
162 * sending process. We also print out a bit of info related
163 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
164 * of mDNSResponder stays the same.
165 */
166 #define MDNSRESPONDER_PATH "/var/run/mDNSResponder"
167
168 static int unpst_tracemdns; /* enable tracing */
169
170 #define MDNS_IPC_MSG_HDR_VERSION_1 1
171
172 struct mdns_ipc_msg_hdr {
173 uint32_t version;
174 uint32_t datalen;
175 uint32_t ipc_flags;
176 uint32_t op;
177 union {
178 void *context;
179 uint32_t u32[2];
180 } __attribute__((packed));
181 uint32_t reg_index;
182 } __attribute__((packed));
183
184 /*
185 * Unix communications domain.
186 *
187 * TODO:
188 * SEQPACKET, RDM
189 * rethink name space problems
190 * need a proper out-of-band
191 * lock pushdown
192 */
193 static struct sockaddr sun_noname = {
194 .sa_len = sizeof(struct sockaddr),
195 .sa_family = AF_LOCAL,
196 .sa_data = {
197 0, 0, 0, 0, 0, 0, 0,
198 0, 0, 0, 0, 0, 0, 0
199 }
200 };
201
202 static ino_t unp_ino; /* prototype for fake inode numbers */
203
204 static int unp_attach(struct socket *);
205 static void unp_detach(struct unpcb *);
206 static int unp_bind(struct unpcb *, struct sockaddr *, proc_t);
207 static int unp_connect(struct socket *, struct sockaddr *, proc_t);
208 static void unp_disconnect(struct unpcb *);
209 static void unp_shutdown(struct unpcb *);
210 static void unp_drop(struct unpcb *, int);
211 static void unp_gc(thread_call_param_t arg0, thread_call_param_t arg1);
212 static void unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);
213 static void unp_mark(struct fileglob *, __unused void *);
214 static void unp_discard(struct fileglob *, void *);
215 static int unp_internalize(struct mbuf *, proc_t);
216 static int unp_listen(struct unpcb *, proc_t);
217 static void unpcb_to_compat(struct unpcb *, struct unpcb_compat *);
218 static void unp_get_locks_in_order(struct socket *so, struct socket *conn_so);
219
220 __startup_func
221 static void
unp_gc_setup(void)222 unp_gc_setup(void)
223 {
224 unp_gc_tcall = thread_call_allocate_with_options(unp_gc,
225 NULL, THREAD_CALL_PRIORITY_KERNEL,
226 THREAD_CALL_OPTIONS_ONCE);
227 }
228 STARTUP(THREAD_CALL, STARTUP_RANK_MIDDLE, unp_gc_setup);
229
230 static void
unp_get_locks_in_order(struct socket * so,struct socket * conn_so)231 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)
232 {
233 if (so < conn_so) {
234 socket_lock(conn_so, 1);
235 } else {
236 struct unpcb *unp = sotounpcb(so);
237 unp->unp_flags |= UNP_DONTDISCONNECT;
238 unp->rw_thrcount++;
239 socket_unlock(so, 0);
240
241 /* Get the locks in the correct order */
242 socket_lock(conn_so, 1);
243 socket_lock(so, 0);
244 unp->rw_thrcount--;
245 if (unp->rw_thrcount == 0) {
246 unp->unp_flags &= ~UNP_DONTDISCONNECT;
247 wakeup(unp);
248 }
249 }
250 }
251
252 static int
uipc_abort(struct socket * so)253 uipc_abort(struct socket *so)
254 {
255 struct unpcb *unp = sotounpcb(so);
256
257 if (unp == 0) {
258 return EINVAL;
259 }
260 unp_drop(unp, ECONNABORTED);
261 unp_detach(unp);
262 sofree(so);
263 return 0;
264 }
265
266 static int
uipc_accept(struct socket * so,struct sockaddr ** nam)267 uipc_accept(struct socket *so, struct sockaddr **nam)
268 {
269 struct unpcb *unp = sotounpcb(so);
270
271 if (unp == 0) {
272 return EINVAL;
273 }
274
275 /*
276 * Pass back name of connected socket,
277 * if it was bound and we are still connected
278 * (our peer may have closed already!).
279 */
280 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
281 *nam = dup_sockaddr(SA(unp->unp_conn->unp_addr), 1);
282 } else {
283 if (unp_log_enable_flags & ULEF_CONNECTION) {
284 os_log(OS_LOG_DEFAULT, "%s: peer disconnected unp_gencnt %llu",
285 __func__, unp->unp_gencnt);
286 }
287 *nam = dup_sockaddr(SA(&sun_noname), 1);
288 }
289 return 0;
290 }
291
292 /*
293 * Returns: 0 Success
294 * EISCONN
295 * unp_attach:
296 */
297 static int
uipc_attach(struct socket * so,__unused int proto,__unused proc_t p)298 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)
299 {
300 struct unpcb *unp = sotounpcb(so);
301
302 if (unp != 0) {
303 return EISCONN;
304 }
305 return unp_attach(so);
306 }
307
308 static int
uipc_bind(struct socket * so,struct sockaddr * nam,proc_t p)309 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)
310 {
311 struct unpcb *unp = sotounpcb(so);
312
313 if (unp == 0) {
314 return EINVAL;
315 }
316
317 return unp_bind(unp, nam, p);
318 }
319
320 /*
321 * Returns: 0 Success
322 * EINVAL
323 * unp_connect:??? [See elsewhere in this file]
324 */
325 static int
uipc_connect(struct socket * so,struct sockaddr * nam,proc_t p)326 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)
327 {
328 struct unpcb *unp = sotounpcb(so);
329
330 if (unp == 0) {
331 return EINVAL;
332 }
333 return unp_connect(so, nam, p);
334 }
335
336 /*
337 * Returns: 0 Success
338 * EINVAL
339 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
340 * unp_connect2:EINVAL Invalid argument
341 */
342 static int
uipc_connect2(struct socket * so1,struct socket * so2)343 uipc_connect2(struct socket *so1, struct socket *so2)
344 {
345 struct unpcb *unp = sotounpcb(so1);
346
347 if (unp == 0) {
348 return EINVAL;
349 }
350
351 return unp_connect2(so1, so2);
352 }
353
354 /* control is EOPNOTSUPP */
355
356 static int
uipc_detach(struct socket * so)357 uipc_detach(struct socket *so)
358 {
359 struct unpcb *unp = sotounpcb(so);
360
361 if (unp == 0) {
362 return EINVAL;
363 }
364
365 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
366 unp_detach(unp);
367 return 0;
368 }
369
370 static int
uipc_disconnect(struct socket * so)371 uipc_disconnect(struct socket *so)
372 {
373 struct unpcb *unp = sotounpcb(so);
374
375 if (unp == 0) {
376 return EINVAL;
377 }
378 unp_disconnect(unp);
379 return 0;
380 }
381
382 /*
383 * Returns: 0 Success
384 * EINVAL
385 */
386 static int
uipc_listen(struct socket * so,__unused proc_t p)387 uipc_listen(struct socket *so, __unused proc_t p)
388 {
389 struct unpcb *unp = sotounpcb(so);
390
391 if (unp == 0 || unp->unp_vnode == 0) {
392 return EINVAL;
393 }
394 return unp_listen(unp, p);
395 }
396
397 static int
uipc_peeraddr(struct socket * so,struct sockaddr ** nam)398 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
399 {
400 struct unpcb *unp = sotounpcb(so);
401 struct socket *so2;
402
403 if (unp == NULL) {
404 return EINVAL;
405 }
406 so2 = unp->unp_conn != NULL ? unp->unp_conn->unp_socket : NULL;
407 if (so2 != NULL) {
408 unp_get_locks_in_order(so, so2);
409 }
410
411 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {
412 *nam = dup_sockaddr(SA(unp->unp_conn->unp_addr), 1);
413 } else {
414 *nam = dup_sockaddr(SA(&sun_noname), 1);
415 }
416 if (so2 != NULL) {
417 socket_unlock(so2, 1);
418 }
419 return 0;
420 }
421
422 static int
uipc_rcvd(struct socket * so,__unused int flags)423 uipc_rcvd(struct socket *so, __unused int flags)
424 {
425 struct unpcb *unp = sotounpcb(so);
426 struct socket *so2;
427
428 if (unp == NULL) {
429 return EINVAL;
430 }
431 #define rcv (&so->so_rcv)
432 #define snd (&so2->so_snd)
433 switch (so->so_type) {
434 case SOCK_DGRAM:
435 if (unp->unp_conn == NULL) {
436 break;
437 }
438 so2 = unp->unp_conn->unp_socket;
439 unp_get_locks_in_order(so, so2);
440 if (sb_notify(&so2->so_snd)) {
441 sowakeup(so2, &so2->so_snd, so);
442 }
443 socket_unlock(so2, 1);
444 break;
445 case SOCK_STREAM:
446 if (unp->unp_conn == NULL) {
447 break;
448 }
449
450 so2 = unp->unp_conn->unp_socket;
451 unp_get_locks_in_order(so, so2);
452 /*
453 * Adjust backpressure on sender
454 * and wakeup any waiting to write.
455 */
456 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
457 unp->unp_mbcnt = rcv->sb_mbcnt;
458 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
459 unp->unp_cc = rcv->sb_cc;
460 if (sb_notify(&so2->so_snd)) {
461 sowakeup(so2, &so2->so_snd, so);
462 }
463 socket_unlock(so2, 1);
464 #undef snd
465 #undef rcv
466 break;
467 default:
468 panic("uipc_rcvd unknown socktype");
469 }
470 return 0;
471 }
472
473 /* pru_rcvoob is EOPNOTSUPP */
474
475 /*
476 * Returns: 0 Success
477 * EINVAL
478 * EOPNOTSUPP
479 * EPIPE
480 * ENOTCONN
481 * EISCONN
482 * unp_internalize:EINVAL
483 * unp_internalize:EBADF
484 * unp_connect:EAFNOSUPPORT Address family not supported
485 * unp_connect:EINVAL Invalid argument
486 * unp_connect:ENOTSOCK Not a socket
487 * unp_connect:ECONNREFUSED Connection refused
488 * unp_connect:EISCONN Socket is connected
489 * unp_connect:EPROTOTYPE Protocol wrong type for socket
490 * unp_connect:???
491 * sbappendaddr:ENOBUFS [5th argument, contents modified]
492 * sbappendaddr:??? [whatever a filter author chooses]
493 */
494 static int
uipc_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,proc_t p)495 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
496 struct mbuf *control, proc_t p)
497 {
498 int error = 0;
499 struct unpcb *unp = sotounpcb(so);
500 struct socket *so2;
501 int32_t len = m_pktlen(m);
502
503 if (unp == 0) {
504 error = EINVAL;
505 goto release;
506 }
507 if (flags & PRUS_OOB) {
508 error = EOPNOTSUPP;
509 goto release;
510 }
511
512 if (control) {
513 /* release lock to avoid deadlock (4436174) */
514 socket_unlock(so, 0);
515 error = unp_internalize(control, p);
516 socket_lock(so, 0);
517 if (error) {
518 goto release;
519 }
520 }
521
522 switch (so->so_type) {
523 case SOCK_DGRAM:
524 {
525 struct sockaddr *from;
526
527 if (nam) {
528 if (unp->unp_conn) {
529 error = EISCONN;
530 break;
531 }
532 error = unp_connect(so, nam, p);
533 if (error) {
534 so->so_state &= ~SS_ISCONNECTING;
535 break;
536 }
537 } else {
538 if (unp->unp_conn == 0) {
539 error = ENOTCONN;
540 break;
541 }
542 }
543
544 so2 = unp->unp_conn->unp_socket;
545 if (so != so2) {
546 unp_get_locks_in_order(so, so2);
547 }
548
549 if (unp->unp_addr) {
550 from = SA(unp->unp_addr);
551 } else {
552 from = &sun_noname;
553 }
554 /*
555 * sbappendaddr() will fail when the receiver runs out of
556 * space; in contrast to SOCK_STREAM, we will lose messages
557 * for the SOCK_DGRAM case when the receiver's queue overflows.
558 * SB_UNIX on the socket buffer implies that the callee will
559 * not free the control message, if any, because we would need
560 * to call unp_dispose() on it.
561 */
562 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {
563 control = NULL;
564 if (sb_notify(&so2->so_rcv)) {
565 sowakeup(so2, &so2->so_rcv, so);
566 }
567 } else if (control != NULL && error == 0) {
568 /* A socket filter took control; don't touch it */
569 control = NULL;
570 }
571
572 if (so != so2) {
573 socket_unlock(so2, 1);
574 }
575
576 m = NULL;
577 if (nam) {
578 unp_disconnect(unp);
579 }
580 break;
581 }
582
583 case SOCK_STREAM: {
584 int didreceive = 0;
585 #define rcv (&so2->so_rcv)
586 #define snd (&so->so_snd)
587 /* Connect if not connected yet. */
588 /*
589 * Note: A better implementation would complain
590 * if not equal to the peer's address.
591 */
592 if ((so->so_state & SS_ISCONNECTED) == 0) {
593 if (nam) {
594 error = unp_connect(so, nam, p);
595 if (error) {
596 so->so_state &= ~SS_ISCONNECTING;
597 break; /* XXX */
598 }
599 } else {
600 error = ENOTCONN;
601 break;
602 }
603 }
604
605 if (so->so_state & SS_CANTSENDMORE) {
606 error = EPIPE;
607 break;
608 }
609 if (unp->unp_conn == 0) {
610 panic("uipc_send connected but no connection? "
611 "socket state: %x socket flags: %x socket flags1: %x.",
612 so->so_state, so->so_flags, so->so_flags1);
613 }
614
615 so2 = unp->unp_conn->unp_socket;
616 unp_get_locks_in_order(so, so2);
617
618 /* Check socket state again as we might have unlocked the socket
619 * while trying to get the locks in order
620 */
621
622 if ((so->so_state & SS_CANTSENDMORE)) {
623 error = EPIPE;
624 socket_unlock(so2, 1);
625 break;
626 }
627
628 if (unp->unp_flags & UNP_TRACE_MDNS) {
629 struct mdns_ipc_msg_hdr hdr;
630
631 if (mbuf_copydata(m, 0, sizeof(hdr), &hdr) == 0 &&
632 hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
633 os_log(OS_LOG_DEFAULT,
634 "%s[mDNSResponder] pid=%d (%s): op=0x%x",
635 __func__, proc_getpid(p), p->p_comm, ntohl(hdr.op));
636 }
637 }
638
639 /*
640 * Send to paired receive port, and then reduce send buffer
641 * hiwater marks to maintain backpressure. Wake up readers.
642 * SB_UNIX flag will allow new record to be appended to the
643 * receiver's queue even when it is already full. It is
644 * possible, however, that append might fail. In that case,
645 * we will need to call unp_dispose() on the control message;
646 * the callee will not free it since SB_UNIX is set.
647 */
648 didreceive = control ?
649 sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);
650
651 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
652 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
653 if ((int32_t)snd->sb_hiwat >=
654 (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {
655 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
656 } else {
657 snd->sb_hiwat = 0;
658 }
659 unp->unp_conn->unp_cc = rcv->sb_cc;
660 if (didreceive) {
661 control = NULL;
662 if (sb_notify(&so2->so_rcv)) {
663 sowakeup(so2, &so2->so_rcv, so);
664 }
665 } else if (control != NULL && error == 0) {
666 /* A socket filter took control; don't touch it */
667 control = NULL;
668 }
669
670 socket_unlock(so2, 1);
671 m = NULL;
672 #undef snd
673 #undef rcv
674 }
675 break;
676
677 default:
678 panic("uipc_send unknown socktype");
679 }
680
681 so_update_tx_data_stats(so, 1, len);
682
683 /*
684 * SEND_EOF is equivalent to a SEND followed by
685 * a SHUTDOWN.
686 */
687 if (flags & PRUS_EOF) {
688 socantsendmore(so);
689 unp_shutdown(unp);
690 }
691
692 if (control && error != 0) {
693 socket_unlock(so, 0);
694 unp_dispose(control);
695 socket_lock(so, 0);
696 }
697
698 release:
699 if (control) {
700 m_freem(control);
701 }
702 if (m) {
703 m_freem(m);
704 }
705 return error;
706 }
707
708 static int
uipc_sense(struct socket * so,void * ub,int isstat64)709 uipc_sense(struct socket *so, void *ub, int isstat64)
710 {
711 struct unpcb *unp = sotounpcb(so);
712 struct socket *so2;
713 blksize_t blksize;
714
715 if (unp == 0) {
716 return EINVAL;
717 }
718
719 blksize = so->so_snd.sb_hiwat;
720 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
721 so2 = unp->unp_conn->unp_socket;
722 blksize += so2->so_rcv.sb_cc;
723 }
724 if (unp->unp_ino == 0) {
725 unp->unp_ino = unp_ino++;
726 }
727
728 if (isstat64 != 0) {
729 struct stat64 *sb64;
730
731 sb64 = (struct stat64 *)ub;
732 sb64->st_blksize = blksize;
733 sb64->st_dev = NODEV;
734 sb64->st_ino = (ino64_t)unp->unp_ino;
735 } else {
736 struct stat *sb;
737
738 sb = (struct stat *)ub;
739 sb->st_blksize = blksize;
740 sb->st_dev = NODEV;
741 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;
742 }
743
744 return 0;
745 }
746
747 /*
748 * Returns: 0 Success
749 * EINVAL
750 *
751 * Notes: This is not strictly correct, as unp_shutdown() also calls
752 * socantrcvmore(). These should maybe both be conditionalized
753 * on the 'how' argument in soshutdown() as called from the
754 * shutdown() system call.
755 */
756 static int
uipc_shutdown(struct socket * so)757 uipc_shutdown(struct socket *so)
758 {
759 struct unpcb *unp = sotounpcb(so);
760
761 if (unp == 0) {
762 return EINVAL;
763 }
764 socantsendmore(so);
765 unp_shutdown(unp);
766 return 0;
767 }
768
769 /*
770 * Returns: 0 Success
771 * EINVAL Invalid argument
772 */
773 static int
uipc_sockaddr(struct socket * so,struct sockaddr ** nam)774 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
775 {
776 struct unpcb *unp = sotounpcb(so);
777
778 if (unp == NULL) {
779 return EINVAL;
780 }
781 if (unp->unp_addr != NULL) {
782 *nam = dup_sockaddr(SA(unp->unp_addr), 1);
783 } else {
784 *nam = dup_sockaddr(SA(&sun_noname), 1);
785 }
786 return 0;
787 }
788
789 struct pr_usrreqs uipc_usrreqs = {
790 .pru_abort = uipc_abort,
791 .pru_accept = uipc_accept,
792 .pru_attach = uipc_attach,
793 .pru_bind = uipc_bind,
794 .pru_connect = uipc_connect,
795 .pru_connect2 = uipc_connect2,
796 .pru_detach = uipc_detach,
797 .pru_disconnect = uipc_disconnect,
798 .pru_listen = uipc_listen,
799 .pru_peeraddr = uipc_peeraddr,
800 .pru_rcvd = uipc_rcvd,
801 .pru_send = uipc_send,
802 .pru_sense = uipc_sense,
803 .pru_shutdown = uipc_shutdown,
804 .pru_sockaddr = uipc_sockaddr,
805 .pru_sosend = sosend,
806 .pru_soreceive = soreceive,
807 };
808
809 int
uipc_ctloutput(struct socket * so,struct sockopt * sopt)810 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
811 {
812 struct unpcb *unp = sotounpcb(so);
813 int error = 0;
814 pid_t peerpid;
815 proc_t p;
816 task_t t __single;
817 struct socket *peerso;
818
819 switch (sopt->sopt_dir) {
820 case SOPT_GET:
821 switch (sopt->sopt_name) {
822 case LOCAL_PEERCRED:
823 if (unp->unp_flags & UNP_HAVEPC) {
824 error = sooptcopyout(sopt, &unp->unp_peercred,
825 sizeof(unp->unp_peercred));
826 } else {
827 if (so->so_type == SOCK_STREAM) {
828 error = ENOTCONN;
829 } else {
830 error = EINVAL;
831 }
832 }
833 break;
834 case LOCAL_PEERPID:
835 case LOCAL_PEEREPID:
836 if (unp->unp_conn == NULL) {
837 error = ENOTCONN;
838 break;
839 }
840 peerso = unp->unp_conn->unp_socket;
841 if (peerso == NULL) {
842 panic("peer is connected but has no socket?");
843 }
844 unp_get_locks_in_order(so, peerso);
845 if (sopt->sopt_name == LOCAL_PEEREPID &&
846 peerso->so_flags & SOF_DELEGATED) {
847 peerpid = peerso->e_pid;
848 } else {
849 peerpid = peerso->last_pid;
850 }
851 socket_unlock(peerso, 1);
852 error = sooptcopyout(sopt, &peerpid, sizeof(peerpid));
853 break;
854 case LOCAL_PEERUUID:
855 case LOCAL_PEEREUUID:
856 if (unp->unp_conn == NULL) {
857 error = ENOTCONN;
858 break;
859 }
860 peerso = unp->unp_conn->unp_socket;
861 if (peerso == NULL) {
862 panic("peer is connected but has no socket?");
863 }
864 unp_get_locks_in_order(so, peerso);
865 if (sopt->sopt_name == LOCAL_PEEREUUID &&
866 peerso->so_flags & SOF_DELEGATED) {
867 error = sooptcopyout(sopt, &peerso->e_uuid,
868 sizeof(peerso->e_uuid));
869 } else {
870 error = sooptcopyout(sopt, &peerso->last_uuid,
871 sizeof(peerso->last_uuid));
872 }
873 socket_unlock(peerso, 1);
874 break;
875 case LOCAL_PEERTOKEN:
876 if (unp->unp_conn == NULL) {
877 error = ENOTCONN;
878 break;
879 }
880 peerso = unp->unp_conn->unp_socket;
881 if (peerso == NULL) {
882 panic("peer is connected but has no socket?");
883 }
884 unp_get_locks_in_order(so, peerso);
885 peerpid = peerso->last_pid;
886 p = proc_find(peerpid);
887 if (p != PROC_NULL) {
888 t = proc_task(p);
889 if (t != TASK_NULL) {
890 audit_token_t peertoken;
891 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
892 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&peertoken, &count) == KERN_SUCCESS) {
893 error = sooptcopyout(sopt, &peertoken, sizeof(peertoken));
894 } else {
895 error = EINVAL;
896 }
897 } else {
898 error = EINVAL;
899 }
900 proc_rele(p);
901 } else {
902 error = EINVAL;
903 }
904 socket_unlock(peerso, 1);
905 break;
906 default:
907 error = EOPNOTSUPP;
908 break;
909 }
910 break;
911 case SOPT_SET:
912 default:
913 error = EOPNOTSUPP;
914 break;
915 }
916
917 return error;
918 }
919
920 /*
921 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
922 * for stream sockets, although the total for sender and receiver is
923 * actually only PIPSIZ.
924 * Datagram sockets really use the sendspace as the maximum datagram size,
925 * and don't really want to reserve the sendspace. Their recvspace should
926 * be large enough for at least one max-size datagram plus address.
927 */
928 #ifndef PIPSIZ
929 #define PIPSIZ 8192
930 #endif
931 static u_int32_t unpst_sendspace = PIPSIZ;
932 static u_int32_t unpst_recvspace = PIPSIZ;
933 static u_int32_t unpdg_sendspace = 2 * 1024; /* really max datagram size */
934 static u_int32_t unpdg_recvspace = 4 * 1024;
935
936 SYSCTL_DECL(_net_local_stream);
937 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
938 &unpst_sendspace, 0, "");
939 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
940 &unpst_recvspace, 0, "");
941 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
942 &unpst_tracemdns, 0, "");
943 SYSCTL_DECL(_net_local_dgram);
944 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
945 &unpdg_sendspace, 0, "");
946 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
947 &unpdg_recvspace, 0, "");
948
949 /*
950 * Returns: 0 Success
951 * ENOBUFS
952 * soreserve:ENOBUFS
953 */
954 static int
unp_attach(struct socket * so)955 unp_attach(struct socket *so)
956 {
957 struct unpcb *unp;
958 int error = 0;
959
960 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
961 switch (so->so_type) {
962 case SOCK_STREAM:
963 error = soreserve(so, unpst_sendspace, unpst_recvspace);
964 break;
965
966 case SOCK_DGRAM:
967 /*
968 * By default soreserve() will set the low water
969 * mark to MCLBYTES which is too high given our
970 * default sendspace. Override it here to something
971 * sensible.
972 */
973 so->so_snd.sb_lowat = 1;
974 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
975 break;
976
977 default:
978 panic("unp_attach");
979 }
980 if (error) {
981 return error;
982 }
983 }
984 unp = zalloc_flags(unp_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
985
986 lck_mtx_init(&unp->unp_mtx, &unp_mtx_grp, &unp_mtx_attr);
987
988 lck_rw_lock_exclusive(&unp_list_mtx);
989 LIST_INIT(&unp->unp_refs);
990 unp->unp_socket = so;
991 unp->unp_gencnt = ++unp_gencnt;
992 unp_count++;
993 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?
994 &unp_dhead : &unp_shead, unp, unp_link);
995 lck_rw_done(&unp_list_mtx);
996 so->so_pcb = (caddr_t)unp;
997 /*
998 * Mark AF_UNIX socket buffers accordingly so that:
999 *
1000 * a. In the SOCK_STREAM case, socket buffer append won't fail due to
1001 * the lack of space; this essentially loosens the sbspace() check,
1002 * since there is disconnect between sosend() and uipc_send() with
1003 * respect to flow control that might result in our dropping the
1004 * data in uipc_send(). By setting this, we allow for slightly
1005 * more records to be appended to the receiving socket to avoid
1006 * losing data (which we can't afford in the SOCK_STREAM case).
1007 * Flow control still takes place since we adjust the sender's
1008 * hiwat during each send. This doesn't affect the SOCK_DGRAM
1009 * case and append would still fail when the queue overflows.
1010 *
1011 * b. In the presence of control messages containing internalized
1012 * file descriptors, the append routines will not free them since
1013 * we'd need to undo the work first via unp_dispose().
1014 */
1015 so->so_rcv.sb_flags |= SB_UNIX;
1016 so->so_snd.sb_flags |= SB_UNIX;
1017 return 0;
1018 }
1019
1020 static void
unp_detach(struct unpcb * unp)1021 unp_detach(struct unpcb *unp)
1022 {
1023 int so_locked = 1;
1024
1025 lck_rw_lock_exclusive(&unp_list_mtx);
1026 LIST_REMOVE(unp, unp_link);
1027 --unp_count;
1028 ++unp_gencnt;
1029 lck_rw_done(&unp_list_mtx);
1030 if (unp->unp_vnode) {
1031 struct vnode *tvp = NULL;
1032 socket_unlock(unp->unp_socket, 0);
1033
1034 /* Holding unp_connect_lock will avoid a race between
1035 * a thread closing the listening socket and a thread
1036 * connecting to it.
1037 */
1038 lck_mtx_lock(&unp_connect_lock);
1039 socket_lock(unp->unp_socket, 0);
1040 if (unp->unp_vnode) {
1041 tvp = unp->unp_vnode;
1042 unp->unp_vnode->v_socket = NULL;
1043 unp->unp_vnode = NULL;
1044 }
1045 lck_mtx_unlock(&unp_connect_lock);
1046 if (tvp != NULL) {
1047 vnode_rele(tvp); /* drop the usecount */
1048 }
1049 }
1050 if (unp->unp_conn) {
1051 unp_disconnect(unp);
1052 }
1053 while (unp->unp_refs.lh_first) {
1054 struct unpcb *unp2 = NULL;
1055
1056 /* This datagram socket is connected to one or more
1057 * sockets. In order to avoid a race condition between removing
1058 * this reference and closing the connected socket, we need
1059 * to check disconnect_in_progress
1060 */
1061 if (so_locked == 1) {
1062 socket_unlock(unp->unp_socket, 0);
1063 so_locked = 0;
1064 }
1065 lck_mtx_lock(&unp_disconnect_lock);
1066 while (disconnect_in_progress != 0) {
1067 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1068 PSOCK, "disconnect", NULL);
1069 }
1070 disconnect_in_progress = 1;
1071 lck_mtx_unlock(&unp_disconnect_lock);
1072
1073 /* Now we are sure that any unpcb socket disconnect is not happening */
1074 if (unp->unp_refs.lh_first != NULL) {
1075 unp2 = unp->unp_refs.lh_first;
1076 socket_lock(unp2->unp_socket, 1);
1077 }
1078
1079 lck_mtx_lock(&unp_disconnect_lock);
1080 disconnect_in_progress = 0;
1081 wakeup(&disconnect_in_progress);
1082 lck_mtx_unlock(&unp_disconnect_lock);
1083
1084 if (unp2 != NULL) {
1085 /* We already locked this socket and have a reference on it */
1086 unp_drop(unp2, ECONNRESET);
1087 socket_unlock(unp2->unp_socket, 1);
1088 }
1089 }
1090
1091 if (so_locked == 0) {
1092 socket_lock(unp->unp_socket, 0);
1093 so_locked = 1;
1094 }
1095 soisdisconnected(unp->unp_socket);
1096 /* makes sure we're getting dealloced */
1097 unp->unp_socket->so_flags |= SOF_PCBCLEARING;
1098 }
1099
1100 /*
1101 * Returns: 0 Success
1102 * EAFNOSUPPORT
1103 * EINVAL
1104 * EADDRINUSE
1105 * namei:??? [anything namei can return]
1106 * vnode_authorize:??? [anything vnode_authorize can return]
1107 *
1108 * Notes: p at this point is the current process, as this function is
1109 * only called by sobind().
1110 */
1111 static int
unp_bind(struct unpcb * unp,struct sockaddr * nam,proc_t p)1112 unp_bind(
1113 struct unpcb *unp,
1114 struct sockaddr *nam,
1115 proc_t p)
1116 {
1117 struct sockaddr_un *soun = SUN(nam);
1118 struct vnode *vp __single, *dvp;
1119 struct vnode_attr va;
1120 vfs_context_t ctx = vfs_context_current();
1121 int error, namelen;
1122 struct nameidata nd;
1123 struct socket *so = unp->unp_socket;
1124 char buf[SOCK_MAXADDRLEN];
1125
1126 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1127 return EAFNOSUPPORT;
1128 }
1129
1130 /*
1131 * Check if the socket is already bound to an address
1132 */
1133 if (unp->unp_vnode != NULL) {
1134 return EINVAL;
1135 }
1136 /*
1137 * Check if the socket may have been shut down
1138 */
1139 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
1140 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
1141 return EINVAL;
1142 }
1143
1144 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
1145 if (namelen <= 0) {
1146 return EINVAL;
1147 }
1148 /*
1149 * Note: sun_path is not a zero terminated "C" string
1150 */
1151 if (namelen >= SOCK_MAXADDRLEN) {
1152 return EINVAL;
1153 }
1154 char *path = UNP_FORGE_PATH(soun, namelen);
1155 bcopy(path, buf, namelen);
1156 buf[namelen] = 0;
1157
1158 socket_unlock(so, 0);
1159
1160 NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
1161 CAST_USER_ADDR_T(buf), ctx);
1162 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1163 error = namei(&nd);
1164 if (error) {
1165 socket_lock(so, 0);
1166 return error;
1167 }
1168 dvp = nd.ni_dvp;
1169 vp = nd.ni_vp;
1170
1171 if (vp != NULL) {
1172 /*
1173 * need to do this before the vnode_put of dvp
1174 * since we may have to release an fs_nodelock
1175 */
1176 nameidone(&nd);
1177
1178 vnode_put(dvp);
1179 vnode_put(vp);
1180
1181 socket_lock(so, 0);
1182 return EADDRINUSE;
1183 }
1184
1185 VATTR_INIT(&va);
1186 VATTR_SET(&va, va_type, VSOCK);
1187 VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd.fd_cmask));
1188
1189 #if CONFIG_MACF
1190 error = mac_vnode_check_create(ctx,
1191 nd.ni_dvp, &nd.ni_cnd, &va);
1192
1193 if (error == 0)
1194 #endif /* CONFIG_MACF */
1195 #if CONFIG_MACF_SOCKET_SUBSET
1196 error = mac_vnode_check_uipc_bind(ctx,
1197 nd.ni_dvp, &nd.ni_cnd, &va);
1198
1199 if (error == 0)
1200 #endif /* MAC_SOCKET_SUBSET */
1201 /* authorize before creating */
1202 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
1203
1204 if (!error) {
1205 /* create the socket */
1206 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
1207 }
1208
1209 nameidone(&nd);
1210 vnode_put(dvp);
1211
1212 if (error) {
1213 socket_lock(so, 0);
1214 return error;
1215 }
1216
1217 socket_lock(so, 0);
1218
1219 if (unp->unp_vnode != NULL) {
1220 vnode_put(vp); /* drop the iocount */
1221 return EINVAL;
1222 }
1223
1224 error = vnode_ref(vp); /* gain a longterm reference */
1225 if (error) {
1226 vnode_put(vp); /* drop the iocount */
1227 return error;
1228 }
1229
1230 vp->v_socket = unp->unp_socket;
1231 unp->unp_vnode = vp;
1232 unp->unp_addr = SUN(dup_sockaddr(nam, 1));
1233 vnode_put(vp); /* drop the iocount */
1234
1235 return 0;
1236 }
1237
1238
1239 /*
1240 * Returns: 0 Success
1241 * EAFNOSUPPORT Address family not supported
1242 * EINVAL Invalid argument
1243 * ENOTSOCK Not a socket
1244 * ECONNREFUSED Connection refused
1245 * EPROTOTYPE Protocol wrong type for socket
1246 * EISCONN Socket is connected
1247 * unp_connect2:EPROTOTYPE Protocol wrong type for socket
1248 * unp_connect2:EINVAL Invalid argument
1249 * namei:??? [anything namei can return]
1250 * vnode_authorize:???? [anything vnode_authorize can return]
1251 *
1252 * Notes: p at this point is the current process, as this function is
1253 * only called by sosend(), sendfile(), and soconnectlock().
1254 */
1255 static int
unp_connect(struct socket * so,struct sockaddr * nam,__unused proc_t p)1256 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
1257 {
1258 struct sockaddr_un *soun = SUN(nam);
1259 struct vnode *vp;
1260 struct socket *so2, *so3, *list_so = NULL;
1261 struct unpcb *unp, *unp2, *unp3;
1262 vfs_context_t ctx = vfs_context_current();
1263 int error, len;
1264 struct nameidata nd;
1265 char buf[SOCK_MAXADDRLEN];
1266
1267 if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {
1268 return EAFNOSUPPORT;
1269 }
1270
1271 unp = sotounpcb(so);
1272 so2 = so3 = NULL;
1273
1274 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
1275 if (len <= 0) {
1276 return EINVAL;
1277 }
1278 /*
1279 * Note: sun_path is not a zero terminated "C" string
1280 */
1281 if (len >= SOCK_MAXADDRLEN) {
1282 return EINVAL;
1283 }
1284
1285 soisconnecting(so);
1286
1287 char *path = UNP_FORGE_PATH(soun, len);
1288 bcopy(path, buf, len);
1289 buf[len] = 0;
1290
1291 socket_unlock(so, 0);
1292
1293 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
1294 CAST_USER_ADDR_T(buf), ctx);
1295 error = namei(&nd);
1296 if (error) {
1297 socket_lock(so, 0);
1298 return error;
1299 }
1300 nameidone(&nd);
1301 vp = nd.ni_vp;
1302 if (vp->v_type != VSOCK) {
1303 error = ENOTSOCK;
1304 socket_lock(so, 0);
1305 goto out;
1306 }
1307
1308 #if CONFIG_MACF_SOCKET_SUBSET
1309 error = mac_vnode_check_uipc_connect(ctx, vp, so);
1310 if (error) {
1311 socket_lock(so, 0);
1312 goto out;
1313 }
1314 #endif /* MAC_SOCKET_SUBSET */
1315
1316 error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);
1317 if (error) {
1318 socket_lock(so, 0);
1319 goto out;
1320 }
1321
1322 lck_mtx_lock(&unp_connect_lock);
1323
1324 if (vp->v_socket == 0) {
1325 lck_mtx_unlock(&unp_connect_lock);
1326 error = ECONNREFUSED;
1327 socket_lock(so, 0);
1328 goto out;
1329 }
1330
1331 socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */
1332 so2 = vp->v_socket;
1333 lck_mtx_unlock(&unp_connect_lock);
1334
1335
1336 if (so2->so_pcb == NULL) {
1337 error = ECONNREFUSED;
1338 if (so != so2) {
1339 socket_unlock(so2, 1);
1340 socket_lock(so, 0);
1341 } else {
1342 /* Release the reference held for the listen socket */
1343 VERIFY(so2->so_usecount > 0);
1344 so2->so_usecount--;
1345 }
1346 goto out;
1347 }
1348
1349 if (so < so2) {
1350 socket_unlock(so2, 0);
1351 socket_lock(so, 0);
1352 socket_lock(so2, 0);
1353 } else if (so > so2) {
1354 socket_lock(so, 0);
1355 }
1356 /*
1357 * Check if socket was connected while we were trying to
1358 * get the socket locks in order.
1359 * XXX - probably shouldn't return an error for SOCK_DGRAM
1360 */
1361 if ((so->so_state & SS_ISCONNECTED) != 0) {
1362 error = EISCONN;
1363 goto decref_out;
1364 }
1365
1366 if (so->so_type != so2->so_type) {
1367 error = EPROTOTYPE;
1368 goto decref_out;
1369 }
1370
1371 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
1372 /* Release the incoming socket but keep a reference */
1373 socket_unlock(so, 0);
1374
1375 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1376 (so3 = sonewconn(so2, 0, nam)) == 0) {
1377 error = ECONNREFUSED;
1378 if (so != so2) {
1379 socket_unlock(so2, 1);
1380 socket_lock(so, 0);
1381 } else {
1382 socket_lock(so, 0);
1383 /* Release the reference held for
1384 * listen socket.
1385 */
1386 VERIFY(so2->so_usecount > 0);
1387 so2->so_usecount--;
1388 }
1389 goto out;
1390 }
1391 unp2 = sotounpcb(so2);
1392 unp3 = sotounpcb(so3);
1393 if (unp2->unp_addr) {
1394 unp3->unp_addr = SUN(dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1));
1395 }
1396
1397 /*
1398 * unp_peercred management:
1399 *
1400 * The connecter's (client's) credentials are copied
1401 * from its process structure at the time of connect()
1402 * (which is now).
1403 */
1404 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);
1405 unp3->unp_flags |= UNP_HAVEPC;
1406 /*
1407 * The receiver's (server's) credentials are copied
1408 * from the unp_peercred member of socket on which the
1409 * former called listen(); unp_listen() cached that
1410 * process's credentials at that time so we can use
1411 * them now.
1412 */
1413 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1414 ("unp_connect: listener without cached peercred"));
1415
1416 /* Here we need to have both so and so2 locks and so2
1417 * is already locked. Lock ordering is required.
1418 */
1419 if (so < so2) {
1420 socket_unlock(so2, 0);
1421 socket_lock(so, 0);
1422 socket_lock(so2, 0);
1423 } else {
1424 socket_lock(so, 0);
1425 }
1426
1427 /* Check again if the socket state changed when its lock was released */
1428 if ((so->so_state & SS_ISCONNECTED) != 0) {
1429 error = EISCONN;
1430 socket_unlock(so2, 1);
1431 socket_lock(so3, 0);
1432 sofreelastref(so3, 1);
1433 goto out;
1434 }
1435 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1436 sizeof(unp->unp_peercred));
1437 unp->unp_flags |= UNP_HAVEPC;
1438
1439 /* Hold the reference on listening socket until the end */
1440 socket_unlock(so2, 0);
1441 list_so = so2;
1442
1443 /* Lock ordering doesn't matter because so3 was just created */
1444 socket_lock(so3, 1);
1445 so2 = so3;
1446
1447 /*
1448 * Enable tracing for mDNSResponder endpoints. (The use
1449 * of sizeof instead of strlen below takes the null
1450 * terminating character into account.)
1451 */
1452 if (unpst_tracemdns &&
1453 !strbufcmp(soun->sun_path, MDNSRESPONDER_PATH)) {
1454 unp->unp_flags |= UNP_TRACE_MDNS;
1455 unp2->unp_flags |= UNP_TRACE_MDNS;
1456 }
1457 }
1458
1459 error = unp_connect2(so, so2);
1460
1461 decref_out:
1462 if (so2 != NULL) {
1463 if (so != so2) {
1464 socket_unlock(so2, 1);
1465 } else {
1466 /* Release the extra reference held for the listen socket.
1467 * This is possible only for SOCK_DGRAM sockets. We refuse
1468 * connecting to the same socket for SOCK_STREAM sockets.
1469 */
1470 VERIFY(so2->so_usecount > 0);
1471 so2->so_usecount--;
1472 }
1473 }
1474
1475 if (list_so != NULL) {
1476 socket_lock(list_so, 0);
1477 socket_unlock(list_so, 1);
1478 }
1479
1480 out:
1481 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1482 vnode_put(vp);
1483 return error;
1484 }
1485
1486 /*
1487 * Returns: 0 Success
1488 * EPROTOTYPE Protocol wrong type for socket
1489 * EINVAL Invalid argument
1490 */
1491 int
unp_connect2(struct socket * so,struct socket * so2)1492 unp_connect2(struct socket *so, struct socket *so2)
1493 {
1494 struct unpcb *unp = sotounpcb(so);
1495 struct unpcb *unp2;
1496
1497 if (so2->so_type != so->so_type) {
1498 return EPROTOTYPE;
1499 }
1500
1501 unp2 = sotounpcb(so2);
1502
1503 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1504 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1505
1506 /* Verify both sockets are still opened */
1507 if (unp == 0 || unp2 == 0) {
1508 return EINVAL;
1509 }
1510
1511 unp->unp_conn = unp2;
1512 so2->so_usecount++;
1513
1514 switch (so->so_type) {
1515 case SOCK_DGRAM:
1516 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
1517
1518 if (so != so2) {
1519 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1520 /* Keep an extra reference on so2 that will be dropped
1521 * soon after getting the locks in order
1522 */
1523 socket_unlock(so2, 0);
1524 soisconnected(so);
1525 unp_get_locks_in_order(so, so2);
1526 VERIFY(so2->so_usecount > 0);
1527 so2->so_usecount--;
1528 } else {
1529 soisconnected(so);
1530 }
1531
1532 break;
1533
1534 case SOCK_STREAM:
1535 /* This takes care of socketpair */
1536 if (!(unp->unp_flags & UNP_HAVEPC) &&
1537 !(unp2->unp_flags & UNP_HAVEPC)) {
1538 cru2x(kauth_cred_get(), &unp->unp_peercred);
1539 unp->unp_flags |= UNP_HAVEPC;
1540
1541 cru2x(kauth_cred_get(), &unp2->unp_peercred);
1542 unp2->unp_flags |= UNP_HAVEPC;
1543 }
1544 unp2->unp_conn = unp;
1545 so->so_usecount++;
1546
1547 /* Avoid lock order reversals due to drop/acquire in soisconnected. */
1548 socket_unlock(so, 0);
1549 soisconnected(so2);
1550
1551 /* Keep an extra reference on so2, that will be dropped soon after
1552 * getting the locks in order again.
1553 */
1554 socket_unlock(so2, 0);
1555
1556 socket_lock(so, 0);
1557 soisconnected(so);
1558
1559 unp_get_locks_in_order(so, so2);
1560 /* Decrement the extra reference left before */
1561 VERIFY(so2->so_usecount > 0);
1562 so2->so_usecount--;
1563 break;
1564
1565 default:
1566 panic("unknown socket type %d in unp_connect2", so->so_type);
1567 }
1568 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1569 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1570 return 0;
1571 }
1572
1573 static void
unp_disconnect(struct unpcb * unp)1574 unp_disconnect(struct unpcb *unp)
1575 {
1576 struct unpcb *unp2 = NULL;
1577 struct socket *so2 = NULL, *so;
1578 struct socket *waitso;
1579 int so_locked = 1, strdisconn = 0;
1580
1581 so = unp->unp_socket;
1582 if (unp->unp_conn == NULL) {
1583 return;
1584 }
1585 lck_mtx_lock(&unp_disconnect_lock);
1586 while (disconnect_in_progress != 0) {
1587 if (so_locked == 1) {
1588 socket_unlock(so, 0);
1589 so_locked = 0;
1590 }
1591 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,
1592 PSOCK, "disconnect", NULL);
1593 }
1594 disconnect_in_progress = 1;
1595 lck_mtx_unlock(&unp_disconnect_lock);
1596
1597 if (so_locked == 0) {
1598 socket_lock(so, 0);
1599 so_locked = 1;
1600 }
1601
1602 unp2 = unp->unp_conn;
1603
1604 if (unp2 == 0 || unp2->unp_socket == NULL) {
1605 goto out;
1606 }
1607 so2 = unp2->unp_socket;
1608
1609 try_again:
1610 if (so == so2) {
1611 if (so_locked == 0) {
1612 socket_lock(so, 0);
1613 }
1614 waitso = so;
1615 } else if (so < so2) {
1616 if (so_locked == 0) {
1617 socket_lock(so, 0);
1618 }
1619 socket_lock(so2, 1);
1620 waitso = so2;
1621 } else {
1622 if (so_locked == 1) {
1623 socket_unlock(so, 0);
1624 }
1625 socket_lock(so2, 1);
1626 socket_lock(so, 0);
1627 waitso = so;
1628 }
1629 so_locked = 1;
1630
1631 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1632 LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
1633
1634 /* Check for the UNP_DONTDISCONNECT flag, if it
1635 * is set, release both sockets and go to sleep
1636 */
1637
1638 if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
1639 if (so != so2) {
1640 socket_unlock(so2, 1);
1641 }
1642 so_locked = 0;
1643
1644 (void)msleep(waitso->so_pcb, &unp->unp_mtx,
1645 PSOCK | PDROP, "unpdisconnect", NULL);
1646 goto try_again;
1647 }
1648
1649 if (unp->unp_conn == NULL) {
1650 panic("unp_conn became NULL after sleep");
1651 }
1652
1653 unp->unp_conn = NULL;
1654 VERIFY(so2->so_usecount > 0);
1655 so2->so_usecount--;
1656
1657 if (unp->unp_flags & UNP_TRACE_MDNS) {
1658 unp->unp_flags &= ~UNP_TRACE_MDNS;
1659 }
1660
1661 switch (unp->unp_socket->so_type) {
1662 case SOCK_DGRAM:
1663 LIST_REMOVE(unp, unp_reflink);
1664 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1665 if (so != so2) {
1666 socket_unlock(so2, 1);
1667 }
1668 break;
1669
1670 case SOCK_STREAM:
1671 unp2->unp_conn = NULL;
1672 VERIFY(so->so_usecount > 0);
1673 so->so_usecount--;
1674
1675 /*
1676 * Set the socket state correctly but do a wakeup later when
1677 * we release all locks except the socket lock, this will avoid
1678 * a deadlock.
1679 */
1680 unp->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1681 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1682
1683 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);
1684 unp2->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);
1685
1686 if (unp2->unp_flags & UNP_TRACE_MDNS) {
1687 unp2->unp_flags &= ~UNP_TRACE_MDNS;
1688 }
1689
1690 strdisconn = 1;
1691 break;
1692 default:
1693 panic("unknown socket type %d", so->so_type);
1694 }
1695 out:
1696 lck_mtx_lock(&unp_disconnect_lock);
1697 disconnect_in_progress = 0;
1698 wakeup(&disconnect_in_progress);
1699 lck_mtx_unlock(&unp_disconnect_lock);
1700
1701 if (strdisconn) {
1702 socket_unlock(so, 0);
1703 soisdisconnected(so2);
1704 socket_unlock(so2, 1);
1705
1706 socket_lock(so, 0);
1707 soisdisconnected(so);
1708 }
1709 LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
1710 return;
1711 }
1712
1713 /*
1714 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.
1715 * The unpcb_compat data structure is passed to user space and must not change.
1716 */
1717 static void
unpcb_to_compat(struct unpcb * up,struct unpcb_compat * cp)1718 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)
1719 {
1720 #if defined(__LP64__)
1721 cp->unp_link.le_next = (u_int32_t)
1722 VM_KERNEL_ADDRHASH(up->unp_link.le_next);
1723 cp->unp_link.le_prev = (u_int32_t)
1724 VM_KERNEL_ADDRHASH(up->unp_link.le_prev);
1725 #else
1726 cp->unp_link.le_next = (struct unpcb_compat *)
1727 VM_KERNEL_ADDRHASH(up->unp_link.le_next);
1728 cp->unp_link.le_prev = (struct unpcb_compat **)
1729 VM_KERNEL_ADDRHASH(up->unp_link.le_prev);
1730 #endif
1731 cp->unp_socket = (_UNPCB_PTR(struct socket *))
1732 VM_KERNEL_ADDRHASH(up->unp_socket);
1733 cp->unp_vnode = (_UNPCB_PTR(struct vnode *))
1734 VM_KERNEL_ADDRHASH(up->unp_vnode);
1735 cp->unp_ino = up->unp_ino;
1736 cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))
1737 VM_KERNEL_ADDRHASH(up->unp_conn);
1738 cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRHASH(up->unp_refs.lh_first);
1739 #if defined(__LP64__)
1740 cp->unp_reflink.le_next =
1741 (u_int32_t)VM_KERNEL_ADDRHASH(up->unp_reflink.le_next);
1742 cp->unp_reflink.le_prev =
1743 (u_int32_t)VM_KERNEL_ADDRHASH(up->unp_reflink.le_prev);
1744 #else
1745 cp->unp_reflink.le_next =
1746 (struct unpcb_compat *)VM_KERNEL_ADDRHASH(up->unp_reflink.le_next);
1747 cp->unp_reflink.le_prev =
1748 (struct unpcb_compat **)VM_KERNEL_ADDRHASH(up->unp_reflink.le_prev);
1749 #endif
1750 cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))
1751 VM_KERNEL_ADDRHASH(up->unp_addr);
1752 cp->unp_cc = up->unp_cc;
1753 cp->unp_mbcnt = up->unp_mbcnt;
1754 cp->unp_gencnt = up->unp_gencnt;
1755 }
1756
1757 static int
1758 unp_pcblist SYSCTL_HANDLER_ARGS
1759 {
1760 #pragma unused(oidp,arg2)
1761 int error, i, n;
1762 struct unpcb *unp, **unp_list __bidi_indexable;
1763 size_t unp_list_len;
1764 unp_gen_t gencnt;
1765 struct xunpgen xug;
1766 struct unp_head *head;
1767
1768 lck_rw_lock_shared(&unp_list_mtx);
1769 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1770
1771 /*
1772 * The process of preparing the PCB list is too time-consuming and
1773 * resource-intensive to repeat twice on every request.
1774 */
1775 if (req->oldptr == USER_ADDR_NULL) {
1776 n = unp_count;
1777 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1778 sizeof(struct xunpcb);
1779 lck_rw_done(&unp_list_mtx);
1780 return 0;
1781 }
1782
1783 if (req->newptr != USER_ADDR_NULL) {
1784 lck_rw_done(&unp_list_mtx);
1785 return EPERM;
1786 }
1787
1788 /*
1789 * OK, now we're committed to doing something.
1790 */
1791 gencnt = unp_gencnt;
1792 n = unp_count;
1793
1794 bzero(&xug, sizeof(xug));
1795 xug.xug_len = sizeof(xug);
1796 xug.xug_count = n;
1797 xug.xug_gen = gencnt;
1798 xug.xug_sogen = so_gencnt;
1799 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1800 if (error) {
1801 lck_rw_done(&unp_list_mtx);
1802 return error;
1803 }
1804
1805 /*
1806 * We are done if there is no pcb
1807 */
1808 if (n == 0) {
1809 lck_rw_done(&unp_list_mtx);
1810 return 0;
1811 }
1812
1813 unp_list_len = n;
1814 unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1815 if (unp_list == 0) {
1816 lck_rw_done(&unp_list_mtx);
1817 return ENOMEM;
1818 }
1819
1820 for (unp = head->lh_first, i = 0; unp && i < n;
1821 unp = unp->unp_link.le_next) {
1822 if (unp->unp_gencnt <= gencnt) {
1823 unp_list[i++] = unp;
1824 }
1825 }
1826 n = i; /* in case we lost some during malloc */
1827
1828 error = 0;
1829 for (i = 0; i < n; i++) {
1830 unp = unp_list[i];
1831 if (unp->unp_gencnt <= gencnt) {
1832 struct xunpcb xu;
1833
1834 bzero(&xu, sizeof(xu));
1835 xu.xu_len = sizeof(xu);
1836 xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))
1837 VM_KERNEL_ADDRHASH(unp);
1838 /*
1839 * XXX - need more locking here to protect against
1840 * connect/disconnect races for SMP.
1841 */
1842 if (unp->unp_addr) {
1843 struct sockaddr_un *dst __single = &xu.xu_au.xuu_addr;
1844 SOCKADDR_COPY(unp->unp_addr, dst,
1845 unp->unp_addr->sun_len);
1846 }
1847 if (unp->unp_conn && unp->unp_conn->unp_addr) {
1848 struct sockaddr_un *dst __single = &xu.xu_cau.xuu_caddr;
1849 SOCKADDR_COPY(unp->unp_conn->unp_addr, dst,
1850 unp->unp_conn->unp_addr->sun_len);
1851 }
1852 unpcb_to_compat(unp, &xu.xu_unp);
1853 sotoxsocket(unp->unp_socket, &xu.xu_socket);
1854 error = SYSCTL_OUT(req, &xu, sizeof(xu));
1855 }
1856 }
1857 if (!error) {
1858 /*
1859 * Give the user an updated idea of our state.
1860 * If the generation differs from what we told
1861 * her before, she knows that something happened
1862 * while we were processing this request, and it
1863 * might be necessary to retry.
1864 */
1865 bzero(&xug, sizeof(xug));
1866 xug.xug_len = sizeof(xug);
1867 xug.xug_gen = unp_gencnt;
1868 xug.xug_sogen = so_gencnt;
1869 xug.xug_count = unp_count;
1870 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1871 }
1872 kfree_type(struct unpcb *, unp_list_len, unp_list);
1873 lck_rw_done(&unp_list_mtx);
1874 return error;
1875 }
1876
1877 const caddr_t SYSCTL_SOCK_DGRAM_ARG = __unsafe_forge_single(caddr_t, SOCK_DGRAM);
1878 const caddr_t SYSCTL_SOCK_STREAM_ARG = __unsafe_forge_single(caddr_t, SOCK_STREAM);
1879
1880 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
1881 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1882 SYSCTL_SOCK_DGRAM_ARG, 0, unp_pcblist, "S,xunpcb",
1883 "List of active local datagram sockets");
1884 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
1885 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
1886 SYSCTL_SOCK_STREAM_ARG, 0, unp_pcblist, "S,xunpcb",
1887 "List of active local stream sockets");
1888
1889 #if XNU_TARGET_OS_OSX
1890
1891 static int
1892 unp_pcblist64 SYSCTL_HANDLER_ARGS
1893 {
1894 #pragma unused(oidp,arg2)
1895 int error, i, n;
1896 struct unpcb *unp, **unp_list;
1897 unp_gen_t gencnt;
1898 struct xunpgen xug;
1899 struct unp_head *head;
1900
1901 lck_rw_lock_shared(&unp_list_mtx);
1902 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1903
1904 /*
1905 * The process of preparing the PCB list is too time-consuming and
1906 * resource-intensive to repeat twice on every request.
1907 */
1908 if (req->oldptr == USER_ADDR_NULL) {
1909 n = unp_count;
1910 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *
1911 (sizeof(struct xunpcb64));
1912 lck_rw_done(&unp_list_mtx);
1913 return 0;
1914 }
1915
1916 if (req->newptr != USER_ADDR_NULL) {
1917 lck_rw_done(&unp_list_mtx);
1918 return EPERM;
1919 }
1920
1921 /*
1922 * OK, now we're committed to doing something.
1923 */
1924 gencnt = unp_gencnt;
1925 n = unp_count;
1926
1927 bzero(&xug, sizeof(xug));
1928 xug.xug_len = sizeof(xug);
1929 xug.xug_count = n;
1930 xug.xug_gen = gencnt;
1931 xug.xug_sogen = so_gencnt;
1932 error = SYSCTL_OUT(req, &xug, sizeof(xug));
1933 if (error) {
1934 lck_rw_done(&unp_list_mtx);
1935 return error;
1936 }
1937
1938 /*
1939 * We are done if there is no pcb
1940 */
1941 if (n == 0) {
1942 lck_rw_done(&unp_list_mtx);
1943 return 0;
1944 }
1945
1946 size_t unp_list_len = n;
1947 unp_list = kalloc_type(struct unpcb *, unp_list_len, Z_WAITOK);
1948 if (unp_list == 0) {
1949 lck_rw_done(&unp_list_mtx);
1950 return ENOMEM;
1951 }
1952
1953 for (unp = head->lh_first, i = 0; unp && i < n;
1954 unp = unp->unp_link.le_next) {
1955 if (unp->unp_gencnt <= gencnt) {
1956 unp_list[i++] = unp;
1957 }
1958 }
1959 n = i; /* in case we lost some during malloc */
1960
1961 error = 0;
1962 for (i = 0; i < n; i++) {
1963 unp = unp_list[i];
1964 if (unp->unp_gencnt <= gencnt) {
1965 struct xunpcb64 xu;
1966 size_t xu_len = sizeof(struct xunpcb64);
1967
1968 bzero(&xu, xu_len);
1969 xu.xu_len = (u_int32_t)xu_len;
1970 xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRHASH(unp);
1971 xu.xunp_link.le_next = (u_int64_t)
1972 VM_KERNEL_ADDRHASH(unp->unp_link.le_next);
1973 xu.xunp_link.le_prev = (u_int64_t)
1974 VM_KERNEL_ADDRHASH(unp->unp_link.le_prev);
1975 xu.xunp_socket = (u_int64_t)
1976 VM_KERNEL_ADDRHASH(unp->unp_socket);
1977 xu.xunp_vnode = (u_int64_t)
1978 VM_KERNEL_ADDRHASH(unp->unp_vnode);
1979 xu.xunp_ino = unp->unp_ino;
1980 xu.xunp_conn = (u_int64_t)
1981 VM_KERNEL_ADDRHASH(unp->unp_conn);
1982 xu.xunp_refs = (u_int64_t)
1983 VM_KERNEL_ADDRHASH(unp->unp_refs.lh_first);
1984 xu.xunp_reflink.le_next = (u_int64_t)
1985 VM_KERNEL_ADDRHASH(unp->unp_reflink.le_next);
1986 xu.xunp_reflink.le_prev = (u_int64_t)
1987 VM_KERNEL_ADDRHASH(unp->unp_reflink.le_prev);
1988 xu.xunp_cc = unp->unp_cc;
1989 xu.xunp_mbcnt = unp->unp_mbcnt;
1990 xu.xunp_gencnt = unp->unp_gencnt;
1991
1992 if (unp->unp_socket) {
1993 sotoxsocket64(unp->unp_socket, &xu.xu_socket);
1994 }
1995
1996 /*
1997 * XXX - need more locking here to protect against
1998 * connect/disconnect races for SMP.
1999 */
2000 if (unp->unp_addr) {
2001 struct sockaddr_un *dst __single = &xu.xu_au.xuu_addr;
2002 SOCKADDR_COPY(unp->unp_addr, dst,
2003 unp->unp_addr->sun_len);
2004 }
2005 if (unp->unp_conn && unp->unp_conn->unp_addr) {
2006 struct sockaddr_un *dst __single = &xu.xu_cau.xuu_caddr;
2007 SOCKADDR_COPY(unp->unp_conn->unp_addr,
2008 dst,
2009 unp->unp_conn->unp_addr->sun_len);
2010 }
2011
2012 error = SYSCTL_OUT(req, &xu, xu_len);
2013 }
2014 }
2015 if (!error) {
2016 /*
2017 * Give the user an updated idea of our state.
2018 * If the generation differs from what we told
2019 * her before, she knows that something happened
2020 * while we were processing this request, and it
2021 * might be necessary to retry.
2022 */
2023 bzero(&xug, sizeof(xug));
2024 xug.xug_len = sizeof(xug);
2025 xug.xug_gen = unp_gencnt;
2026 xug.xug_sogen = so_gencnt;
2027 xug.xug_count = unp_count;
2028 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2029 }
2030 kfree_type(struct unpcb *, unp_list_len, unp_list);
2031 lck_rw_done(&unp_list_mtx);
2032 return error;
2033 }
2034
2035 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,
2036 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2037 SYSCTL_SOCK_DGRAM_ARG, 0, unp_pcblist64, "S,xunpcb64",
2038 "List of active local datagram sockets 64 bit");
2039 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,
2040 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2041 SYSCTL_SOCK_STREAM_ARG, 0, unp_pcblist64, "S,xunpcb64",
2042 "List of active local stream sockets 64 bit");
2043
2044 #endif /* XNU_TARGET_OS_OSX */
2045
2046 static int
2047 unp_pcblist_n SYSCTL_HANDLER_ARGS
2048 {
2049 #pragma unused(oidp,arg2)
2050 int error = 0;
2051 int i, n;
2052 struct unpcb *unp;
2053 unp_gen_t gencnt;
2054 struct xunpgen xug;
2055 struct unp_head *head;
2056 size_t item_size = 0;
2057 uint8_t *__sized_by(item_size) buf = NULL;
2058
2059 const size_t size = ROUNDUP64(sizeof(struct xunpcb_n)) +
2060 ROUNDUP64(sizeof(struct xsocket_n)) +
2061 2 * ROUNDUP64(sizeof(struct xsockbuf_n)) +
2062 ROUNDUP64(sizeof(struct xsockstat_n));
2063 buf = kalloc_data(size, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2064 item_size = size;
2065
2066 lck_rw_lock_shared(&unp_list_mtx);
2067
2068 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
2069
2070 /*
2071 * The process of preparing the PCB list is too time-consuming and
2072 * resource-intensive to repeat twice on every request.
2073 */
2074 if (req->oldptr == USER_ADDR_NULL) {
2075 n = unp_count;
2076 req->oldidx = 2 * sizeof(xug) + (n + n / 8) * item_size;
2077 goto done;
2078 }
2079
2080 if (req->newptr != USER_ADDR_NULL) {
2081 error = EPERM;
2082 goto done;
2083 }
2084
2085 /*
2086 * OK, now we're committed to doing something.
2087 */
2088 gencnt = unp_gencnt;
2089 n = unp_count;
2090
2091 bzero(&xug, sizeof(xug));
2092 xug.xug_len = sizeof(xug);
2093 xug.xug_count = n;
2094 xug.xug_gen = gencnt;
2095 xug.xug_sogen = so_gencnt;
2096 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2097 if (error != 0) {
2098 goto done;
2099 }
2100
2101 /*
2102 * We are done if there is no pcb
2103 */
2104 if (n == 0) {
2105 goto done;
2106 }
2107
2108 for (i = 0, unp = head->lh_first;
2109 i < n && unp != NULL;
2110 i++, unp = unp->unp_link.le_next) {
2111 struct xunpcb_n *xu = (struct xunpcb_n *)buf;
2112 struct xsocket_n *xso = (struct xsocket_n *)
2113 ADVANCE64(xu, sizeof(*xu));
2114 struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *)
2115 ADVANCE64(xso, sizeof(*xso));
2116 struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *)
2117 ADVANCE64(xsbrcv, sizeof(*xsbrcv));
2118 struct xsockstat_n *xsostats = (struct xsockstat_n *)
2119 ADVANCE64(xsbsnd, sizeof(*xsbsnd));
2120
2121 if (unp->unp_gencnt > gencnt) {
2122 continue;
2123 }
2124
2125 bzero(buf, item_size);
2126
2127 xu->xunp_len = sizeof(struct xunpcb_n);
2128 xu->xunp_kind = XSO_UNPCB;
2129 xu->xunp_unpp = (uint64_t)VM_KERNEL_ADDRHASH(unp);
2130 xu->xunp_vnode = (uint64_t)VM_KERNEL_ADDRHASH(unp->unp_vnode);
2131 xu->xunp_ino = unp->unp_ino;
2132 xu->xunp_conn = (uint64_t)VM_KERNEL_ADDRHASH(unp->unp_conn);
2133 xu->xunp_refs = (uint64_t)VM_KERNEL_ADDRHASH(unp->unp_refs.lh_first);
2134 xu->xunp_reflink = (uint64_t)VM_KERNEL_ADDRHASH(unp->unp_reflink.le_next);
2135 xu->xunp_cc = unp->unp_cc;
2136 xu->xunp_mbcnt = unp->unp_mbcnt;
2137 xu->xunp_flags = unp->unp_flags;
2138 xu->xunp_gencnt = unp->unp_gencnt;
2139
2140 if (unp->unp_addr) {
2141 struct sockaddr_un *dst __single = &xu->xu_au.xuu_addr;
2142 SOCKADDR_COPY(unp->unp_addr, dst,
2143 unp->unp_addr->sun_len);
2144 }
2145 if (unp->unp_conn && unp->unp_conn->unp_addr) {
2146 struct sockaddr_un *dst __single = &xu->xu_cau.xuu_caddr;
2147 SOCKADDR_COPY(unp->unp_conn->unp_addr, dst,
2148 unp->unp_conn->unp_addr->sun_len);
2149 }
2150 sotoxsocket_n(unp->unp_socket, xso);
2151 sbtoxsockbuf_n(unp->unp_socket ?
2152 &unp->unp_socket->so_rcv : NULL, xsbrcv);
2153 sbtoxsockbuf_n(unp->unp_socket ?
2154 &unp->unp_socket->so_snd : NULL, xsbsnd);
2155 sbtoxsockstat_n(unp->unp_socket, xsostats);
2156
2157 error = SYSCTL_OUT(req, buf, item_size);
2158 if (error != 0) {
2159 break;
2160 }
2161 }
2162 if (error == 0) {
2163 /*
2164 * Give the user an updated idea of our state.
2165 * If the generation differs from what we told
2166 * her before, she knows that something happened
2167 * while we were processing this request, and it
2168 * might be necessary to retry.
2169 */
2170 bzero(&xug, sizeof(xug));
2171 xug.xug_len = sizeof(xug);
2172 xug.xug_gen = unp_gencnt;
2173 xug.xug_sogen = so_gencnt;
2174 xug.xug_count = unp_count;
2175 error = SYSCTL_OUT(req, &xug, sizeof(xug));
2176 }
2177 done:
2178 lck_rw_done(&unp_list_mtx);
2179 kfree_data_sized_by(buf, item_size);
2180 return error;
2181 }
2182
2183 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist_n,
2184 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2185 SYSCTL_SOCK_DGRAM_ARG, 0, unp_pcblist_n, "S,xunpcb_n",
2186 "List of active local datagram sockets");
2187 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist_n,
2188 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
2189 SYSCTL_SOCK_STREAM_ARG, 0, unp_pcblist_n, "S,xunpcb_n",
2190 "List of active local stream sockets");
2191
2192 static void
unp_shutdown(struct unpcb * unp)2193 unp_shutdown(struct unpcb *unp)
2194 {
2195 struct socket *so = unp->unp_socket;
2196 struct socket *so2;
2197 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {
2198 so2 = unp->unp_conn->unp_socket;
2199 unp_get_locks_in_order(so, so2);
2200 socantrcvmore(so2);
2201 socket_unlock(so2, 1);
2202 }
2203 }
2204
2205 static void
unp_drop(struct unpcb * unp,int errno)2206 unp_drop(struct unpcb *unp, int errno)
2207 {
2208 struct socket *so = unp->unp_socket;
2209
2210 so->so_error = (u_short)errno;
2211 unp_disconnect(unp);
2212 }
2213
2214 /*
2215 * fg_insertuipc_mark
2216 *
2217 * Description: Mark fileglob for insertion onto message queue if needed
2218 * Also takes fileglob reference
2219 *
2220 * Parameters: fg Fileglob pointer to insert
2221 *
2222 * Returns: true, if the fileglob needs to be inserted onto msg queue
2223 *
2224 * Locks: Takes and drops fg_lock, potentially many times
2225 */
2226 static boolean_t
fg_insertuipc_mark(struct fileglob * fg)2227 fg_insertuipc_mark(struct fileglob * fg)
2228 {
2229 boolean_t insert = FALSE;
2230
2231 lck_mtx_lock_spin(&fg->fg_lock);
2232 while (fg->fg_lflags & FG_RMMSGQ) {
2233 lck_mtx_convert_spin(&fg->fg_lock);
2234
2235 fg->fg_lflags |= FG_WRMMSGQ;
2236 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);
2237 }
2238
2239 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2240 fg->fg_msgcount++;
2241 if (fg->fg_msgcount == 1) {
2242 fg->fg_lflags |= FG_INSMSGQ;
2243 insert = TRUE;
2244 }
2245 lck_mtx_unlock(&fg->fg_lock);
2246 return insert;
2247 }
2248
2249 /*
2250 * fg_insertuipc
2251 *
2252 * Description: Insert marked fileglob onto message queue
2253 *
2254 * Parameters: fg Fileglob pointer to insert
2255 *
2256 * Returns: void
2257 *
2258 * Locks: Takes and drops fg_lock & uipc_lock
2259 * DO NOT call this function with proc_fdlock held as unp_gc()
2260 * can potentially try to acquire proc_fdlock, which can result
2261 * in a deadlock.
2262 */
2263 static void
fg_insertuipc(struct fileglob * fg)2264 fg_insertuipc(struct fileglob * fg)
2265 {
2266 if (fg->fg_lflags & FG_INSMSGQ) {
2267 lck_mtx_lock(&uipc_lock);
2268 LIST_INSERT_HEAD(&unp_msghead, fg, f_msglist);
2269 lck_mtx_unlock(&uipc_lock);
2270 lck_mtx_lock(&fg->fg_lock);
2271 fg->fg_lflags &= ~FG_INSMSGQ;
2272 if (fg->fg_lflags & FG_WINSMSGQ) {
2273 fg->fg_lflags &= ~FG_WINSMSGQ;
2274 wakeup(&fg->fg_lflags);
2275 }
2276 lck_mtx_unlock(&fg->fg_lock);
2277 }
2278 }
2279
2280 /*
2281 * fg_removeuipc_mark
2282 *
2283 * Description: Mark the fileglob for removal from message queue if needed
2284 * Also releases fileglob message queue reference
2285 *
2286 * Parameters: fg Fileglob pointer to remove
2287 *
2288 * Returns: true, if the fileglob needs to be removed from msg queue
2289 *
2290 * Locks: Takes and drops fg_lock, potentially many times
2291 */
2292 static boolean_t
fg_removeuipc_mark(struct fileglob * fg)2293 fg_removeuipc_mark(struct fileglob * fg)
2294 {
2295 boolean_t remove = FALSE;
2296
2297 lck_mtx_lock_spin(&fg->fg_lock);
2298 while (fg->fg_lflags & FG_INSMSGQ) {
2299 lck_mtx_convert_spin(&fg->fg_lock);
2300
2301 fg->fg_lflags |= FG_WINSMSGQ;
2302 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);
2303 }
2304 fg->fg_msgcount--;
2305 if (fg->fg_msgcount == 0) {
2306 fg->fg_lflags |= FG_RMMSGQ;
2307 remove = TRUE;
2308 }
2309 lck_mtx_unlock(&fg->fg_lock);
2310 return remove;
2311 }
2312
2313 /*
2314 * fg_removeuipc
2315 *
2316 * Description: Remove marked fileglob from message queue
2317 *
2318 * Parameters: fg Fileglob pointer to remove
2319 *
2320 * Returns: void
2321 *
2322 * Locks: Takes and drops fg_lock & uipc_lock
2323 * DO NOT call this function with proc_fdlock held as unp_gc()
2324 * can potentially try to acquire proc_fdlock, which can result
2325 * in a deadlock.
2326 */
2327 static void
fg_removeuipc(struct fileglob * fg)2328 fg_removeuipc(struct fileglob * fg)
2329 {
2330 if (fg->fg_lflags & FG_RMMSGQ) {
2331 lck_mtx_lock(&uipc_lock);
2332 LIST_REMOVE(fg, f_msglist);
2333 lck_mtx_unlock(&uipc_lock);
2334 lck_mtx_lock(&fg->fg_lock);
2335 fg->fg_lflags &= ~FG_RMMSGQ;
2336 if (fg->fg_lflags & FG_WRMMSGQ) {
2337 fg->fg_lflags &= ~FG_WRMMSGQ;
2338 wakeup(&fg->fg_lflags);
2339 }
2340 lck_mtx_unlock(&fg->fg_lock);
2341 }
2342 }
2343
2344 /*
2345 * Returns: 0 Success
2346 * EMSGSIZE The new fd's will not fit
2347 * ENOBUFS Cannot alloc struct fileproc
2348 */
2349 int
unp_externalize(struct mbuf * rights)2350 unp_externalize(struct mbuf *rights)
2351 {
2352 proc_t p = current_proc();
2353 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
2354 struct fileglob **rp = (struct fileglob **)(cm + 1);
2355 const int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2356 int *fds __bidi_indexable;
2357 int error = 0;
2358
2359 fds = kalloc_data(newfds * sizeof(int), Z_WAITOK);
2360 if (fds == NULL) {
2361 error = ENOMEM;
2362 goto out;
2363 }
2364
2365 /*
2366 * Step 1:
2367 * Allocate all the fds, and if it doesn't fit,
2368 * then fail and discard everything.
2369 */
2370 proc_fdlock(p);
2371
2372 if (fdt_available_locked(p, newfds)) {
2373 for (int i = 0; i < newfds; i++) {
2374 error = fdalloc(p, 0, &fds[i]);
2375 if (error) {
2376 while (i-- > 0) {
2377 fdrelse(p, fds[i]);
2378 }
2379 break;
2380 }
2381 }
2382 } else {
2383 error = EMSGSIZE;
2384 }
2385
2386 proc_fdunlock(p);
2387
2388 if (error) {
2389 goto out;
2390 }
2391
2392 /*
2393 * Step 2:
2394 * At this point we are commited, and can't fail anymore.
2395 * Allocate all the fileprocs, and remove the files
2396 * from the queue.
2397 *
2398 * Until we call procfdtbl_releasefd(), fds are in flux
2399 * and can't be closed.
2400 */
2401 for (int i = 0; i < newfds; i++) {
2402 struct fileproc *fp = NULL;
2403
2404 fp = fileproc_alloc_init();
2405 fp->fp_glob = rp[i];
2406 if (fg_removeuipc_mark(rp[i])) {
2407 fg_removeuipc(rp[i]);
2408 }
2409
2410 proc_fdlock(p);
2411 procfdtbl_releasefd(p, fds[i], fp);
2412 proc_fdunlock(p);
2413 }
2414
2415 /*
2416 * Step 3:
2417 * Return the fds into `cm`.
2418 * Handle the fact ints and pointers do not have the same size.
2419 */
2420 int *fds_out = (int *)(cm + 1);
2421 memcpy(fds_out, fds, newfds * sizeof(int));
2422 if (sizeof(struct fileglob *) != sizeof(int)) {
2423 bzero(fds_out + newfds,
2424 newfds * (sizeof(struct fileglob *) - sizeof(int)));
2425 }
2426 OSAddAtomic(-newfds, &unp_rights);
2427
2428 out:
2429 if (error) {
2430 for (int i = 0; i < newfds; i++) {
2431 unp_discard(rp[i], p);
2432 }
2433 bzero(rp, newfds * sizeof(struct fileglob *));
2434 }
2435
2436 kfree_data(fds, newfds * sizeof(int));
2437 return error;
2438 }
2439
2440 void
unp_init(void)2441 unp_init(void)
2442 {
2443 _CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));
2444 LIST_INIT(&unp_dhead);
2445 LIST_INIT(&unp_shead);
2446 }
2447
2448 #ifndef MIN
2449 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
2450 #endif
2451
2452 /*
2453 * Returns: 0 Success
2454 * EINVAL
2455 * EBADF
2456 */
2457 static int
unp_internalize(struct mbuf * control,proc_t p)2458 unp_internalize(struct mbuf *control, proc_t p)
2459 {
2460 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
2461 int *fds;
2462 struct fileglob **rp;
2463 struct fileproc *fp;
2464 int i, error;
2465 int oldfds;
2466 uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];
2467
2468 /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */
2469 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
2470 (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {
2471 return EINVAL;
2472 }
2473 oldfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);
2474 bzero(fg_ins, sizeof(fg_ins));
2475
2476 proc_fdlock(p);
2477 fds = (int *)(cm + 1);
2478
2479 for (i = 0; i < oldfds; i++) {
2480 struct fileproc *tmpfp;
2481 if ((tmpfp = fp_get_noref_locked(p, fds[i])) == NULL) {
2482 proc_fdunlock(p);
2483 return EBADF;
2484 } else if (!fg_sendable(tmpfp->fp_glob)) {
2485 proc_fdunlock(p);
2486 return EINVAL;
2487 } else if (fp_isguarded(tmpfp, GUARD_SOCKET_IPC)) {
2488 error = fp_guard_exception(p,
2489 fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);
2490 proc_fdunlock(p);
2491 return error;
2492 }
2493 }
2494 rp = (struct fileglob **)(cm + 1);
2495
2496 /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd
2497 * and doing them in-order would result in stomping over unprocessed fd's
2498 */
2499 for (i = (oldfds - 1); i >= 0; i--) {
2500 fp = fp_get_noref_locked(p, fds[i]);
2501 if (fg_insertuipc_mark(fp->fp_glob)) {
2502 fg_ins[i / 8] |= 0x80 >> (i % 8);
2503 }
2504 rp[i] = fp->fp_glob;
2505 }
2506 proc_fdunlock(p);
2507
2508 for (i = 0; i < oldfds; i++) {
2509 if (fg_ins[i / 8] & (0x80 >> (i % 8))) {
2510 VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);
2511 fg_insertuipc(rp[i]);
2512 }
2513 (void) OSAddAtomic(1, &unp_rights);
2514 }
2515
2516 return 0;
2517 }
2518
2519 static void
unp_gc(thread_call_param_t arg0,thread_call_param_t arg1)2520 unp_gc(thread_call_param_t arg0, thread_call_param_t arg1)
2521 {
2522 #pragma unused(arg0, arg1)
2523 struct fileglob *fg;
2524 struct socket *so;
2525 static struct fileglob **__indexable extra_ref;
2526 struct fileglob **fpp;
2527 int nunref, i;
2528 static struct proc *UNP_FG_NOPROC = __unsafe_forge_single(struct proc *, FG_NOPROC);
2529
2530 restart:
2531 lck_mtx_lock(&uipc_lock);
2532 unp_defer = 0;
2533 /*
2534 * before going through all this, set all FDs to
2535 * be NOT defered and NOT externally accessible
2536 */
2537 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2538 os_atomic_andnot(&fg->fg_flag, FMARK | FDEFER, relaxed);
2539 }
2540 do {
2541 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2542 lck_mtx_lock(&fg->fg_lock);
2543 /*
2544 * If the file is not open, skip it
2545 */
2546 if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2547 lck_mtx_unlock(&fg->fg_lock);
2548 continue;
2549 }
2550 /*
2551 * If we already marked it as 'defer' in a
2552 * previous pass, then try process it this time
2553 * and un-mark it
2554 */
2555 if (fg->fg_flag & FDEFER) {
2556 os_atomic_andnot(&fg->fg_flag, FDEFER, relaxed);
2557 unp_defer--;
2558 } else {
2559 /*
2560 * if it's not defered, then check if it's
2561 * already marked.. if so skip it
2562 */
2563 if (fg->fg_flag & FMARK) {
2564 lck_mtx_unlock(&fg->fg_lock);
2565 continue;
2566 }
2567 /*
2568 * If all references are from messages
2569 * in transit, then skip it. it's not
2570 * externally accessible.
2571 */
2572 if (os_ref_get_count_raw(&fg->fg_count) ==
2573 fg->fg_msgcount) {
2574 lck_mtx_unlock(&fg->fg_lock);
2575 continue;
2576 }
2577 /*
2578 * If it got this far then it must be
2579 * externally accessible.
2580 */
2581 os_atomic_or(&fg->fg_flag, FMARK, relaxed);
2582 }
2583 /*
2584 * either it was defered, or it is externally
2585 * accessible and not already marked so.
2586 * Now check if it is possibly one of OUR sockets.
2587 */
2588 if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||
2589 (so = (struct socket *)fg_get_data(fg)) == 0) {
2590 lck_mtx_unlock(&fg->fg_lock);
2591 continue;
2592 }
2593 if (so->so_proto->pr_domain != localdomain ||
2594 (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
2595 lck_mtx_unlock(&fg->fg_lock);
2596 continue;
2597 }
2598 /*
2599 * So, Ok, it's one of our sockets and it IS externally
2600 * accessible (or was defered). Now we look
2601 * to see if we hold any file descriptors in its
2602 * message buffers. Follow those links and mark them
2603 * as accessible too.
2604 *
2605 * In case a file is passed onto itself we need to
2606 * release the file lock.
2607 */
2608 lck_mtx_unlock(&fg->fg_lock);
2609 /*
2610 * It's safe to lock the socket after dropping fg_lock
2611 * because the socket isn't going away at this point.
2612 *
2613 * If we couldn't lock the socket or the socket buffer,
2614 * then it's because someone holding one of these
2615 * locks is stuck in unp_{internalize,externalize}().
2616 * Yield to that process and restart the garbage
2617 * collection.
2618 */
2619 if (!socket_try_lock(so)) {
2620 lck_mtx_unlock(&uipc_lock);
2621 goto restart;
2622 }
2623 so->so_usecount++;
2624 /*
2625 * Lock the receive socket buffer so that we can
2626 * iterate over its mbuf list.
2627 */
2628 if (sblock(&so->so_rcv, SBL_NOINTR | SBL_IGNDEFUNCT)) {
2629 socket_unlock(so, 1);
2630 lck_mtx_unlock(&uipc_lock);
2631 goto restart;
2632 }
2633 VERIFY(so->so_rcv.sb_flags & SB_LOCK);
2634 socket_unlock(so, 0);
2635 unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
2636 socket_lock(so, 0);
2637 sbunlock(&so->so_rcv, TRUE);
2638 /*
2639 * Unlock and release the reference acquired above.
2640 */
2641 socket_unlock(so, 1);
2642 }
2643 } while (unp_defer);
2644 /*
2645 * We grab an extra reference to each of the file table entries
2646 * that are not otherwise accessible and then free the rights
2647 * that are stored in messages on them.
2648 *
2649 * Here, we first take an extra reference to each inaccessible
2650 * descriptor. Then, we call sorflush ourself, since we know
2651 * it is a Unix domain socket anyhow. After we destroy all the
2652 * rights carried in messages, we do a last closef to get rid
2653 * of our extra reference. This is the last close, and the
2654 * unp_detach etc will shut down the socket.
2655 *
2656 * 91/09/19, [email protected]
2657 */
2658 size_t extra_ref_size = nfiles;
2659 if (extra_ref_size == 0) {
2660 lck_mtx_unlock(&uipc_lock);
2661 return;
2662 }
2663 extra_ref = kalloc_type(struct fileglob *, extra_ref_size, Z_WAITOK);
2664 if (extra_ref == NULL) {
2665 lck_mtx_unlock(&uipc_lock);
2666 return;
2667 }
2668 nunref = 0;
2669 fpp = extra_ref;
2670 LIST_FOREACH(fg, &unp_msghead, f_msglist) {
2671 lck_mtx_lock(&fg->fg_lock);
2672 /*
2673 * If it's not open, skip it
2674 */
2675 if (os_ref_get_count_raw(&fg->fg_count) == 0) {
2676 lck_mtx_unlock(&fg->fg_lock);
2677 continue;
2678 }
2679 /*
2680 * If all refs are from msgs, and it's not marked accessible
2681 * then it must be referenced from some unreachable cycle
2682 * of (shut-down) FDs, so include it in our
2683 * list of FDs to remove
2684 */
2685 if (fg->fg_flag & FMARK) {
2686 lck_mtx_unlock(&fg->fg_lock);
2687 continue;
2688 }
2689 if (os_ref_get_count_raw(&fg->fg_count) == fg->fg_msgcount) {
2690 os_ref_retain_raw(&fg->fg_count, &f_refgrp);
2691 *fpp++ = fg;
2692 nunref++;
2693 }
2694 lck_mtx_unlock(&fg->fg_lock);
2695 }
2696 lck_mtx_unlock(&uipc_lock);
2697
2698 /*
2699 * for each FD on our hit list, do the following two things
2700 */
2701 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2702 struct fileglob *tfg;
2703
2704 tfg = *fpp;
2705
2706 if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET) {
2707 so = (struct socket *)fg_get_data(tfg);
2708
2709 if (so) {
2710 socket_lock(so, 0);
2711 sorflush(so);
2712 socket_unlock(so, 0);
2713 }
2714 }
2715 }
2716 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
2717 fg_drop(UNP_FG_NOPROC, *fpp);
2718 }
2719
2720 kfree_type(struct fileglob *, extra_ref_size, extra_ref);
2721 }
2722
2723 void
unp_dispose(struct mbuf * m)2724 unp_dispose(struct mbuf *m)
2725 {
2726 if (m) {
2727 unp_scan(m, unp_discard, NULL);
2728 }
2729 }
2730
2731 /*
2732 * Returns: 0 Success
2733 */
2734 static int
unp_listen(struct unpcb * unp,proc_t p)2735 unp_listen(struct unpcb *unp, proc_t p)
2736 {
2737 kauth_cred_t safecred __single = kauth_cred_proc_ref(p);
2738 cru2x(safecred, &unp->unp_peercred);
2739 kauth_cred_unref(&safecred);
2740 unp->unp_flags |= UNP_HAVEPCCACHED;
2741 return 0;
2742 }
2743
2744 static void
unp_scan(struct mbuf * m0,void (* op)(struct fileglob *,void * arg),void * arg)2745 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)
2746 {
2747 struct mbuf *m;
2748 struct fileglob **rp;
2749 struct cmsghdr *cm;
2750 int i;
2751 int qfds;
2752
2753 while (m0) {
2754 for (m = m0; m; m = m->m_next) {
2755 if (m->m_type == MT_CONTROL &&
2756 (size_t)m->m_len >= sizeof(*cm)) {
2757 cm = mtod(m, struct cmsghdr *);
2758 if (cm->cmsg_level != SOL_SOCKET ||
2759 cm->cmsg_type != SCM_RIGHTS) {
2760 continue;
2761 }
2762 qfds = (cm->cmsg_len - sizeof(*cm)) /
2763 sizeof(int);
2764 rp = (struct fileglob **)(cm + 1);
2765 for (i = 0; i < qfds; i++) {
2766 (*op)(*rp++, arg);
2767 }
2768 break; /* XXX, but saves time */
2769 }
2770 }
2771 m0 = m0->m_act;
2772 }
2773 }
2774
2775 static void
unp_mark(struct fileglob * fg,__unused void * arg)2776 unp_mark(struct fileglob *fg, __unused void *arg)
2777 {
2778 uint32_t oflags, nflags;
2779
2780 os_atomic_rmw_loop(&fg->fg_flag, oflags, nflags, relaxed, {
2781 if (oflags & FMARK) {
2782 os_atomic_rmw_loop_give_up(return );
2783 }
2784 nflags = oflags | FMARK | FDEFER;
2785 });
2786
2787 unp_defer++;
2788 }
2789
2790 static void
unp_discard(struct fileglob * fg,void * p)2791 unp_discard(struct fileglob *fg, void *p)
2792 {
2793 if (p == NULL) {
2794 p = current_proc(); /* XXX */
2795 }
2796 (void) OSAddAtomic(1, &unp_disposed);
2797 if (fg_removeuipc_mark(fg)) {
2798 VERIFY(fg->fg_lflags & FG_RMMSGQ);
2799 fg_removeuipc(fg);
2800 }
2801 (void) OSAddAtomic(-1, &unp_rights);
2802
2803 (void) fg_drop(p, fg);
2804 }
2805
2806 int
unp_lock(struct socket * so,int refcount,void * lr)2807 unp_lock(struct socket *so, int refcount, void * lr)
2808 {
2809 void * lr_saved __single;
2810 if (lr == 0) {
2811 lr_saved = __unsafe_forge_single(void*, __builtin_return_address(0));
2812 } else {
2813 lr_saved = lr;
2814 }
2815
2816 if (so->so_pcb) {
2817 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
2818 } else {
2819 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x",
2820 so, lr_saved, so->so_usecount);
2821 }
2822
2823 if (so->so_usecount < 0) {
2824 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x",
2825 so, so->so_pcb, lr_saved, so->so_usecount);
2826 }
2827
2828 if (refcount) {
2829 VERIFY(so->so_usecount > 0);
2830 so->so_usecount++;
2831 }
2832 so->lock_lr[so->next_lock_lr] = lr_saved;
2833 so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;
2834 return 0;
2835 }
2836
2837 int
unp_unlock(struct socket * so,int refcount,void * lr)2838 unp_unlock(struct socket *so, int refcount, void * lr)
2839 {
2840 void * lr_saved __single;
2841 lck_mtx_t * mutex_held = NULL;
2842 struct unpcb *unp __single = sotounpcb(so);
2843
2844 if (lr == 0) {
2845 lr_saved = __unsafe_forge_single(void*, __builtin_return_address(0));
2846 } else {
2847 lr_saved = lr;
2848 }
2849
2850 if (refcount) {
2851 so->so_usecount--;
2852 }
2853
2854 if (so->so_usecount < 0) {
2855 panic("unp_unlock: so=%p usecount=%x", so, so->so_usecount);
2856 }
2857 if (so->so_pcb == NULL) {
2858 panic("unp_unlock: so=%p NO PCB usecount=%x", so, so->so_usecount);
2859 } else {
2860 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
2861 }
2862 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
2863 so->unlock_lr[so->next_unlock_lr] = lr_saved;
2864 so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
2865
2866 if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {
2867 sofreelastref(so, 1);
2868
2869 if (unp->unp_addr != NULL) {
2870 free_sockaddr(unp->unp_addr);
2871 }
2872
2873 lck_mtx_unlock(mutex_held);
2874
2875 lck_mtx_destroy(&unp->unp_mtx, &unp_mtx_grp);
2876 zfree(unp_zone, unp);
2877 thread_call_enter(unp_gc_tcall);
2878 } else {
2879 lck_mtx_unlock(mutex_held);
2880 }
2881
2882 return 0;
2883 }
2884
2885 lck_mtx_t *
unp_getlock(struct socket * so,__unused int flags)2886 unp_getlock(struct socket *so, __unused int flags)
2887 {
2888 struct unpcb *unp = (struct unpcb *)so->so_pcb;
2889
2890
2891 if (so->so_pcb) {
2892 if (so->so_usecount < 0) {
2893 panic("unp_getlock: so=%p usecount=%x", so, so->so_usecount);
2894 }
2895 return &unp->unp_mtx;
2896 } else {
2897 panic("unp_getlock: so=%p NULL so_pcb", so);
2898 return so->so_proto->pr_domain->dom_mtx;
2899 }
2900 }
2901